From 5d0a770c129c00e3942263b429f8efa4c42efba9 Mon Sep 17 00:00:00 2001 From: "Steven R. Loomis" Date: Thu, 13 Apr 2017 16:25:08 -0700 Subject: deps: ICU 59.1 bump * No feature changes. * Bug fixes. * Details: http://site.icu-project.org/download/59 Fixes: https://github.com/nodejs/node/issues/12077 PR-URL: https://github.com/nodejs/node/pull/12486 Refs: https://github.com/nodejs/node/issues/7844 Reviewed-By: James M Snell Reviewed-By: Michael Dawson Reviewed-By: Refael Ackermann --- deps/icu-small/source/common/appendable.cpp | 4 +- deps/icu-small/source/common/bmpset.cpp | 4 +- deps/icu-small/source/common/bmpset.h | 4 +- deps/icu-small/source/common/brkeng.cpp | 2 +- deps/icu-small/source/common/brkeng.h | 2 +- deps/icu-small/source/common/brkiter.cpp | 2 +- deps/icu-small/source/common/bytestream.cpp | 2 +- deps/icu-small/source/common/bytestrie.cpp | 4 +- deps/icu-small/source/common/bytestriebuilder.cpp | 4 +- deps/icu-small/source/common/bytestrieiterator.cpp | 4 +- deps/icu-small/source/common/caniter.cpp | 6 +- deps/icu-small/source/common/chariter.cpp | 2 +- deps/icu-small/source/common/charstr.cpp | 5 +- deps/icu-small/source/common/charstr.h | 2 +- deps/icu-small/source/common/cmemory.c | 162 - deps/icu-small/source/common/cmemory.cpp | 162 + deps/icu-small/source/common/cmemory.h | 2 +- deps/icu-small/source/common/cpputils.h | 4 +- deps/icu-small/source/common/cstr.cpp | 2 +- deps/icu-small/source/common/cstr.h | 2 +- deps/icu-small/source/common/cstring.c | 341 - deps/icu-small/source/common/cstring.cpp | 341 + deps/icu-small/source/common/cstring.h | 2 +- deps/icu-small/source/common/cwchar.c | 54 - deps/icu-small/source/common/cwchar.cpp | 54 + deps/icu-small/source/common/cwchar.h | 4 +- deps/icu-small/source/common/dictbe.cpp | 21 +- deps/icu-small/source/common/dictbe.h | 2 +- deps/icu-small/source/common/dictionarydata.cpp | 2 +- deps/icu-small/source/common/dictionarydata.h | 2 +- deps/icu-small/source/common/dtintrv.cpp | 2 +- deps/icu-small/source/common/edits.cpp | 346 + deps/icu-small/source/common/errorcode.cpp | 4 +- deps/icu-small/source/common/filteredbrk.cpp | 4 +- .../source/common/filterednormalizer2.cpp | 4 +- deps/icu-small/source/common/hash.h | 2 +- deps/icu-small/source/common/icudataver.c | 31 - deps/icu-small/source/common/icudataver.cpp | 31 + deps/icu-small/source/common/icuplug.cpp | 2 +- deps/icu-small/source/common/icuplugimp.h | 2 +- deps/icu-small/source/common/listformatter.cpp | 128 +- .../source/common/loadednormalizer2impl.cpp | 3 +- deps/icu-small/source/common/localsvc.h | 2 +- deps/icu-small/source/common/locavailable.cpp | 4 +- deps/icu-small/source/common/locbased.cpp | 2 +- deps/icu-small/source/common/locbased.h | 2 +- deps/icu-small/source/common/locdispnames.cpp | 4 +- deps/icu-small/source/common/locdspnm.cpp | 3 +- deps/icu-small/source/common/locid.cpp | 3 +- deps/icu-small/source/common/loclikely.cpp | 5 +- deps/icu-small/source/common/locmap.c | 1174 --- deps/icu-small/source/common/locmap.cpp | 1288 +++ deps/icu-small/source/common/locmap.h | 5 +- deps/icu-small/source/common/locresdata.cpp | 4 +- deps/icu-small/source/common/locutil.cpp | 2 +- deps/icu-small/source/common/locutil.h | 2 +- deps/icu-small/source/common/messageimpl.h | 4 +- deps/icu-small/source/common/messagepattern.cpp | 4 +- deps/icu-small/source/common/msvcres.h | 2 +- deps/icu-small/source/common/mutex.h | 2 +- deps/icu-small/source/common/norm2_nfc_data.h | 2 +- deps/icu-small/source/common/norm2allmodes.h | 2 +- deps/icu-small/source/common/normalizer2.cpp | 4 +- deps/icu-small/source/common/normalizer2impl.cpp | 4 +- deps/icu-small/source/common/normalizer2impl.h | 6 +- deps/icu-small/source/common/normlzr.cpp | 12 +- deps/icu-small/source/common/parsepos.cpp | 2 +- deps/icu-small/source/common/patternprops.cpp | 4 +- deps/icu-small/source/common/patternprops.h | 4 +- deps/icu-small/source/common/pluralmap.cpp | 2 +- deps/icu-small/source/common/pluralmap.h | 2 +- deps/icu-small/source/common/propname.cpp | 2 +- deps/icu-small/source/common/propname.h | 2 +- deps/icu-small/source/common/propname_data.h | 2 +- deps/icu-small/source/common/propsvec.c | 527 -- deps/icu-small/source/common/propsvec.cpp | 529 ++ deps/icu-small/source/common/propsvec.h | 4 +- deps/icu-small/source/common/punycode.cpp | 4 +- deps/icu-small/source/common/punycode.h | 4 +- deps/icu-small/source/common/putil.cpp | 286 +- deps/icu-small/source/common/putilimp.h | 10 +- deps/icu-small/source/common/rbbi.cpp | 2 +- deps/icu-small/source/common/rbbidata.cpp | 2 +- deps/icu-small/source/common/rbbidata.h | 4 +- deps/icu-small/source/common/rbbinode.cpp | 2 +- deps/icu-small/source/common/rbbinode.h | 2 +- deps/icu-small/source/common/rbbirb.cpp | 2 +- deps/icu-small/source/common/rbbirb.h | 2 +- deps/icu-small/source/common/rbbirpt.h | 2 +- deps/icu-small/source/common/rbbiscan.cpp | 9 +- deps/icu-small/source/common/rbbiscan.h | 2 +- deps/icu-small/source/common/rbbisetb.cpp | 2 +- deps/icu-small/source/common/rbbisetb.h | 2 +- deps/icu-small/source/common/rbbistbl.cpp | 2 +- deps/icu-small/source/common/rbbitblb.cpp | 2 +- deps/icu-small/source/common/rbbitblb.h | 2 +- deps/icu-small/source/common/resbund.cpp | 2 +- deps/icu-small/source/common/resbund_cnv.cpp | 4 +- deps/icu-small/source/common/resource.cpp | 2 +- deps/icu-small/source/common/resource.h | 2 +- deps/icu-small/source/common/ruleiter.cpp | 2 +- deps/icu-small/source/common/ruleiter.h | 2 +- deps/icu-small/source/common/schriter.cpp | 2 +- deps/icu-small/source/common/serv.cpp | 2 +- deps/icu-small/source/common/serv.h | 2 +- deps/icu-small/source/common/servlk.cpp | 2 +- deps/icu-small/source/common/servlkf.cpp | 2 +- deps/icu-small/source/common/servloc.h | 2 +- deps/icu-small/source/common/servls.cpp | 2 +- deps/icu-small/source/common/servnotf.cpp | 2 +- deps/icu-small/source/common/servnotf.h | 2 +- deps/icu-small/source/common/servrbf.cpp | 2 +- deps/icu-small/source/common/servslkf.cpp | 2 +- deps/icu-small/source/common/sharedobject.cpp | 2 +- deps/icu-small/source/common/sharedobject.h | 2 +- deps/icu-small/source/common/simpleformatter.cpp | 2 +- deps/icu-small/source/common/sprpimpl.h | 4 +- deps/icu-small/source/common/stringpiece.cpp | 2 +- deps/icu-small/source/common/stringtriebuilder.cpp | 4 +- deps/icu-small/source/common/uarrsort.c | 285 - deps/icu-small/source/common/uarrsort.cpp | 288 + deps/icu-small/source/common/uarrsort.h | 4 +- deps/icu-small/source/common/uassert.h | 2 +- deps/icu-small/source/common/ubidi.c | 3015 ------- deps/icu-small/source/common/ubidi.cpp | 3042 ++++++++ deps/icu-small/source/common/ubidi_props.c | 265 - deps/icu-small/source/common/ubidi_props.cpp | 267 + deps/icu-small/source/common/ubidi_props.h | 4 +- deps/icu-small/source/common/ubidi_props_data.h | 863 ++- deps/icu-small/source/common/ubidiimp.h | 4 +- deps/icu-small/source/common/ubidiln.c | 1349 ---- deps/icu-small/source/common/ubidiln.cpp | 1349 ++++ deps/icu-small/source/common/ubiditransform.c | 528 -- deps/icu-small/source/common/ubiditransform.cpp | 530 ++ deps/icu-small/source/common/ubidiwrt.c | 640 -- deps/icu-small/source/common/ubidiwrt.cpp | 640 ++ deps/icu-small/source/common/ubrk.cpp | 62 +- deps/icu-small/source/common/ubrkimpl.h | 2 +- deps/icu-small/source/common/ucase.cpp | 370 +- deps/icu-small/source/common/ucase.h | 61 +- deps/icu-small/source/common/ucase_props_data.h | 2 +- deps/icu-small/source/common/ucasemap.cpp | 705 +- deps/icu-small/source/common/ucasemap_imp.h | 239 + .../source/common/ucasemap_titlecase_brkiter.cpp | 76 +- deps/icu-small/source/common/ucat.c | 78 - deps/icu-small/source/common/ucat.cpp | 78 + deps/icu-small/source/common/uchar.c | 733 -- deps/icu-small/source/common/uchar.cpp | 736 ++ deps/icu-small/source/common/uchar_props_data.h | 4352 +++++------ deps/icu-small/source/common/ucharstrie.cpp | 7 +- deps/icu-small/source/common/ucharstriebuilder.cpp | 4 +- .../icu-small/source/common/ucharstrieiterator.cpp | 6 +- deps/icu-small/source/common/uchriter.cpp | 10 +- deps/icu-small/source/common/ucln.h | 4 +- deps/icu-small/source/common/ucln_cmn.cpp | 4 +- deps/icu-small/source/common/ucln_cmn.h | 4 +- deps/icu-small/source/common/ucln_imp.h | 6 +- deps/icu-small/source/common/ucmndata.c | 384 - deps/icu-small/source/common/ucmndata.cpp | 389 + deps/icu-small/source/common/ucmndata.h | 2 +- deps/icu-small/source/common/ucnv.c | 2918 ------- deps/icu-small/source/common/ucnv.cpp | 2918 +++++++ deps/icu-small/source/common/ucnv2022.cpp | 4 +- deps/icu-small/source/common/ucnv_bld.cpp | 2 +- deps/icu-small/source/common/ucnv_bld.h | 2 +- deps/icu-small/source/common/ucnv_cb.c | 261 - deps/icu-small/source/common/ucnv_cb.cpp | 261 + deps/icu-small/source/common/ucnv_cnv.c | 176 - deps/icu-small/source/common/ucnv_cnv.cpp | 182 + deps/icu-small/source/common/ucnv_cnv.h | 2 +- deps/icu-small/source/common/ucnv_ct.c | 637 -- deps/icu-small/source/common/ucnv_ct.cpp | 645 ++ deps/icu-small/source/common/ucnv_err.c | 481 -- deps/icu-small/source/common/ucnv_err.cpp | 496 ++ deps/icu-small/source/common/ucnv_ext.cpp | 5 +- deps/icu-small/source/common/ucnv_ext.h | 4 +- deps/icu-small/source/common/ucnv_imp.h | 2 +- deps/icu-small/source/common/ucnv_io.cpp | 2 +- deps/icu-small/source/common/ucnv_io.h | 2 +- deps/icu-small/source/common/ucnv_lmb.c | 1378 ---- deps/icu-small/source/common/ucnv_lmb.cpp | 1386 ++++ deps/icu-small/source/common/ucnv_set.c | 70 - deps/icu-small/source/common/ucnv_set.cpp | 70 + deps/icu-small/source/common/ucnv_u16.c | 1563 ---- deps/icu-small/source/common/ucnv_u16.cpp | 1571 ++++ deps/icu-small/source/common/ucnv_u32.c | 1251 --- deps/icu-small/source/common/ucnv_u32.cpp | 1253 +++ deps/icu-small/source/common/ucnv_u7.c | 1484 ---- deps/icu-small/source/common/ucnv_u7.cpp | 1491 ++++ deps/icu-small/source/common/ucnv_u8.c | 1098 --- deps/icu-small/source/common/ucnv_u8.cpp | 1104 +++ deps/icu-small/source/common/ucnvbocu.cpp | 4 +- deps/icu-small/source/common/ucnvdisp.c | 88 - deps/icu-small/source/common/ucnvdisp.cpp | 88 + deps/icu-small/source/common/ucnvhz.c | 631 -- deps/icu-small/source/common/ucnvhz.cpp | 633 ++ deps/icu-small/source/common/ucnvisci.c | 1625 ---- deps/icu-small/source/common/ucnvisci.cpp | 1635 ++++ deps/icu-small/source/common/ucnvlat1.c | 740 -- deps/icu-small/source/common/ucnvlat1.cpp | 751 ++ deps/icu-small/source/common/ucnvmbcs.cpp | 4 +- deps/icu-small/source/common/ucnvmbcs.h | 4 +- deps/icu-small/source/common/ucnvscsu.c | 2043 ----- deps/icu-small/source/common/ucnvscsu.cpp | 2045 +++++ deps/icu-small/source/common/ucnvsel.cpp | 2 +- deps/icu-small/source/common/ucol_data.h | 4 +- deps/icu-small/source/common/ucol_swp.cpp | 4 +- deps/icu-small/source/common/ucol_swp.h | 4 +- deps/icu-small/source/common/ucurr.cpp | 5 +- deps/icu-small/source/common/ucurrimp.h | 2 +- deps/icu-small/source/common/udata.cpp | 21 +- deps/icu-small/source/common/udatamem.c | 161 - deps/icu-small/source/common/udatamem.cpp | 161 + deps/icu-small/source/common/udatamem.h | 2 +- deps/icu-small/source/common/udataswp.c | 473 -- deps/icu-small/source/common/udataswp.cpp | 473 ++ deps/icu-small/source/common/udataswp.h | 4 +- deps/icu-small/source/common/uelement.h | 4 +- deps/icu-small/source/common/uenum.c | 189 - deps/icu-small/source/common/uenum.cpp | 189 + deps/icu-small/source/common/uenumimp.h | 4 +- deps/icu-small/source/common/uhash.c | 975 --- deps/icu-small/source/common/uhash.cpp | 975 +++ deps/icu-small/source/common/uhash.h | 2 +- deps/icu-small/source/common/uhash_us.cpp | 2 +- deps/icu-small/source/common/uidna.cpp | 4 +- deps/icu-small/source/common/uinit.cpp | 4 +- deps/icu-small/source/common/uinvchar.c | 612 -- deps/icu-small/source/common/uinvchar.cpp | 614 ++ deps/icu-small/source/common/uinvchar.h | 6 +- deps/icu-small/source/common/uiter.cpp | 4 +- deps/icu-small/source/common/ulist.c | 274 - deps/icu-small/source/common/ulist.cpp | 270 + deps/icu-small/source/common/ulist.h | 2 +- deps/icu-small/source/common/ulistformatter.cpp | 2 +- deps/icu-small/source/common/uloc.cpp | 440 +- deps/icu-small/source/common/uloc_keytype.cpp | 2 +- deps/icu-small/source/common/uloc_tag.c | 2529 ------ deps/icu-small/source/common/uloc_tag.cpp | 2530 ++++++ deps/icu-small/source/common/ulocimp.h | 2 +- deps/icu-small/source/common/umapfile.c | 466 -- deps/icu-small/source/common/umapfile.cpp | 496 ++ deps/icu-small/source/common/umapfile.h | 2 +- deps/icu-small/source/common/umath.c | 25 - deps/icu-small/source/common/umath.cpp | 25 + deps/icu-small/source/common/umutex.cpp | 2 +- deps/icu-small/source/common/umutex.h | 6 +- deps/icu-small/source/common/unames.cpp | 4 +- deps/icu-small/source/common/unicode/appendable.h | 50 +- deps/icu-small/source/common/unicode/brkiter.h | 2 +- deps/icu-small/source/common/unicode/bytestream.h | 6 +- deps/icu-small/source/common/unicode/bytestrie.h | 4 +- .../source/common/unicode/bytestriebuilder.h | 8 +- deps/icu-small/source/common/unicode/caniter.h | 6 +- deps/icu-small/source/common/unicode/casemap.h | 359 + deps/icu-small/source/common/unicode/char16ptr.h | 306 + deps/icu-small/source/common/unicode/chariter.h | 38 +- deps/icu-small/source/common/unicode/dbbi.h | 2 +- deps/icu-small/source/common/unicode/docmain.h | 7 +- deps/icu-small/source/common/unicode/dtintrv.h | 2 +- deps/icu-small/source/common/unicode/edits.h | 245 + deps/icu-small/source/common/unicode/enumset.h | 2 +- deps/icu-small/source/common/unicode/errorcode.h | 4 +- deps/icu-small/source/common/unicode/filteredbrk.h | 2 +- deps/icu-small/source/common/unicode/icudataver.h | 2 +- deps/icu-small/source/common/unicode/icuplug.h | 2 +- deps/icu-small/source/common/unicode/idna.h | 4 +- .../source/common/unicode/listformatter.h | 6 +- .../icu-small/source/common/unicode/localpointer.h | 12 +- deps/icu-small/source/common/unicode/locdspnm.h | 3 +- deps/icu-small/source/common/unicode/locid.h | 7 +- .../source/common/unicode/messagepattern.h | 4 +- deps/icu-small/source/common/unicode/normalizer2.h | 6 +- deps/icu-small/source/common/unicode/normlzr.h | 10 +- deps/icu-small/source/common/unicode/parseerr.h | 2 +- deps/icu-small/source/common/unicode/parsepos.h | 2 +- deps/icu-small/source/common/unicode/platform.h | 52 +- deps/icu-small/source/common/unicode/ptypes.h | 2 +- deps/icu-small/source/common/unicode/putil.h | 2 +- deps/icu-small/source/common/unicode/rbbi.h | 2 +- deps/icu-small/source/common/unicode/rep.h | 8 +- deps/icu-small/source/common/unicode/resbund.h | 4 +- deps/icu-small/source/common/unicode/schriter.h | 4 +- .../source/common/unicode/simpleformatter.h | 46 +- deps/icu-small/source/common/unicode/std_string.h | 14 +- deps/icu-small/source/common/unicode/strenum.h | 10 +- deps/icu-small/source/common/unicode/stringpiece.h | 4 +- .../source/common/unicode/stringtriebuilder.h | 20 +- deps/icu-small/source/common/unicode/symtable.h | 2 +- deps/icu-small/source/common/unicode/ubidi.h | 15 +- .../source/common/unicode/ubiditransform.h | 4 +- deps/icu-small/source/common/unicode/ubrk.h | 64 +- deps/icu-small/source/common/unicode/ucasemap.h | 19 +- deps/icu-small/source/common/unicode/ucat.h | 2 +- deps/icu-small/source/common/unicode/uchar.h | 12 +- deps/icu-small/source/common/unicode/ucharstrie.h | 96 +- .../source/common/unicode/ucharstriebuilder.h | 24 +- deps/icu-small/source/common/unicode/uchriter.h | 62 +- deps/icu-small/source/common/unicode/uclean.h | 12 +- deps/icu-small/source/common/unicode/ucnv.h | 2 +- deps/icu-small/source/common/unicode/ucnv_cb.h | 2 +- deps/icu-small/source/common/unicode/ucnv_err.h | 2 +- deps/icu-small/source/common/unicode/ucnvsel.h | 2 +- deps/icu-small/source/common/unicode/uconfig.h | 4 +- deps/icu-small/source/common/unicode/ucurr.h | 2 +- deps/icu-small/source/common/unicode/udata.h | 4 +- .../source/common/unicode/udisplaycontext.h | 2 +- deps/icu-small/source/common/unicode/uenum.h | 8 +- deps/icu-small/source/common/unicode/uidna.h | 4 +- deps/icu-small/source/common/unicode/uiter.h | 4 +- deps/icu-small/source/common/unicode/uldnames.h | 2 +- .../source/common/unicode/ulistformatter.h | 8 +- deps/icu-small/source/common/unicode/uloc.h | 47 +- deps/icu-small/source/common/unicode/umachine.h | 115 +- deps/icu-small/source/common/unicode/umisc.h | 4 +- deps/icu-small/source/common/unicode/unifilt.h | 4 +- deps/icu-small/source/common/unicode/unifunct.h | 2 +- deps/icu-small/source/common/unicode/unimatch.h | 2 +- deps/icu-small/source/common/unicode/uniset.h | 8 +- deps/icu-small/source/common/unicode/unistr.h | 543 +- deps/icu-small/source/common/unicode/unorm.h | 2 +- deps/icu-small/source/common/unicode/unorm2.h | 4 +- deps/icu-small/source/common/unicode/uobject.h | 4 +- deps/icu-small/source/common/unicode/urename.h | 29 +- deps/icu-small/source/common/unicode/urep.h | 2 +- deps/icu-small/source/common/unicode/ures.h | 2 +- deps/icu-small/source/common/unicode/uscript.h | 2 +- deps/icu-small/source/common/unicode/uset.h | 4 +- deps/icu-small/source/common/unicode/usetiter.h | 2 +- deps/icu-small/source/common/unicode/ushape.h | 4 +- deps/icu-small/source/common/unicode/usprep.h | 4 +- deps/icu-small/source/common/unicode/ustring.h | 2 +- deps/icu-small/source/common/unicode/ustringtrie.h | 4 +- deps/icu-small/source/common/unicode/utext.h | 4 +- deps/icu-small/source/common/unicode/utf.h | 4 +- deps/icu-small/source/common/unicode/utf16.h | 4 +- deps/icu-small/source/common/unicode/utf32.h | 4 +- deps/icu-small/source/common/unicode/utf8.h | 24 +- deps/icu-small/source/common/unicode/utf_old.h | 23 +- deps/icu-small/source/common/unicode/utrace.h | 4 +- deps/icu-small/source/common/unicode/utypes.h | 6 +- deps/icu-small/source/common/unicode/uvernum.h | 21 +- deps/icu-small/source/common/unicode/uversion.h | 4 +- deps/icu-small/source/common/unifiedcache.cpp | 2 +- deps/icu-small/source/common/unifiedcache.h | 2 +- deps/icu-small/source/common/unifilt.cpp | 2 +- deps/icu-small/source/common/unifunct.cpp | 2 +- deps/icu-small/source/common/uniset.cpp | 2 +- deps/icu-small/source/common/uniset_closure.cpp | 18 +- deps/icu-small/source/common/uniset_props.cpp | 8 +- deps/icu-small/source/common/unisetspan.cpp | 4 +- deps/icu-small/source/common/unisetspan.h | 4 +- deps/icu-small/source/common/unistr.cpp | 23 +- deps/icu-small/source/common/unistr_case.cpp | 128 +- .../icu-small/source/common/unistr_case_locale.cpp | 40 +- deps/icu-small/source/common/unistr_cnv.cpp | 4 +- deps/icu-small/source/common/unistr_props.cpp | 4 +- .../source/common/unistr_titlecase_brkiter.cpp | 47 +- deps/icu-small/source/common/unistrappender.h | 2 +- deps/icu-small/source/common/unorm.cpp | 2 +- deps/icu-small/source/common/unormcmp.cpp | 14 +- deps/icu-small/source/common/unormimp.h | 4 +- deps/icu-small/source/common/uobject.cpp | 4 +- deps/icu-small/source/common/uposixdefs.h | 24 +- deps/icu-small/source/common/uprops.cpp | 10 +- deps/icu-small/source/common/uprops.h | 4 +- deps/icu-small/source/common/ures_cnv.c | 78 - deps/icu-small/source/common/ures_cnv.cpp | 78 + deps/icu-small/source/common/uresbund.cpp | 2 +- deps/icu-small/source/common/uresdata.cpp | 18 +- deps/icu-small/source/common/uresdata.h | 4 +- deps/icu-small/source/common/uresimp.h | 2 +- deps/icu-small/source/common/ureslocs.h | 2 +- deps/icu-small/source/common/usc_impl.c | 361 - deps/icu-small/source/common/usc_impl.cpp | 361 + deps/icu-small/source/common/usc_impl.h | 2 +- deps/icu-small/source/common/uscript.c | 144 - deps/icu-small/source/common/uscript.cpp | 144 + deps/icu-small/source/common/uscript_props.cpp | 4 +- deps/icu-small/source/common/uset.cpp | 4 +- deps/icu-small/source/common/uset_imp.h | 4 +- deps/icu-small/source/common/uset_props.cpp | 4 +- deps/icu-small/source/common/usetiter.cpp | 2 +- deps/icu-small/source/common/ushape.cpp | 4 +- deps/icu-small/source/common/usprep.cpp | 4 +- deps/icu-small/source/common/ustack.cpp | 2 +- deps/icu-small/source/common/ustr_cnv.cpp | 4 +- deps/icu-small/source/common/ustr_cnv.h | 4 +- deps/icu-small/source/common/ustr_imp.h | 226 +- .../source/common/ustr_titlecase_brkiter.cpp | 93 +- deps/icu-small/source/common/ustr_wcs.cpp | 8 +- deps/icu-small/source/common/ustrcase.cpp | 517 +- deps/icu-small/source/common/ustrcase_locale.cpp | 110 +- deps/icu-small/source/common/ustrenum.cpp | 2 +- deps/icu-small/source/common/ustrenum.h | 2 +- deps/icu-small/source/common/ustrfmt.c | 59 - deps/icu-small/source/common/ustrfmt.cpp | 59 + deps/icu-small/source/common/ustrfmt.h | 2 +- deps/icu-small/source/common/ustring.cpp | 3 +- deps/icu-small/source/common/ustrtrns.cpp | 2 +- deps/icu-small/source/common/utext.cpp | 35 +- deps/icu-small/source/common/utf_impl.c | 328 - deps/icu-small/source/common/utf_impl.cpp | 339 + deps/icu-small/source/common/util.cpp | 2 +- deps/icu-small/source/common/util.h | 2 +- deps/icu-small/source/common/util_props.cpp | 2 +- deps/icu-small/source/common/utrace.c | 489 -- deps/icu-small/source/common/utrace.cpp | 492 ++ deps/icu-small/source/common/utracimp.h | 18 +- deps/icu-small/source/common/utrie.cpp | 4 +- deps/icu-small/source/common/utrie.h | 4 +- deps/icu-small/source/common/utrie2.cpp | 4 +- deps/icu-small/source/common/utrie2.h | 4 +- deps/icu-small/source/common/utrie2_builder.cpp | 4 +- deps/icu-small/source/common/utrie2_impl.h | 4 +- deps/icu-small/source/common/uts46.cpp | 4 +- deps/icu-small/source/common/utypeinfo.h | 4 +- deps/icu-small/source/common/utypes.c | 224 - deps/icu-small/source/common/utypes.cpp | 224 + deps/icu-small/source/common/uvector.cpp | 2 +- deps/icu-small/source/common/uvector.h | 2 +- deps/icu-small/source/common/uvectr32.cpp | 2 +- deps/icu-small/source/common/uvectr32.h | 2 +- deps/icu-small/source/common/uvectr64.cpp | 2 +- deps/icu-small/source/common/uvectr64.h | 2 +- deps/icu-small/source/common/wintz.c | 441 -- deps/icu-small/source/common/wintz.cpp | 384 + deps/icu-small/source/common/wintz.h | 8 +- deps/icu-small/source/data/in/icudt58l.dat | Bin 2682224 -> 0 bytes deps/icu-small/source/data/in/icudt59l.dat | Bin 0 -> 2717856 bytes deps/icu-small/source/i18n/affixpatternparser.cpp | 4 +- deps/icu-small/source/i18n/affixpatternparser.h | 2 +- deps/icu-small/source/i18n/alphaindex.cpp | 2 +- deps/icu-small/source/i18n/anytrans.cpp | 2 +- deps/icu-small/source/i18n/anytrans.h | 2 +- deps/icu-small/source/i18n/astro.cpp | 2 +- deps/icu-small/source/i18n/astro.h | 2 +- deps/icu-small/source/i18n/basictz.cpp | 2 +- deps/icu-small/source/i18n/bocsu.cpp | 4 +- deps/icu-small/source/i18n/bocsu.h | 4 +- deps/icu-small/source/i18n/brktrans.cpp | 2 +- deps/icu-small/source/i18n/brktrans.h | 2 +- deps/icu-small/source/i18n/buddhcal.cpp | 2 +- deps/icu-small/source/i18n/buddhcal.h | 2 +- deps/icu-small/source/i18n/calendar.cpp | 2 +- deps/icu-small/source/i18n/casetrn.cpp | 12 +- deps/icu-small/source/i18n/casetrn.h | 5 +- deps/icu-small/source/i18n/cecal.cpp | 2 +- deps/icu-small/source/i18n/cecal.h | 2 +- deps/icu-small/source/i18n/chnsecal.cpp | 2 +- deps/icu-small/source/i18n/chnsecal.h | 2 +- deps/icu-small/source/i18n/choicfmt.cpp | 2 +- deps/icu-small/source/i18n/coleitr.cpp | 3 +- deps/icu-small/source/i18n/coll.cpp | 2 +- deps/icu-small/source/i18n/collation.cpp | 2 +- deps/icu-small/source/i18n/collation.h | 2 +- deps/icu-small/source/i18n/collationbuilder.cpp | 2 +- deps/icu-small/source/i18n/collationbuilder.h | 2 +- deps/icu-small/source/i18n/collationcompare.cpp | 2 +- deps/icu-small/source/i18n/collationcompare.h | 2 +- deps/icu-small/source/i18n/collationdata.cpp | 2 +- deps/icu-small/source/i18n/collationdata.h | 2 +- .../icu-small/source/i18n/collationdatabuilder.cpp | 2 +- deps/icu-small/source/i18n/collationdatabuilder.h | 2 +- deps/icu-small/source/i18n/collationdatareader.cpp | 2 +- deps/icu-small/source/i18n/collationdatareader.h | 2 +- deps/icu-small/source/i18n/collationdatawriter.cpp | 2 +- deps/icu-small/source/i18n/collationdatawriter.h | 2 +- deps/icu-small/source/i18n/collationfastlatin.cpp | 2 +- deps/icu-small/source/i18n/collationfastlatin.h | 2 +- .../source/i18n/collationfastlatinbuilder.cpp | 2 +- .../source/i18n/collationfastlatinbuilder.h | 2 +- deps/icu-small/source/i18n/collationfcd.cpp | 2 +- deps/icu-small/source/i18n/collationfcd.h | 2 +- deps/icu-small/source/i18n/collationiterator.cpp | 2 +- deps/icu-small/source/i18n/collationiterator.h | 2 +- deps/icu-small/source/i18n/collationkeys.cpp | 2 +- deps/icu-small/source/i18n/collationkeys.h | 2 +- deps/icu-small/source/i18n/collationroot.cpp | 2 +- deps/icu-small/source/i18n/collationroot.h | 2 +- .../source/i18n/collationrootelements.cpp | 2 +- deps/icu-small/source/i18n/collationrootelements.h | 2 +- deps/icu-small/source/i18n/collationruleparser.cpp | 2 +- deps/icu-small/source/i18n/collationruleparser.h | 2 +- deps/icu-small/source/i18n/collationsets.cpp | 2 +- deps/icu-small/source/i18n/collationsets.h | 2 +- deps/icu-small/source/i18n/collationsettings.cpp | 2 +- deps/icu-small/source/i18n/collationsettings.h | 2 +- deps/icu-small/source/i18n/collationtailoring.cpp | 2 +- deps/icu-small/source/i18n/collationtailoring.h | 2 +- deps/icu-small/source/i18n/collationweights.cpp | 4 +- deps/icu-small/source/i18n/collationweights.h | 4 +- deps/icu-small/source/i18n/collunsafe.h | 2 +- .../icu-small/source/i18n/compactdecimalformat.cpp | 21 +- deps/icu-small/source/i18n/coptccal.cpp | 2 +- deps/icu-small/source/i18n/coptccal.h | 2 +- deps/icu-small/source/i18n/cpdtrans.cpp | 2 +- deps/icu-small/source/i18n/cpdtrans.h | 2 +- deps/icu-small/source/i18n/csdetect.cpp | 2 +- deps/icu-small/source/i18n/csdetect.h | 2 +- deps/icu-small/source/i18n/csmatch.cpp | 2 +- deps/icu-small/source/i18n/csmatch.h | 2 +- deps/icu-small/source/i18n/csr2022.cpp | 2 +- deps/icu-small/source/i18n/csr2022.h | 2 +- deps/icu-small/source/i18n/csrecog.cpp | 2 +- deps/icu-small/source/i18n/csrecog.h | 2 +- deps/icu-small/source/i18n/csrmbcs.cpp | 2 +- deps/icu-small/source/i18n/csrmbcs.h | 2 +- deps/icu-small/source/i18n/csrsbcs.cpp | 2 +- deps/icu-small/source/i18n/csrsbcs.h | 2 +- deps/icu-small/source/i18n/csrucode.cpp | 2 +- deps/icu-small/source/i18n/csrucode.h | 2 +- deps/icu-small/source/i18n/csrutf8.cpp | 2 +- deps/icu-small/source/i18n/csrutf8.h | 2 +- deps/icu-small/source/i18n/curramt.cpp | 6 +- deps/icu-small/source/i18n/currfmt.cpp | 2 +- deps/icu-small/source/i18n/currfmt.h | 2 +- deps/icu-small/source/i18n/currpinf.cpp | 3 +- deps/icu-small/source/i18n/currunit.cpp | 6 +- deps/icu-small/source/i18n/dangical.cpp | 2 +- deps/icu-small/source/i18n/dangical.h | 2 +- deps/icu-small/source/i18n/datefmt.cpp | 6 +- deps/icu-small/source/i18n/dayperiodrules.cpp | 4 +- deps/icu-small/source/i18n/dayperiodrules.h | 2 +- deps/icu-small/source/i18n/dcfmtimp.h | 2 +- deps/icu-small/source/i18n/dcfmtsym.cpp | 2 +- deps/icu-small/source/i18n/decContext.c | 431 -- deps/icu-small/source/i18n/decContext.cpp | 431 ++ deps/icu-small/source/i18n/decContext.h | 2 +- deps/icu-small/source/i18n/decNumber.c | 8188 -------------------- deps/icu-small/source/i18n/decNumber.cpp | 8188 ++++++++++++++++++++ deps/icu-small/source/i18n/decNumber.h | 2 +- deps/icu-small/source/i18n/decNumberLocal.h | 2 +- deps/icu-small/source/i18n/decfmtst.cpp | 2 +- deps/icu-small/source/i18n/decfmtst.h | 2 +- .../icu-small/source/i18n/decimalformatpattern.cpp | 2 +- deps/icu-small/source/i18n/decimalformatpattern.h | 2 +- .../source/i18n/decimalformatpatternimpl.h | 2 +- deps/icu-small/source/i18n/decimfmt.cpp | 3 +- deps/icu-small/source/i18n/decimfmtimpl.cpp | 2 +- deps/icu-small/source/i18n/decimfmtimpl.h | 2 +- deps/icu-small/source/i18n/digitaffix.cpp | 2 +- deps/icu-small/source/i18n/digitaffix.h | 2 +- .../source/i18n/digitaffixesandpadding.cpp | 2 +- .../icu-small/source/i18n/digitaffixesandpadding.h | 2 +- deps/icu-small/source/i18n/digitformatter.cpp | 2 +- deps/icu-small/source/i18n/digitformatter.h | 2 +- deps/icu-small/source/i18n/digitgrouping.cpp | 2 +- deps/icu-small/source/i18n/digitgrouping.h | 2 +- deps/icu-small/source/i18n/digitinterval.cpp | 2 +- deps/icu-small/source/i18n/digitinterval.h | 2 +- deps/icu-small/source/i18n/digitlst.cpp | 2 +- deps/icu-small/source/i18n/digitlst.h | 2 +- deps/icu-small/source/i18n/dt_impl.h | 2 +- deps/icu-small/source/i18n/dtfmtsym.cpp | 4 +- deps/icu-small/source/i18n/dtitv_impl.h | 2 +- deps/icu-small/source/i18n/dtitvfmt.cpp | 2 +- deps/icu-small/source/i18n/dtitvinf.cpp | 2 +- deps/icu-small/source/i18n/dtptngen.cpp | 4 +- deps/icu-small/source/i18n/dtptngen_impl.h | 5 +- deps/icu-small/source/i18n/dtrule.cpp | 2 +- deps/icu-small/source/i18n/esctrn.cpp | 2 +- deps/icu-small/source/i18n/esctrn.h | 2 +- deps/icu-small/source/i18n/ethpccal.cpp | 2 +- deps/icu-small/source/i18n/ethpccal.h | 2 +- deps/icu-small/source/i18n/fmtable.cpp | 2 +- deps/icu-small/source/i18n/fmtable_cnv.cpp | 2 +- deps/icu-small/source/i18n/fmtableimp.h | 5 +- deps/icu-small/source/i18n/format.cpp | 2 +- deps/icu-small/source/i18n/fphdlimp.cpp | 2 +- deps/icu-small/source/i18n/fphdlimp.h | 2 +- deps/icu-small/source/i18n/fpositer.cpp | 2 +- deps/icu-small/source/i18n/funcrepl.cpp | 2 +- deps/icu-small/source/i18n/funcrepl.h | 2 +- deps/icu-small/source/i18n/gender.cpp | 2 +- deps/icu-small/source/i18n/gregocal.cpp | 2 +- deps/icu-small/source/i18n/gregoimp.cpp | 2 +- deps/icu-small/source/i18n/gregoimp.h | 2 +- deps/icu-small/source/i18n/hebrwcal.cpp | 2 +- deps/icu-small/source/i18n/hebrwcal.h | 2 +- deps/icu-small/source/i18n/indiancal.cpp | 2 +- deps/icu-small/source/i18n/indiancal.h | 2 +- deps/icu-small/source/i18n/inputext.cpp | 2 +- deps/icu-small/source/i18n/inputext.h | 2 +- deps/icu-small/source/i18n/islamcal.cpp | 2 +- deps/icu-small/source/i18n/islamcal.h | 2 +- deps/icu-small/source/i18n/japancal.cpp | 2 +- deps/icu-small/source/i18n/japancal.h | 2 +- deps/icu-small/source/i18n/measfmt.cpp | 13 +- deps/icu-small/source/i18n/measunit.cpp | 319 +- deps/icu-small/source/i18n/measure.cpp | 2 +- deps/icu-small/source/i18n/msgfmt.cpp | 2 +- deps/icu-small/source/i18n/msgfmt_impl.h | 2 +- deps/icu-small/source/i18n/name2uni.cpp | 2 +- deps/icu-small/source/i18n/name2uni.h | 2 +- deps/icu-small/source/i18n/nfrlist.h | 4 +- deps/icu-small/source/i18n/nfrs.cpp | 27 +- deps/icu-small/source/i18n/nfrs.h | 6 +- deps/icu-small/source/i18n/nfrule.cpp | 21 +- deps/icu-small/source/i18n/nfrule.h | 7 +- deps/icu-small/source/i18n/nfsubs.cpp | 87 +- deps/icu-small/source/i18n/nfsubs.h | 6 +- deps/icu-small/source/i18n/nortrans.cpp | 2 +- deps/icu-small/source/i18n/nortrans.h | 2 +- deps/icu-small/source/i18n/nultrans.cpp | 2 +- deps/icu-small/source/i18n/nultrans.h | 2 +- deps/icu-small/source/i18n/numfmt.cpp | 8 +- deps/icu-small/source/i18n/numsys.cpp | 4 +- deps/icu-small/source/i18n/numsys_impl.h | 2 +- deps/icu-small/source/i18n/olsontz.cpp | 2 +- deps/icu-small/source/i18n/olsontz.h | 2 +- deps/icu-small/source/i18n/persncal.cpp | 2 +- deps/icu-small/source/i18n/persncal.h | 2 +- deps/icu-small/source/i18n/pluralaffix.cpp | 2 +- deps/icu-small/source/i18n/pluralaffix.h | 2 +- deps/icu-small/source/i18n/plurfmt.cpp | 2 +- deps/icu-small/source/i18n/plurrule.cpp | 25 +- deps/icu-small/source/i18n/plurrule_impl.h | 3 +- deps/icu-small/source/i18n/precision.cpp | 2 +- deps/icu-small/source/i18n/precision.h | 2 +- deps/icu-small/source/i18n/quant.cpp | 2 +- deps/icu-small/source/i18n/quant.h | 2 +- deps/icu-small/source/i18n/quantityformatter.cpp | 2 +- deps/icu-small/source/i18n/quantityformatter.h | 2 +- deps/icu-small/source/i18n/rbnf.cpp | 174 +- deps/icu-small/source/i18n/rbt.cpp | 2 +- deps/icu-small/source/i18n/rbt.h | 2 +- deps/icu-small/source/i18n/rbt_data.cpp | 2 +- deps/icu-small/source/i18n/rbt_data.h | 2 +- deps/icu-small/source/i18n/rbt_pars.cpp | 2 +- deps/icu-small/source/i18n/rbt_pars.h | 2 +- deps/icu-small/source/i18n/rbt_rule.cpp | 2 +- deps/icu-small/source/i18n/rbt_rule.h | 2 +- deps/icu-small/source/i18n/rbt_set.cpp | 2 +- deps/icu-small/source/i18n/rbt_set.h | 2 +- deps/icu-small/source/i18n/rbtz.cpp | 2 +- deps/icu-small/source/i18n/regexcmp.cpp | 52 +- deps/icu-small/source/i18n/regexcmp.h | 2 +- deps/icu-small/source/i18n/regexcst.h | 2 +- deps/icu-small/source/i18n/regeximp.cpp | 12 +- deps/icu-small/source/i18n/regeximp.h | 4 +- deps/icu-small/source/i18n/regexst.cpp | 2 +- deps/icu-small/source/i18n/regexst.h | 2 +- deps/icu-small/source/i18n/regextxt.cpp | 2 +- deps/icu-small/source/i18n/regextxt.h | 2 +- deps/icu-small/source/i18n/region.cpp | 2 +- deps/icu-small/source/i18n/region_impl.h | 2 +- deps/icu-small/source/i18n/reldatefmt.cpp | 3 +- deps/icu-small/source/i18n/reldtfmt.cpp | 4 +- deps/icu-small/source/i18n/reldtfmt.h | 2 +- deps/icu-small/source/i18n/rematch.cpp | 24 +- deps/icu-small/source/i18n/remtrans.cpp | 2 +- deps/icu-small/source/i18n/remtrans.h | 2 +- deps/icu-small/source/i18n/repattrn.cpp | 2 +- deps/icu-small/source/i18n/rulebasedcollator.cpp | 2 +- .../source/i18n/scientificnumberformatter.cpp | 2 +- deps/icu-small/source/i18n/scriptset.cpp | 2 +- deps/icu-small/source/i18n/scriptset.h | 2 +- deps/icu-small/source/i18n/search.cpp | 2 +- deps/icu-small/source/i18n/selfmt.cpp | 2 +- deps/icu-small/source/i18n/selfmtimpl.h | 2 +- deps/icu-small/source/i18n/sharedbreakiterator.cpp | 2 +- deps/icu-small/source/i18n/sharedbreakiterator.h | 2 +- deps/icu-small/source/i18n/sharedcalendar.h | 2 +- .../source/i18n/shareddateformatsymbols.h | 2 +- deps/icu-small/source/i18n/sharednumberformat.h | 2 +- deps/icu-small/source/i18n/sharedpluralrules.h | 2 +- .../source/i18n/significantdigitinterval.h | 2 +- deps/icu-small/source/i18n/simpletz.cpp | 15 +- deps/icu-small/source/i18n/smallintformatter.cpp | 2 +- deps/icu-small/source/i18n/smallintformatter.h | 2 +- deps/icu-small/source/i18n/smpdtfmt.cpp | 4 +- deps/icu-small/source/i18n/smpdtfst.cpp | 2 +- deps/icu-small/source/i18n/smpdtfst.h | 3 +- deps/icu-small/source/i18n/sortkey.cpp | 2 +- deps/icu-small/source/i18n/standardplural.cpp | 2 +- deps/icu-small/source/i18n/standardplural.h | 2 +- deps/icu-small/source/i18n/strmatch.cpp | 2 +- deps/icu-small/source/i18n/strmatch.h | 2 +- deps/icu-small/source/i18n/strrepl.cpp | 2 +- deps/icu-small/source/i18n/strrepl.h | 2 +- deps/icu-small/source/i18n/stsearch.cpp | 2 +- deps/icu-small/source/i18n/taiwncal.cpp | 2 +- deps/icu-small/source/i18n/taiwncal.h | 2 +- deps/icu-small/source/i18n/timezone.cpp | 2 +- deps/icu-small/source/i18n/titletrn.cpp | 12 +- deps/icu-small/source/i18n/titletrn.h | 2 +- deps/icu-small/source/i18n/tmunit.cpp | 2 +- deps/icu-small/source/i18n/tmutamt.cpp | 2 +- deps/icu-small/source/i18n/tmutfmt.cpp | 2 +- deps/icu-small/source/i18n/tolowtrn.cpp | 2 +- deps/icu-small/source/i18n/tolowtrn.h | 2 +- deps/icu-small/source/i18n/toupptrn.cpp | 2 +- deps/icu-small/source/i18n/toupptrn.h | 2 +- deps/icu-small/source/i18n/translit.cpp | 2 +- deps/icu-small/source/i18n/transreg.cpp | 2 +- deps/icu-small/source/i18n/transreg.h | 2 +- deps/icu-small/source/i18n/tridpars.cpp | 2 +- deps/icu-small/source/i18n/tridpars.h | 2 +- deps/icu-small/source/i18n/tzfmt.cpp | 4 +- deps/icu-small/source/i18n/tzgnames.cpp | 3 +- deps/icu-small/source/i18n/tzgnames.h | 2 +- deps/icu-small/source/i18n/tznames.cpp | 2 +- deps/icu-small/source/i18n/tznames_impl.cpp | 3 +- deps/icu-small/source/i18n/tznames_impl.h | 2 +- deps/icu-small/source/i18n/tzrule.cpp | 2 +- deps/icu-small/source/i18n/tztrans.cpp | 2 +- deps/icu-small/source/i18n/ucal.cpp | 2 +- deps/icu-small/source/i18n/ucln_in.cpp | 4 +- deps/icu-small/source/i18n/ucln_in.h | 4 +- deps/icu-small/source/i18n/ucol.cpp | 4 +- deps/icu-small/source/i18n/ucol_imp.h | 4 +- deps/icu-small/source/i18n/ucol_res.cpp | 5 +- deps/icu-small/source/i18n/ucol_sit.cpp | 4 +- deps/icu-small/source/i18n/ucoleitr.cpp | 2 +- deps/icu-small/source/i18n/ucsdet.cpp | 2 +- deps/icu-small/source/i18n/udat.cpp | 2 +- deps/icu-small/source/i18n/udateintervalformat.cpp | 2 +- deps/icu-small/source/i18n/udatpg.cpp | 4 +- deps/icu-small/source/i18n/ufieldpositer.cpp | 2 +- .../source/i18n/uitercollationiterator.cpp | 2 +- .../icu-small/source/i18n/uitercollationiterator.h | 2 +- deps/icu-small/source/i18n/ulocdata.c | 386 - deps/icu-small/source/i18n/ulocdata.cpp | 386 + deps/icu-small/source/i18n/umsg.cpp | 4 +- deps/icu-small/source/i18n/umsg_imp.h | 4 +- deps/icu-small/source/i18n/unesctrn.cpp | 2 +- deps/icu-small/source/i18n/unesctrn.h | 2 +- deps/icu-small/source/i18n/uni2name.cpp | 2 +- deps/icu-small/source/i18n/uni2name.h | 2 +- deps/icu-small/source/i18n/unicode/alphaindex.h | 3 +- deps/icu-small/source/i18n/unicode/basictz.h | 2 +- deps/icu-small/source/i18n/unicode/calendar.h | 2 +- deps/icu-small/source/i18n/unicode/choicfmt.h | 2 +- deps/icu-small/source/i18n/unicode/coleitr.h | 3 +- deps/icu-small/source/i18n/unicode/coll.h | 30 +- .../source/i18n/unicode/compactdecimalformat.h | 2 +- deps/icu-small/source/i18n/unicode/curramt.h | 10 +- deps/icu-small/source/i18n/unicode/currpinf.h | 2 +- deps/icu-small/source/i18n/unicode/currunit.h | 12 +- deps/icu-small/source/i18n/unicode/datefmt.h | 2 +- deps/icu-small/source/i18n/unicode/dcfmtsym.h | 9 +- deps/icu-small/source/i18n/unicode/decimfmt.h | 28 +- deps/icu-small/source/i18n/unicode/dtfmtsym.h | 13 +- deps/icu-small/source/i18n/unicode/dtitvfmt.h | 4 +- deps/icu-small/source/i18n/unicode/dtitvinf.h | 2 +- deps/icu-small/source/i18n/unicode/dtptngen.h | 5 +- deps/icu-small/source/i18n/unicode/dtrule.h | 2 +- deps/icu-small/source/i18n/unicode/fieldpos.h | 2 +- deps/icu-small/source/i18n/unicode/fmtable.h | 2 +- deps/icu-small/source/i18n/unicode/format.h | 2 +- deps/icu-small/source/i18n/unicode/fpositer.h | 2 +- deps/icu-small/source/i18n/unicode/gender.h | 2 +- deps/icu-small/source/i18n/unicode/gregocal.h | 2 +- deps/icu-small/source/i18n/unicode/measfmt.h | 2 +- deps/icu-small/source/i18n/unicode/measunit.h | 76 +- deps/icu-small/source/i18n/unicode/measure.h | 2 +- deps/icu-small/source/i18n/unicode/msgfmt.h | 4 +- deps/icu-small/source/i18n/unicode/numfmt.h | 10 +- deps/icu-small/source/i18n/unicode/numsys.h | 4 +- deps/icu-small/source/i18n/unicode/plurfmt.h | 2 +- deps/icu-small/source/i18n/unicode/plurrule.h | 21 +- deps/icu-small/source/i18n/unicode/rbnf.h | 51 +- deps/icu-small/source/i18n/unicode/rbtz.h | 2 +- deps/icu-small/source/i18n/unicode/regex.h | 34 +- deps/icu-small/source/i18n/unicode/region.h | 2 +- deps/icu-small/source/i18n/unicode/reldatefmt.h | 9 +- .../i18n/unicode/scientificnumberformatter.h | 2 +- deps/icu-small/source/i18n/unicode/search.h | 2 +- deps/icu-small/source/i18n/unicode/selfmt.h | 2 +- deps/icu-small/source/i18n/unicode/simpletz.h | 2 +- deps/icu-small/source/i18n/unicode/smpdtfmt.h | 14 +- deps/icu-small/source/i18n/unicode/sortkey.h | 2 +- deps/icu-small/source/i18n/unicode/stsearch.h | 2 +- deps/icu-small/source/i18n/unicode/tblcoll.h | 26 +- deps/icu-small/source/i18n/unicode/timezone.h | 10 +- deps/icu-small/source/i18n/unicode/tmunit.h | 2 +- deps/icu-small/source/i18n/unicode/tmutamt.h | 2 +- deps/icu-small/source/i18n/unicode/tmutfmt.h | 2 +- deps/icu-small/source/i18n/unicode/translit.h | 4 +- deps/icu-small/source/i18n/unicode/tzfmt.h | 8 +- deps/icu-small/source/i18n/unicode/tznames.h | 2 +- deps/icu-small/source/i18n/unicode/tzrule.h | 2 +- deps/icu-small/source/i18n/unicode/tztrans.h | 2 +- deps/icu-small/source/i18n/unicode/ucal.h | 6 +- deps/icu-small/source/i18n/unicode/ucol.h | 12 +- deps/icu-small/source/i18n/unicode/ucoleitr.h | 2 +- deps/icu-small/source/i18n/unicode/ucsdet.h | 10 +- deps/icu-small/source/i18n/unicode/udat.h | 80 +- .../source/i18n/unicode/udateintervalformat.h | 2 +- deps/icu-small/source/i18n/unicode/udatpg.h | 8 +- deps/icu-small/source/i18n/unicode/ufieldpositer.h | 2 +- deps/icu-small/source/i18n/unicode/uformattable.h | 4 +- deps/icu-small/source/i18n/unicode/ugender.h | 2 +- deps/icu-small/source/i18n/unicode/ulocdata.h | 10 +- deps/icu-small/source/i18n/unicode/umsg.h | 4 +- deps/icu-small/source/i18n/unicode/unirepl.h | 2 +- deps/icu-small/source/i18n/unicode/unum.h | 66 +- deps/icu-small/source/i18n/unicode/unumsys.h | 2 +- deps/icu-small/source/i18n/unicode/upluralrules.h | 59 +- deps/icu-small/source/i18n/unicode/uregex.h | 4 +- deps/icu-small/source/i18n/unicode/uregion.h | 4 +- deps/icu-small/source/i18n/unicode/ureldatefmt.h | 72 +- deps/icu-small/source/i18n/unicode/usearch.h | 6 +- deps/icu-small/source/i18n/unicode/uspoof.h | 4 +- deps/icu-small/source/i18n/unicode/utmscale.h | 9 +- deps/icu-small/source/i18n/unicode/utrans.h | 2 +- deps/icu-small/source/i18n/unicode/vtzone.h | 2 +- deps/icu-small/source/i18n/unum.cpp | 29 +- deps/icu-small/source/i18n/unumsys.cpp | 2 +- deps/icu-small/source/i18n/upluralrules.cpp | 47 +- deps/icu-small/source/i18n/uregex.cpp | 2 +- deps/icu-small/source/i18n/uregexc.cpp | 2 +- deps/icu-small/source/i18n/uregion.cpp | 2 +- deps/icu-small/source/i18n/usearch.cpp | 2 +- deps/icu-small/source/i18n/uspoof.cpp | 4 +- deps/icu-small/source/i18n/uspoof_build.cpp | 4 +- deps/icu-small/source/i18n/uspoof_conf.cpp | 4 +- deps/icu-small/source/i18n/uspoof_conf.h | 4 +- deps/icu-small/source/i18n/uspoof_impl.cpp | 2 +- deps/icu-small/source/i18n/uspoof_impl.h | 2 +- deps/icu-small/source/i18n/usrchimp.h | 2 +- .../source/i18n/utf16collationiterator.cpp | 2 +- .../icu-small/source/i18n/utf16collationiterator.h | 2 +- .../source/i18n/utf8collationiterator.cpp | 2 +- deps/icu-small/source/i18n/utf8collationiterator.h | 2 +- deps/icu-small/source/i18n/utmscale.c | 116 - deps/icu-small/source/i18n/utmscale.cpp | 116 + deps/icu-small/source/i18n/utrans.cpp | 2 +- deps/icu-small/source/i18n/valueformatter.cpp | 2 +- deps/icu-small/source/i18n/valueformatter.h | 2 +- deps/icu-small/source/i18n/visibledigits.cpp | 2 +- deps/icu-small/source/i18n/visibledigits.h | 2 +- deps/icu-small/source/i18n/vtzone.cpp | 4 +- deps/icu-small/source/i18n/vzone.cpp | 2 +- deps/icu-small/source/i18n/vzone.h | 2 +- deps/icu-small/source/i18n/windtfmt.cpp | 115 +- deps/icu-small/source/i18n/windtfmt.h | 11 +- deps/icu-small/source/i18n/winnmfmt.cpp | 171 +- deps/icu-small/source/i18n/winnmfmt.h | 5 +- deps/icu-small/source/i18n/wintzimpl.cpp | 6 +- deps/icu-small/source/i18n/wintzimpl.h | 6 +- deps/icu-small/source/i18n/zonemeta.cpp | 11 +- deps/icu-small/source/i18n/zonemeta.h | 8 +- deps/icu-small/source/i18n/zrule.cpp | 2 +- deps/icu-small/source/i18n/zrule.h | 2 +- deps/icu-small/source/i18n/ztrans.cpp | 2 +- deps/icu-small/source/i18n/ztrans.h | 2 +- deps/icu-small/source/io/locbund.cpp | 2 +- deps/icu-small/source/io/locbund.h | 2 +- deps/icu-small/source/io/sprintf.c | 261 - deps/icu-small/source/io/sprintf.cpp | 261 + deps/icu-small/source/io/sscanf.c | 129 - deps/icu-small/source/io/sscanf.cpp | 129 + deps/icu-small/source/io/ucln_io.cpp | 4 +- deps/icu-small/source/io/ucln_io.h | 4 +- deps/icu-small/source/io/ufile.c | 362 - deps/icu-small/source/io/ufile.cpp | 343 + deps/icu-small/source/io/ufile.h | 4 +- deps/icu-small/source/io/ufmt_cmn.c | 259 - deps/icu-small/source/io/ufmt_cmn.cpp | 259 + deps/icu-small/source/io/ufmt_cmn.h | 4 +- deps/icu-small/source/io/unicode/ustdio.h | 2 +- deps/icu-small/source/io/unicode/ustream.h | 4 +- deps/icu-small/source/io/uprintf.cpp | 2 +- deps/icu-small/source/io/uprintf.h | 2 +- deps/icu-small/source/io/uprntf_p.c | 1593 ---- deps/icu-small/source/io/uprntf_p.cpp | 1606 ++++ deps/icu-small/source/io/uscanf.c | 107 - deps/icu-small/source/io/uscanf.cpp | 107 + deps/icu-small/source/io/uscanf.h | 2 +- deps/icu-small/source/io/uscanf_p.c | 1408 ---- deps/icu-small/source/io/uscanf_p.cpp | 1450 ++++ deps/icu-small/source/io/ustdio.c | 732 -- deps/icu-small/source/io/ustdio.cpp | 732 ++ deps/icu-small/source/io/ustream.cpp | 2 +- deps/icu-small/source/stubdata/stubdata.c | 74 - deps/icu-small/source/stubdata/stubdata.cpp | 74 + deps/icu-small/source/tools/escapesrc/cptbl.h | 520 ++ .../icu-small/source/tools/escapesrc/escapesrc.cpp | 409 + .../source/tools/escapesrc/expect-simple.cpp | 17 + deps/icu-small/source/tools/escapesrc/tblgen.cpp | 80 + .../source/tools/escapesrc/test-nochange.cpp | 5 + .../source/tools/escapesrc/test-simple.cpp | 17 + deps/icu-small/source/tools/genccode/genccode.c | 4 +- deps/icu-small/source/tools/gencmn/gencmn.c | 4 +- deps/icu-small/source/tools/genrb/derb.cpp | 4 +- deps/icu-small/source/tools/genrb/errmsg.c | 2 +- deps/icu-small/source/tools/genrb/errmsg.h | 2 +- deps/icu-small/source/tools/genrb/genrb.cpp | 2 +- deps/icu-small/source/tools/genrb/genrb.h | 2 +- deps/icu-small/source/tools/genrb/parse.cpp | 2 +- deps/icu-small/source/tools/genrb/parse.h | 2 +- deps/icu-small/source/tools/genrb/prscmnts.cpp | 2 +- deps/icu-small/source/tools/genrb/prscmnts.h | 2 +- deps/icu-small/source/tools/genrb/rbutil.c | 2 +- deps/icu-small/source/tools/genrb/rbutil.h | 2 +- deps/icu-small/source/tools/genrb/read.c | 2 +- deps/icu-small/source/tools/genrb/read.h | 2 +- deps/icu-small/source/tools/genrb/reslist.cpp | 8 +- deps/icu-small/source/tools/genrb/reslist.h | 4 +- deps/icu-small/source/tools/genrb/rle.c | 2 +- deps/icu-small/source/tools/genrb/rle.h | 2 +- deps/icu-small/source/tools/genrb/ustr.c | 2 +- deps/icu-small/source/tools/genrb/ustr.h | 2 +- deps/icu-small/source/tools/genrb/wrtjava.cpp | 2 +- deps/icu-small/source/tools/genrb/wrtxml.cpp | 6 +- deps/icu-small/source/tools/icupkg/icupkg.cpp | 4 +- deps/icu-small/source/tools/pkgdata/pkgdata.cpp | 62 +- deps/icu-small/source/tools/pkgdata/pkgtypes.c | 2 +- deps/icu-small/source/tools/pkgdata/pkgtypes.h | 2 +- .../source/tools/toolutil/collationinfo.cpp | 2 +- .../source/tools/toolutil/collationinfo.h | 2 +- deps/icu-small/source/tools/toolutil/dbgutil.cpp | 6 +- deps/icu-small/source/tools/toolutil/dbgutil.h | 2 +- .../source/tools/toolutil/denseranges.cpp | 4 +- deps/icu-small/source/tools/toolutil/denseranges.h | 4 +- deps/icu-small/source/tools/toolutil/filestrm.c | 227 - deps/icu-small/source/tools/toolutil/filestrm.cpp | 227 + deps/icu-small/source/tools/toolutil/filestrm.h | 2 +- deps/icu-small/source/tools/toolutil/filetools.cpp | 2 +- deps/icu-small/source/tools/toolutil/filetools.h | 4 +- deps/icu-small/source/tools/toolutil/flagparser.c | 180 - .../icu-small/source/tools/toolutil/flagparser.cpp | 180 + deps/icu-small/source/tools/toolutil/flagparser.h | 4 +- deps/icu-small/source/tools/toolutil/package.cpp | 6 +- deps/icu-small/source/tools/toolutil/package.h | 4 +- deps/icu-small/source/tools/toolutil/pkg_genc.c | 1199 --- deps/icu-small/source/tools/toolutil/pkg_genc.cpp | 1213 +++ deps/icu-small/source/tools/toolutil/pkg_genc.h | 2 +- deps/icu-small/source/tools/toolutil/pkg_gencmn.c | 573 -- .../icu-small/source/tools/toolutil/pkg_gencmn.cpp | 578 ++ deps/icu-small/source/tools/toolutil/pkg_gencmn.h | 2 +- deps/icu-small/source/tools/toolutil/pkg_icu.cpp | 2 +- deps/icu-small/source/tools/toolutil/pkg_icu.h | 2 +- deps/icu-small/source/tools/toolutil/pkg_imp.h | 4 +- deps/icu-small/source/tools/toolutil/pkgitems.cpp | 4 +- deps/icu-small/source/tools/toolutil/ppucd.cpp | 8 +- deps/icu-small/source/tools/toolutil/ppucd.h | 4 +- deps/icu-small/source/tools/toolutil/swapimpl.cpp | 4 +- deps/icu-small/source/tools/toolutil/swapimpl.h | 4 +- deps/icu-small/source/tools/toolutil/toolutil.cpp | 4 +- deps/icu-small/source/tools/toolutil/toolutil.h | 4 +- deps/icu-small/source/tools/toolutil/ucbuf.cpp | 2 +- deps/icu-small/source/tools/toolutil/ucbuf.h | 2 +- deps/icu-small/source/tools/toolutil/ucln_tu.cpp | 2 +- deps/icu-small/source/tools/toolutil/ucm.c | 1191 --- deps/icu-small/source/tools/toolutil/ucm.cpp | 1195 +++ deps/icu-small/source/tools/toolutil/ucm.h | 4 +- deps/icu-small/source/tools/toolutil/ucmstate.c | 1048 --- deps/icu-small/source/tools/toolutil/ucmstate.cpp | 1051 +++ deps/icu-small/source/tools/toolutil/udbgutil.cpp | 42 +- deps/icu-small/source/tools/toolutil/udbgutil.h | 2 +- deps/icu-small/source/tools/toolutil/unewdata.c | 275 - deps/icu-small/source/tools/toolutil/unewdata.cpp | 275 + deps/icu-small/source/tools/toolutil/unewdata.h | 4 +- deps/icu-small/source/tools/toolutil/uoptions.c | 133 - deps/icu-small/source/tools/toolutil/uoptions.cpp | 133 + deps/icu-small/source/tools/toolutil/uoptions.h | 4 +- deps/icu-small/source/tools/toolutil/uparse.c | 383 - deps/icu-small/source/tools/toolutil/uparse.cpp | 383 + deps/icu-small/source/tools/toolutil/uparse.h | 4 +- deps/icu-small/source/tools/toolutil/writesrc.c | 268 - deps/icu-small/source/tools/toolutil/writesrc.cpp | 268 + deps/icu-small/source/tools/toolutil/writesrc.h | 4 +- deps/icu-small/source/tools/toolutil/xmlparser.cpp | 8 +- deps/icu-small/source/tools/toolutil/xmlparser.h | 4 +- 967 files changed, 63996 insertions(+), 59638 deletions(-) delete mode 100644 deps/icu-small/source/common/cmemory.c create mode 100644 deps/icu-small/source/common/cmemory.cpp delete mode 100644 deps/icu-small/source/common/cstring.c create mode 100644 deps/icu-small/source/common/cstring.cpp delete mode 100644 deps/icu-small/source/common/cwchar.c create mode 100644 deps/icu-small/source/common/cwchar.cpp create mode 100644 deps/icu-small/source/common/edits.cpp delete mode 100644 deps/icu-small/source/common/icudataver.c create mode 100644 deps/icu-small/source/common/icudataver.cpp delete mode 100644 deps/icu-small/source/common/locmap.c create mode 100644 deps/icu-small/source/common/locmap.cpp delete mode 100644 deps/icu-small/source/common/propsvec.c create mode 100644 deps/icu-small/source/common/propsvec.cpp delete mode 100644 deps/icu-small/source/common/uarrsort.c create mode 100644 deps/icu-small/source/common/uarrsort.cpp delete mode 100644 deps/icu-small/source/common/ubidi.c create mode 100644 deps/icu-small/source/common/ubidi.cpp delete mode 100644 deps/icu-small/source/common/ubidi_props.c create mode 100644 deps/icu-small/source/common/ubidi_props.cpp delete mode 100644 deps/icu-small/source/common/ubidiln.c create mode 100644 deps/icu-small/source/common/ubidiln.cpp delete mode 100644 deps/icu-small/source/common/ubiditransform.c create mode 100644 deps/icu-small/source/common/ubiditransform.cpp delete mode 100644 deps/icu-small/source/common/ubidiwrt.c create mode 100644 deps/icu-small/source/common/ubidiwrt.cpp create mode 100644 deps/icu-small/source/common/ucasemap_imp.h delete mode 100644 deps/icu-small/source/common/ucat.c create mode 100644 deps/icu-small/source/common/ucat.cpp delete mode 100644 deps/icu-small/source/common/uchar.c create mode 100644 deps/icu-small/source/common/uchar.cpp delete mode 100644 deps/icu-small/source/common/ucmndata.c create mode 100644 deps/icu-small/source/common/ucmndata.cpp delete mode 100644 deps/icu-small/source/common/ucnv.c create mode 100644 deps/icu-small/source/common/ucnv.cpp delete mode 100644 deps/icu-small/source/common/ucnv_cb.c create mode 100644 deps/icu-small/source/common/ucnv_cb.cpp delete mode 100644 deps/icu-small/source/common/ucnv_cnv.c create mode 100644 deps/icu-small/source/common/ucnv_cnv.cpp delete mode 100644 deps/icu-small/source/common/ucnv_ct.c create mode 100644 deps/icu-small/source/common/ucnv_ct.cpp delete mode 100644 deps/icu-small/source/common/ucnv_err.c create mode 100644 deps/icu-small/source/common/ucnv_err.cpp delete mode 100644 deps/icu-small/source/common/ucnv_lmb.c create mode 100644 deps/icu-small/source/common/ucnv_lmb.cpp delete mode 100644 deps/icu-small/source/common/ucnv_set.c create mode 100644 deps/icu-small/source/common/ucnv_set.cpp delete mode 100644 deps/icu-small/source/common/ucnv_u16.c create mode 100644 deps/icu-small/source/common/ucnv_u16.cpp delete mode 100644 deps/icu-small/source/common/ucnv_u32.c create mode 100644 deps/icu-small/source/common/ucnv_u32.cpp delete mode 100644 deps/icu-small/source/common/ucnv_u7.c create mode 100644 deps/icu-small/source/common/ucnv_u7.cpp delete mode 100644 deps/icu-small/source/common/ucnv_u8.c create mode 100644 deps/icu-small/source/common/ucnv_u8.cpp delete mode 100644 deps/icu-small/source/common/ucnvdisp.c create mode 100644 deps/icu-small/source/common/ucnvdisp.cpp delete mode 100644 deps/icu-small/source/common/ucnvhz.c create mode 100644 deps/icu-small/source/common/ucnvhz.cpp delete mode 100644 deps/icu-small/source/common/ucnvisci.c create mode 100644 deps/icu-small/source/common/ucnvisci.cpp delete mode 100644 deps/icu-small/source/common/ucnvlat1.c create mode 100644 deps/icu-small/source/common/ucnvlat1.cpp delete mode 100644 deps/icu-small/source/common/ucnvscsu.c create mode 100644 deps/icu-small/source/common/ucnvscsu.cpp delete mode 100644 deps/icu-small/source/common/udatamem.c create mode 100644 deps/icu-small/source/common/udatamem.cpp delete mode 100644 deps/icu-small/source/common/udataswp.c create mode 100644 deps/icu-small/source/common/udataswp.cpp delete mode 100644 deps/icu-small/source/common/uenum.c create mode 100644 deps/icu-small/source/common/uenum.cpp delete mode 100644 deps/icu-small/source/common/uhash.c create mode 100644 deps/icu-small/source/common/uhash.cpp delete mode 100644 deps/icu-small/source/common/uinvchar.c create mode 100644 deps/icu-small/source/common/uinvchar.cpp delete mode 100644 deps/icu-small/source/common/ulist.c create mode 100644 deps/icu-small/source/common/ulist.cpp delete mode 100644 deps/icu-small/source/common/uloc_tag.c create mode 100644 deps/icu-small/source/common/uloc_tag.cpp delete mode 100644 deps/icu-small/source/common/umapfile.c create mode 100644 deps/icu-small/source/common/umapfile.cpp delete mode 100644 deps/icu-small/source/common/umath.c create mode 100644 deps/icu-small/source/common/umath.cpp create mode 100644 deps/icu-small/source/common/unicode/casemap.h create mode 100644 deps/icu-small/source/common/unicode/char16ptr.h create mode 100644 deps/icu-small/source/common/unicode/edits.h delete mode 100644 deps/icu-small/source/common/ures_cnv.c create mode 100644 deps/icu-small/source/common/ures_cnv.cpp delete mode 100644 deps/icu-small/source/common/usc_impl.c create mode 100644 deps/icu-small/source/common/usc_impl.cpp delete mode 100644 deps/icu-small/source/common/uscript.c create mode 100644 deps/icu-small/source/common/uscript.cpp delete mode 100644 deps/icu-small/source/common/ustrfmt.c create mode 100644 deps/icu-small/source/common/ustrfmt.cpp delete mode 100644 deps/icu-small/source/common/utf_impl.c create mode 100644 deps/icu-small/source/common/utf_impl.cpp delete mode 100644 deps/icu-small/source/common/utrace.c create mode 100644 deps/icu-small/source/common/utrace.cpp delete mode 100644 deps/icu-small/source/common/utypes.c create mode 100644 deps/icu-small/source/common/utypes.cpp delete mode 100644 deps/icu-small/source/common/wintz.c create mode 100644 deps/icu-small/source/common/wintz.cpp delete mode 100644 deps/icu-small/source/data/in/icudt58l.dat create mode 100644 deps/icu-small/source/data/in/icudt59l.dat delete mode 100644 deps/icu-small/source/i18n/decContext.c create mode 100644 deps/icu-small/source/i18n/decContext.cpp delete mode 100644 deps/icu-small/source/i18n/decNumber.c create mode 100644 deps/icu-small/source/i18n/decNumber.cpp delete mode 100644 deps/icu-small/source/i18n/ulocdata.c create mode 100644 deps/icu-small/source/i18n/ulocdata.cpp delete mode 100644 deps/icu-small/source/i18n/utmscale.c create mode 100644 deps/icu-small/source/i18n/utmscale.cpp delete mode 100644 deps/icu-small/source/io/sprintf.c create mode 100644 deps/icu-small/source/io/sprintf.cpp delete mode 100644 deps/icu-small/source/io/sscanf.c create mode 100644 deps/icu-small/source/io/sscanf.cpp delete mode 100644 deps/icu-small/source/io/ufile.c create mode 100644 deps/icu-small/source/io/ufile.cpp delete mode 100644 deps/icu-small/source/io/ufmt_cmn.c create mode 100644 deps/icu-small/source/io/ufmt_cmn.cpp delete mode 100644 deps/icu-small/source/io/uprntf_p.c create mode 100644 deps/icu-small/source/io/uprntf_p.cpp delete mode 100644 deps/icu-small/source/io/uscanf.c create mode 100644 deps/icu-small/source/io/uscanf.cpp delete mode 100644 deps/icu-small/source/io/uscanf_p.c create mode 100644 deps/icu-small/source/io/uscanf_p.cpp delete mode 100644 deps/icu-small/source/io/ustdio.c create mode 100644 deps/icu-small/source/io/ustdio.cpp delete mode 100644 deps/icu-small/source/stubdata/stubdata.c create mode 100644 deps/icu-small/source/stubdata/stubdata.cpp create mode 100644 deps/icu-small/source/tools/escapesrc/cptbl.h create mode 100644 deps/icu-small/source/tools/escapesrc/escapesrc.cpp create mode 100644 deps/icu-small/source/tools/escapesrc/expect-simple.cpp create mode 100644 deps/icu-small/source/tools/escapesrc/tblgen.cpp create mode 100644 deps/icu-small/source/tools/escapesrc/test-nochange.cpp create mode 100644 deps/icu-small/source/tools/escapesrc/test-simple.cpp delete mode 100644 deps/icu-small/source/tools/toolutil/filestrm.c create mode 100644 deps/icu-small/source/tools/toolutil/filestrm.cpp delete mode 100644 deps/icu-small/source/tools/toolutil/flagparser.c create mode 100644 deps/icu-small/source/tools/toolutil/flagparser.cpp delete mode 100644 deps/icu-small/source/tools/toolutil/pkg_genc.c create mode 100644 deps/icu-small/source/tools/toolutil/pkg_genc.cpp delete mode 100644 deps/icu-small/source/tools/toolutil/pkg_gencmn.c create mode 100644 deps/icu-small/source/tools/toolutil/pkg_gencmn.cpp delete mode 100644 deps/icu-small/source/tools/toolutil/ucm.c create mode 100644 deps/icu-small/source/tools/toolutil/ucm.cpp delete mode 100644 deps/icu-small/source/tools/toolutil/ucmstate.c create mode 100644 deps/icu-small/source/tools/toolutil/ucmstate.cpp delete mode 100644 deps/icu-small/source/tools/toolutil/unewdata.c create mode 100644 deps/icu-small/source/tools/toolutil/unewdata.cpp delete mode 100644 deps/icu-small/source/tools/toolutil/uoptions.c create mode 100644 deps/icu-small/source/tools/toolutil/uoptions.cpp delete mode 100644 deps/icu-small/source/tools/toolutil/uparse.c create mode 100644 deps/icu-small/source/tools/toolutil/uparse.cpp delete mode 100644 deps/icu-small/source/tools/toolutil/writesrc.c create mode 100644 deps/icu-small/source/tools/toolutil/writesrc.cpp (limited to 'deps/icu-small/source') diff --git a/deps/icu-small/source/common/appendable.cpp b/deps/icu-small/source/common/appendable.cpp index 1a597b5de5..fca3c1e413 100644 --- a/deps/icu-small/source/common/appendable.cpp +++ b/deps/icu-small/source/common/appendable.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: appendable.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/bmpset.cpp b/deps/icu-small/source/common/bmpset.cpp index ebcd0d23bf..08f9bed066 100644 --- a/deps/icu-small/source/common/bmpset.cpp +++ b/deps/icu-small/source/common/bmpset.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: bmpset.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/bmpset.h b/deps/icu-small/source/common/bmpset.h index 8975cd61d5..87375d2cac 100644 --- a/deps/icu-small/source/common/bmpset.h +++ b/deps/icu-small/source/common/bmpset.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: bmpset.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/brkeng.cpp b/deps/icu-small/source/common/brkeng.cpp index 96a7dc9348..354998dac4 100644 --- a/deps/icu-small/source/common/brkeng.cpp +++ b/deps/icu-small/source/common/brkeng.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ************************************************************************************ diff --git a/deps/icu-small/source/common/brkeng.h b/deps/icu-small/source/common/brkeng.h index 73fdb81dff..ccb95320d2 100644 --- a/deps/icu-small/source/common/brkeng.h +++ b/deps/icu-small/source/common/brkeng.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /** ************************************************************************************ diff --git a/deps/icu-small/source/common/brkiter.cpp b/deps/icu-small/source/common/brkiter.cpp index b768c20f0b..e2904b0544 100644 --- a/deps/icu-small/source/common/brkiter.cpp +++ b/deps/icu-small/source/common/brkiter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/common/bytestream.cpp b/deps/icu-small/source/common/bytestream.cpp index 5a5c2e4410..bfd7bded71 100644 --- a/deps/icu-small/source/common/bytestream.cpp +++ b/deps/icu-small/source/common/bytestream.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // Copyright (C) 2009-2011, International Business Machines // Corporation and others. All Rights Reserved. diff --git a/deps/icu-small/source/common/bytestrie.cpp b/deps/icu-small/source/common/bytestrie.cpp index 093cd8ddb1..c4d498c4bf 100644 --- a/deps/icu-small/source/common/bytestrie.cpp +++ b/deps/icu-small/source/common/bytestrie.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: bytestrie.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/bytestriebuilder.cpp b/deps/icu-small/source/common/bytestriebuilder.cpp index 913d85a212..581505e009 100644 --- a/deps/icu-small/source/common/bytestriebuilder.cpp +++ b/deps/icu-small/source/common/bytestriebuilder.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: bytestriebuilder.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/bytestrieiterator.cpp b/deps/icu-small/source/common/bytestrieiterator.cpp index 4d04247c49..e64961a1f1 100644 --- a/deps/icu-small/source/common/bytestrieiterator.cpp +++ b/deps/icu-small/source/common/bytestrieiterator.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: bytestrieiterator.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/caniter.cpp b/deps/icu-small/source/common/caniter.cpp index c3af281b82..eea0398d12 100644 --- a/deps/icu-small/source/common/caniter.cpp +++ b/deps/icu-small/source/common/caniter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ***************************************************************************** @@ -311,12 +311,12 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros // see what the permutations of the characters before and after this one are //Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp))); - permute(subPermuteString.replace(i, U16_LENGTH(cp), NULL, 0), skipZeros, &subpermute, status); + permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status); /* Test for buffer overflows */ if(U_FAILURE(status)) { return; } - // The upper replace is destructive. The question is do we have to make a copy, or we don't care about the contents + // The upper remove is destructive. The question is do we have to make a copy, or we don't care about the contents // of source at this point. // prefix this character to all of them diff --git a/deps/icu-small/source/common/chariter.cpp b/deps/icu-small/source/common/chariter.cpp index 625ac49eac..887119a0eb 100644 --- a/deps/icu-small/source/common/chariter.cpp +++ b/deps/icu-small/source/common/chariter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/charstr.cpp b/deps/icu-small/source/common/charstr.cpp index c792181378..8bacd20ddc 100644 --- a/deps/icu-small/source/common/charstr.cpp +++ b/deps/icu-small/source/common/charstr.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: charstr.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -15,6 +15,7 @@ */ #include "unicode/utypes.h" +#include "unicode/putil.h" #include "charstr.h" #include "cmemory.h" #include "cstring.h" diff --git a/deps/icu-small/source/common/charstr.h b/deps/icu-small/source/common/charstr.h index 9758c5c542..3cfdf6a897 100644 --- a/deps/icu-small/source/common/charstr.h +++ b/deps/icu-small/source/common/charstr.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/cmemory.c b/deps/icu-small/source/common/cmemory.c deleted file mode 100644 index 0054e3de8b..0000000000 --- a/deps/icu-small/source/common/cmemory.c +++ /dev/null @@ -1,162 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2002-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* File cmemory.c ICU Heap allocation. -* All ICU heap allocation, both for C and C++ new of ICU -* class types, comes through these functions. -* -* If you have a need to replace ICU allocation, this is the -* place to do it. -* -* Note that uprv_malloc(0) returns a non-NULL pointer, and -* that a subsequent free of that pointer value is a NOP. -* -****************************************************************************** -*/ -#include "unicode/uclean.h" -#include "cmemory.h" -#include "putilimp.h" -#include "uassert.h" -#include - -/* uprv_malloc(0) returns a pointer to this read-only data. */ -static const int32_t zeroMem[] = {0, 0, 0, 0, 0, 0}; - -/* Function Pointers for user-supplied heap functions */ -static const void *pContext; -static UMemAllocFn *pAlloc; -static UMemReallocFn *pRealloc; -static UMemFreeFn *pFree; - -#if U_DEBUG && defined(UPRV_MALLOC_COUNT) -#include -static int n=0; -static long b=0; -#endif - -#if U_DEBUG - -static char gValidMemorySink = 0; - -U_CAPI void uprv_checkValidMemory(const void *p, size_t n) { - /* - * Access the memory to ensure that it's all valid. - * Load and save a computed value to try to ensure that the compiler - * does not throw away the whole loop. - * A thread analyzer might complain about un-mutexed access to gValidMemorySink - * which is true but harmless because no one ever uses the value in gValidMemorySink. - */ - const char *s = (const char *)p; - char c = gValidMemorySink; - size_t i; - U_ASSERT(p != NULL); - for(i = 0; i < n; ++i) { - c ^= s[i]; - } - gValidMemorySink = c; -} - -#endif /* U_DEBUG */ - -U_CAPI void * U_EXPORT2 -uprv_malloc(size_t s) { -#if U_DEBUG && defined(UPRV_MALLOC_COUNT) -#if 1 - putchar('>'); - fflush(stdout); -#else - fprintf(stderr,"MALLOC\t#%d\t%ul bytes\t%ul total\n", ++n,s,(b+=s)); fflush(stderr); -#endif -#endif - if (s > 0) { - if (pAlloc) { - return (*pAlloc)(pContext, s); - } else { - return uprv_default_malloc(s); - } - } else { - return (void *)zeroMem; - } -} - -U_CAPI void * U_EXPORT2 -uprv_realloc(void * buffer, size_t size) { -#if U_DEBUG && defined(UPRV_MALLOC_COUNT) - putchar('~'); - fflush(stdout); -#endif - if (buffer == zeroMem) { - return uprv_malloc(size); - } else if (size == 0) { - if (pFree) { - (*pFree)(pContext, buffer); - } else { - uprv_default_free(buffer); - } - return (void *)zeroMem; - } else { - if (pRealloc) { - return (*pRealloc)(pContext, buffer, size); - } else { - return uprv_default_realloc(buffer, size); - } - } -} - -U_CAPI void U_EXPORT2 -uprv_free(void *buffer) { -#if U_DEBUG && defined(UPRV_MALLOC_COUNT) - putchar('<'); - fflush(stdout); -#endif - if (buffer != zeroMem) { - if (pFree) { - (*pFree)(pContext, buffer); - } else { - uprv_default_free(buffer); - } - } -} - -U_CAPI void * U_EXPORT2 -uprv_calloc(size_t num, size_t size) { - void *mem = NULL; - size *= num; - mem = uprv_malloc(size); - if (mem) { - uprv_memset(mem, 0, size); - } - return mem; -} - -U_CAPI void U_EXPORT2 -u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f, UErrorCode *status) -{ - if (U_FAILURE(*status)) { - return; - } - if (a==NULL || r==NULL || f==NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - pContext = context; - pAlloc = a; - pRealloc = r; - pFree = f; -} - - -U_CFUNC UBool cmemory_cleanup(void) { - pContext = NULL; - pAlloc = NULL; - pRealloc = NULL; - pFree = NULL; - return TRUE; -} diff --git a/deps/icu-small/source/common/cmemory.cpp b/deps/icu-small/source/common/cmemory.cpp new file mode 100644 index 0000000000..300279c243 --- /dev/null +++ b/deps/icu-small/source/common/cmemory.cpp @@ -0,0 +1,162 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2002-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File cmemory.c ICU Heap allocation. +* All ICU heap allocation, both for C and C++ new of ICU +* class types, comes through these functions. +* +* If you have a need to replace ICU allocation, this is the +* place to do it. +* +* Note that uprv_malloc(0) returns a non-NULL pointer, and +* that a subsequent free of that pointer value is a NOP. +* +****************************************************************************** +*/ +#include "unicode/uclean.h" +#include "cmemory.h" +#include "putilimp.h" +#include "uassert.h" +#include + +/* uprv_malloc(0) returns a pointer to this read-only data. */ +static const int32_t zeroMem[] = {0, 0, 0, 0, 0, 0}; + +/* Function Pointers for user-supplied heap functions */ +static const void *pContext; +static UMemAllocFn *pAlloc; +static UMemReallocFn *pRealloc; +static UMemFreeFn *pFree; + +#if U_DEBUG && defined(UPRV_MALLOC_COUNT) +#include +static int n=0; +static long b=0; +#endif + +#if U_DEBUG + +static char gValidMemorySink = 0; + +U_CAPI void uprv_checkValidMemory(const void *p, size_t n) { + /* + * Access the memory to ensure that it's all valid. + * Load and save a computed value to try to ensure that the compiler + * does not throw away the whole loop. + * A thread analyzer might complain about un-mutexed access to gValidMemorySink + * which is true but harmless because no one ever uses the value in gValidMemorySink. + */ + const char *s = (const char *)p; + char c = gValidMemorySink; + size_t i; + U_ASSERT(p != NULL); + for(i = 0; i < n; ++i) { + c ^= s[i]; + } + gValidMemorySink = c; +} + +#endif /* U_DEBUG */ + +U_CAPI void * U_EXPORT2 +uprv_malloc(size_t s) { +#if U_DEBUG && defined(UPRV_MALLOC_COUNT) +#if 1 + putchar('>'); + fflush(stdout); +#else + fprintf(stderr,"MALLOC\t#%d\t%ul bytes\t%ul total\n", ++n,s,(b+=s)); fflush(stderr); +#endif +#endif + if (s > 0) { + if (pAlloc) { + return (*pAlloc)(pContext, s); + } else { + return uprv_default_malloc(s); + } + } else { + return (void *)zeroMem; + } +} + +U_CAPI void * U_EXPORT2 +uprv_realloc(void * buffer, size_t size) { +#if U_DEBUG && defined(UPRV_MALLOC_COUNT) + putchar('~'); + fflush(stdout); +#endif + if (buffer == zeroMem) { + return uprv_malloc(size); + } else if (size == 0) { + if (pFree) { + (*pFree)(pContext, buffer); + } else { + uprv_default_free(buffer); + } + return (void *)zeroMem; + } else { + if (pRealloc) { + return (*pRealloc)(pContext, buffer, size); + } else { + return uprv_default_realloc(buffer, size); + } + } +} + +U_CAPI void U_EXPORT2 +uprv_free(void *buffer) { +#if U_DEBUG && defined(UPRV_MALLOC_COUNT) + putchar('<'); + fflush(stdout); +#endif + if (buffer != zeroMem) { + if (pFree) { + (*pFree)(pContext, buffer); + } else { + uprv_default_free(buffer); + } + } +} + +U_CAPI void * U_EXPORT2 +uprv_calloc(size_t num, size_t size) { + void *mem = NULL; + size *= num; + mem = uprv_malloc(size); + if (mem) { + uprv_memset(mem, 0, size); + } + return mem; +} + +U_CAPI void U_EXPORT2 +u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f, UErrorCode *status) +{ + if (U_FAILURE(*status)) { + return; + } + if (a==NULL || r==NULL || f==NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + pContext = context; + pAlloc = a; + pRealloc = r; + pFree = f; +} + + +U_CFUNC UBool cmemory_cleanup(void) { + pContext = NULL; + pAlloc = NULL; + pRealloc = NULL; + pFree = NULL; + return TRUE; +} diff --git a/deps/icu-small/source/common/cmemory.h b/deps/icu-small/source/common/cmemory.h index d2e48e5f30..c77b826867 100644 --- a/deps/icu-small/source/common/cmemory.h +++ b/deps/icu-small/source/common/cmemory.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/cpputils.h b/deps/icu-small/source/common/cpputils.h index 57af69a712..307e570486 100644 --- a/deps/icu-small/source/common/cpputils.h +++ b/deps/icu-small/source/common/cpputils.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: cpputils.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 */ diff --git a/deps/icu-small/source/common/cstr.cpp b/deps/icu-small/source/common/cstr.cpp index 356367e0bc..0114434329 100644 --- a/deps/icu-small/source/common/cstr.cpp +++ b/deps/icu-small/source/common/cstr.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/common/cstr.h b/deps/icu-small/source/common/cstr.h index 25a8a5fc25..e310f131ac 100644 --- a/deps/icu-small/source/common/cstr.h +++ b/deps/icu-small/source/common/cstr.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/cstring.c b/deps/icu-small/source/common/cstring.c deleted file mode 100644 index fbc0320d4a..0000000000 --- a/deps/icu-small/source/common/cstring.c +++ /dev/null @@ -1,341 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1997-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* File CSTRING.C -* -* @author Helena Shih -* -* Modification History: -* -* Date Name Description -* 6/18/98 hshih Created -* 09/08/98 stephen Added include for ctype, for Mac Port -* 11/15/99 helena Integrated S/390 IEEE changes. -****************************************************************************** -*/ - - - -#include -#include -#include "unicode/utypes.h" -#include "cmemory.h" -#include "cstring.h" -#include "uassert.h" - -/* - * We hardcode case conversion for invariant characters to match our expectation - * and the compiler execution charset. - * This prevents problems on systems - * - with non-default casing behavior, like Turkish system locales where - * tolower('I') maps to dotless i and toupper('i') maps to dotted I - * - where there are no lowercase Latin characters at all, or using different - * codes (some old EBCDIC codepages) - * - * This works because the compiler usually runs on a platform where the execution - * charset includes all of the invariant characters at their expected - * code positions, so that the char * string literals in ICU code match - * the char literals here. - * - * Note that the set of lowercase Latin letters is discontiguous in EBCDIC - * and the set of uppercase Latin letters is discontiguous as well. - */ - -U_CAPI UBool U_EXPORT2 -uprv_isASCIILetter(char c) { -#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY - return - ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') || - ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z'); -#else - return ('a'<=c && c<='z') || ('A'<=c && c<='Z'); -#endif -} - -U_CAPI char U_EXPORT2 -uprv_toupper(char c) { -#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY - if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) { - c=(char)(c+('A'-'a')); - } -#else - if('a'<=c && c<='z') { - c=(char)(c+('A'-'a')); - } -#endif - return c; -} - - -#if 0 -/* - * Commented out because cstring.h defines uprv_tolower() to be - * the same as either uprv_asciitolower() or uprv_ebcdictolower() - * to reduce the amount of code to cover with tests. - * - * Note that this uprv_tolower() definition is likely to work for most - * charset families, not just ASCII and EBCDIC, because its #else branch - * is written generically. - */ -U_CAPI char U_EXPORT2 -uprv_tolower(char c) { -#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY - if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) { - c=(char)(c+('a'-'A')); - } -#else - if('A'<=c && c<='Z') { - c=(char)(c+('a'-'A')); - } -#endif - return c; -} -#endif - -U_CAPI char U_EXPORT2 -uprv_asciitolower(char c) { - if(0x41<=c && c<=0x5a) { - c=(char)(c+0x20); - } - return c; -} - -U_CAPI char U_EXPORT2 -uprv_ebcdictolower(char c) { - if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) || - (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) || - (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9) - ) { - c=(char)(c-0x40); - } - return c; -} - - -U_CAPI char* U_EXPORT2 -T_CString_toLowerCase(char* str) -{ - char* origPtr = str; - - if (str) { - do - *str = (char)uprv_tolower(*str); - while (*(str++)); - } - - return origPtr; -} - -U_CAPI char* U_EXPORT2 -T_CString_toUpperCase(char* str) -{ - char* origPtr = str; - - if (str) { - do - *str = (char)uprv_toupper(*str); - while (*(str++)); - } - - return origPtr; -} - -/* - * Takes a int32_t and fills in a char* string with that number "radix"-based. - * Does not handle negative values (makes an empty string for them). - * Writes at most 12 chars ("-2147483647" plus NUL). - * Returns the length of the string (not including the NUL). - */ -U_CAPI int32_t U_EXPORT2 -T_CString_integerToString(char* buffer, int32_t v, int32_t radix) -{ - char tbuf[30]; - int32_t tbx = sizeof(tbuf); - uint8_t digit; - int32_t length = 0; - uint32_t uval; - - U_ASSERT(radix>=2 && radix<=16); - uval = (uint32_t) v; - if(v<0 && radix == 10) { - /* Only in base 10 do we conside numbers to be signed. */ - uval = (uint32_t)(-v); - buffer[length++] = '-'; - } - - tbx = sizeof(tbuf)-1; - tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ - do { - digit = (uint8_t)(uval % radix); - tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); - uval = uval / radix; - } while (uval != 0); - - /* copy converted number into user buffer */ - uprv_strcpy(buffer+length, tbuf+tbx); - length += sizeof(tbuf) - tbx -1; - return length; -} - - - -/* - * Takes a int64_t and fills in a char* string with that number "radix"-based. - * Writes at most 21: chars ("-9223372036854775807" plus NUL). - * Returns the length of the string, not including the terminating NULL. - */ -U_CAPI int32_t U_EXPORT2 -T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix) -{ - char tbuf[30]; - int32_t tbx = sizeof(tbuf); - uint8_t digit; - int32_t length = 0; - uint64_t uval; - - U_ASSERT(radix>=2 && radix<=16); - uval = (uint64_t) v; - if(v<0 && radix == 10) { - /* Only in base 10 do we conside numbers to be signed. */ - uval = (uint64_t)(-v); - buffer[length++] = '-'; - } - - tbx = sizeof(tbuf)-1; - tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ - do { - digit = (uint8_t)(uval % radix); - tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); - uval = uval / radix; - } while (uval != 0); - - /* copy converted number into user buffer */ - uprv_strcpy(buffer+length, tbuf+tbx); - length += sizeof(tbuf) - tbx -1; - return length; -} - - -U_CAPI int32_t U_EXPORT2 -T_CString_stringToInteger(const char *integerString, int32_t radix) -{ - char *end; - return uprv_strtoul(integerString, &end, radix); - -} - -U_CAPI int U_EXPORT2 -uprv_stricmp(const char *str1, const char *str2) { - if(str1==NULL) { - if(str2==NULL) { - return 0; - } else { - return -1; - } - } else if(str2==NULL) { - return 1; - } else { - /* compare non-NULL strings lexically with lowercase */ - int rc; - unsigned char c1, c2; - - for(;;) { - c1=(unsigned char)*str1; - c2=(unsigned char)*str2; - if(c1==0) { - if(c2==0) { - return 0; - } else { - return -1; - } - } else if(c2==0) { - return 1; - } else { - /* compare non-zero characters with lowercase */ - rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); - if(rc!=0) { - return rc; - } - } - ++str1; - ++str2; - } - } -} - -U_CAPI int U_EXPORT2 -uprv_strnicmp(const char *str1, const char *str2, uint32_t n) { - if(str1==NULL) { - if(str2==NULL) { - return 0; - } else { - return -1; - } - } else if(str2==NULL) { - return 1; - } else { - /* compare non-NULL strings lexically with lowercase */ - int rc; - unsigned char c1, c2; - - for(; n--;) { - c1=(unsigned char)*str1; - c2=(unsigned char)*str2; - if(c1==0) { - if(c2==0) { - return 0; - } else { - return -1; - } - } else if(c2==0) { - return 1; - } else { - /* compare non-zero characters with lowercase */ - rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); - if(rc!=0) { - return rc; - } - } - ++str1; - ++str2; - } - } - - return 0; -} - -U_CAPI char* U_EXPORT2 -uprv_strdup(const char *src) { - size_t len = uprv_strlen(src) + 1; - char *dup = (char *) uprv_malloc(len); - - if (dup) { - uprv_memcpy(dup, src, len); - } - - return dup; -} - -U_CAPI char* U_EXPORT2 -uprv_strndup(const char *src, int32_t n) { - char *dup; - - if(n < 0) { - dup = uprv_strdup(src); - } else { - dup = (char*)uprv_malloc(n+1); - if (dup) { - uprv_memcpy(dup, src, n); - dup[n] = 0; - } - } - - return dup; -} diff --git a/deps/icu-small/source/common/cstring.cpp b/deps/icu-small/source/common/cstring.cpp new file mode 100644 index 0000000000..a06bd3c79e --- /dev/null +++ b/deps/icu-small/source/common/cstring.cpp @@ -0,0 +1,341 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File CSTRING.C +* +* @author Helena Shih +* +* Modification History: +* +* Date Name Description +* 6/18/98 hshih Created +* 09/08/98 stephen Added include for ctype, for Mac Port +* 11/15/99 helena Integrated S/390 IEEE changes. +****************************************************************************** +*/ + + + +#include +#include +#include "unicode/utypes.h" +#include "cmemory.h" +#include "cstring.h" +#include "uassert.h" + +/* + * We hardcode case conversion for invariant characters to match our expectation + * and the compiler execution charset. + * This prevents problems on systems + * - with non-default casing behavior, like Turkish system locales where + * tolower('I') maps to dotless i and toupper('i') maps to dotted I + * - where there are no lowercase Latin characters at all, or using different + * codes (some old EBCDIC codepages) + * + * This works because the compiler usually runs on a platform where the execution + * charset includes all of the invariant characters at their expected + * code positions, so that the char * string literals in ICU code match + * the char literals here. + * + * Note that the set of lowercase Latin letters is discontiguous in EBCDIC + * and the set of uppercase Latin letters is discontiguous as well. + */ + +U_CAPI UBool U_EXPORT2 +uprv_isASCIILetter(char c) { +#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY + return + ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') || + ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z'); +#else + return ('a'<=c && c<='z') || ('A'<=c && c<='Z'); +#endif +} + +U_CAPI char U_EXPORT2 +uprv_toupper(char c) { +#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY + if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) { + c=(char)(c+('A'-'a')); + } +#else + if('a'<=c && c<='z') { + c=(char)(c+('A'-'a')); + } +#endif + return c; +} + + +#if 0 +/* + * Commented out because cstring.h defines uprv_tolower() to be + * the same as either uprv_asciitolower() or uprv_ebcdictolower() + * to reduce the amount of code to cover with tests. + * + * Note that this uprv_tolower() definition is likely to work for most + * charset families, not just ASCII and EBCDIC, because its #else branch + * is written generically. + */ +U_CAPI char U_EXPORT2 +uprv_tolower(char c) { +#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY + if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) { + c=(char)(c+('a'-'A')); + } +#else + if('A'<=c && c<='Z') { + c=(char)(c+('a'-'A')); + } +#endif + return c; +} +#endif + +U_CAPI char U_EXPORT2 +uprv_asciitolower(char c) { + if(0x41<=c && c<=0x5a) { + c=(char)(c+0x20); + } + return c; +} + +U_CAPI char U_EXPORT2 +uprv_ebcdictolower(char c) { + if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) || + (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) || + (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9) + ) { + c=(char)(c-0x40); + } + return c; +} + + +U_CAPI char* U_EXPORT2 +T_CString_toLowerCase(char* str) +{ + char* origPtr = str; + + if (str) { + do + *str = (char)uprv_tolower(*str); + while (*(str++)); + } + + return origPtr; +} + +U_CAPI char* U_EXPORT2 +T_CString_toUpperCase(char* str) +{ + char* origPtr = str; + + if (str) { + do + *str = (char)uprv_toupper(*str); + while (*(str++)); + } + + return origPtr; +} + +/* + * Takes a int32_t and fills in a char* string with that number "radix"-based. + * Does not handle negative values (makes an empty string for them). + * Writes at most 12 chars ("-2147483647" plus NUL). + * Returns the length of the string (not including the NUL). + */ +U_CAPI int32_t U_EXPORT2 +T_CString_integerToString(char* buffer, int32_t v, int32_t radix) +{ + char tbuf[30]; + int32_t tbx = sizeof(tbuf); + uint8_t digit; + int32_t length = 0; + uint32_t uval; + + U_ASSERT(radix>=2 && radix<=16); + uval = (uint32_t) v; + if(v<0 && radix == 10) { + /* Only in base 10 do we conside numbers to be signed. */ + uval = (uint32_t)(-v); + buffer[length++] = '-'; + } + + tbx = sizeof(tbuf)-1; + tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ + do { + digit = (uint8_t)(uval % radix); + tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); + uval = uval / radix; + } while (uval != 0); + + /* copy converted number into user buffer */ + uprv_strcpy(buffer+length, tbuf+tbx); + length += sizeof(tbuf) - tbx -1; + return length; +} + + + +/* + * Takes a int64_t and fills in a char* string with that number "radix"-based. + * Writes at most 21: chars ("-9223372036854775807" plus NUL). + * Returns the length of the string, not including the terminating NULL. + */ +U_CAPI int32_t U_EXPORT2 +T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix) +{ + char tbuf[30]; + int32_t tbx = sizeof(tbuf); + uint8_t digit; + int32_t length = 0; + uint64_t uval; + + U_ASSERT(radix>=2 && radix<=16); + uval = (uint64_t) v; + if(v<0 && radix == 10) { + /* Only in base 10 do we conside numbers to be signed. */ + uval = (uint64_t)(-v); + buffer[length++] = '-'; + } + + tbx = sizeof(tbuf)-1; + tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ + do { + digit = (uint8_t)(uval % radix); + tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); + uval = uval / radix; + } while (uval != 0); + + /* copy converted number into user buffer */ + uprv_strcpy(buffer+length, tbuf+tbx); + length += sizeof(tbuf) - tbx -1; + return length; +} + + +U_CAPI int32_t U_EXPORT2 +T_CString_stringToInteger(const char *integerString, int32_t radix) +{ + char *end; + return uprv_strtoul(integerString, &end, radix); + +} + +U_CAPI int U_EXPORT2 +uprv_stricmp(const char *str1, const char *str2) { + if(str1==NULL) { + if(str2==NULL) { + return 0; + } else { + return -1; + } + } else if(str2==NULL) { + return 1; + } else { + /* compare non-NULL strings lexically with lowercase */ + int rc; + unsigned char c1, c2; + + for(;;) { + c1=(unsigned char)*str1; + c2=(unsigned char)*str2; + if(c1==0) { + if(c2==0) { + return 0; + } else { + return -1; + } + } else if(c2==0) { + return 1; + } else { + /* compare non-zero characters with lowercase */ + rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); + if(rc!=0) { + return rc; + } + } + ++str1; + ++str2; + } + } +} + +U_CAPI int U_EXPORT2 +uprv_strnicmp(const char *str1, const char *str2, uint32_t n) { + if(str1==NULL) { + if(str2==NULL) { + return 0; + } else { + return -1; + } + } else if(str2==NULL) { + return 1; + } else { + /* compare non-NULL strings lexically with lowercase */ + int rc; + unsigned char c1, c2; + + for(; n--;) { + c1=(unsigned char)*str1; + c2=(unsigned char)*str2; + if(c1==0) { + if(c2==0) { + return 0; + } else { + return -1; + } + } else if(c2==0) { + return 1; + } else { + /* compare non-zero characters with lowercase */ + rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); + if(rc!=0) { + return rc; + } + } + ++str1; + ++str2; + } + } + + return 0; +} + +U_CAPI char* U_EXPORT2 +uprv_strdup(const char *src) { + size_t len = uprv_strlen(src) + 1; + char *dup = (char *) uprv_malloc(len); + + if (dup) { + uprv_memcpy(dup, src, len); + } + + return dup; +} + +U_CAPI char* U_EXPORT2 +uprv_strndup(const char *src, int32_t n) { + char *dup; + + if(n < 0) { + dup = uprv_strdup(src); + } else { + dup = (char*)uprv_malloc(n+1); + if (dup) { + uprv_memcpy(dup, src, n); + dup[n] = 0; + } + } + + return dup; +} diff --git a/deps/icu-small/source/common/cstring.h b/deps/icu-small/source/common/cstring.h index 238cb3138f..2232efcda5 100644 --- a/deps/icu-small/source/common/cstring.h +++ b/deps/icu-small/source/common/cstring.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/cwchar.c b/deps/icu-small/source/common/cwchar.c deleted file mode 100644 index 0a9cd7b136..0000000000 --- a/deps/icu-small/source/common/cwchar.c +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2001, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: cwchar.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2001may25 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" - -#if !U_HAVE_WCSCPY - -#include "cwchar.h" - -U_CAPI wchar_t *uprv_wcscat(wchar_t *dst, const wchar_t *src) { - wchar_t *start=dst; - while(*dst!=0) { - ++dst; - } - while((*dst=*src)!=0) { - ++dst; - ++src; - } - return start; -} - -U_CAPI wchar_t *uprv_wcscpy(wchar_t *dst, const wchar_t *src) { - wchar_t *start=dst; - while((*dst=*src)!=0) { - ++dst; - ++src; - } - return start; -} - -U_CAPI size_t uprv_wcslen(const wchar_t *src) { - const wchar_t *start=src; - while(*src!=0) { - ++src; - } - return src-start; -} - -#endif diff --git a/deps/icu-small/source/common/cwchar.cpp b/deps/icu-small/source/common/cwchar.cpp new file mode 100644 index 0000000000..4fd531114e --- /dev/null +++ b/deps/icu-small/source/common/cwchar.cpp @@ -0,0 +1,54 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2001, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: cwchar.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2001may25 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" + +#if !U_HAVE_WCSCPY + +#include "cwchar.h" + +U_CAPI wchar_t *uprv_wcscat(wchar_t *dst, const wchar_t *src) { + wchar_t *start=dst; + while(*dst!=0) { + ++dst; + } + while((*dst=*src)!=0) { + ++dst; + ++src; + } + return start; +} + +U_CAPI wchar_t *uprv_wcscpy(wchar_t *dst, const wchar_t *src) { + wchar_t *start=dst; + while((*dst=*src)!=0) { + ++dst; + ++src; + } + return start; +} + +U_CAPI size_t uprv_wcslen(const wchar_t *src) { + const wchar_t *start=src; + while(*src!=0) { + ++src; + } + return src-start; +} + +#endif diff --git a/deps/icu-small/source/common/cwchar.h b/deps/icu-small/source/common/cwchar.h index 1365abe4bc..939eb599d6 100644 --- a/deps/icu-small/source/common/cwchar.h +++ b/deps/icu-small/source/common/cwchar.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: cwchar.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/dictbe.cpp b/deps/icu-small/source/common/dictbe.cpp index 26635fa3d3..6c0413a31b 100644 --- a/deps/icu-small/source/common/dictbe.cpp +++ b/deps/icu-small/source/common/dictbe.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /** ******************************************************************************* @@ -1385,12 +1385,25 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText, // Now that we're done, convert positions in t_boundary[] (indices in // the normalized input string) back to indices in the original input UText // while reversing t_boundary and pushing values to foundBreaks. + int32_t prevCPPos = -1; + int32_t prevUTextPos = -1; for (int32_t i = numBreaks-1; i >= 0; i--) { int32_t cpPos = t_boundary.elementAti(i); + U_ASSERT(cpPos > prevCPPos); int32_t utextPos = inputMap.isValid() ? inputMap->elementAti(cpPos) : cpPos + rangeStart; - // Boundaries are added to foundBreaks output in ascending order. - U_ASSERT(foundBreaks.size() == 0 ||foundBreaks.peeki() < utextPos); - foundBreaks.push(utextPos, status); + U_ASSERT(utextPos >= prevUTextPos); + if (utextPos > prevUTextPos) { + // Boundaries are added to foundBreaks output in ascending order. + U_ASSERT(foundBreaks.size() == 0 || foundBreaks.peeki() < utextPos); + foundBreaks.push(utextPos, status); + } else { + // Normalization expanded the input text, the dictionary found a boundary + // within the expansion, giving two boundaries with the same index in the + // original text. Ignore the second. See ticket #12918. + --numBreaks; + } + prevCPPos = cpPos; + prevUTextPos = utextPos; } // inString goes out of scope diff --git a/deps/icu-small/source/common/dictbe.h b/deps/icu-small/source/common/dictbe.h index 6e9f3d5020..088bcb788d 100644 --- a/deps/icu-small/source/common/dictbe.h +++ b/deps/icu-small/source/common/dictbe.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /** ******************************************************************************* diff --git a/deps/icu-small/source/common/dictionarydata.cpp b/deps/icu-small/source/common/dictionarydata.cpp index 4a62f0bfc6..0efa587493 100644 --- a/deps/icu-small/source/common/dictionarydata.cpp +++ b/deps/icu-small/source/common/dictionarydata.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/common/dictionarydata.h b/deps/icu-small/source/common/dictionarydata.h index ef4277a440..5aec8fe028 100644 --- a/deps/icu-small/source/common/dictionarydata.h +++ b/deps/icu-small/source/common/dictionarydata.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/common/dtintrv.cpp b/deps/icu-small/source/common/dtintrv.cpp index b7c3b48af3..dee637e62c 100644 --- a/deps/icu-small/source/common/dtintrv.cpp +++ b/deps/icu-small/source/common/dtintrv.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /******************************************************************************* * Copyright (C) 2008, International Business Machines Corporation and diff --git a/deps/icu-small/source/common/edits.cpp b/deps/icu-small/source/common/edits.cpp new file mode 100644 index 0000000000..58a70d5c92 --- /dev/null +++ b/deps/icu-small/source/common/edits.cpp @@ -0,0 +1,346 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// edits.cpp +// created: 2017feb08 Markus W. Scherer + +#include "unicode/utypes.h" +#include "unicode/edits.h" +#include "cmemory.h" +#include "uassert.h" + +U_NAMESPACE_BEGIN + +namespace { + +// 0000uuuuuuuuuuuu records u+1 unchanged text units. +const int32_t MAX_UNCHANGED_LENGTH = 0x1000; +const int32_t MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1; + +// 0wwwcccccccccccc with w=1..6 records ccc+1 replacements of w:w text units. +// No length change. +const int32_t MAX_SHORT_WIDTH = 6; +const int32_t MAX_SHORT_CHANGE_LENGTH = 0xfff; +const int32_t MAX_SHORT_CHANGE = 0x6fff; + +// 0111mmmmmmnnnnnn records a replacement of m text units with n. +// m or n = 61: actual length follows in the next edits array unit. +// m or n = 62..63: actual length follows in the next two edits array units. +// Bit 30 of the actual length is in the head unit. +// Trailing units have bit 15 set. +const int32_t LENGTH_IN_1TRAIL = 61; +const int32_t LENGTH_IN_2TRAIL = 62; + +} // namespace + +Edits::~Edits() { + if(array != stackArray) { + uprv_free(array); + } +} + +void Edits::reset() { + length = delta = 0; +} + +void Edits::addUnchanged(int32_t unchangedLength) { + if(U_FAILURE(errorCode) || unchangedLength == 0) { return; } + if(unchangedLength < 0) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + // Merge into previous unchanged-text record, if any. + int32_t last = lastUnit(); + if(last < MAX_UNCHANGED) { + int32_t remaining = MAX_UNCHANGED - last; + if (remaining >= unchangedLength) { + setLastUnit(last + unchangedLength); + return; + } + setLastUnit(MAX_UNCHANGED); + unchangedLength -= remaining; + } + // Split large lengths into multiple units. + while(unchangedLength >= MAX_UNCHANGED_LENGTH) { + append(MAX_UNCHANGED); + unchangedLength -= MAX_UNCHANGED_LENGTH; + } + // Write a small (remaining) length. + if(unchangedLength > 0) { + append(unchangedLength - 1); + } +} + +void Edits::addReplace(int32_t oldLength, int32_t newLength) { + if(U_FAILURE(errorCode)) { return; } + if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) { + // Replacement of short oldLength text units by same-length new text. + // Merge into previous short-replacement record, if any. + int32_t last = lastUnit(); + if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE && + (last >> 12) == oldLength && (last & 0xfff) < MAX_SHORT_CHANGE_LENGTH) { + setLastUnit(last + 1); + return; + } + append(oldLength << 12); + return; + } + + if(oldLength < 0 || newLength < 0) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if (oldLength == 0 && newLength == 0) { + return; + } + int32_t newDelta = newLength - oldLength; + if (newDelta != 0) { + if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) || + (newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) { + // Integer overflow or underflow. + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + delta += newDelta; + } + + int32_t head = 0x7000; + if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) { + head |= oldLength << 6; + head |= newLength; + append(head); + } else if ((capacity - length) >= 5 || growArray()) { + int32_t limit = length + 1; + if(oldLength < LENGTH_IN_1TRAIL) { + head |= oldLength << 6; + } else if(oldLength <= 0x7fff) { + head |= LENGTH_IN_1TRAIL << 6; + array[limit++] = (uint16_t)(0x8000 | oldLength); + } else { + head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6; + array[limit++] = (uint16_t)(0x8000 | (oldLength >> 15)); + array[limit++] = (uint16_t)(0x8000 | oldLength); + } + if(newLength < LENGTH_IN_1TRAIL) { + head |= newLength; + } else if(newLength <= 0x7fff) { + head |= LENGTH_IN_1TRAIL; + array[limit++] = (uint16_t)(0x8000 | newLength); + } else { + head |= LENGTH_IN_2TRAIL + (newLength >> 30); + array[limit++] = (uint16_t)(0x8000 | (newLength >> 15)); + array[limit++] = (uint16_t)(0x8000 | newLength); + } + array[length] = (uint16_t)head; + length = limit; + } +} + +void Edits::append(int32_t r) { + if(length < capacity || growArray()) { + array[length++] = (uint16_t)r; + } +} + +UBool Edits::growArray() { + int32_t newCapacity; + if (array == stackArray) { + newCapacity = 2000; + } else if (capacity == INT32_MAX) { + // Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API + // with a result-string-buffer overflow. + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return FALSE; + } else if (capacity >= (INT32_MAX / 2)) { + newCapacity = INT32_MAX; + } else { + newCapacity = 2 * capacity; + } + // Grow by at least 5 units so that a maximal change record will fit. + if ((newCapacity - capacity) < 5) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return FALSE; + } + uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2); + if (newArray == NULL) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + uprv_memcpy(newArray, array, (size_t)length * 2); + if (array != stackArray) { + uprv_free(array); + } + array = newArray; + capacity = newCapacity; + return TRUE; +} + +UBool Edits::copyErrorTo(UErrorCode &outErrorCode) { + if (U_FAILURE(outErrorCode)) { return TRUE; } + if (U_SUCCESS(errorCode)) { return FALSE; } + outErrorCode = errorCode; + return TRUE; +} + +UBool Edits::hasChanges() const { + if (delta != 0) { + return TRUE; + } + for (int32_t i = 0; i < length; ++i) { + if (array[i] > MAX_UNCHANGED) { + return TRUE; + } + } + return FALSE; +} + +Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) : + array(a), index(0), length(len), remaining(0), + onlyChanges_(oc), coarse(crs), + changed(FALSE), oldLength_(0), newLength_(0), + srcIndex(0), replIndex(0), destIndex(0) {} + +int32_t Edits::Iterator::readLength(int32_t head) { + if (head < LENGTH_IN_1TRAIL) { + return head; + } else if (head < LENGTH_IN_2TRAIL) { + U_ASSERT(index < length); + U_ASSERT(array[index] >= 0x8000); + return array[index++] & 0x7fff; + } else { + U_ASSERT((index + 2) <= length); + U_ASSERT(array[index] >= 0x8000); + U_ASSERT(array[index + 1] >= 0x8000); + int32_t len = ((head & 1) << 30) | + ((int32_t)(array[index] & 0x7fff) << 15) | + (array[index + 1] & 0x7fff); + index += 2; + return len; + } +} + +void Edits::Iterator::updateIndexes() { + srcIndex += oldLength_; + if (changed) { + replIndex += newLength_; + } + destIndex += newLength_; +} + +UBool Edits::Iterator::noNext() { + // No change beyond the string. + changed = FALSE; + oldLength_ = newLength_ = 0; + return FALSE; +} + +UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return FALSE; } + // We have an errorCode in case we need to start guarding against integer overflows. + // It is also convenient for caller loops if we bail out when an error was set elsewhere. + updateIndexes(); + if (remaining > 0) { + // Fine-grained iterator: Continue a sequence of equal-length changes. + --remaining; + return TRUE; + } + if (index >= length) { + return noNext(); + } + int32_t u = array[index++]; + if (u <= MAX_UNCHANGED) { + // Combine adjacent unchanged ranges. + changed = FALSE; + oldLength_ = u + 1; + while (index < length && (u = array[index]) <= MAX_UNCHANGED) { + ++index; + oldLength_ += u + 1; + } + newLength_ = oldLength_; + if (onlyChanges) { + updateIndexes(); + if (index >= length) { + return noNext(); + } + // already fetched u > MAX_UNCHANGED at index + ++index; + } else { + return TRUE; + } + } + changed = TRUE; + if (u <= MAX_SHORT_CHANGE) { + if (coarse) { + int32_t w = u >> 12; + int32_t len = (u & 0xfff) + 1; + oldLength_ = newLength_ = len * w; + } else { + // Split a sequence of equal-length changes that was compressed into one unit. + oldLength_ = newLength_ = u >> 12; + remaining = u & 0xfff; + return TRUE; + } + } else { + U_ASSERT(u <= 0x7fff); + oldLength_ = readLength((u >> 6) & 0x3f); + newLength_ = readLength(u & 0x3f); + if (!coarse) { + return TRUE; + } + } + // Combine adjacent changes. + while (index < length && (u = array[index]) > MAX_UNCHANGED) { + ++index; + if (u <= MAX_SHORT_CHANGE) { + int32_t w = u >> 12; + int32_t len = (u & 0xfff) + 1; + len = len * w; + oldLength_ += len; + newLength_ += len; + } else { + U_ASSERT(u <= 0x7fff); + int32_t oldLen = readLength((u >> 6) & 0x3f); + int32_t newLen = readLength(u & 0x3f); + oldLength_ += oldLen; + newLength_ += newLen; + } + } + return TRUE; +} + +UBool Edits::Iterator::findSourceIndex(int32_t i, UErrorCode &errorCode) { + if (U_FAILURE(errorCode) || i < 0) { return FALSE; } + if (i < srcIndex) { + // Reset the iterator to the start. + index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0; + } else if (i < (srcIndex + oldLength_)) { + // The index is in the current span. + return TRUE; + } + while (next(FALSE, errorCode)) { + if (i < (srcIndex + oldLength_)) { + // The index is in the current span. + return TRUE; + } + if (remaining > 0) { + // Is the index in one of the remaining compressed edits? + // srcIndex is the start of the current span, before the remaining ones. + int32_t len = (remaining + 1) * oldLength_; + if (i < (srcIndex + len)) { + int32_t n = (i - srcIndex) / oldLength_; // 1 <= n <= remaining + len = n * oldLength_; + srcIndex += len; + replIndex += len; + destIndex += len; + remaining -= n; + return TRUE; + } + // Make next() skip all of these edits at once. + oldLength_ = newLength_ = len; + remaining = 0; + } + } + return FALSE; +} + +U_NAMESPACE_END diff --git a/deps/icu-small/source/common/errorcode.cpp b/deps/icu-small/source/common/errorcode.cpp index bc7807d601..e7ac43b527 100644 --- a/deps/icu-small/source/common/errorcode.cpp +++ b/deps/icu-small/source/common/errorcode.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: errorcode.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/filteredbrk.cpp b/deps/icu-small/source/common/filteredbrk.cpp index 610ab9e664..0f642b19f6 100644 --- a/deps/icu-small/source/common/filteredbrk.cpp +++ b/deps/icu-small/source/common/filteredbrk.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -702,4 +702,4 @@ FilteredBreakIteratorBuilder::createInstance(UErrorCode& status) { U_NAMESPACE_END -#endif //#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION +#endif //#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION diff --git a/deps/icu-small/source/common/filterednormalizer2.cpp b/deps/icu-small/source/common/filterednormalizer2.cpp index fb6e831af7..28e5f6cbdd 100644 --- a/deps/icu-small/source/common/filterednormalizer2.cpp +++ b/deps/icu-small/source/common/filterednormalizer2.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: filterednormalizer2.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/hash.h b/deps/icu-small/source/common/hash.h index a03fcae954..900c812098 100644 --- a/deps/icu-small/source/common/hash.h +++ b/deps/icu-small/source/common/hash.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/icudataver.c b/deps/icu-small/source/common/icudataver.c deleted file mode 100644 index 367e58f59b..0000000000 --- a/deps/icu-small/source/common/icudataver.c +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2009-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -*/ - -#include "unicode/utypes.h" -#include "unicode/icudataver.h" -#include "unicode/ures.h" -#include "uresimp.h" /* for ures_getVersionByKey */ - -U_CAPI void U_EXPORT2 u_getDataVersion(UVersionInfo dataVersionFillin, UErrorCode *status) { - UResourceBundle *icudatares = NULL; - - if (U_FAILURE(*status)) { - return; - } - - if (dataVersionFillin != NULL) { - icudatares = ures_openDirect(NULL, U_ICU_VERSION_BUNDLE , status); - if (U_SUCCESS(*status)) { - ures_getVersionByKey(icudatares, U_ICU_DATA_KEY, dataVersionFillin, status); - } - ures_close(icudatares); - } -} diff --git a/deps/icu-small/source/common/icudataver.cpp b/deps/icu-small/source/common/icudataver.cpp new file mode 100644 index 0000000000..6dd3ea1bae --- /dev/null +++ b/deps/icu-small/source/common/icudataver.cpp @@ -0,0 +1,31 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2009-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +*/ + +#include "unicode/utypes.h" +#include "unicode/icudataver.h" +#include "unicode/ures.h" +#include "uresimp.h" /* for ures_getVersionByKey */ + +U_CAPI void U_EXPORT2 u_getDataVersion(UVersionInfo dataVersionFillin, UErrorCode *status) { + UResourceBundle *icudatares = NULL; + + if (U_FAILURE(*status)) { + return; + } + + if (dataVersionFillin != NULL) { + icudatares = ures_openDirect(NULL, U_ICU_VERSION_BUNDLE , status); + if (U_SUCCESS(*status)) { + ures_getVersionByKey(icudatares, U_ICU_DATA_KEY, dataVersionFillin, status); + } + ures_close(icudatares); + } +} diff --git a/deps/icu-small/source/common/icuplug.cpp b/deps/icu-small/source/common/icuplug.cpp index a0ec46b1f9..c3c8231b77 100644 --- a/deps/icu-small/source/common/icuplug.cpp +++ b/deps/icu-small/source/common/icuplug.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/icuplugimp.h b/deps/icu-small/source/common/icuplugimp.h index 895b35357b..282c639b40 100644 --- a/deps/icu-small/source/common/icuplugimp.h +++ b/deps/icu-small/source/common/icuplugimp.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/listformatter.cpp b/deps/icu-small/source/common/listformatter.cpp index 9225c22f16..d105654755 100644 --- a/deps/icu-small/source/common/listformatter.cpp +++ b/deps/icu-small/source/common/listformatter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: listformatter.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -25,6 +25,7 @@ #include "charstr.h" #include "ucln_cmn.h" #include "uresimp.h" +#include "resource.h" U_NAMESPACE_BEGIN @@ -78,17 +79,6 @@ uprv_deleteListFormatInternal(void *obj) { U_CDECL_END -static ListFormatInternal* loadListFormatInternal( - const Locale& locale, - const char* style, - UErrorCode& errorCode); - -static void getStringByKey( - const UResourceBundle* rb, - const char* key, - UnicodeString& result, - UErrorCode& errorCode); - ListFormatter::ListFormatter(const ListFormatter& other) : owned(other.owned), data(other.data) { if (other.owned != NULL) { @@ -171,30 +161,107 @@ const ListFormatInternal* ListFormatter::getListFormatInternal( return result; } -static ListFormatInternal* loadListFormatInternal( +static const UChar solidus = 0x2F; +static const UChar aliasPrefix[] = { 0x6C,0x69,0x73,0x74,0x50,0x61,0x74,0x74,0x65,0x72,0x6E,0x2F }; // "listPattern/" +enum { + kAliasPrefixLen = UPRV_LENGTHOF(aliasPrefix), + kStyleLenMax = 24 // longest currently is 14 +}; + +struct ListFormatter::ListPatternsSink : public ResourceSink { + UnicodeString two, start, middle, end; +#if ((U_PLATFORM == U_PF_AIX) || (U_PLATFORM == U_PF_OS390)) && (U_CPLUSPLUS_VERSION < 11) + char aliasedStyle[kStyleLenMax+1]; + ListPatternsSink() { + uprv_memset(aliasedStyle, 0, kStyleLenMax+1); + } +#else + char aliasedStyle[kStyleLenMax+1] = {0}; + + ListPatternsSink() {} +#endif + virtual ~ListPatternsSink(); + + void setAliasedStyle(UnicodeString alias) { + int32_t startIndex = alias.indexOf(aliasPrefix, kAliasPrefixLen, 0); + if (startIndex < 0) { + return; + } + startIndex += kAliasPrefixLen; + int32_t endIndex = alias.indexOf(solidus, startIndex); + if (endIndex < 0) { + endIndex = alias.length(); + } + alias.extract(startIndex, endIndex-startIndex, aliasedStyle, kStyleLenMax+1, US_INV); + aliasedStyle[kStyleLenMax] = 0; + } + + void handleValueForPattern(ResourceValue &value, UnicodeString &pattern, UErrorCode &errorCode) { + if (pattern.isEmpty()) { + if (value.getType() == URES_ALIAS) { + if (aliasedStyle[0] == 0) { + setAliasedStyle(value.getAliasUnicodeString(errorCode)); + } + } else { + pattern = value.getUnicodeString(errorCode); + } + } + } + + virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/, + UErrorCode &errorCode) { + aliasedStyle[0] = 0; + if (value.getType() == URES_ALIAS) { + setAliasedStyle(value.getAliasUnicodeString(errorCode)); + return; + } + ResourceTable listPatterns = value.getTable(errorCode); + for (int i = 0; U_SUCCESS(errorCode) && listPatterns.getKeyAndValue(i, key, value); ++i) { + if (uprv_strcmp(key, "2") == 0) { + handleValueForPattern(value, two, errorCode); + } else if (uprv_strcmp(key, "end") == 0) { + handleValueForPattern(value, end, errorCode); + } else if (uprv_strcmp(key, "middle") == 0) { + handleValueForPattern(value, middle, errorCode); + } else if (uprv_strcmp(key, "start") == 0) { + handleValueForPattern(value, start, errorCode); + } + } + } +}; + +// Virtual destructors must be defined out of line. +ListFormatter::ListPatternsSink::~ListPatternsSink() {} + +ListFormatInternal* ListFormatter::loadListFormatInternal( const Locale& locale, const char * style, UErrorCode& errorCode) { UResourceBundle* rb = ures_open(NULL, locale.getName(), &errorCode); - if (U_FAILURE(errorCode)) { - ures_close(rb); - return NULL; - } rb = ures_getByKeyWithFallback(rb, "listPattern", rb, &errorCode); - rb = ures_getByKeyWithFallback(rb, style, rb, &errorCode); - if (U_FAILURE(errorCode)) { ures_close(rb); return NULL; } - UnicodeString two, start, middle, end; - getStringByKey(rb, "2", two, errorCode); - getStringByKey(rb, "start", start, errorCode); - getStringByKey(rb, "middle", middle, errorCode); - getStringByKey(rb, "end", end, errorCode); + ListFormatter::ListPatternsSink sink; + char currentStyle[kStyleLenMax+1]; + uprv_strncpy(currentStyle, style, kStyleLenMax); + currentStyle[kStyleLenMax] = 0; + + for (;;) { + ures_getAllItemsWithFallback(rb, currentStyle, sink, errorCode); + if (U_FAILURE(errorCode) || sink.aliasedStyle[0] == 0 || uprv_strcmp(currentStyle, sink.aliasedStyle) == 0) { + break; + } + uprv_strcpy(currentStyle, sink.aliasedStyle); + } ures_close(rb); if (U_FAILURE(errorCode)) { return NULL; } - ListFormatInternal* result = new ListFormatInternal(two, start, middle, end, errorCode); + if (sink.two.isEmpty() || sink.start.isEmpty() || sink.middle.isEmpty() || sink.end.isEmpty()) { + errorCode = U_MISSING_RESOURCE_ERROR; + return NULL; + } + ListFormatInternal* result = new ListFormatInternal(sink.two, sink.start, sink.middle, sink.end, errorCode); if (result == NULL) { errorCode = U_MEMORY_ALLOCATION_ERROR; return NULL; @@ -206,15 +273,6 @@ static ListFormatInternal* loadListFormatInternal( return result; } -static void getStringByKey(const UResourceBundle* rb, const char* key, UnicodeString& result, UErrorCode& errorCode) { - int32_t len; - const UChar* ustr = ures_getStringByKeyWithFallback(rb, key, &len, &errorCode); - if (U_FAILURE(errorCode)) { - return; - } - result.setTo(ustr, len); -} - ListFormatter* ListFormatter::createInstance(UErrorCode& errorCode) { Locale locale; // The default locale. return createInstance(locale, errorCode); diff --git a/deps/icu-small/source/common/loadednormalizer2impl.cpp b/deps/icu-small/source/common/loadednormalizer2impl.cpp index b3b7b5d621..2b2d9a8e80 100644 --- a/deps/icu-small/source/common/loadednormalizer2impl.cpp +++ b/deps/icu-small/source/common/loadednormalizer2impl.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -232,6 +232,7 @@ Normalizer2::getInstance(const char *packageName, } } if(allModes==NULL) { + ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup); LocalPointer localAllModes( Norm2AllModes::createInstance(packageName, name, errorCode)); if(U_SUCCESS(errorCode)) { diff --git a/deps/icu-small/source/common/localsvc.h b/deps/icu-small/source/common/localsvc.h index 4c0686d449..724216aa64 100644 --- a/deps/icu-small/source/common/localsvc.h +++ b/deps/icu-small/source/common/localsvc.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* *************************************************************************** diff --git a/deps/icu-small/source/common/locavailable.cpp b/deps/icu-small/source/common/locavailable.cpp index 3a92c3c3eb..5079885936 100644 --- a/deps/icu-small/source/common/locavailable.cpp +++ b/deps/icu-small/source/common/locavailable.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: locavailable.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/locbased.cpp b/deps/icu-small/source/common/locbased.cpp index f2f56adda1..ff378b4cc7 100644 --- a/deps/icu-small/source/common/locbased.cpp +++ b/deps/icu-small/source/common/locbased.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/locbased.h b/deps/icu-small/source/common/locbased.h index 453d5a8a30..6db6a41dc4 100644 --- a/deps/icu-small/source/common/locbased.h +++ b/deps/icu-small/source/common/locbased.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/locdispnames.cpp b/deps/icu-small/source/common/locdispnames.cpp index bb10b8946b..f5cd9a48f3 100644 --- a/deps/icu-small/source/common/locdispnames.cpp +++ b/deps/icu-small/source/common/locdispnames.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: locdispnames.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/locdspnm.cpp b/deps/icu-small/source/common/locdspnm.cpp index a17478ce6d..39934dc6c3 100644 --- a/deps/icu-small/source/common/locdspnm.cpp +++ b/deps/icu-small/source/common/locdspnm.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -13,6 +13,7 @@ #include "unicode/locdspnm.h" #include "unicode/simpleformatter.h" +#include "unicode/ucasemap.h" #include "unicode/ures.h" #include "unicode/udisplaycontext.h" #include "unicode/brkiter.h" diff --git a/deps/icu-small/source/common/locid.cpp b/deps/icu-small/source/common/locid.cpp index d2781db95b..36508acaf5 100644 --- a/deps/icu-small/source/common/locid.cpp +++ b/deps/icu-small/source/common/locid.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -33,6 +33,7 @@ #include "unicode/locid.h" +#include "unicode/strenum.h" #include "unicode/uloc.h" #include "putilimp.h" #include "mutex.h" diff --git a/deps/icu-small/source/common/loclikely.cpp b/deps/icu-small/source/common/loclikely.cpp index 9dcfc90cbd..1fbad9b9ff 100644 --- a/deps/icu-small/source/common/loclikely.cpp +++ b/deps/icu-small/source/common/loclikely.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: loclikely.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -22,6 +22,7 @@ #include "unicode/utypes.h" #include "unicode/locid.h" #include "unicode/putil.h" +#include "unicode/uchar.h" #include "unicode/uloc.h" #include "unicode/ures.h" #include "unicode/uscript.h" diff --git a/deps/icu-small/source/common/locmap.c b/deps/icu-small/source/common/locmap.c deleted file mode 100644 index 1dba67a092..0000000000 --- a/deps/icu-small/source/common/locmap.c +++ /dev/null @@ -1,1174 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ********************************************************************** - * Copyright (C) 1996-2016, International Business Machines - * Corporation and others. All Rights Reserved. - ********************************************************************** - * - * Provides functionality for mapping between - * LCID and Posix IDs or ICU locale to codepage - * - * Note: All classes and code in this file are - * intended for internal use only. - * - * Methods of interest: - * unsigned long convertToLCID(const char*); - * const char* convertToPosix(unsigned long); - * - * Kathleen Wilson, 4/30/96 - * - * Date Name Description - * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added - * setId() method and safety check against - * MAX_ID_LENGTH. - * 04/23/99 stephen Added C wrapper for convertToPosix. - * 09/18/00 george Removed the memory leaks. - * 08/23/01 george Convert to C - */ - -#include "locmap.h" -#include "cstring.h" -#include "cmemory.h" - -#if U_PLATFORM == U_PF_WINDOWS && defined(_MSC_VER) && (_MSC_VER >= 1500) -/* - * TODO: It seems like we should widen this to - * either U_PLATFORM_USES_ONLY_WIN32_API (includes MinGW) - * or U_PLATFORM_HAS_WIN32_API (includes MinGW and Cygwin) - * but those use gcc and won't have defined(_MSC_VER). - * We might need to #include some Windows header and test for some version macro from there. - * Or call some Windows function and see what it returns. - */ -#define USE_WINDOWS_LOCALE_API -#endif - -#ifdef USE_WINDOWS_LOCALE_API -#include -#include -#endif - -/* - * Note: - * The mapping from Win32 locale ID numbers to POSIX locale strings should - * be the faster one. - * - * Many LCID values come from winnt.h - * Some also come from http://www.microsoft.com/globaldev/reference/lcid-all.mspx - */ - -/* -//////////////////////////////////////////////// -// -// Internal Classes for LCID <--> POSIX Mapping -// -///////////////////////////////////////////////// -*/ - -typedef struct ILcidPosixElement -{ - const uint32_t hostID; - const char * const posixID; -} ILcidPosixElement; - -typedef struct ILcidPosixMap -{ - const uint32_t numRegions; - const struct ILcidPosixElement* const regionMaps; -} ILcidPosixMap; - - -/* -///////////////////////////////////////////////// -// -// Easy macros to make the LCID <--> POSIX Mapping -// -///////////////////////////////////////////////// -*/ - -/** - * The standard one language/one country mapping for LCID. - * The first element must be the language, and the following - * elements are the language with the country. - * @param hostID LCID in host format such as 0x044d - * @param languageID posix ID of just the language such as 'de' - * @param posixID posix ID of the language_TERRITORY such as 'de_CH' - */ -#define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \ -static const ILcidPosixElement locmap_ ## languageID [] = { \ - {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \ - {hostID, #posixID}, \ -}; - -/** - * Define a subtable by ID - * @param id the POSIX ID, either a language or language_TERRITORY - */ -#define ILCID_POSIX_SUBTABLE(id) \ -static const ILcidPosixElement locmap_ ## id [] = - - -/** - * Create the map for the posixID. This macro supposes that the language string - * name is the same as the global variable name, and that the first element - * in the ILcidPosixElement is just the language. - * @param _posixID the full POSIX ID for this entry. - */ -#define ILCID_POSIX_MAP(_posixID) \ - {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID} - -/* -//////////////////////////////////////////// -// -// Create the table of LCID to POSIX Mapping -// None of it should be dynamically created. -// -// Keep static locale variables inside the function so that -// it can be created properly during static init. -// -// Note: This table should be updated periodically. Check the National Lanaguage Support API Reference Website. -// Microsoft is moving away from LCID in favor of locale name as of Vista. This table needs to be -// maintained for support of older Windows version. -// Update: Windows 7 (091130) -// -// Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain -// @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is -// called from uloc_getLCID(), keywords other than collation are already removed. If we really need -// to support other keywords in this mapping data, we must update the implementation. -//////////////////////////////////////////// -*/ - -ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA) - -ILCID_POSIX_SUBTABLE(ar) { - {0x01, "ar"}, - {0x3801, "ar_AE"}, - {0x3c01, "ar_BH"}, - {0x1401, "ar_DZ"}, - {0x0c01, "ar_EG"}, - {0x0801, "ar_IQ"}, - {0x2c01, "ar_JO"}, - {0x3401, "ar_KW"}, - {0x3001, "ar_LB"}, - {0x1001, "ar_LY"}, - {0x1801, "ar_MA"}, - {0x1801, "ar_MO"}, - {0x2001, "ar_OM"}, - {0x4001, "ar_QA"}, - {0x0401, "ar_SA"}, - {0x2801, "ar_SY"}, - {0x1c01, "ar_TN"}, - {0x2401, "ar_YE"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN) -ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET) -ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL) - -ILCID_POSIX_SUBTABLE(az) { - {0x2c, "az"}, - {0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */ - {0x742c, "az_Cyrl"}, /* Cyrillic based */ - {0x042c, "az_Latn_AZ"}, /* Latin based */ - {0x782c, "az_Latn"}, /* Latin based */ - {0x042c, "az_AZ"} /* Latin based */ -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU) -ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY) - -/*ILCID_POSIX_SUBTABLE(ber) { - {0x5f, "ber"}, - {0x045f, "ber_Arab_DZ"}, - {0x045f, "ber_Arab"}, - {0x085f, "ber_Latn_DZ"}, - {0x085f, "ber_Latn"} -};*/ - -ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG) - -ILCID_POSIX_ELEMENT_ARRAY(0x0466, bin, bin_NG) - -ILCID_POSIX_SUBTABLE(bn) { - {0x45, "bn"}, - {0x0845, "bn_BD"}, - {0x0445, "bn_IN"} -}; - -ILCID_POSIX_SUBTABLE(bo) { - {0x51, "bo"}, - {0x0851, "bo_BT"}, - {0x0451, "bo_CN"}, - {0x0c51, "dz_BT"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR) - -ILCID_POSIX_SUBTABLE(ca) { - {0x03, "ca"}, - {0x0403, "ca_ES"}, - {0x0803, "ca_ES_VALENCIA"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR) -ILCID_POSIX_ELEMENT_ARRAY(0x045c, chr,chr_US) - -ILCID_POSIX_SUBTABLE(ckb) { - {0x92, "ckb"}, - {0x7c92, "ckb_Arab"}, - {0x0492, "ckb_Arab_IQ"} -}; - -/* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */ -ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ) - -ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB) -ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK) - -ILCID_POSIX_SUBTABLE(de) { - {0x07, "de"}, - {0x0c07, "de_AT"}, - {0x0807, "de_CH"}, - {0x0407, "de_DE"}, - {0x1407, "de_LI"}, - {0x1007, "de_LU"}, - {0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/ - {0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/ -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV) -ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR) - -ILCID_POSIX_SUBTABLE(en) { - {0x09, "en"}, - {0x0c09, "en_AU"}, - {0x2809, "en_BZ"}, - {0x1009, "en_CA"}, - {0x0809, "en_GB"}, - {0x3c09, "en_HK"}, - {0x3809, "en_ID"}, - {0x1809, "en_IE"}, - {0x4009, "en_IN"}, - {0x2009, "en_JM"}, - {0x4409, "en_MY"}, - {0x1409, "en_NZ"}, - {0x3409, "en_PH"}, - {0x4809, "en_SG"}, - {0x2C09, "en_TT"}, - {0x0409, "en_US"}, - {0x007f, "en_US_POSIX"}, /* duplicate for roundtripping */ - {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). */ - {0x1c09, "en_ZA"}, - {0x3009, "en_ZW"}, - {0x2409, "en_029"}, - {0x0409, "en_AS"}, /* Alias for en_US. Leave last. */ - {0x0409, "en_GU"}, /* Alias for en_US. Leave last. */ - {0x0409, "en_MH"}, /* Alias for en_US. Leave last. */ - {0x0409, "en_MP"}, /* Alias for en_US. Leave last. */ - {0x0409, "en_UM"} /* Alias for en_US. Leave last. */ -}; - -ILCID_POSIX_SUBTABLE(en_US_POSIX) { - {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */ -}; - -ILCID_POSIX_SUBTABLE(es) { - {0x0a, "es"}, - {0x2c0a, "es_AR"}, - {0x400a, "es_BO"}, - {0x340a, "es_CL"}, - {0x240a, "es_CO"}, - {0x140a, "es_CR"}, - {0x5c0a, "es_CU"}, - {0x1c0a, "es_DO"}, - {0x300a, "es_EC"}, - {0x0c0a, "es_ES"}, /*Modern sort.*/ - {0x100a, "es_GT"}, - {0x480a, "es_HN"}, - {0x080a, "es_MX"}, - {0x4c0a, "es_NI"}, - {0x180a, "es_PA"}, - {0x280a, "es_PE"}, - {0x500a, "es_PR"}, - {0x3c0a, "es_PY"}, - {0x440a, "es_SV"}, - {0x540a, "es_US"}, - {0x380a, "es_UY"}, - {0x200a, "es_VE"}, - {0x580a, "es_419"}, - {0x040a, "es_ES@collation=traditional"}, - {0x040a, "es@collation=traditional"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE) -ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES) - -/* ISO-639 doesn't distinguish between Persian and Dari.*/ -ILCID_POSIX_SUBTABLE(fa) { - {0x29, "fa"}, - {0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */ - {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */ -}; - -/* duplicate for roundtripping */ -ILCID_POSIX_SUBTABLE(fa_AF) { - {0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */ - {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */ -}; - -ILCID_POSIX_SUBTABLE(ff) { - {0x67, "ff"}, - {0x7c67, "ff_Latn"}, - {0x0867, "ff_Latn_SN"}, - {0x0467, "ff_NG"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI) -ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH) -ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO) - -ILCID_POSIX_SUBTABLE(fr) { - {0x0c, "fr"}, - {0x080c, "fr_BE"}, - {0x0c0c, "fr_CA"}, - {0x240c, "fr_CD"}, - {0x240c, "fr_CG"}, - {0x100c, "fr_CH"}, - {0x300c, "fr_CI"}, - {0x2c0c, "fr_CM"}, - {0x040c, "fr_FR"}, - {0x3c0c, "fr_HT"}, - {0x140c, "fr_LU"}, - {0x380c, "fr_MA"}, - {0x180c, "fr_MC"}, - {0x340c, "fr_ML"}, - {0x200c, "fr_RE"}, - {0x280c, "fr_SN"}, - {0xe40c, "fr_015"}, - {0x1c0c, "fr_029"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG) - -ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL) - -ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */ - {0x3c, "ga"}, - {0x083c, "ga_IE"}, - {0x043c, "gd_GB"} -}; - -ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */ - {0x91, "gd"}, - {0x0491, "gd_GB"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES) -ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN) -ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY) -ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR) - -ILCID_POSIX_SUBTABLE(ha) { - {0x68, "ha"}, - {0x7c68, "ha_Latn"}, - {0x0468, "ha_Latn_NG"}, -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US) -ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL) -ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN) - -/* This LCID is really four different locales.*/ -ILCID_POSIX_SUBTABLE(hr) { - {0x1a, "hr"}, - {0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */ - {0x681a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */ - {0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */ - {0x781a, "bs"}, /* Bosnian */ - {0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */ - {0x641a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */ - {0x101a, "hr_BA"}, /* Croatian in Bosnia */ - {0x041a, "hr_HR"}, /* Croatian*/ - {0x2c1a, "sr_Latn_ME"}, - {0x241a, "sr_Latn_RS"}, - {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */ - {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/ - {0x701a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */ - {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */ - {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/ - {0x301a, "sr_Cyrl_ME"}, - {0x281a, "sr_Cyrl_RS"}, - {0x6c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */ - {0x7c1a, "sr"} /* In CLDR sr is sr_Cyrl. */ -}; - -ILCID_POSIX_SUBTABLE(hsb) { - {0x2E, "hsb"}, - {0x042E, "hsb_DE"}, - {0x082E, "dsb_DE"}, - {0x7C2E, "dsb"}, -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU) -ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM) -ILCID_POSIX_ELEMENT_ARRAY(0x0469, ibb, ibb_NG) -ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID) -ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG) -ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN) -ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS) - -ILCID_POSIX_SUBTABLE(it) { - {0x10, "it"}, - {0x0810, "it_CH"}, - {0x0410, "it_IT"} -}; - -ILCID_POSIX_SUBTABLE(iu) { - {0x5d, "iu"}, - {0x045d, "iu_Cans_CA"}, - {0x785d, "iu_Cans"}, - {0x085d, "iu_Latn_CA"}, - {0x7c5d, "iu_Latn"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL) /*Left in for compatibility*/ -ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP) -ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE) -ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ) -ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL) -ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH) -ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN) - -ILCID_POSIX_SUBTABLE(ko) { - {0x12, "ko"}, - {0x0812, "ko_KP"}, - {0x0412, "ko_KR"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN) -ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG) - -ILCID_POSIX_SUBTABLE(ks) { /* We could add PK and CN too */ - {0x60, "ks"}, - {0x0860, "ks_IN"}, /* Documentation doesn't mention script */ - {0x0460, "ks_Arab_IN"}, - {0x0860, "ks_Deva_IN"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */ -ILCID_POSIX_ELEMENT_ARRAY(0x0476, la, la_IT) /* TODO: Verify the country */ -ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU) -ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA) -ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT) -ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV) -ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ) -ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK) -ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN) - -ILCID_POSIX_SUBTABLE(mn) { - {0x50, "mn"}, - {0x0450, "mn_MN"}, - {0x7c50, "mn_Mong"}, - {0x0850, "mn_Mong_CN"}, - {0x0850, "mn_CN"}, - {0x7850, "mn_Cyrl"}, - {0x0c50, "mn_Mong_MN"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN) -ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA) -ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN) - -ILCID_POSIX_SUBTABLE(ms) { - {0x3e, "ms"}, - {0x083e, "ms_BN"}, /* Brunei Darussalam*/ - {0x043e, "ms_MY"} /* Malaysia*/ -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT) -ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM) - -ILCID_POSIX_SUBTABLE(ne) { - {0x61, "ne"}, - {0x0861, "ne_IN"}, /* India*/ - {0x0461, "ne_NP"} /* Nepal*/ -}; - -ILCID_POSIX_SUBTABLE(nl) { - {0x13, "nl"}, - {0x0813, "nl_BE"}, - {0x0413, "nl_NL"} -}; - -/* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/ -ILCID_POSIX_SUBTABLE(no) { - {0x14, "no"}, /* really nb_NO */ - {0x7c14, "nb"}, /* really nb */ - {0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */ - {0x0414, "no_NO"}, /* really nb_NO */ - {0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */ - {0x7814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */ - {0x0814, "no_NO_NY"}/* really nn_NO */ -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA) /* TODO: Verify the ISO-639 code */ -ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR) - -ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */ - {0x72, "om"}, - {0x0472, "om_ET"}, - {0x0472, "gaz_ET"} -}; - -/* Declared as or_IN to get around compiler errors*/ -ILCID_POSIX_SUBTABLE(or_IN) { - {0x48, "or"}, - {0x0448, "or_IN"}, -}; - - -ILCID_POSIX_SUBTABLE(pa) { - {0x46, "pa"}, - {0x0446, "pa_IN"}, - {0x0846, "pa_PK"}, - {0x0846, "pa_Arab_PK"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0479, pap, pap_AN) -ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL) -ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF) - -ILCID_POSIX_SUBTABLE(pt) { - {0x16, "pt"}, - {0x0416, "pt_BR"}, - {0x0816, "pt_PT"} -}; - -ILCID_POSIX_SUBTABLE(qu) { - {0x6b, "qu"}, - {0x046b, "qu_BO"}, - {0x086b, "qu_EC"}, - {0x0C6b, "qu_PE"}, - {0x046b, "quz_BO"}, - {0x086b, "quz_EC"}, - {0x0C6b, "quz_PE"} -}; - -ILCID_POSIX_SUBTABLE(quc) { - {0x93, "quc"}, - {0x0493, "quc_CO"}, - /* - "quc_Latn_GT" is an exceptional case. Language ID of "quc" - is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be - under the group of "qut". "qut" is a retired ISO 639-3 language - code for West Central Quiche, and merged to "quc". - It looks Windows previously reserved "qut" for K'iche', but, - decided to use "quc" when adding a locale for K'iche' (Guatemala). - - This data structure used here assumes language ID bits in - LCID is unique for alphabetic language code. But this is not true - for "quc_Latn_GT". If we don't have the data below, LCID look up - by alphabetic locale ID (POSIX) will fail. The same entry is found - under "qut" below, which is required for reverse look up. - */ - {0x0486, "quc_Latn_GT"} -}; - -ILCID_POSIX_SUBTABLE(qut) { - {0x86, "qut"}, - {0x0486, "qut_GT"}, - /* - See the note in "quc" above. - */ - {0x0486, "quc_Latn_GT"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH) - -ILCID_POSIX_SUBTABLE(ro) { - {0x18, "ro"}, - {0x0418, "ro_RO"}, - {0x0818, "ro_MD"} -}; - -ILCID_POSIX_SUBTABLE(root) { - {0x00, "root"} -}; - -ILCID_POSIX_SUBTABLE(ru) { - {0x19, "ru"}, - {0x0419, "ru_RU"}, - {0x0819, "ru_MD"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW) -ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN) -ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU) - -ILCID_POSIX_SUBTABLE(sd) { - {0x59, "sd"}, - {0x0459, "sd_IN"}, - {0x0459, "sd_Deva_IN"}, - {0x0859, "sd_PK"} -}; - -ILCID_POSIX_SUBTABLE(se) { - {0x3b, "se"}, - {0x0c3b, "se_FI"}, - {0x043b, "se_NO"}, - {0x083b, "se_SE"}, - {0x783b, "sma"}, - {0x183b, "sma_NO"}, - {0x1c3b, "sma_SE"}, - {0x7c3b, "smj"}, - {0x703b, "smn"}, - {0x743b, "sms"}, - {0x103b, "smj_NO"}, - {0x143b, "smj_SE"}, - {0x243b, "smn_FI"}, - {0x203b, "sms_FI"}, -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK) -ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK) -ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI) - -ILCID_POSIX_SUBTABLE(so) { /* TODO: Verify the country */ - {0x77, "so"}, - {0x0477, "so_ET"}, - {0x0477, "so_SO"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL) -ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA) - -ILCID_POSIX_SUBTABLE(sv) { - {0x1d, "sv"}, - {0x081d, "sv_FI"}, - {0x041d, "sv_SE"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE) -ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY) - -ILCID_POSIX_SUBTABLE(ta) { - {0x49, "ta"}, - {0x0449, "ta_IN"}, - {0x0849, "ta_LK"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN) - -/* Cyrillic based by default */ -ILCID_POSIX_SUBTABLE(tg) { - {0x28, "tg"}, - {0x7c28, "tg_Cyrl"}, - {0x0428, "tg_Cyrl_TJ"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH) - -ILCID_POSIX_SUBTABLE(ti) { - {0x73, "ti"}, - {0x0873, "ti_ER"}, - {0x0473, "ti_ET"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM) - -ILCID_POSIX_SUBTABLE(tn) { - {0x32, "tn"}, - {0x0832, "tn_BW"}, - {0x0432, "tn_ZA"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR) -ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA) -ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU) - -ILCID_POSIX_SUBTABLE(tzm) { - {0x5f, "tzm"}, - {0x7c5f, "tzm_Latn"}, - {0x085f, "tzm_Latn_DZ"}, - {0x105f, "tzm_Tfng_MA"}, - {0x045f, "tzm_Arab_MA"}, - {0x045f, "tmz"} -}; - -ILCID_POSIX_SUBTABLE(ug) { - {0x80, "ug"}, - {0x0480, "ug_CN"}, - {0x0480, "ug_Arab_CN"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA) - -ILCID_POSIX_SUBTABLE(ur) { - {0x20, "ur"}, - {0x0820, "ur_IN"}, - {0x0420, "ur_PK"} -}; - -ILCID_POSIX_SUBTABLE(uz) { - {0x43, "uz"}, - {0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */ - {0x7843, "uz_Cyrl"}, /* Cyrillic based */ - {0x0843, "uz_UZ"}, /* Cyrillic based */ - {0x0443, "uz_Latn_UZ"}, /* Latin based */ - {0x7c43, "uz_Latn"} /* Latin based */ -}; - -ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */ - {0x33, "ve"}, - {0x0433, "ve_ZA"}, - {0x0433, "ven_ZA"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN) -ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN) -ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA) -ILCID_POSIX_ELEMENT_ARRAY(0x043d, yi, yi) -ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG) - -ILCID_POSIX_SUBTABLE(zh) { - {0x0004, "zh_Hans"}, - {0x7804, "zh"}, - {0x0804, "zh_CN"}, - {0x0804, "zh_Hans_CN"}, - {0x0c04, "zh_Hant_HK"}, - {0x0c04, "zh_HK"}, - {0x1404, "zh_Hant_MO"}, - {0x1404, "zh_MO"}, - {0x1004, "zh_Hans_SG"}, - {0x1004, "zh_SG"}, - {0x0404, "zh_Hant_TW"}, - {0x7c04, "zh_Hant"}, - {0x0404, "zh_TW"}, - {0x30404,"zh_Hant_TW"}, /* Bopomofo order */ - {0x30404,"zh_TW"}, /* Bopomofo order */ - {0x20004,"zh@collation=stroke"}, - {0x20404,"zh_Hant@collation=stroke"}, - {0x20404,"zh_Hant_TW@collation=stroke"}, - {0x20404,"zh_TW@collation=stroke"}, - {0x20804,"zh_Hans@collation=stroke"}, - {0x20804,"zh_Hans_CN@collation=stroke"}, - {0x20804,"zh_CN@collation=stroke"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA) - -/* This must be static and grouped by LCID. */ -static const ILcidPosixMap gPosixIDmap[] = { - ILCID_POSIX_MAP(af), /* af Afrikaans 0x36 */ - ILCID_POSIX_MAP(am), /* am Amharic 0x5e */ - ILCID_POSIX_MAP(ar), /* ar Arabic 0x01 */ - ILCID_POSIX_MAP(arn), /* arn Araucanian/Mapudungun 0x7a */ - ILCID_POSIX_MAP(as), /* as Assamese 0x4d */ - ILCID_POSIX_MAP(az), /* az Azerbaijani 0x2c */ - ILCID_POSIX_MAP(ba), /* ba Bashkir 0x6d */ - ILCID_POSIX_MAP(be), /* be Belarusian 0x23 */ -/* ILCID_POSIX_MAP(ber), ber Berber/Tamazight 0x5f */ - ILCID_POSIX_MAP(bg), /* bg Bulgarian 0x02 */ - ILCID_POSIX_MAP(bin), /* bin Edo 0x66 */ - ILCID_POSIX_MAP(bn), /* bn Bengali; Bangla 0x45 */ - ILCID_POSIX_MAP(bo), /* bo Tibetan 0x51 */ - ILCID_POSIX_MAP(br), /* br Breton 0x7e */ - ILCID_POSIX_MAP(ca), /* ca Catalan 0x03 */ - ILCID_POSIX_MAP(chr), /* chr Cherokee 0x5c */ - ILCID_POSIX_MAP(ckb), /* ckb Sorani (Central Kurdish) 0x92 */ - ILCID_POSIX_MAP(co), /* co Corsican 0x83 */ - ILCID_POSIX_MAP(cs), /* cs Czech 0x05 */ - ILCID_POSIX_MAP(cy), /* cy Welsh 0x52 */ - ILCID_POSIX_MAP(da), /* da Danish 0x06 */ - ILCID_POSIX_MAP(de), /* de German 0x07 */ - ILCID_POSIX_MAP(dv), /* dv Divehi 0x65 */ - ILCID_POSIX_MAP(el), /* el Greek 0x08 */ - ILCID_POSIX_MAP(en), /* en English 0x09 */ - ILCID_POSIX_MAP(en_US_POSIX), /* invariant 0x7f */ - ILCID_POSIX_MAP(es), /* es Spanish 0x0a */ - ILCID_POSIX_MAP(et), /* et Estonian 0x25 */ - ILCID_POSIX_MAP(eu), /* eu Basque 0x2d */ - ILCID_POSIX_MAP(fa), /* fa Persian/Farsi 0x29 */ - ILCID_POSIX_MAP(fa_AF), /* fa Persian/Dari 0x8c */ - ILCID_POSIX_MAP(ff), /* ff Fula 0x67 */ - ILCID_POSIX_MAP(fi), /* fi Finnish 0x0b */ - ILCID_POSIX_MAP(fil), /* fil Filipino 0x64 */ - ILCID_POSIX_MAP(fo), /* fo Faroese 0x38 */ - ILCID_POSIX_MAP(fr), /* fr French 0x0c */ - ILCID_POSIX_MAP(fuv), /* fuv Fulfulde - Nigeria 0x67 */ - ILCID_POSIX_MAP(fy), /* fy Frisian 0x62 */ - ILCID_POSIX_MAP(ga), /* * Gaelic (Ireland,Scotland) 0x3c */ - ILCID_POSIX_MAP(gd), /* gd Gaelic (United Kingdom) 0x91 */ - ILCID_POSIX_MAP(gl), /* gl Galician 0x56 */ - ILCID_POSIX_MAP(gn), /* gn Guarani 0x74 */ - ILCID_POSIX_MAP(gsw), /* gsw Alemanic/Alsatian/Swiss German 0x84 */ - ILCID_POSIX_MAP(gu), /* gu Gujarati 0x47 */ - ILCID_POSIX_MAP(ha), /* ha Hausa 0x68 */ - ILCID_POSIX_MAP(haw), /* haw Hawaiian 0x75 */ - ILCID_POSIX_MAP(he), /* he Hebrew (formerly iw) 0x0d */ - ILCID_POSIX_MAP(hi), /* hi Hindi 0x39 */ - ILCID_POSIX_MAP(hr), /* * Croatian and others 0x1a */ - ILCID_POSIX_MAP(hsb), /* hsb Upper Sorbian 0x2e */ - ILCID_POSIX_MAP(hu), /* hu Hungarian 0x0e */ - ILCID_POSIX_MAP(hy), /* hy Armenian 0x2b */ - ILCID_POSIX_MAP(ibb), /* ibb Ibibio - Nigeria 0x69 */ - ILCID_POSIX_MAP(id), /* id Indonesian (formerly in) 0x21 */ - ILCID_POSIX_MAP(ig), /* ig Igbo 0x70 */ - ILCID_POSIX_MAP(ii), /* ii Sichuan Yi 0x78 */ - ILCID_POSIX_MAP(is), /* is Icelandic 0x0f */ - ILCID_POSIX_MAP(it), /* it Italian 0x10 */ - ILCID_POSIX_MAP(iu), /* iu Inuktitut 0x5d */ - ILCID_POSIX_MAP(iw), /* iw Hebrew 0x0d */ - ILCID_POSIX_MAP(ja), /* ja Japanese 0x11 */ - ILCID_POSIX_MAP(ka), /* ka Georgian 0x37 */ - ILCID_POSIX_MAP(kk), /* kk Kazakh 0x3f */ - ILCID_POSIX_MAP(kl), /* kl Kalaallisut 0x6f */ - ILCID_POSIX_MAP(km), /* km Khmer 0x53 */ - ILCID_POSIX_MAP(kn), /* kn Kannada 0x4b */ - ILCID_POSIX_MAP(ko), /* ko Korean 0x12 */ - ILCID_POSIX_MAP(kok), /* kok Konkani 0x57 */ - ILCID_POSIX_MAP(kr), /* kr Kanuri 0x71 */ - ILCID_POSIX_MAP(ks), /* ks Kashmiri 0x60 */ - ILCID_POSIX_MAP(ky), /* ky Kyrgyz 0x40 */ - ILCID_POSIX_MAP(lb), /* lb Luxembourgish 0x6e */ - ILCID_POSIX_MAP(la), /* la Latin 0x76 */ - ILCID_POSIX_MAP(lo), /* lo Lao 0x54 */ - ILCID_POSIX_MAP(lt), /* lt Lithuanian 0x27 */ - ILCID_POSIX_MAP(lv), /* lv Latvian, Lettish 0x26 */ - ILCID_POSIX_MAP(mi), /* mi Maori 0x81 */ - ILCID_POSIX_MAP(mk), /* mk Macedonian 0x2f */ - ILCID_POSIX_MAP(ml), /* ml Malayalam 0x4c */ - ILCID_POSIX_MAP(mn), /* mn Mongolian 0x50 */ - ILCID_POSIX_MAP(mni), /* mni Manipuri 0x58 */ - ILCID_POSIX_MAP(moh), /* moh Mohawk 0x7c */ - ILCID_POSIX_MAP(mr), /* mr Marathi 0x4e */ - ILCID_POSIX_MAP(ms), /* ms Malay 0x3e */ - ILCID_POSIX_MAP(mt), /* mt Maltese 0x3a */ - ILCID_POSIX_MAP(my), /* my Burmese 0x55 */ -/* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */ - ILCID_POSIX_MAP(ne), /* ne Nepali 0x61 */ - ILCID_POSIX_MAP(nl), /* nl Dutch 0x13 */ -/* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */ - ILCID_POSIX_MAP(no), /* * Norwegian 0x14 */ - ILCID_POSIX_MAP(nso), /* nso Sotho, Northern (Sepedi dialect) 0x6c */ - ILCID_POSIX_MAP(oc), /* oc Occitan 0x82 */ - ILCID_POSIX_MAP(om), /* om Oromo 0x72 */ - ILCID_POSIX_MAP(or_IN), /* or Oriya 0x48 */ - ILCID_POSIX_MAP(pa), /* pa Punjabi 0x46 */ - ILCID_POSIX_MAP(pap), /* pap Papiamentu 0x79 */ - ILCID_POSIX_MAP(pl), /* pl Polish 0x15 */ - ILCID_POSIX_MAP(ps), /* ps Pashto 0x63 */ - ILCID_POSIX_MAP(pt), /* pt Portuguese 0x16 */ - ILCID_POSIX_MAP(qu), /* qu Quechua 0x6B */ - ILCID_POSIX_MAP(quc), /* quc K'iche 0x93 */ - ILCID_POSIX_MAP(qut), /* qut K'iche 0x86 */ - ILCID_POSIX_MAP(rm), /* rm Raeto-Romance/Romansh 0x17 */ - ILCID_POSIX_MAP(ro), /* ro Romanian 0x18 */ - ILCID_POSIX_MAP(root), /* root 0x00 */ - ILCID_POSIX_MAP(ru), /* ru Russian 0x19 */ - ILCID_POSIX_MAP(rw), /* rw Kinyarwanda 0x87 */ - ILCID_POSIX_MAP(sa), /* sa Sanskrit 0x4f */ - ILCID_POSIX_MAP(sah), /* sah Yakut 0x85 */ - ILCID_POSIX_MAP(sd), /* sd Sindhi 0x59 */ - ILCID_POSIX_MAP(se), /* se Sami 0x3b */ -/* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */ - ILCID_POSIX_MAP(si), /* si Sinhalese 0x5b */ - ILCID_POSIX_MAP(sk), /* sk Slovak 0x1b */ - ILCID_POSIX_MAP(sl), /* sl Slovenian 0x24 */ - ILCID_POSIX_MAP(so), /* so Somali 0x77 */ - ILCID_POSIX_MAP(sq), /* sq Albanian 0x1c */ -/* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */ - ILCID_POSIX_MAP(st), /* st Sutu 0x30 */ - ILCID_POSIX_MAP(sv), /* sv Swedish 0x1d */ - ILCID_POSIX_MAP(sw), /* sw Swahili 0x41 */ - ILCID_POSIX_MAP(syr), /* syr Syriac 0x5A */ - ILCID_POSIX_MAP(ta), /* ta Tamil 0x49 */ - ILCID_POSIX_MAP(te), /* te Telugu 0x4a */ - ILCID_POSIX_MAP(tg), /* tg Tajik 0x28 */ - ILCID_POSIX_MAP(th), /* th Thai 0x1e */ - ILCID_POSIX_MAP(ti), /* ti Tigrigna 0x73 */ - ILCID_POSIX_MAP(tk), /* tk Turkmen 0x42 */ - ILCID_POSIX_MAP(tn), /* tn Tswana 0x32 */ - ILCID_POSIX_MAP(tr), /* tr Turkish 0x1f */ - ILCID_POSIX_MAP(ts), /* ts Tsonga 0x31 */ - ILCID_POSIX_MAP(tt), /* tt Tatar 0x44 */ - ILCID_POSIX_MAP(tzm), /* tzm Tamazight 0x5f */ - ILCID_POSIX_MAP(ug), /* ug Uighur 0x80 */ - ILCID_POSIX_MAP(uk), /* uk Ukrainian 0x22 */ - ILCID_POSIX_MAP(ur), /* ur Urdu 0x20 */ - ILCID_POSIX_MAP(uz), /* uz Uzbek 0x43 */ - ILCID_POSIX_MAP(ve), /* ve Venda 0x33 */ - ILCID_POSIX_MAP(vi), /* vi Vietnamese 0x2a */ - ILCID_POSIX_MAP(wo), /* wo Wolof 0x88 */ - ILCID_POSIX_MAP(xh), /* xh Xhosa 0x34 */ - ILCID_POSIX_MAP(yi), /* yi Yiddish 0x3d */ - ILCID_POSIX_MAP(yo), /* yo Yoruba 0x6a */ - ILCID_POSIX_MAP(zh), /* zh Chinese 0x04 */ - ILCID_POSIX_MAP(zu), /* zu Zulu 0x35 */ -}; - -static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap); - -/** - * Do not call this function. It is called by hostID. - * The function is not private because this struct must stay as a C struct, - * and this is an internal class. - */ -static int32_t -idCmp(const char* id1, const char* id2) -{ - int32_t diffIdx = 0; - while (*id1 == *id2 && *id1 != 0) { - diffIdx++; - id1++; - id2++; - } - return diffIdx; -} - -/** - * Searches for a Windows LCID - * - * @param posixid the Posix style locale id. - * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has - * no equivalent Windows LCID. - * @return the LCID - */ -static uint32_t -getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status) -{ - int32_t bestIdx = 0; - int32_t bestIdxDiff = 0; - int32_t posixIDlen = (int32_t)uprv_strlen(posixID); - uint32_t idx; - - for (idx = 0; idx < this_0->numRegions; idx++ ) { - int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID); - if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) { - if (posixIDlen == sameChars) { - /* Exact match */ - return this_0->regionMaps[idx].hostID; - } - bestIdxDiff = sameChars; - bestIdx = idx; - } - } - /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */ - /* We also have to make sure that sid and si and similar string subsets don't match. */ - if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@') - && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0) - { - *status = U_USING_FALLBACK_WARNING; - return this_0->regionMaps[bestIdx].hostID; - } - - /*no match found */ - *status = U_ILLEGAL_ARGUMENT_ERROR; - return this_0->regionMaps->hostID; -} - -static const char* -getPosixID(const ILcidPosixMap *this_0, uint32_t hostID) -{ - uint32_t i; - for (i = 0; i <= this_0->numRegions; i++) - { - if (this_0->regionMaps[i].hostID == hostID) - { - return this_0->regionMaps[i].posixID; - } - } - - /* If you get here, then no matching region was found, - so return the language id with the wild card region. */ - return this_0->regionMaps[0].posixID; -} - -/* -////////////////////////////////////// -// -// LCID --> POSIX -// -///////////////////////////////////// -*/ -#ifdef USE_WINDOWS_LOCALE_API -/* - * Various language tags needs to be changed: - * quz -> qu - * prs -> fa - */ -#define FIX_LANGUAGE_ID_TAG(buffer, len) \ - if (len >= 3) { \ - if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\ - buffer[2] = 0; \ - uprv_strcat(buffer, buffer+3); \ - } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\ - buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \ - uprv_strcat(buffer, buffer+3); \ - } \ - } - -#endif -U_CAPI int32_t -uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status) -{ - uint16_t langID; - uint32_t localeIndex; - UBool bLookup = TRUE; - const char *pPosixID = NULL; - -#ifdef USE_WINDOWS_LOCALE_API - // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and - // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for - // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot - // use the Windows API to resolve locale ID for this specific case. - if ((hostid & 0x3FF) != 0x92) { - int32_t tmpLen = 0; - char locName[157]; /* ULOC_FULLNAME_CAPACITY */ - - tmpLen = GetLocaleInfoA(hostid, LOCALE_SNAME, (LPSTR)locName, UPRV_LENGTHOF(locName)); - if (tmpLen > 1) { - /* Windows locale name may contain sorting variant, such as "es-ES_tradnl". - In such case, we need special mapping data found in the hardcoded table - in this source file. */ - char *p = uprv_strchr(locName, '_'); - if (p) { - /* Keep the base locale, without variant */ - *p = 0; - tmpLen = uprv_strlen(locName); - } - else { - /* No hardcoded table lookup necessary */ - bLookup = FALSE; - } - /* Change the tag separator from '-' to '_' */ - p = locName; - while (*p) { - if (*p == '-') { - *p = '_'; - } - p++; - } - FIX_LANGUAGE_ID_TAG(locName, tmpLen); - pPosixID = locName; - } - } -#endif - if (bLookup) { - const char *pCandidate = NULL; - langID = LANGUAGE_LCID(hostid); - - for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) { - if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) { - pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid); - break; - } - } - - /* On Windows, when locale name has a variant, we still look up the hardcoded table. - If a match in the hardcoded table is longer than the Windows locale name without - variant, we use the one as the result */ - if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) { - pPosixID = pCandidate; - } - } - - if (pPosixID) { - int32_t resLen = uprv_strlen(pPosixID); - int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity; - uprv_memcpy(posixID, pPosixID, copyLen); - if (resLen < posixIDCapacity) { - posixID[resLen] = 0; - if (*status == U_STRING_NOT_TERMINATED_WARNING) { - *status = U_ZERO_ERROR; - } - } else if (resLen == posixIDCapacity) { - *status = U_STRING_NOT_TERMINATED_WARNING; - } else { - *status = U_BUFFER_OVERFLOW_ERROR; - } - return resLen; - } - - /* no match found */ - *status = U_ILLEGAL_ARGUMENT_ERROR; - return -1; -} - -/* -////////////////////////////////////// -// -// POSIX --> LCID -// This should only be called from uloc_getLCID. -// The locale ID must be in canonical form. -// langID is separate so that this file doesn't depend on the uloc_* API. -// -///////////////////////////////////// -*/ - -U_CAPI uint32_t -uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status) -{ - - uint32_t low = 0; - uint32_t high = gLocaleCount; - uint32_t mid; - uint32_t oldmid = 0; - int32_t compVal; - - uint32_t value = 0; - uint32_t fallbackValue = (uint32_t)-1; - UErrorCode myStatus; - uint32_t idx; - - /* Check for incomplete id. */ - if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) { - return 0; - } - - /*Binary search for the map entry for normal cases */ - - while (high > low) /*binary search*/{ - - mid = (high+low) >> 1; /*Finds median*/ - - if (mid == oldmid) - break; - - compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID); - if (compVal < 0){ - high = mid; - } - else if (compVal > 0){ - low = mid; - } - else /*we found it*/{ - return getHostID(&gPosixIDmap[mid], posixID, status); - } - oldmid = mid; - } - - /* - * Sometimes we can't do a binary search on posixID because some LCIDs - * go to different locales. We hit one of those special cases. - */ - for (idx = 0; idx < gLocaleCount; idx++ ) { - myStatus = U_ZERO_ERROR; - value = getHostID(&gPosixIDmap[idx], posixID, &myStatus); - if (myStatus == U_ZERO_ERROR) { - return value; - } - else if (myStatus == U_USING_FALLBACK_WARNING) { - fallbackValue = value; - } - } - - if (fallbackValue != (uint32_t)-1) { - *status = U_USING_FALLBACK_WARNING; - return fallbackValue; - } - - /* no match found */ - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; /* return international (root) */ -} diff --git a/deps/icu-small/source/common/locmap.cpp b/deps/icu-small/source/common/locmap.cpp new file mode 100644 index 0000000000..8e47c84b1e --- /dev/null +++ b/deps/icu-small/source/common/locmap.cpp @@ -0,0 +1,1288 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ********************************************************************** + * Copyright (C) 1996-2016, International Business Machines + * Corporation and others. All Rights Reserved. + ********************************************************************** + * + * Provides functionality for mapping between + * LCID and Posix IDs or ICU locale to codepage + * + * Note: All classes and code in this file are + * intended for internal use only. + * + * Methods of interest: + * unsigned long convertToLCID(const char*); + * const char* convertToPosix(unsigned long); + * + * Kathleen Wilson, 4/30/96 + * + * Date Name Description + * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added + * setId() method and safety check against + * MAX_ID_LENGTH. + * 04/23/99 stephen Added C wrapper for convertToPosix. + * 09/18/00 george Removed the memory leaks. + * 08/23/01 george Convert to C + */ + +#include "locmap.h" +#include "cstring.h" +#include "cmemory.h" +#include "unicode/uloc.h" + +#if U_PLATFORM == U_PF_WINDOWS && defined(_MSC_VER) && (_MSC_VER >= 1500) +/* + * TODO: It seems like we should widen this to + * either U_PLATFORM_USES_ONLY_WIN32_API (includes MinGW) + * or U_PLATFORM_HAS_WIN32_API (includes MinGW and Cygwin) + * but those use gcc and won't have defined(_MSC_VER). + * We might need to #include some Windows header and test for some version macro from there. + * Or call some Windows function and see what it returns. + */ +#define USE_WINDOWS_LCID_MAPPING_API +#include +#include +#endif + +/* + * Note: + * The mapping from Win32 locale ID numbers to POSIX locale strings should + * be the faster one. + * + * Windows LCIDs are defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx + * [MS-LCID] Windows Language Code Identifier (LCID) Reference + */ + +/* +//////////////////////////////////////////////// +// +// Internal Classes for LCID <--> POSIX Mapping +// +///////////////////////////////////////////////// +*/ + +typedef struct ILcidPosixElement +{ + const uint32_t hostID; + const char * const posixID; +} ILcidPosixElement; + +typedef struct ILcidPosixMap +{ + const uint32_t numRegions; + const struct ILcidPosixElement* const regionMaps; +} ILcidPosixMap; + + +/* +///////////////////////////////////////////////// +// +// Easy macros to make the LCID <--> POSIX Mapping +// +///////////////////////////////////////////////// +*/ + +/** + * The standard one language/one country mapping for LCID. + * The first element must be the language, and the following + * elements are the language with the country. + * @param hostID LCID in host format such as 0x044d + * @param languageID posix ID of just the language such as 'de' + * @param posixID posix ID of the language_TERRITORY such as 'de_CH' + */ +#define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \ +static const ILcidPosixElement locmap_ ## languageID [] = { \ + {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \ + {hostID, #posixID}, \ +}; + +/** + * Define a subtable by ID + * @param id the POSIX ID, either a language or language_TERRITORY + */ +#define ILCID_POSIX_SUBTABLE(id) \ +static const ILcidPosixElement locmap_ ## id [] = + + +/** + * Create the map for the posixID. This macro supposes that the language string + * name is the same as the global variable name, and that the first element + * in the ILcidPosixElement is just the language. + * @param _posixID the full POSIX ID for this entry. + */ +#define ILCID_POSIX_MAP(_posixID) \ + {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID} + +/* +//////////////////////////////////////////// +// +// Create the table of LCID to POSIX Mapping +// None of it should be dynamically created. +// +// Keep static locale variables inside the function so that +// it can be created properly during static init. +// +// Note: This table should be updated periodically. Check the [MS-LCID] Windows Language Code Identifier +// (LCID) Reference defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx +// +// Microsoft is moving away from LCID in favor of locale name as of Vista. This table needs to be +// maintained for support of older Windows version. +// Update: Windows 7 (091130) +// +// Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain +// @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is +// called from uloc_getLCID(), keywords other than collation are already removed. If we really need +// to support other keywords in this mapping data, we must update the implementation. +//////////////////////////////////////////// +*/ + +// TODO: For Windows ideally this table would be a list of exceptions rather than a complete list as +// LocaleNameToLCID and LCIDToLocaleName provide 90% of these. + +ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA) + +ILCID_POSIX_SUBTABLE(ar) { + {0x01, "ar"}, + {0x3801, "ar_AE"}, + {0x3c01, "ar_BH"}, + {0x1401, "ar_DZ"}, + {0x0c01, "ar_EG"}, + {0x0801, "ar_IQ"}, + {0x2c01, "ar_JO"}, + {0x3401, "ar_KW"}, + {0x3001, "ar_LB"}, + {0x1001, "ar_LY"}, + {0x1801, "ar_MA"}, + {0x1801, "ar_MO"}, + {0x2001, "ar_OM"}, + {0x4001, "ar_QA"}, + {0x0401, "ar_SA"}, + {0x2801, "ar_SY"}, + {0x1c01, "ar_TN"}, + {0x2401, "ar_YE"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN) +ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET) +ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL) + +ILCID_POSIX_SUBTABLE(az) { + {0x2c, "az"}, + {0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */ + {0x742c, "az_Cyrl"}, /* Cyrillic based */ + {0x042c, "az_Latn_AZ"}, /* Latin based */ + {0x782c, "az_Latn"}, /* Latin based */ + {0x042c, "az_AZ"} /* Latin based */ +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU) +ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY) + +/*ILCID_POSIX_SUBTABLE(ber) { + {0x5f, "ber"}, + {0x045f, "ber_Arab_DZ"}, + {0x045f, "ber_Arab"}, + {0x085f, "ber_Latn_DZ"}, + {0x085f, "ber_Latn"} +};*/ + +ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG) + +ILCID_POSIX_ELEMENT_ARRAY(0x0466, bin, bin_NG) + +ILCID_POSIX_SUBTABLE(bn) { + {0x45, "bn"}, + {0x0845, "bn_BD"}, + {0x0445, "bn_IN"} +}; + +ILCID_POSIX_SUBTABLE(bo) { + {0x51, "bo"}, + {0x0851, "bo_BT"}, + {0x0451, "bo_CN"}, + {0x0c51, "dz_BT"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR) + +ILCID_POSIX_SUBTABLE(ca) { + {0x03, "ca"}, + {0x0403, "ca_ES"}, + {0x0803, "ca_ES_VALENCIA"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR) +ILCID_POSIX_ELEMENT_ARRAY(0x045c, chr,chr_US) + +// ICU has chosen different names for these. +ILCID_POSIX_SUBTABLE(ckb) { + {0x92, "ckb"}, + {0x7c92, "ckb_Arab"}, + {0x0492, "ckb_Arab_IQ"} +}; + +/* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */ +ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ) + +ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB) +ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK) + +// Windows doesn't know POSIX or BCP47 Unicode phonebook sort names +ILCID_POSIX_SUBTABLE(de) { + {0x07, "de"}, + {0x0c07, "de_AT"}, + {0x0807, "de_CH"}, + {0x0407, "de_DE"}, + {0x1407, "de_LI"}, + {0x1007, "de_LU"}, + {0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/ + {0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/ +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV) +ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR) + +// Windows uses an empty string for 'invariant' +ILCID_POSIX_SUBTABLE(en) { + {0x09, "en"}, + {0x0c09, "en_AU"}, + {0x2809, "en_BZ"}, + {0x1009, "en_CA"}, + {0x0809, "en_GB"}, + {0x3c09, "en_HK"}, + {0x3809, "en_ID"}, + {0x1809, "en_IE"}, + {0x4009, "en_IN"}, + {0x2009, "en_JM"}, + {0x4409, "en_MY"}, + {0x1409, "en_NZ"}, + {0x3409, "en_PH"}, + {0x4809, "en_SG"}, + {0x2C09, "en_TT"}, + {0x0409, "en_US"}, + {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */ + {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */ + {0x1c09, "en_ZA"}, + {0x3009, "en_ZW"}, + {0x2409, "en_029"}, + {0x0409, "en_AS"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */ + {0x0409, "en_GU"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */ + {0x0409, "en_MH"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */ + {0x0409, "en_MP"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */ + {0x0409, "en_UM"} /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */ +}; + +ILCID_POSIX_SUBTABLE(en_US_POSIX) { + {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */ +}; + +// Windows doesn't know POSIX or BCP47 Unicode traditional sort names +ILCID_POSIX_SUBTABLE(es) { + {0x0a, "es"}, + {0x2c0a, "es_AR"}, + {0x400a, "es_BO"}, + {0x340a, "es_CL"}, + {0x240a, "es_CO"}, + {0x140a, "es_CR"}, + {0x5c0a, "es_CU"}, + {0x1c0a, "es_DO"}, + {0x300a, "es_EC"}, + {0x0c0a, "es_ES"}, /*Modern sort.*/ + {0x100a, "es_GT"}, + {0x480a, "es_HN"}, + {0x080a, "es_MX"}, + {0x4c0a, "es_NI"}, + {0x180a, "es_PA"}, + {0x280a, "es_PE"}, + {0x500a, "es_PR"}, + {0x3c0a, "es_PY"}, + {0x440a, "es_SV"}, + {0x540a, "es_US"}, + {0x380a, "es_UY"}, + {0x200a, "es_VE"}, + {0x580a, "es_419"}, + {0x040a, "es_ES@collation=traditional"}, + {0x040a, "es@collation=traditional"} // Windows will treat this as es-ES@collation=traditional +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE) +ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES) + +/* ISO-639 doesn't distinguish between Persian and Dari.*/ +ILCID_POSIX_SUBTABLE(fa) { + {0x29, "fa"}, + {0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */ + {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */ +}; + + +/* duplicate for roundtripping */ +ILCID_POSIX_SUBTABLE(fa_AF) { + {0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */ + {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */ +}; + +ILCID_POSIX_SUBTABLE(ff) { + {0x67, "ff"}, + {0x7c67, "ff_Latn"}, + {0x0867, "ff_Latn_SN"}, + {0x0467, "ff_NG"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI) +ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH) +ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO) + +ILCID_POSIX_SUBTABLE(fr) { + {0x0c, "fr"}, + {0x080c, "fr_BE"}, + {0x0c0c, "fr_CA"}, + {0x240c, "fr_CD"}, + {0x240c, "fr_CG"}, + {0x100c, "fr_CH"}, + {0x300c, "fr_CI"}, + {0x2c0c, "fr_CM"}, + {0x040c, "fr_FR"}, + {0x3c0c, "fr_HT"}, + {0x140c, "fr_LU"}, + {0x380c, "fr_MA"}, + {0x180c, "fr_MC"}, + {0x340c, "fr_ML"}, + {0x200c, "fr_RE"}, + {0x280c, "fr_SN"}, + {0xe40c, "fr_015"}, + {0x1c0c, "fr_029"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG) + +ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL) + +ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */ + {0x3c, "ga"}, + {0x083c, "ga_IE"}, + {0x043c, "gd_GB"} +}; + +ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */ + {0x91, "gd"}, + {0x0491, "gd_GB"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES) +ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN) +ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY) +ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR) + +ILCID_POSIX_SUBTABLE(ha) { + {0x68, "ha"}, + {0x7c68, "ha_Latn"}, + {0x0468, "ha_Latn_NG"}, +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US) +ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL) +ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN) + +/* This LCID is really four different locales.*/ +ILCID_POSIX_SUBTABLE(hr) { + {0x1a, "hr"}, + {0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */ + {0x681a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */ + {0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */ + {0x781a, "bs"}, /* Bosnian */ + {0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */ + {0x641a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */ + {0x101a, "hr_BA"}, /* Croatian in Bosnia */ + {0x041a, "hr_HR"}, /* Croatian*/ + {0x2c1a, "sr_Latn_ME"}, + {0x241a, "sr_Latn_RS"}, + {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */ + {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/ + {0x701a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */ + {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */ + {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/ + {0x301a, "sr_Cyrl_ME"}, + {0x281a, "sr_Cyrl_RS"}, + {0x6c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */ + {0x7c1a, "sr"} /* In CLDR sr is sr_Cyrl. */ +}; + +ILCID_POSIX_SUBTABLE(hsb) { + {0x2E, "hsb"}, + {0x042E, "hsb_DE"}, + {0x082E, "dsb_DE"}, + {0x7C2E, "dsb"}, +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU) +ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM) +ILCID_POSIX_ELEMENT_ARRAY(0x0469, ibb, ibb_NG) +ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID) +ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG) +ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN) +ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS) + +ILCID_POSIX_SUBTABLE(it) { + {0x10, "it"}, + {0x0810, "it_CH"}, + {0x0410, "it_IT"} +}; + +ILCID_POSIX_SUBTABLE(iu) { + {0x5d, "iu"}, + {0x045d, "iu_Cans_CA"}, + {0x785d, "iu_Cans"}, + {0x085d, "iu_Latn_CA"}, + {0x7c5d, "iu_Latn"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL) /*Left in for compatibility*/ +ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP) +ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE) +ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ) +ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL) +ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH) +ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN) + +ILCID_POSIX_SUBTABLE(ko) { + {0x12, "ko"}, + {0x0812, "ko_KP"}, + {0x0412, "ko_KR"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN) +ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG) + +ILCID_POSIX_SUBTABLE(ks) { /* We could add PK and CN too */ + {0x60, "ks"}, + {0x0860, "ks_IN"}, /* Documentation doesn't mention script */ + {0x0460, "ks_Arab_IN"}, + {0x0860, "ks_Deva_IN"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */ +ILCID_POSIX_ELEMENT_ARRAY(0x0476, la, la_IT) /* TODO: Verify the country */ +ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU) +ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA) +ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT) +ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV) +ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ) +ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK) +ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN) + +ILCID_POSIX_SUBTABLE(mn) { + {0x50, "mn"}, + {0x0450, "mn_MN"}, + {0x7c50, "mn_Mong"}, + {0x0850, "mn_Mong_CN"}, + {0x0850, "mn_CN"}, + {0x7850, "mn_Cyrl"}, + {0x0c50, "mn_Mong_MN"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN) +ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA) +ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN) + +ILCID_POSIX_SUBTABLE(ms) { + {0x3e, "ms"}, + {0x083e, "ms_BN"}, /* Brunei Darussalam*/ + {0x043e, "ms_MY"} /* Malaysia*/ +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT) +ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM) + +ILCID_POSIX_SUBTABLE(ne) { + {0x61, "ne"}, + {0x0861, "ne_IN"}, /* India*/ + {0x0461, "ne_NP"} /* Nepal*/ +}; + +ILCID_POSIX_SUBTABLE(nl) { + {0x13, "nl"}, + {0x0813, "nl_BE"}, + {0x0413, "nl_NL"} +}; + +/* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/ +// TODO: Not all of these are needed on Windows, but I don't know how ICU treats preferred ones here. +ILCID_POSIX_SUBTABLE(no) { + {0x14, "no"}, /* really nb_NO - actually Windows differentiates between neutral (no region) and specific (with region) */ + {0x7c14, "nb"}, /* really nb */ + {0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */ + {0x0414, "no_NO"}, /* really nb_NO */ + {0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */ + {0x7814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */ + {0x0814, "no_NO_NY"}/* really nn_NO */ +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA) /* TODO: Verify the ISO-639 code */ +ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR) + +ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */ + {0x72, "om"}, + {0x0472, "om_ET"}, + {0x0472, "gaz_ET"} +}; + +/* Declared as or_IN to get around compiler errors*/ +ILCID_POSIX_SUBTABLE(or_IN) { + {0x48, "or"}, + {0x0448, "or_IN"}, +}; + + +ILCID_POSIX_SUBTABLE(pa) { + {0x46, "pa"}, + {0x0446, "pa_IN"}, + {0x0846, "pa_PK"}, + {0x0846, "pa_Arab_PK"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0479, pap, pap_AN) +ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL) +ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF) + +ILCID_POSIX_SUBTABLE(pt) { + {0x16, "pt"}, + {0x0416, "pt_BR"}, + {0x0816, "pt_PT"} +}; + +ILCID_POSIX_SUBTABLE(qu) { + {0x6b, "qu"}, + {0x046b, "qu_BO"}, + {0x086b, "qu_EC"}, + {0x0C6b, "qu_PE"}, + {0x046b, "quz_BO"}, + {0x086b, "quz_EC"}, + {0x0C6b, "quz_PE"} +}; + +ILCID_POSIX_SUBTABLE(quc) { + {0x93, "quc"}, + {0x0493, "quc_CO"}, + /* + "quc_Latn_GT" is an exceptional case. Language ID of "quc" + is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be + under the group of "qut". "qut" is a retired ISO 639-3 language + code for West Central Quiche, and merged to "quc". + It looks Windows previously reserved "qut" for K'iche', but, + decided to use "quc" when adding a locale for K'iche' (Guatemala). + + This data structure used here assumes language ID bits in + LCID is unique for alphabetic language code. But this is not true + for "quc_Latn_GT". If we don't have the data below, LCID look up + by alphabetic locale ID (POSIX) will fail. The same entry is found + under "qut" below, which is required for reverse look up. + */ + {0x0486, "quc_Latn_GT"} +}; + +ILCID_POSIX_SUBTABLE(qut) { + {0x86, "qut"}, + {0x0486, "qut_GT"}, + /* + See the note in "quc" above. + */ + {0x0486, "quc_Latn_GT"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH) + +ILCID_POSIX_SUBTABLE(ro) { + {0x18, "ro"}, + {0x0418, "ro_RO"}, + {0x0818, "ro_MD"} +}; + +// TODO: This is almost certainly 'wrong'. 0 in Windows is a synonym for LOCALE_USER_DEFAULT. +// More likely this is a similar concept to the Windows 0x7f Invariant locale "" +// (Except that it's not invariant in ICU) +ILCID_POSIX_SUBTABLE(root) { + {0x00, "root"} +}; + +ILCID_POSIX_SUBTABLE(ru) { + {0x19, "ru"}, + {0x0419, "ru_RU"}, + {0x0819, "ru_MD"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW) +ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN) +ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU) + +ILCID_POSIX_SUBTABLE(sd) { + {0x59, "sd"}, + {0x0459, "sd_IN"}, + {0x0459, "sd_Deva_IN"}, + {0x0859, "sd_PK"} +}; + +ILCID_POSIX_SUBTABLE(se) { + {0x3b, "se"}, + {0x0c3b, "se_FI"}, + {0x043b, "se_NO"}, + {0x083b, "se_SE"}, + {0x783b, "sma"}, + {0x183b, "sma_NO"}, + {0x1c3b, "sma_SE"}, + {0x7c3b, "smj"}, + {0x703b, "smn"}, + {0x743b, "sms"}, + {0x103b, "smj_NO"}, + {0x143b, "smj_SE"}, + {0x243b, "smn_FI"}, + {0x203b, "sms_FI"}, +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK) +ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK) +ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI) + +ILCID_POSIX_SUBTABLE(so) { /* TODO: Verify the country */ + {0x77, "so"}, + {0x0477, "so_ET"}, + {0x0477, "so_SO"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL) +ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA) + +ILCID_POSIX_SUBTABLE(sv) { + {0x1d, "sv"}, + {0x081d, "sv_FI"}, + {0x041d, "sv_SE"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE) +ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY) + +ILCID_POSIX_SUBTABLE(ta) { + {0x49, "ta"}, + {0x0449, "ta_IN"}, + {0x0849, "ta_LK"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN) + +/* Cyrillic based by default */ +ILCID_POSIX_SUBTABLE(tg) { + {0x28, "tg"}, + {0x7c28, "tg_Cyrl"}, + {0x0428, "tg_Cyrl_TJ"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH) + +ILCID_POSIX_SUBTABLE(ti) { + {0x73, "ti"}, + {0x0873, "ti_ER"}, + {0x0473, "ti_ET"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM) + +ILCID_POSIX_SUBTABLE(tn) { + {0x32, "tn"}, + {0x0832, "tn_BW"}, + {0x0432, "tn_ZA"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR) +ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA) +ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU) + +ILCID_POSIX_SUBTABLE(tzm) { + {0x5f, "tzm"}, + {0x7c5f, "tzm_Latn"}, + {0x085f, "tzm_Latn_DZ"}, + {0x105f, "tzm_Tfng_MA"}, + {0x045f, "tzm_Arab_MA"}, + {0x045f, "tmz"} +}; + +ILCID_POSIX_SUBTABLE(ug) { + {0x80, "ug"}, + {0x0480, "ug_CN"}, + {0x0480, "ug_Arab_CN"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA) + +ILCID_POSIX_SUBTABLE(ur) { + {0x20, "ur"}, + {0x0820, "ur_IN"}, + {0x0420, "ur_PK"} +}; + +ILCID_POSIX_SUBTABLE(uz) { + {0x43, "uz"}, + {0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */ + {0x7843, "uz_Cyrl"}, /* Cyrillic based */ + {0x0843, "uz_UZ"}, /* Cyrillic based */ + {0x0443, "uz_Latn_UZ"}, /* Latin based */ + {0x7c43, "uz_Latn"} /* Latin based */ +}; + +ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */ + {0x33, "ve"}, + {0x0433, "ve_ZA"}, + {0x0433, "ven_ZA"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN) +ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN) +ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA) +ILCID_POSIX_ELEMENT_ARRAY(0x043d, yi, yi) +ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG) + +// Windows & ICU tend to different names for some of these +// TODO: Windows probably does not need all of these entries, but I don't know how the precedence works. +ILCID_POSIX_SUBTABLE(zh) { + {0x0004, "zh_Hans"}, + {0x7804, "zh"}, + {0x0804, "zh_CN"}, + {0x0804, "zh_Hans_CN"}, + {0x0c04, "zh_Hant_HK"}, + {0x0c04, "zh_HK"}, + {0x1404, "zh_Hant_MO"}, + {0x1404, "zh_MO"}, + {0x1004, "zh_Hans_SG"}, + {0x1004, "zh_SG"}, + {0x0404, "zh_Hant_TW"}, + {0x7c04, "zh_Hant"}, + {0x0404, "zh_TW"}, + {0x30404,"zh_Hant_TW"}, /* Bopomofo order */ + {0x30404,"zh_TW"}, /* Bopomofo order */ + {0x20004,"zh@collation=stroke"}, + {0x20404,"zh_Hant@collation=stroke"}, + {0x20404,"zh_Hant_TW@collation=stroke"}, + {0x20404,"zh_TW@collation=stroke"}, + {0x20804,"zh_Hans@collation=stroke"}, + {0x20804,"zh_Hans_CN@collation=stroke"}, + {0x20804,"zh_CN@collation=stroke"} + // TODO: Alternate collations for other LCIDs are missing, eg: 0x50804 +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA) + +/* This must be static and grouped by LCID. */ +static const ILcidPosixMap gPosixIDmap[] = { + ILCID_POSIX_MAP(af), /* af Afrikaans 0x36 */ + ILCID_POSIX_MAP(am), /* am Amharic 0x5e */ + ILCID_POSIX_MAP(ar), /* ar Arabic 0x01 */ + ILCID_POSIX_MAP(arn), /* arn Araucanian/Mapudungun 0x7a */ + ILCID_POSIX_MAP(as), /* as Assamese 0x4d */ + ILCID_POSIX_MAP(az), /* az Azerbaijani 0x2c */ + ILCID_POSIX_MAP(ba), /* ba Bashkir 0x6d */ + ILCID_POSIX_MAP(be), /* be Belarusian 0x23 */ +/* ILCID_POSIX_MAP(ber), ber Berber/Tamazight 0x5f */ + ILCID_POSIX_MAP(bg), /* bg Bulgarian 0x02 */ + ILCID_POSIX_MAP(bin), /* bin Edo 0x66 */ + ILCID_POSIX_MAP(bn), /* bn Bengali; Bangla 0x45 */ + ILCID_POSIX_MAP(bo), /* bo Tibetan 0x51 */ + ILCID_POSIX_MAP(br), /* br Breton 0x7e */ + ILCID_POSIX_MAP(ca), /* ca Catalan 0x03 */ + ILCID_POSIX_MAP(chr), /* chr Cherokee 0x5c */ + ILCID_POSIX_MAP(ckb), /* ckb Sorani (Central Kurdish) 0x92 */ + ILCID_POSIX_MAP(co), /* co Corsican 0x83 */ + ILCID_POSIX_MAP(cs), /* cs Czech 0x05 */ + ILCID_POSIX_MAP(cy), /* cy Welsh 0x52 */ + ILCID_POSIX_MAP(da), /* da Danish 0x06 */ + ILCID_POSIX_MAP(de), /* de German 0x07 */ + ILCID_POSIX_MAP(dv), /* dv Divehi 0x65 */ + ILCID_POSIX_MAP(el), /* el Greek 0x08 */ + ILCID_POSIX_MAP(en), /* en English 0x09 */ + ILCID_POSIX_MAP(en_US_POSIX), /* invariant 0x7f */ + ILCID_POSIX_MAP(es), /* es Spanish 0x0a */ + ILCID_POSIX_MAP(et), /* et Estonian 0x25 */ + ILCID_POSIX_MAP(eu), /* eu Basque 0x2d */ + ILCID_POSIX_MAP(fa), /* fa Persian/Farsi 0x29 */ + ILCID_POSIX_MAP(fa_AF), /* fa Persian/Dari 0x8c */ + ILCID_POSIX_MAP(ff), /* ff Fula 0x67 */ + ILCID_POSIX_MAP(fi), /* fi Finnish 0x0b */ + ILCID_POSIX_MAP(fil), /* fil Filipino 0x64 */ + ILCID_POSIX_MAP(fo), /* fo Faroese 0x38 */ + ILCID_POSIX_MAP(fr), /* fr French 0x0c */ + ILCID_POSIX_MAP(fuv), /* fuv Fulfulde - Nigeria 0x67 */ + ILCID_POSIX_MAP(fy), /* fy Frisian 0x62 */ + ILCID_POSIX_MAP(ga), /* * Gaelic (Ireland,Scotland) 0x3c */ + ILCID_POSIX_MAP(gd), /* gd Gaelic (United Kingdom) 0x91 */ + ILCID_POSIX_MAP(gl), /* gl Galician 0x56 */ + ILCID_POSIX_MAP(gn), /* gn Guarani 0x74 */ + ILCID_POSIX_MAP(gsw), /* gsw Alemanic/Alsatian/Swiss German 0x84 */ + ILCID_POSIX_MAP(gu), /* gu Gujarati 0x47 */ + ILCID_POSIX_MAP(ha), /* ha Hausa 0x68 */ + ILCID_POSIX_MAP(haw), /* haw Hawaiian 0x75 */ + ILCID_POSIX_MAP(he), /* he Hebrew (formerly iw) 0x0d */ + ILCID_POSIX_MAP(hi), /* hi Hindi 0x39 */ + ILCID_POSIX_MAP(hr), /* * Croatian and others 0x1a */ + ILCID_POSIX_MAP(hsb), /* hsb Upper Sorbian 0x2e */ + ILCID_POSIX_MAP(hu), /* hu Hungarian 0x0e */ + ILCID_POSIX_MAP(hy), /* hy Armenian 0x2b */ + ILCID_POSIX_MAP(ibb), /* ibb Ibibio - Nigeria 0x69 */ + ILCID_POSIX_MAP(id), /* id Indonesian (formerly in) 0x21 */ + ILCID_POSIX_MAP(ig), /* ig Igbo 0x70 */ + ILCID_POSIX_MAP(ii), /* ii Sichuan Yi 0x78 */ + ILCID_POSIX_MAP(is), /* is Icelandic 0x0f */ + ILCID_POSIX_MAP(it), /* it Italian 0x10 */ + ILCID_POSIX_MAP(iu), /* iu Inuktitut 0x5d */ + ILCID_POSIX_MAP(iw), /* iw Hebrew 0x0d */ + ILCID_POSIX_MAP(ja), /* ja Japanese 0x11 */ + ILCID_POSIX_MAP(ka), /* ka Georgian 0x37 */ + ILCID_POSIX_MAP(kk), /* kk Kazakh 0x3f */ + ILCID_POSIX_MAP(kl), /* kl Kalaallisut 0x6f */ + ILCID_POSIX_MAP(km), /* km Khmer 0x53 */ + ILCID_POSIX_MAP(kn), /* kn Kannada 0x4b */ + ILCID_POSIX_MAP(ko), /* ko Korean 0x12 */ + ILCID_POSIX_MAP(kok), /* kok Konkani 0x57 */ + ILCID_POSIX_MAP(kr), /* kr Kanuri 0x71 */ + ILCID_POSIX_MAP(ks), /* ks Kashmiri 0x60 */ + ILCID_POSIX_MAP(ky), /* ky Kyrgyz 0x40 */ + ILCID_POSIX_MAP(lb), /* lb Luxembourgish 0x6e */ + ILCID_POSIX_MAP(la), /* la Latin 0x76 */ + ILCID_POSIX_MAP(lo), /* lo Lao 0x54 */ + ILCID_POSIX_MAP(lt), /* lt Lithuanian 0x27 */ + ILCID_POSIX_MAP(lv), /* lv Latvian, Lettish 0x26 */ + ILCID_POSIX_MAP(mi), /* mi Maori 0x81 */ + ILCID_POSIX_MAP(mk), /* mk Macedonian 0x2f */ + ILCID_POSIX_MAP(ml), /* ml Malayalam 0x4c */ + ILCID_POSIX_MAP(mn), /* mn Mongolian 0x50 */ + ILCID_POSIX_MAP(mni), /* mni Manipuri 0x58 */ + ILCID_POSIX_MAP(moh), /* moh Mohawk 0x7c */ + ILCID_POSIX_MAP(mr), /* mr Marathi 0x4e */ + ILCID_POSIX_MAP(ms), /* ms Malay 0x3e */ + ILCID_POSIX_MAP(mt), /* mt Maltese 0x3a */ + ILCID_POSIX_MAP(my), /* my Burmese 0x55 */ +/* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */ + ILCID_POSIX_MAP(ne), /* ne Nepali 0x61 */ + ILCID_POSIX_MAP(nl), /* nl Dutch 0x13 */ +/* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */ + ILCID_POSIX_MAP(no), /* * Norwegian 0x14 */ + ILCID_POSIX_MAP(nso), /* nso Sotho, Northern (Sepedi dialect) 0x6c */ + ILCID_POSIX_MAP(oc), /* oc Occitan 0x82 */ + ILCID_POSIX_MAP(om), /* om Oromo 0x72 */ + ILCID_POSIX_MAP(or_IN), /* or Oriya 0x48 */ + ILCID_POSIX_MAP(pa), /* pa Punjabi 0x46 */ + ILCID_POSIX_MAP(pap), /* pap Papiamentu 0x79 */ + ILCID_POSIX_MAP(pl), /* pl Polish 0x15 */ + ILCID_POSIX_MAP(ps), /* ps Pashto 0x63 */ + ILCID_POSIX_MAP(pt), /* pt Portuguese 0x16 */ + ILCID_POSIX_MAP(qu), /* qu Quechua 0x6B */ + ILCID_POSIX_MAP(quc), /* quc K'iche 0x93 */ + ILCID_POSIX_MAP(qut), /* qut K'iche 0x86 */ + ILCID_POSIX_MAP(rm), /* rm Raeto-Romance/Romansh 0x17 */ + ILCID_POSIX_MAP(ro), /* ro Romanian 0x18 */ + ILCID_POSIX_MAP(root), /* root 0x00 */ + ILCID_POSIX_MAP(ru), /* ru Russian 0x19 */ + ILCID_POSIX_MAP(rw), /* rw Kinyarwanda 0x87 */ + ILCID_POSIX_MAP(sa), /* sa Sanskrit 0x4f */ + ILCID_POSIX_MAP(sah), /* sah Yakut 0x85 */ + ILCID_POSIX_MAP(sd), /* sd Sindhi 0x59 */ + ILCID_POSIX_MAP(se), /* se Sami 0x3b */ +/* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */ + ILCID_POSIX_MAP(si), /* si Sinhalese 0x5b */ + ILCID_POSIX_MAP(sk), /* sk Slovak 0x1b */ + ILCID_POSIX_MAP(sl), /* sl Slovenian 0x24 */ + ILCID_POSIX_MAP(so), /* so Somali 0x77 */ + ILCID_POSIX_MAP(sq), /* sq Albanian 0x1c */ +/* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */ + ILCID_POSIX_MAP(st), /* st Sutu 0x30 */ + ILCID_POSIX_MAP(sv), /* sv Swedish 0x1d */ + ILCID_POSIX_MAP(sw), /* sw Swahili 0x41 */ + ILCID_POSIX_MAP(syr), /* syr Syriac 0x5A */ + ILCID_POSIX_MAP(ta), /* ta Tamil 0x49 */ + ILCID_POSIX_MAP(te), /* te Telugu 0x4a */ + ILCID_POSIX_MAP(tg), /* tg Tajik 0x28 */ + ILCID_POSIX_MAP(th), /* th Thai 0x1e */ + ILCID_POSIX_MAP(ti), /* ti Tigrigna 0x73 */ + ILCID_POSIX_MAP(tk), /* tk Turkmen 0x42 */ + ILCID_POSIX_MAP(tn), /* tn Tswana 0x32 */ + ILCID_POSIX_MAP(tr), /* tr Turkish 0x1f */ + ILCID_POSIX_MAP(ts), /* ts Tsonga 0x31 */ + ILCID_POSIX_MAP(tt), /* tt Tatar 0x44 */ + ILCID_POSIX_MAP(tzm), /* tzm Tamazight 0x5f */ + ILCID_POSIX_MAP(ug), /* ug Uighur 0x80 */ + ILCID_POSIX_MAP(uk), /* uk Ukrainian 0x22 */ + ILCID_POSIX_MAP(ur), /* ur Urdu 0x20 */ + ILCID_POSIX_MAP(uz), /* uz Uzbek 0x43 */ + ILCID_POSIX_MAP(ve), /* ve Venda 0x33 */ + ILCID_POSIX_MAP(vi), /* vi Vietnamese 0x2a */ + ILCID_POSIX_MAP(wo), /* wo Wolof 0x88 */ + ILCID_POSIX_MAP(xh), /* xh Xhosa 0x34 */ + ILCID_POSIX_MAP(yi), /* yi Yiddish 0x3d */ + ILCID_POSIX_MAP(yo), /* yo Yoruba 0x6a */ + ILCID_POSIX_MAP(zh), /* zh Chinese 0x04 */ + ILCID_POSIX_MAP(zu), /* zu Zulu 0x35 */ +}; + +static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap); + +/** + * Do not call this function. It is called by hostID. + * The function is not private because this struct must stay as a C struct, + * and this is an internal class. + */ +static int32_t +idCmp(const char* id1, const char* id2) +{ + int32_t diffIdx = 0; + while (*id1 == *id2 && *id1 != 0) { + diffIdx++; + id1++; + id2++; + } + return diffIdx; +} + +/** + * Searches for a Windows LCID + * + * @param posixid the Posix style locale id. + * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has + * no equivalent Windows LCID. + * @return the LCID + */ +static uint32_t +getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status) +{ + int32_t bestIdx = 0; + int32_t bestIdxDiff = 0; + int32_t posixIDlen = (int32_t)uprv_strlen(posixID); + uint32_t idx; + + for (idx = 0; idx < this_0->numRegions; idx++ ) { + int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID); + if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) { + if (posixIDlen == sameChars) { + /* Exact match */ + return this_0->regionMaps[idx].hostID; + } + bestIdxDiff = sameChars; + bestIdx = idx; + } + } + /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */ + /* We also have to make sure that sid and si and similar string subsets don't match. */ + if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@') + && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0) + { + *status = U_USING_FALLBACK_WARNING; + return this_0->regionMaps[bestIdx].hostID; + } + + /*no match found */ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return this_0->regionMaps->hostID; +} + +static const char* +getPosixID(const ILcidPosixMap *this_0, uint32_t hostID) +{ + uint32_t i; + for (i = 0; i <= this_0->numRegions; i++) + { + if (this_0->regionMaps[i].hostID == hostID) + { + return this_0->regionMaps[i].posixID; + } + } + + /* If you get here, then no matching region was found, + so return the language id with the wild card region. */ + return this_0->regionMaps[0].posixID; +} + +/* +////////////////////////////////////// +// +// LCID --> POSIX +// +///////////////////////////////////// +*/ +#ifdef USE_WINDOWS_LCID_MAPPING_API +/* + * Various language tags needs to be changed: + * quz -> qu + * prs -> fa + */ +#define FIX_LANGUAGE_ID_TAG(buffer, len) \ + if (len >= 3) { \ + if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\ + buffer[2] = 0; \ + uprv_strcat(buffer, buffer+3); \ + } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\ + buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \ + uprv_strcat(buffer, buffer+3); \ + } \ + } + +#endif +U_CAPI int32_t +uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status) +{ + uint16_t langID; + uint32_t localeIndex; + UBool bLookup = TRUE; + const char *pPosixID = NULL; + +#ifdef USE_WINDOWS_LCID_MAPPING_API + // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and + // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for + // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot + // use the Windows API to resolve locale ID for this specific case. + if ((hostid & 0x3FF) != 0x92) { + int32_t tmpLen = 0; + UChar windowsLocaleName[LOCALE_NAME_MAX_LENGTH]; // ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH + char locName[LOCALE_NAME_MAX_LENGTH]; // ICU name can't be longer than Windows name + + // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names. + tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES); + if (tmpLen > 1) { + int32_t i = 0; + // Only need to look up in table if have _, eg for de-de_phoneb type alternate sort. + bLookup = FALSE; + for (i = 0; i < UPRV_LENGTHOF(locName); i++) + { + locName[i] = (char)(windowsLocaleName[i]); + + // Windows locale name may contain sorting variant, such as "es-ES_tradnl". + // In such cases, we need special mapping data found in the hardcoded table + // in this source file. + if (windowsLocaleName[i] == L'_') + { + // Keep the base locale, without variant + // TODO: Should these be mapped from _phoneb to @collation=phonebook, etc.? + locName[i] = '\0'; + tmpLen = i; + bLookup = TRUE; + break; + } + else if (windowsLocaleName[i] == L'-') + { + // Windows names use -, ICU uses _ + locName[i] = '_'; + } + else if (windowsLocaleName[i] == L'\0') + { + // No point in doing more work than necessary + break; + } + } + // TODO: Need to understand this better, why isn't it an alias? + FIX_LANGUAGE_ID_TAG(locName, tmpLen); + pPosixID = locName; + } + } +#endif // USE_WINDOWS_LCID_MAPPING_API + + if (bLookup) { + const char *pCandidate = NULL; + langID = LANGUAGE_LCID(hostid); + + for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) { + if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) { + pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid); + break; + } + } + + /* On Windows, when locale name has a variant, we still look up the hardcoded table. + If a match in the hardcoded table is longer than the Windows locale name without + variant, we use the one as the result */ + if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) { + pPosixID = pCandidate; + } + } + + if (pPosixID) { + int32_t resLen = uprv_strlen(pPosixID); + int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity; + uprv_memcpy(posixID, pPosixID, copyLen); + if (resLen < posixIDCapacity) { + posixID[resLen] = 0; + if (*status == U_STRING_NOT_TERMINATED_WARNING) { + *status = U_ZERO_ERROR; + } + } else if (resLen == posixIDCapacity) { + *status = U_STRING_NOT_TERMINATED_WARNING; + } else { + *status = U_BUFFER_OVERFLOW_ERROR; + } + return resLen; + } + + /* no match found */ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return -1; +} + +/* +////////////////////////////////////// +// +// POSIX --> LCID +// This should only be called from uloc_getLCID. +// The locale ID must be in canonical form. +// +///////////////////////////////////// +*/ +U_CAPI uint32_t +uprv_convertToLCIDPlatform(const char* localeID) +{ + // The purpose of this function is to leverage native platform name->lcid + // conversion functionality when available. +#ifdef USE_WINDOWS_LCID_MAPPING_API + DWORD nameLCIDFlags = 0; + UErrorCode myStatus = U_ZERO_ERROR; + + // First check for a Windows name->LCID match, fall through to catch + // ICU special cases, but Windows may know it already. +#if LOCALE_ALLOW_NEUTRAL_NAMES + nameLCIDFlags = LOCALE_ALLOW_NEUTRAL_NAMES; +#endif /* LOCALE_ALLOW_NEUTRAL_NAMES */ + + int32_t len; + char collVal[ULOC_KEYWORDS_CAPACITY] = {}; + char baseName[ULOC_FULLNAME_CAPACITY] = {}; + const char * mylocaleID = localeID; + + // Check any for keywords. + if (uprv_strchr(localeID, '@')) + { + len = uloc_getKeywordValue(localeID, "collation", collVal, UPRV_LENGTHOF(collVal) - 1, &myStatus); + if (U_SUCCESS(myStatus) && len > 0) + { + // If it contains the keyword collation, return 0 so that the LCID lookup table will be used. + return 0; + } + else + { + // If the locale ID contains keywords other than collation, just use the base name. + len = uloc_getBaseName(localeID, baseName, UPRV_LENGTHOF(baseName) - 1, &myStatus); + + if (U_SUCCESS(myStatus) && len > 0) + { + baseName[len] = 0; + mylocaleID = baseName; + } + } + } + + char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {}; + // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form + int32_t bcp47Len = uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, &myStatus); + + if (U_SUCCESS(myStatus)) + { + // Need it to be UTF-16, not 8-bit + wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {}; + int32_t i; + for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++) + { + if (asciiBCP47Tag[i] == '\0') + { + break; + } + else + { + // Copy the character + bcp47Tag[i] = static_cast(asciiBCP47Tag[i]); + } + } + + if (i < (UPRV_LENGTHOF(bcp47Tag) - 1)) + { + // Ensure it's null terminated + bcp47Tag[i] = L'\0'; + LCID lcid = LocaleNameToLCID(bcp47Tag, nameLCIDFlags); + if (lcid > 0) + { + // Found LCID from windows, return that one, unless its completely ambiguous + // LOCALE_USER_DEFAULT and transients are OK because they will round trip + // for this process. + if (lcid != LOCALE_CUSTOM_UNSPECIFIED) + { + return lcid; + } + } + } + } +#endif /* USE_WINDOWS_LCID_MAPPING_API */ + + // No found, or not implemented on platforms without native name->lcid conversion + return 0; +} + +U_CAPI uint32_t +uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status) +{ + // This function does the table lookup when native platform name->lcid conversion isn't available, + // or for locales that don't follow patterns the platform expects. + uint32_t low = 0; + uint32_t high = gLocaleCount; + uint32_t mid; + uint32_t oldmid = 0; + int32_t compVal; + + uint32_t value = 0; + uint32_t fallbackValue = (uint32_t)-1; + UErrorCode myStatus; + uint32_t idx; + + /* Check for incomplete id. */ + if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) { + return 0; + } + + /*Binary search for the map entry for normal cases */ + + while (high > low) /*binary search*/{ + + mid = (high+low) >> 1; /*Finds median*/ + + if (mid == oldmid) + break; + + compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID); + if (compVal < 0){ + high = mid; + } + else if (compVal > 0){ + low = mid; + } + else /*we found it*/{ + return getHostID(&gPosixIDmap[mid], posixID, status); + } + oldmid = mid; + } + + /* + * Sometimes we can't do a binary search on posixID because some LCIDs + * go to different locales. We hit one of those special cases. + */ + for (idx = 0; idx < gLocaleCount; idx++ ) { + myStatus = U_ZERO_ERROR; + value = getHostID(&gPosixIDmap[idx], posixID, &myStatus); + if (myStatus == U_ZERO_ERROR) { + return value; + } + else if (myStatus == U_USING_FALLBACK_WARNING) { + fallbackValue = value; + } + } + + if (fallbackValue != (uint32_t)-1) { + *status = U_USING_FALLBACK_WARNING; + return fallbackValue; + } + + /* no match found */ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; /* return international (root) */ +} diff --git a/deps/icu-small/source/common/locmap.h b/deps/icu-small/source/common/locmap.h index a9b892ee42..2d7a3d37a0 100644 --- a/deps/icu-small/source/common/locmap.h +++ b/deps/icu-small/source/common/locmap.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -32,7 +32,8 @@ U_CAPI int32_t uprv_convertToPosix(uint32_t hostid, char* posixID, int32_t posixIDCapacity, UErrorCode* status); -/* Don't call this function directly. Use uloc_getLCID instead. */ +/* Don't call these functions directly. Use uloc_getLCID instead. */ +U_CAPI uint32_t uprv_convertToLCIDPlatform(const char *localeID); // Leverage platform conversion if possible U_CAPI uint32_t uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status); #endif /* LOCMAP_H */ diff --git a/deps/icu-small/source/common/locresdata.cpp b/deps/icu-small/source/common/locresdata.cpp index 9ede0cac20..f890411c9a 100644 --- a/deps/icu-small/source/common/locresdata.cpp +++ b/deps/icu-small/source/common/locresdata.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: loclikely.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/locutil.cpp b/deps/icu-small/source/common/locutil.cpp index 2f704953a2..02d2be50ca 100644 --- a/deps/icu-small/source/common/locutil.cpp +++ b/deps/icu-small/source/common/locutil.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/common/locutil.h b/deps/icu-small/source/common/locutil.h index 64f7dcc2c0..31bfffd7a5 100644 --- a/deps/icu-small/source/common/locutil.h +++ b/deps/icu-small/source/common/locutil.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /** ******************************************************************************* diff --git a/deps/icu-small/source/common/messageimpl.h b/deps/icu-small/source/common/messageimpl.h index 980f777e8d..dc7a6edd6c 100644 --- a/deps/icu-small/source/common/messageimpl.h +++ b/deps/icu-small/source/common/messageimpl.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: messageimpl.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/messagepattern.cpp b/deps/icu-small/source/common/messagepattern.cpp index ddd2287e4c..2f79780bd2 100644 --- a/deps/icu-small/source/common/messagepattern.cpp +++ b/deps/icu-small/source/common/messagepattern.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: messagepattern.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/msvcres.h b/deps/icu-small/source/common/msvcres.h index 0514ee440e..d6581b27eb 100644 --- a/deps/icu-small/source/common/msvcres.h +++ b/deps/icu-small/source/common/msvcres.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html //{{NO_DEPENDENCIES}} // Copyright (c) 2003-2010 International Business Machines diff --git a/deps/icu-small/source/common/mutex.h b/deps/icu-small/source/common/mutex.h index 78de718751..04c22b4a37 100644 --- a/deps/icu-small/source/common/mutex.h +++ b/deps/icu-small/source/common/mutex.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/norm2_nfc_data.h b/deps/icu-small/source/common/norm2_nfc_data.h index 4ddba0f8ba..9295404a35 100644 --- a/deps/icu-small/source/common/norm2_nfc_data.h +++ b/deps/icu-small/source/common/norm2_nfc_data.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* * Copyright (C) 1999-2016, International Business Machines diff --git a/deps/icu-small/source/common/norm2allmodes.h b/deps/icu-small/source/common/norm2allmodes.h index 943e83d567..9516817e4a 100644 --- a/deps/icu-small/source/common/norm2allmodes.h +++ b/deps/icu-small/source/common/norm2allmodes.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/common/normalizer2.cpp b/deps/icu-small/source/common/normalizer2.cpp index 34f1cf063d..dfdaa3bdce 100644 --- a/deps/icu-small/source/common/normalizer2.cpp +++ b/deps/icu-small/source/common/normalizer2.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: normalizer2.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/normalizer2impl.cpp b/deps/icu-small/source/common/normalizer2impl.cpp index 906c03a689..41305cc587 100644 --- a/deps/icu-small/source/common/normalizer2impl.cpp +++ b/deps/icu-small/source/common/normalizer2impl.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: normalizer2impl.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/normalizer2impl.h b/deps/icu-small/source/common/normalizer2impl.h index a6bf967979..946abee98f 100644 --- a/deps/icu-small/source/common/normalizer2impl.h +++ b/deps/icu-small/source/common/normalizer2impl.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: normalizer2impl.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -176,7 +176,7 @@ public: lastCC=0; } void copyReorderableSuffixTo(UnicodeString &s) const { - s.setTo(reorderStart, (int32_t)(limit-reorderStart)); + s.setTo(ConstChar16Ptr(reorderStart), (int32_t)(limit-reorderStart)); } private: /* diff --git a/deps/icu-small/source/common/normlzr.cpp b/deps/icu-small/source/common/normlzr.cpp index 3cf7446f17..3911c90b5b 100644 --- a/deps/icu-small/source/common/normlzr.cpp +++ b/deps/icu-small/source/common/normlzr.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ************************************************************************* @@ -23,6 +23,12 @@ #include "normalizer2impl.h" #include "uprops.h" // for uniset_getUnicode32Instance() +#if defined(_ARM64_) && defined(move32) + // System can define move32 intrinsics, but the char iters define move32 method + // using same undef trick in headers, so undef here to re-enable the method. +#undef move32 +#endif + U_NAMESPACE_BEGIN UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer) @@ -40,7 +46,7 @@ Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) : init(); } -Normalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) : +Normalizer::Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode) : UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0), text(new UCharCharacterIterator(str, length)), currentIndex(0), nextIndex(0), @@ -435,7 +441,7 @@ Normalizer::setText(const CharacterIterator& newText, } void -Normalizer::setText(const UChar* newText, +Normalizer::setText(ConstChar16Ptr newText, int32_t length, UErrorCode &status) { diff --git a/deps/icu-small/source/common/parsepos.cpp b/deps/icu-small/source/common/parsepos.cpp index af6ac6c1f7..56c6c78813 100644 --- a/deps/icu-small/source/common/parsepos.cpp +++ b/deps/icu-small/source/common/parsepos.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/patternprops.cpp b/deps/icu-small/source/common/patternprops.cpp index 30c3f68391..01e33ce109 100644 --- a/deps/icu-small/source/common/patternprops.cpp +++ b/deps/icu-small/source/common/patternprops.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: patternprops.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/patternprops.h b/deps/icu-small/source/common/patternprops.h index f309c2dbad..a42eb3c244 100644 --- a/deps/icu-small/source/common/patternprops.h +++ b/deps/icu-small/source/common/patternprops.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: patternprops.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/pluralmap.cpp b/deps/icu-small/source/common/pluralmap.cpp index d3e892124e..a85dd1c979 100644 --- a/deps/icu-small/source/common/pluralmap.cpp +++ b/deps/icu-small/source/common/pluralmap.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* * Copyright (C) 2015, International Business Machines Corporation and diff --git a/deps/icu-small/source/common/pluralmap.h b/deps/icu-small/source/common/pluralmap.h index 76e95c5826..db644093a1 100644 --- a/deps/icu-small/source/common/pluralmap.h +++ b/deps/icu-small/source/common/pluralmap.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/propname.cpp b/deps/icu-small/source/common/propname.cpp index 4107869ee1..a12eb7d913 100644 --- a/deps/icu-small/source/common/propname.cpp +++ b/deps/icu-small/source/common/propname.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/propname.h b/deps/icu-small/source/common/propname.h index c93d3d7503..1a8ced5b87 100644 --- a/deps/icu-small/source/common/propname.h +++ b/deps/icu-small/source/common/propname.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/propname_data.h b/deps/icu-small/source/common/propname_data.h index 4863b26dc7..c15b2a4e04 100644 --- a/deps/icu-small/source/common/propname_data.h +++ b/deps/icu-small/source/common/propname_data.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // // Copyright (C) 1999-2016, International Business Machines diff --git a/deps/icu-small/source/common/propsvec.c b/deps/icu-small/source/common/propsvec.c deleted file mode 100644 index ed4d89954b..0000000000 --- a/deps/icu-small/source/common/propsvec.c +++ /dev/null @@ -1,527 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2002-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: propsvec.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002feb22 -* created by: Markus W. Scherer -* -* Store bits (Unicode character properties) in bit set vectors. -*/ - -#include -#include "unicode/utypes.h" -#include "cmemory.h" -#include "utrie.h" -#include "utrie2.h" -#include "uarrsort.h" -#include "propsvec.h" -#include "uassert.h" - -struct UPropsVectors { - uint32_t *v; - int32_t columns; /* number of columns, plus two for start & limit values */ - int32_t maxRows; - int32_t rows; - int32_t prevRow; /* search optimization: remember last row seen */ - UBool isCompacted; -}; - -#define UPVEC_INITIAL_ROWS (1<<12) -#define UPVEC_MEDIUM_ROWS ((int32_t)1<<16) -#define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1) - -U_CAPI UPropsVectors * U_EXPORT2 -upvec_open(int32_t columns, UErrorCode *pErrorCode) { - UPropsVectors *pv; - uint32_t *v, *row; - uint32_t cp; - - if(U_FAILURE(*pErrorCode)) { - return NULL; - } - if(columns<1) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - columns+=2; /* count range start and limit columns */ - - pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors)); - v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4); - if(pv==NULL || v==NULL) { - uprv_free(pv); - uprv_free(v); - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memset(pv, 0, sizeof(UPropsVectors)); - pv->v=v; - pv->columns=columns; - pv->maxRows=UPVEC_INITIAL_ROWS; - pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP); - - /* set the all-Unicode row and the special-value rows */ - row=pv->v; - uprv_memset(row, 0, pv->rows*columns*4); - row[0]=0; - row[1]=0x110000; - row+=columns; - for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) { - row[0]=cp; - row[1]=cp+1; - row+=columns; - } - return pv; -} - -U_CAPI void U_EXPORT2 -upvec_close(UPropsVectors *pv) { - if(pv!=NULL) { - uprv_free(pv->v); - uprv_free(pv); - } -} - -static uint32_t * -_findRow(UPropsVectors *pv, UChar32 rangeStart) { - uint32_t *row; - int32_t columns, i, start, limit, prevRow; - - columns=pv->columns; - limit=pv->rows; - prevRow=pv->prevRow; - - /* check the vicinity of the last-seen row (start searching with an unrolled loop) */ - row=pv->v+prevRow*columns; - if(rangeStart>=(UChar32)row[0]) { - if(rangeStart<(UChar32)row[1]) { - /* same row as last seen */ - return row; - } else if(rangeStart<(UChar32)(row+=columns)[1]) { - /* next row after the last one */ - pv->prevRow=prevRow+1; - return row; - } else if(rangeStart<(UChar32)(row+=columns)[1]) { - /* second row after the last one */ - pv->prevRow=prevRow+2; - return row; - } else if((rangeStart-(UChar32)row[1])<10) { - /* we are close, continue looping */ - prevRow+=2; - do { - ++prevRow; - row+=columns; - } while(rangeStart>=(UChar32)row[1]); - pv->prevRow=prevRow; - return row; - } - } else if(rangeStart<(UChar32)pv->v[1]) { - /* the very first row */ - pv->prevRow=0; - return pv->v; - } - - /* do a binary search for the start of the range */ - start=0; - while(startv+i*columns; - if(rangeStart<(UChar32)row[0]) { - limit=i; - } else if(rangeStart<(UChar32)row[1]) { - pv->prevRow=i; - return row; - } else { - start=i; - } - } - - /* must be found because all ranges together always cover all of Unicode */ - pv->prevRow=start; - return pv->v+start*columns; -} - -U_CAPI void U_EXPORT2 -upvec_setValue(UPropsVectors *pv, - UChar32 start, UChar32 end, - int32_t column, - uint32_t value, uint32_t mask, - UErrorCode *pErrorCode) { - uint32_t *firstRow, *lastRow; - int32_t columns; - UChar32 limit; - UBool splitFirstRow, splitLastRow; - - /* argument checking */ - if(U_FAILURE(*pErrorCode)) { - return; - } - if( pv==NULL || - start<0 || start>end || end>UPVEC_MAX_CP || - column<0 || column>=(pv->columns-2) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - if(pv->isCompacted) { - *pErrorCode=U_NO_WRITE_PERMISSION; - return; - } - limit=end+1; - - /* initialize */ - columns=pv->columns; - column+=2; /* skip range start and limit columns */ - value&=mask; - - /* find the rows whose ranges overlap with the input range */ - - /* find the first and last rows, always successful */ - firstRow=_findRow(pv, start); - lastRow=_findRow(pv, end); - - /* - * Rows need to be split if they partially overlap with the - * input range (only possible for the first and last rows) - * and if their value differs from the input value. - */ - splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask)); - splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask)); - - /* split first/last rows if necessary */ - if(splitFirstRow || splitLastRow) { - int32_t count, rows; - - rows=pv->rows; - if((rows+splitFirstRow+splitLastRow)>pv->maxRows) { - uint32_t *newVectors; - int32_t newMaxRows; - - if(pv->maxRowsmaxRowsv, (size_t)rows*columns*4); - firstRow=newVectors+(firstRow-pv->v); - lastRow=newVectors+(lastRow-pv->v); - uprv_free(pv->v); - pv->v=newVectors; - pv->maxRows=newMaxRows; - } - - /* count the number of row cells to move after the last row, and move them */ - count = (int32_t)((pv->v+rows*columns)-(lastRow+columns)); - if(count>0) { - uprv_memmove( - lastRow+(1+splitFirstRow+splitLastRow)*columns, - lastRow+columns, - count*4); - } - pv->rows=rows+splitFirstRow+splitLastRow; - - /* split the first row, and move the firstRow pointer to the second part */ - if(splitFirstRow) { - /* copy all affected rows up one and move the lastRow pointer */ - count = (int32_t)((lastRow-firstRow)+columns); - uprv_memmove(firstRow+columns, firstRow, (size_t)count*4); - lastRow+=columns; - - /* split the range and move the firstRow pointer */ - firstRow[1]=firstRow[columns]=(uint32_t)start; - firstRow+=columns; - } - - /* split the last row */ - if(splitLastRow) { - /* copy the last row data */ - uprv_memcpy(lastRow+columns, lastRow, (size_t)columns*4); - - /* split the range and move the firstRow pointer */ - lastRow[1]=lastRow[columns]=(uint32_t)limit; - } - } - - /* set the "row last seen" to the last row for the range */ - pv->prevRow=(int32_t)((lastRow-(pv->v))/columns); - - /* set the input value in all remaining rows */ - firstRow+=column; - lastRow+=column; - mask=~mask; - for(;;) { - *firstRow=(*firstRow&mask)|value; - if(firstRow==lastRow) { - break; - } - firstRow+=columns; - } -} - -U_CAPI uint32_t U_EXPORT2 -upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column) { - uint32_t *row; - UPropsVectors *ncpv; - - if(pv->isCompacted || c<0 || c>UPVEC_MAX_CP || column<0 || column>=(pv->columns-2)) { - return 0; - } - ncpv=(UPropsVectors *)pv; - row=_findRow(ncpv, c); - return row[2+column]; -} - -U_CAPI uint32_t * U_EXPORT2 -upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, - UChar32 *pRangeStart, UChar32 *pRangeEnd) { - uint32_t *row; - int32_t columns; - - if(pv->isCompacted || rowIndex<0 || rowIndex>=pv->rows) { - return NULL; - } - - columns=pv->columns; - row=pv->v+rowIndex*columns; - if(pRangeStart!=NULL) { - *pRangeStart=(UChar32)row[0]; - } - if(pRangeEnd!=NULL) { - *pRangeEnd=(UChar32)row[1]-1; - } - return row+2; -} - -static int32_t U_CALLCONV -upvec_compareRows(const void *context, const void *l, const void *r) { - const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r; - const UPropsVectors *pv=(const UPropsVectors *)context; - int32_t i, count, columns; - - count=columns=pv->columns; /* includes start/limit columns */ - - /* start comparing after start/limit but wrap around to them */ - i=2; - do { - if(left[i]!=right[i]) { - return left[i]0); - - return 0; -} - -U_CAPI void U_EXPORT2 -upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) { - uint32_t *row; - int32_t i, columns, valueColumns, rows, count; - UChar32 start, limit; - - /* argument checking */ - if(U_FAILURE(*pErrorCode)) { - return; - } - if(handler==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - if(pv->isCompacted) { - return; - } - - /* Set the flag now: Sorting and compacting destroys the builder data structure. */ - pv->isCompacted=TRUE; - - rows=pv->rows; - columns=pv->columns; - U_ASSERT(columns>=3); /* upvec_open asserts this */ - valueColumns=columns-2; /* not counting start & limit */ - - /* sort the properties vectors to find unique vector values */ - uprv_sortArray(pv->v, rows, columns*4, - upvec_compareRows, pv, FALSE, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return; - } - - /* - * Find and set the special values. - * This has to do almost the same work as the compaction below, - * to find the indexes where the special-value rows will move. - */ - row=pv->v; - count=-valueColumns; - for(i=0; i=UPVEC_FIRST_SPECIAL_CP) { - handler(context, start, start, count, row+2, valueColumns, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return; - } - } - - row+=columns; - } - - /* count is at the beginning of the last vector, add valueColumns to include that last vector */ - count+=valueColumns; - - /* Call the handler once more to signal the start of delivering real values. */ - handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP, - count, row-valueColumns, valueColumns, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return; - } - - /* - * Move vector contents up to a contiguous array with only unique - * vector values, and call the handler function for each vector. - * - * This destroys the Properties Vector structure and replaces it - * with an array of just vector values. - */ - row=pv->v; - count=-valueColumns; - for(i=0; iv+count, valueColumns*4)) { - count+=valueColumns; - uprv_memmove(pv->v+count, row+2, (size_t)valueColumns*4); - } - - if(startv+count, valueColumns, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return; - } - } - - row+=columns; - } - - /* count is at the beginning of the last vector, add one to include that last vector */ - pv->rows=count/valueColumns+1; -} - -U_CAPI const uint32_t * U_EXPORT2 -upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) { - if(!pv->isCompacted) { - return NULL; - } - if(pRows!=NULL) { - *pRows=pv->rows; - } - if(pColumns!=NULL) { - *pColumns=pv->columns-2; - } - return pv->v; -} - -U_CAPI uint32_t * U_EXPORT2 -upvec_cloneArray(const UPropsVectors *pv, - int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode) { - uint32_t *clonedArray; - int32_t byteLength; - - if(U_FAILURE(*pErrorCode)) { - return NULL; - } - if(!pv->isCompacted) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - byteLength=pv->rows*(pv->columns-2)*4; - clonedArray=(uint32_t *)uprv_malloc(byteLength); - if(clonedArray==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memcpy(clonedArray, pv->v, byteLength); - if(pRows!=NULL) { - *pRows=pv->rows; - } - if(pColumns!=NULL) { - *pColumns=pv->columns-2; - } - return clonedArray; -} - -U_CAPI UTrie2 * U_EXPORT2 -upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) { - UPVecToUTrie2Context toUTrie2={ NULL, 0, 0, 0 }; - upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode); - utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - utrie2_close(toUTrie2.trie); - toUTrie2.trie=NULL; - } - return toUTrie2.trie; -} - -/* - * TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts - * some 16-bit field and builds and returns a UTrie2. - */ - -U_CAPI void U_CALLCONV -upvec_compactToUTrie2Handler(void *context, - UChar32 start, UChar32 end, - int32_t rowIndex, uint32_t *row, int32_t columns, - UErrorCode *pErrorCode) { - UPVecToUTrie2Context *toUTrie2=(UPVecToUTrie2Context *)context; - if(starttrie, start, end, (uint32_t)rowIndex, TRUE, pErrorCode); - } else { - switch(start) { - case UPVEC_INITIAL_VALUE_CP: - toUTrie2->initialValue=rowIndex; - break; - case UPVEC_ERROR_VALUE_CP: - toUTrie2->errorValue=rowIndex; - break; - case UPVEC_START_REAL_VALUES_CP: - toUTrie2->maxValue=rowIndex; - if(rowIndex>0xffff) { - /* too many rows for a 16-bit trie */ - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - } else { - toUTrie2->trie=utrie2_open(toUTrie2->initialValue, - toUTrie2->errorValue, pErrorCode); - } - break; - default: - break; - } - } -} diff --git a/deps/icu-small/source/common/propsvec.cpp b/deps/icu-small/source/common/propsvec.cpp new file mode 100644 index 0000000000..056fcda9cf --- /dev/null +++ b/deps/icu-small/source/common/propsvec.cpp @@ -0,0 +1,529 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: propsvec.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002feb22 +* created by: Markus W. Scherer +* +* Store bits (Unicode character properties) in bit set vectors. +*/ + +#include +#include "unicode/utypes.h" +#include "cmemory.h" +#include "utrie.h" +#include "utrie2.h" +#include "uarrsort.h" +#include "propsvec.h" +#include "uassert.h" + +struct UPropsVectors { + uint32_t *v; + int32_t columns; /* number of columns, plus two for start & limit values */ + int32_t maxRows; + int32_t rows; + int32_t prevRow; /* search optimization: remember last row seen */ + UBool isCompacted; +}; + +#define UPVEC_INITIAL_ROWS (1<<12) +#define UPVEC_MEDIUM_ROWS ((int32_t)1<<16) +#define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1) + +U_CAPI UPropsVectors * U_EXPORT2 +upvec_open(int32_t columns, UErrorCode *pErrorCode) { + UPropsVectors *pv; + uint32_t *v, *row; + uint32_t cp; + + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + if(columns<1) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + columns+=2; /* count range start and limit columns */ + + pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors)); + v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4); + if(pv==NULL || v==NULL) { + uprv_free(pv); + uprv_free(v); + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memset(pv, 0, sizeof(UPropsVectors)); + pv->v=v; + pv->columns=columns; + pv->maxRows=UPVEC_INITIAL_ROWS; + pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP); + + /* set the all-Unicode row and the special-value rows */ + row=pv->v; + uprv_memset(row, 0, pv->rows*columns*4); + row[0]=0; + row[1]=0x110000; + row+=columns; + for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) { + row[0]=cp; + row[1]=cp+1; + row+=columns; + } + return pv; +} + +U_CAPI void U_EXPORT2 +upvec_close(UPropsVectors *pv) { + if(pv!=NULL) { + uprv_free(pv->v); + uprv_free(pv); + } +} + +static uint32_t * +_findRow(UPropsVectors *pv, UChar32 rangeStart) { + uint32_t *row; + int32_t columns, i, start, limit, prevRow; + + columns=pv->columns; + limit=pv->rows; + prevRow=pv->prevRow; + + /* check the vicinity of the last-seen row (start searching with an unrolled loop) */ + row=pv->v+prevRow*columns; + if(rangeStart>=(UChar32)row[0]) { + if(rangeStart<(UChar32)row[1]) { + /* same row as last seen */ + return row; + } else if(rangeStart<(UChar32)(row+=columns)[1]) { + /* next row after the last one */ + pv->prevRow=prevRow+1; + return row; + } else if(rangeStart<(UChar32)(row+=columns)[1]) { + /* second row after the last one */ + pv->prevRow=prevRow+2; + return row; + } else if((rangeStart-(UChar32)row[1])<10) { + /* we are close, continue looping */ + prevRow+=2; + do { + ++prevRow; + row+=columns; + } while(rangeStart>=(UChar32)row[1]); + pv->prevRow=prevRow; + return row; + } + } else if(rangeStart<(UChar32)pv->v[1]) { + /* the very first row */ + pv->prevRow=0; + return pv->v; + } + + /* do a binary search for the start of the range */ + start=0; + while(startv+i*columns; + if(rangeStart<(UChar32)row[0]) { + limit=i; + } else if(rangeStart<(UChar32)row[1]) { + pv->prevRow=i; + return row; + } else { + start=i; + } + } + + /* must be found because all ranges together always cover all of Unicode */ + pv->prevRow=start; + return pv->v+start*columns; +} + +U_CAPI void U_EXPORT2 +upvec_setValue(UPropsVectors *pv, + UChar32 start, UChar32 end, + int32_t column, + uint32_t value, uint32_t mask, + UErrorCode *pErrorCode) { + uint32_t *firstRow, *lastRow; + int32_t columns; + UChar32 limit; + UBool splitFirstRow, splitLastRow; + + /* argument checking */ + if(U_FAILURE(*pErrorCode)) { + return; + } + if( pv==NULL || + start<0 || start>end || end>UPVEC_MAX_CP || + column<0 || column>=(pv->columns-2) + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if(pv->isCompacted) { + *pErrorCode=U_NO_WRITE_PERMISSION; + return; + } + limit=end+1; + + /* initialize */ + columns=pv->columns; + column+=2; /* skip range start and limit columns */ + value&=mask; + + /* find the rows whose ranges overlap with the input range */ + + /* find the first and last rows, always successful */ + firstRow=_findRow(pv, start); + lastRow=_findRow(pv, end); + + /* + * Rows need to be split if they partially overlap with the + * input range (only possible for the first and last rows) + * and if their value differs from the input value. + */ + splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask)); + splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask)); + + /* split first/last rows if necessary */ + if(splitFirstRow || splitLastRow) { + int32_t count, rows; + + rows=pv->rows; + if((rows+splitFirstRow+splitLastRow)>pv->maxRows) { + uint32_t *newVectors; + int32_t newMaxRows; + + if(pv->maxRowsmaxRowsv, (size_t)rows*columns*4); + firstRow=newVectors+(firstRow-pv->v); + lastRow=newVectors+(lastRow-pv->v); + uprv_free(pv->v); + pv->v=newVectors; + pv->maxRows=newMaxRows; + } + + /* count the number of row cells to move after the last row, and move them */ + count = (int32_t)((pv->v+rows*columns)-(lastRow+columns)); + if(count>0) { + uprv_memmove( + lastRow+(1+splitFirstRow+splitLastRow)*columns, + lastRow+columns, + count*4); + } + pv->rows=rows+splitFirstRow+splitLastRow; + + /* split the first row, and move the firstRow pointer to the second part */ + if(splitFirstRow) { + /* copy all affected rows up one and move the lastRow pointer */ + count = (int32_t)((lastRow-firstRow)+columns); + uprv_memmove(firstRow+columns, firstRow, (size_t)count*4); + lastRow+=columns; + + /* split the range and move the firstRow pointer */ + firstRow[1]=firstRow[columns]=(uint32_t)start; + firstRow+=columns; + } + + /* split the last row */ + if(splitLastRow) { + /* copy the last row data */ + uprv_memcpy(lastRow+columns, lastRow, (size_t)columns*4); + + /* split the range and move the firstRow pointer */ + lastRow[1]=lastRow[columns]=(uint32_t)limit; + } + } + + /* set the "row last seen" to the last row for the range */ + pv->prevRow=(int32_t)((lastRow-(pv->v))/columns); + + /* set the input value in all remaining rows */ + firstRow+=column; + lastRow+=column; + mask=~mask; + for(;;) { + *firstRow=(*firstRow&mask)|value; + if(firstRow==lastRow) { + break; + } + firstRow+=columns; + } +} + +U_CAPI uint32_t U_EXPORT2 +upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column) { + uint32_t *row; + UPropsVectors *ncpv; + + if(pv->isCompacted || c<0 || c>UPVEC_MAX_CP || column<0 || column>=(pv->columns-2)) { + return 0; + } + ncpv=(UPropsVectors *)pv; + row=_findRow(ncpv, c); + return row[2+column]; +} + +U_CAPI uint32_t * U_EXPORT2 +upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, + UChar32 *pRangeStart, UChar32 *pRangeEnd) { + uint32_t *row; + int32_t columns; + + if(pv->isCompacted || rowIndex<0 || rowIndex>=pv->rows) { + return NULL; + } + + columns=pv->columns; + row=pv->v+rowIndex*columns; + if(pRangeStart!=NULL) { + *pRangeStart=(UChar32)row[0]; + } + if(pRangeEnd!=NULL) { + *pRangeEnd=(UChar32)row[1]-1; + } + return row+2; +} + +static int32_t U_CALLCONV +upvec_compareRows(const void *context, const void *l, const void *r) { + const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r; + const UPropsVectors *pv=(const UPropsVectors *)context; + int32_t i, count, columns; + + count=columns=pv->columns; /* includes start/limit columns */ + + /* start comparing after start/limit but wrap around to them */ + i=2; + do { + if(left[i]!=right[i]) { + return left[i]0); + + return 0; +} + +U_CAPI void U_EXPORT2 +upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) { + uint32_t *row; + int32_t i, columns, valueColumns, rows, count; + UChar32 start, limit; + + /* argument checking */ + if(U_FAILURE(*pErrorCode)) { + return; + } + if(handler==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if(pv->isCompacted) { + return; + } + + /* Set the flag now: Sorting and compacting destroys the builder data structure. */ + pv->isCompacted=TRUE; + + rows=pv->rows; + columns=pv->columns; + U_ASSERT(columns>=3); /* upvec_open asserts this */ + valueColumns=columns-2; /* not counting start & limit */ + + /* sort the properties vectors to find unique vector values */ + uprv_sortArray(pv->v, rows, columns*4, + upvec_compareRows, pv, FALSE, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } + + /* + * Find and set the special values. + * This has to do almost the same work as the compaction below, + * to find the indexes where the special-value rows will move. + */ + row=pv->v; + count=-valueColumns; + for(i=0; i=UPVEC_FIRST_SPECIAL_CP) { + handler(context, start, start, count, row+2, valueColumns, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } + } + + row+=columns; + } + + /* count is at the beginning of the last vector, add valueColumns to include that last vector */ + count+=valueColumns; + + /* Call the handler once more to signal the start of delivering real values. */ + handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP, + count, row-valueColumns, valueColumns, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } + + /* + * Move vector contents up to a contiguous array with only unique + * vector values, and call the handler function for each vector. + * + * This destroys the Properties Vector structure and replaces it + * with an array of just vector values. + */ + row=pv->v; + count=-valueColumns; + for(i=0; iv+count, valueColumns*4)) { + count+=valueColumns; + uprv_memmove(pv->v+count, row+2, (size_t)valueColumns*4); + } + + if(startv+count, valueColumns, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } + } + + row+=columns; + } + + /* count is at the beginning of the last vector, add one to include that last vector */ + pv->rows=count/valueColumns+1; +} + +U_CAPI const uint32_t * U_EXPORT2 +upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) { + if(!pv->isCompacted) { + return NULL; + } + if(pRows!=NULL) { + *pRows=pv->rows; + } + if(pColumns!=NULL) { + *pColumns=pv->columns-2; + } + return pv->v; +} + +U_CAPI uint32_t * U_EXPORT2 +upvec_cloneArray(const UPropsVectors *pv, + int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode) { + uint32_t *clonedArray; + int32_t byteLength; + + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + if(!pv->isCompacted) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + byteLength=pv->rows*(pv->columns-2)*4; + clonedArray=(uint32_t *)uprv_malloc(byteLength); + if(clonedArray==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memcpy(clonedArray, pv->v, byteLength); + if(pRows!=NULL) { + *pRows=pv->rows; + } + if(pColumns!=NULL) { + *pColumns=pv->columns-2; + } + return clonedArray; +} + +U_CAPI UTrie2 * U_EXPORT2 +upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) { + UPVecToUTrie2Context toUTrie2={ NULL, 0, 0, 0 }; + upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode); + utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + utrie2_close(toUTrie2.trie); + toUTrie2.trie=NULL; + } + return toUTrie2.trie; +} + +/* + * TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts + * some 16-bit field and builds and returns a UTrie2. + */ + +U_CAPI void U_CALLCONV +upvec_compactToUTrie2Handler(void *context, + UChar32 start, UChar32 end, + int32_t rowIndex, uint32_t *row, int32_t columns, + UErrorCode *pErrorCode) { + (void)row; + (void)columns; + UPVecToUTrie2Context *toUTrie2=(UPVecToUTrie2Context *)context; + if(starttrie, start, end, (uint32_t)rowIndex, TRUE, pErrorCode); + } else { + switch(start) { + case UPVEC_INITIAL_VALUE_CP: + toUTrie2->initialValue=rowIndex; + break; + case UPVEC_ERROR_VALUE_CP: + toUTrie2->errorValue=rowIndex; + break; + case UPVEC_START_REAL_VALUES_CP: + toUTrie2->maxValue=rowIndex; + if(rowIndex>0xffff) { + /* too many rows for a 16-bit trie */ + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + } else { + toUTrie2->trie=utrie2_open(toUTrie2->initialValue, + toUTrie2->errorValue, pErrorCode); + } + break; + default: + break; + } + } +} diff --git a/deps/icu-small/source/common/propsvec.h b/deps/icu-small/source/common/propsvec.h index b34e4ee8ff..39080615ea 100644 --- a/deps/icu-small/source/common/propsvec.h +++ b/deps/icu-small/source/common/propsvec.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: propsvec.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/punycode.cpp b/deps/icu-small/source/common/punycode.cpp index 90fd169246..4f0b9ea9cd 100644 --- a/deps/icu-small/source/common/punycode.cpp +++ b/deps/icu-small/source/common/punycode.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: punycode.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/punycode.h b/deps/icu-small/source/common/punycode.h index ff23eb0c37..5d8a243175 100644 --- a/deps/icu-small/source/common/punycode.h +++ b/deps/icu-small/source/common/punycode.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: punycode.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/putil.cpp b/deps/icu-small/source/common/putil.cpp index c2ede806ed..d0714fff33 100644 --- a/deps/icu-small/source/common/putil.cpp +++ b/deps/icu-small/source/common/putil.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -43,8 +43,24 @@ // Must be before any other #includes. #include "uposixdefs.h" -/* include ICU headers */ -#include "unicode/utypes.h" +// First, the platform type. Need this for U_PLATFORM. +#include "unicode/platform.h" + +#if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__ +/* tzset isn't defined in strict ANSI on MinGW. */ +#undef __STRICT_ANSI__ +#endif + +/* + * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement. + */ +#include + +#if !U_PLATFORM_USES_ONLY_WIN32_API +#include +#endif + +/* include the rest of the ICU headers */ #include "unicode/putil.h" #include "unicode/ustring.h" #include "putilimp.h" @@ -76,14 +92,29 @@ * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API) * to use native APIs as much as possible? */ +#ifndef WIN32_LEAN_AND_MEAN # define WIN32_LEAN_AND_MEAN +#endif # define VC_EXTRALEAN # define NOUSER # define NOSERVICE # define NOIME # define NOMCX # include +# include "unicode\uloc.h" +#if U_PLATFORM_HAS_WINUWP_API == 0 # include "wintz.h" +#else // U_PLATFORM_HAS_WINUWP_API +typedef PVOID LPMSG; // TODO: figure out how to get rid of this typedef +#include +#include +#include +#include + +using namespace ABI::Windows::Foundation; +using namespace Microsoft::WRL; +using namespace Microsoft::WRL::Wrappers; +#endif #elif U_PLATFORM == U_PF_OS400 # include # include /* error code structure */ @@ -104,20 +135,6 @@ # include #endif -#if (U_PF_MINGW <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(__STRICT_ANSI__) -/* tzset isn't defined in strict ANSI on Cygwin and MinGW. */ -#undef __STRICT_ANSI__ -#endif - -/* - * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement. - */ -#include - -#if !U_PLATFORM_USES_ONLY_WIN32_API -#include -#endif - /* * Only include langinfo.h if we have a way to get the codeset. If we later * depend on more feature, we can test on U_HAVE_NL_LANGINFO. @@ -651,7 +668,7 @@ uprv_timezone() /* Note that U_TZNAME does *not* have to be tzname, but if it is, some platforms need to have it declared here. */ -#if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED || (U_PLATFORM == U_PF_CYGWIN && !U_PLATFORM_USES_ONLY_WIN32_API)) +#if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED) /* RS6000 and others reject char **tzname. */ extern U_IMPORT char *U_TZNAME[]; #endif @@ -1008,16 +1025,65 @@ uprv_tzname_clear_cache() #endif } +// With the Universal Windows Platform we can just ask Windows for the name +#if U_PLATFORM_HAS_WINUWP_API +U_CAPI const char* U_EXPORT2 +uprv_getWindowsTimeZone() +{ + // Get default Windows timezone. + ComPtr calendar; + HRESULT hr = RoActivateInstance( + HStringReference(RuntimeClass_Windows_Globalization_Calendar).Get(), + &calendar); + if (SUCCEEDED(hr)) + { + ComPtr timezone; + hr = calendar.As(&timezone); + if (SUCCEEDED(hr)) + { + HString timezoneString; + hr = timezone->GetTimeZone(timezoneString.GetAddressOf()); + if (SUCCEEDED(hr)) + { + int32_t length = wcslen(timezoneString.GetRawBuffer(NULL)); + char* asciiId = (char*)uprv_calloc(length + 1, sizeof(char)); + if (asciiId != nullptr) + { + u_UCharsToChars((UChar*)timezoneString.GetRawBuffer(NULL), asciiId, length); + return asciiId; + } + } + } + } + + // Failed + return nullptr; +} +#endif + U_CAPI const char* U_EXPORT2 uprv_tzname(int n) { const char *tzid = NULL; #if U_PLATFORM_USES_ONLY_WIN32_API +#if U_PLATFORM_HAS_WINUWP_API > 0 + tzid = uprv_getWindowsTimeZone(); +#else tzid = uprv_detectWindowsTimeZone(); +#endif if (tzid != NULL) { return tzid; } + +#ifndef U_TZNAME + // The return value is free'd in timezone.cpp on Windows because + // the other code path returns a pointer to a heap location. + // If we don't have a name already, then tzname wouldn't be any + // better, so just fall back. + return uprv_strdup("Etc/UTC"); +#endif // !U_TZNAME + #else /*#if U_PLATFORM_IS_DARWIN_BASED @@ -1162,7 +1228,8 @@ UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER; static CharString *gTimeZoneFilesDirectory = NULL; #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API - static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */ + static char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */ + static bool gCorrectedPOSIXLocaleHeapAllocated = false; #endif static UBool U_CALLCONV putil_cleanup(void) @@ -1183,9 +1250,10 @@ static UBool U_CALLCONV putil_cleanup(void) #endif #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API - if (gCorrectedPOSIXLocale) { + if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) { uprv_free(gCorrectedPOSIXLocale); gCorrectedPOSIXLocale = NULL; + gCorrectedPOSIXLocaleHeapAllocated = false; } #endif return TRUE; @@ -1297,7 +1365,9 @@ static void U_CALLCONV dataDirectoryInitFn() { */ # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO /* First try to get the environment variable */ - path=getenv("ICU_DATA"); +# if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP does not support getenv + path=getenv("ICU_DATA"); +# endif # endif /* ICU_DATA_DIR may be set as a compile option. @@ -1326,9 +1396,35 @@ static void U_CALLCONV dataDirectoryInitFn() { } #endif +#if defined(ICU_DATA_DIR_WINDOWS) && U_PLATFORM_HAS_WINUWP_API != 0 + // Use data from the %windir%\globalization\icu directory + // This is only available if ICU is built as a system component + char datadir_path_buffer[MAX_PATH]; + UINT length = GetWindowsDirectoryA(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer)); + if (length > 0 && length < (UPRV_LENGTHOF(datadir_path_buffer) - sizeof(ICU_DATA_DIR_WINDOWS) - 1)) + { + if (datadir_path_buffer[length - 1] != '\\') + { + datadir_path_buffer[length++] = '\\'; + datadir_path_buffer[length] = '\0'; + } + + if ((length + 1 + sizeof(ICU_DATA_DIR_WINDOWS)) < UPRV_LENGTHOF(datadir_path_buffer)) + { + uprv_strcat(datadir_path_buffer, ICU_DATA_DIR_WINDOWS); + path = datadir_path_buffer; + } + } +#endif + if(path==NULL) { /* It looks really bad, set it to something. */ +#if U_PLATFORM_HAS_WIN32_API + // Windows UWP will require icudtl.dat file in same directory as icuuc.dll + path = ".\\"; +#else path = ""; +#endif } u_setDataDirectory(path); @@ -1366,7 +1462,12 @@ static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) { status = U_MEMORY_ALLOCATION_ERROR; return; } +#if U_PLATFORM_HAS_WINUWP_API == 0 const char *dir = getenv("ICU_TIMEZONE_FILES_DIR"); +#else + // TODO: UWP does not support alternate timezone data directories at this time + const char *dir = ""; +#endif // U_PLATFORM_HAS_WINUWP_API #if defined(U_TIMEZONE_FILES_DIR) if (dir == NULL) { dir = TO_STRING(U_TIMEZONE_FILES_DIR); @@ -1603,6 +1704,7 @@ The leftmost codepage (.xxx) wins. if (gCorrectedPOSIXLocale == NULL) { gCorrectedPOSIXLocale = correctedPOSIXLocale; + gCorrectedPOSIXLocaleHeapAllocated = true; ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); correctedPOSIXLocale = NULL; } @@ -1618,25 +1720,115 @@ The leftmost codepage (.xxx) wins. UErrorCode status = U_ZERO_ERROR; char *correctedPOSIXLocale = 0; + // If we have already figured this out just use the cached value if (gCorrectedPOSIXLocale != NULL) { return gCorrectedPOSIXLocale; } - LCID id = GetThreadLocale(); - correctedPOSIXLocale = static_cast(uprv_malloc(POSIX_LOCALE_CAPACITY + 1)); - if (correctedPOSIXLocale) { - int32_t posixLen = uprv_convertToPosix(id, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status); - if (U_SUCCESS(status)) { - *(correctedPOSIXLocale + posixLen) = 0; - gCorrectedPOSIXLocale = correctedPOSIXLocale; - ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); - } else { - uprv_free(correctedPOSIXLocale); + // No cached value, need to determine the current value + static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH]; +#if U_PLATFORM_HAS_WINUWP_API == 0 + // If not a Universal Windows App, we'll need user default language. + // Vista and above should use Locale Names instead of LCIDs + int length = GetUserDefaultLocaleName(windowsLocale, UPRV_LENGTHOF(windowsLocale)); +#else + // In a UWP app, we want the top language that the application and user agreed upon + ComPtr> languageList; + + ComPtr applicationLanguagesStatics; + HRESULT hr = GetActivationFactory( + HStringReference(RuntimeClass_Windows_Globalization_ApplicationLanguages).Get(), + &applicationLanguagesStatics); + if (SUCCEEDED(hr)) + { + hr = applicationLanguagesStatics->get_Languages(&languageList); + } + + if (FAILED(hr)) + { + // If there is no application context, then use the top language from the user language profile + ComPtr globalizationPreferencesStatics; + hr = GetActivationFactory( + HStringReference(RuntimeClass_Windows_System_UserProfile_GlobalizationPreferences).Get(), + &globalizationPreferencesStatics); + if (SUCCEEDED(hr)) + { + hr = globalizationPreferencesStatics->get_Languages(&languageList); + } + } + + // We have a list of languages, ICU knows one, so use the top one for our locale + HString topLanguage; + if (SUCCEEDED(hr)) + { + hr = languageList->GetAt(0, topLanguage.GetAddressOf()); + } + + if (FAILED(hr)) + { + // Unexpected, use en-US by default + if (gCorrectedPOSIXLocale == NULL) { + gCorrectedPOSIXLocale = "en_US"; + } + + return gCorrectedPOSIXLocale; + } + + // ResolveLocaleName will get a likely subtags form consistent with Windows behavior. + int length = ResolveLocaleName(topLanguage.GetRawBuffer(NULL), windowsLocale, UPRV_LENGTHOF(windowsLocale)); +#endif + // Now we should have a Windows locale name that needs converted to the POSIX style, + if (length > 0) + { + // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.) + char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH]; + + int32_t i; + for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++) + { + if (windowsLocale[i] == '_') + { + modifiedWindowsLocale[i] = '-'; + } + else + { + modifiedWindowsLocale[i] = static_cast(windowsLocale[i]); + } + + if (modifiedWindowsLocale[i] == '\0') + { + break; + } + } + + if (i >= UPRV_LENGTHOF(modifiedWindowsLocale)) + { + // Ran out of room, can't really happen, maybe we'll be lucky about a matching + // locale when tags are dropped + modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0'; + } + + // Now normalize the resulting name + if (correctedPOSIXLocale) + { + int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status); + if (U_SUCCESS(status)) + { + *(correctedPOSIXLocale + posixLen) = 0; + gCorrectedPOSIXLocale = correctedPOSIXLocale; + gCorrectedPOSIXLocaleHeapAllocated = true; + ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); + } + else + { + uprv_free(correctedPOSIXLocale); + } } } + // If unable to find a locale we can agree upon, use en-US by default if (gCorrectedPOSIXLocale == NULL) { - return "en_US"; + gCorrectedPOSIXLocale = "en_US"; } return gCorrectedPOSIXLocale; @@ -1923,8 +2115,34 @@ int_getDefaultCodepage() #elif U_PLATFORM_USES_ONLY_WIN32_API static char codepage[64]; - sprintf(codepage, "windows-%d", GetACP()); - return codepage; + DWORD codepageNumber = 0; + +#if U_PLATFORM_HAS_WINUWP_API > 0 + // UWP doesn't have a direct API to get the default ACP as Microsoft would rather + // have folks use Unicode than a "system" code page, however this is the same + // codepage as the system default locale codepage. (FWIW, the system locale is + // ONLY used for codepage, it should never be used for anything else) + GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER, + (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR)); +#else + // Win32 apps can call GetACP + codepageNumber = GetACP(); +#endif + // Special case for UTF-8 + if (codepageNumber == 65001) + { + return "UTF-8"; + } + // Windows codepages can look like windows-1252, so format the found number + // the numbers are eclectic, however all valid system code pages, besides UTF-8 + // are between 3 and 19999 + if (codepageNumber > 0 && codepageNumber < 20000) + { + sprintf(codepage, "windows-%ld", codepageNumber); + return codepage; + } + // If the codepage number call failed then return UTF-8 + return "UTF-8"; #elif U_POSIX_LOCALE static char codesetName[100]; diff --git a/deps/icu-small/source/common/putilimp.h b/deps/icu-small/source/common/putilimp.h index cb78350825..b797a9a280 100644 --- a/deps/icu-small/source/common/putilimp.h +++ b/deps/icu-small/source/common/putilimp.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -87,7 +87,7 @@ typedef size_t uintptr_t; #ifdef U_HAVE_NL_LANGINFO_CODESET /* Use the predefined value. */ -#elif U_PLATFORM_HAS_WIN32_API || U_PLATFORM == U_PF_ANDROID || U_PLATFORM == U_PF_QNX +#elif U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_ANDROID || U_PLATFORM == U_PF_QNX # define U_HAVE_NL_LANGINFO_CODESET 0 #else # define U_HAVE_NL_LANGINFO_CODESET 1 @@ -106,7 +106,10 @@ typedef size_t uintptr_t; #ifdef U_TZSET /* Use the predefined value. */ #elif U_PLATFORM_USES_ONLY_WIN32_API + // UWP doesn't support tzset or environment variables for tz +#if U_PLATFORM_HAS_WINUWP_API == 0 # define U_TZSET _tzset +#endif #elif U_PLATFORM == U_PF_OS400 /* not defined */ #else @@ -141,7 +144,10 @@ typedef size_t uintptr_t; #ifdef U_TZNAME /* Use the predefined value. */ #elif U_PLATFORM_USES_ONLY_WIN32_API + /* not usable on all windows platforms */ +#if U_PLATFORM_HAS_WINUWP_API == 0 # define U_TZNAME _tzname +#endif #elif U_PLATFORM == U_PF_OS400 /* not defined */ #else diff --git a/deps/icu-small/source/common/rbbi.cpp b/deps/icu-small/source/common/rbbi.cpp index daba40b741..2a501bf167 100644 --- a/deps/icu-small/source/common/rbbi.cpp +++ b/deps/icu-small/source/common/rbbi.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* *************************************************************************** diff --git a/deps/icu-small/source/common/rbbidata.cpp b/deps/icu-small/source/common/rbbidata.cpp index afa87eb6a8..ecdc8f4165 100644 --- a/deps/icu-small/source/common/rbbidata.cpp +++ b/deps/icu-small/source/common/rbbidata.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* *************************************************************************** diff --git a/deps/icu-small/source/common/rbbidata.h b/deps/icu-small/source/common/rbbidata.h index 0d6cde2d94..d33ef7d45e 100644 --- a/deps/icu-small/source/common/rbbidata.h +++ b/deps/icu-small/source/common/rbbidata.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: rbbidata.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/rbbinode.cpp b/deps/icu-small/source/common/rbbinode.cpp index d0949a3f7f..2181d81aca 100644 --- a/deps/icu-small/source/common/rbbinode.cpp +++ b/deps/icu-small/source/common/rbbinode.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* *************************************************************************** diff --git a/deps/icu-small/source/common/rbbinode.h b/deps/icu-small/source/common/rbbinode.h index ac26ceefaf..e33662167f 100644 --- a/deps/icu-small/source/common/rbbinode.h +++ b/deps/icu-small/source/common/rbbinode.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /******************************************************************** * COPYRIGHT: diff --git a/deps/icu-small/source/common/rbbirb.cpp b/deps/icu-small/source/common/rbbirb.cpp index 475fdd7f83..b94ae9605f 100644 --- a/deps/icu-small/source/common/rbbirb.cpp +++ b/deps/icu-small/source/common/rbbirb.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // // file: rbbirb.cpp diff --git a/deps/icu-small/source/common/rbbirb.h b/deps/icu-small/source/common/rbbirb.h index 1a9d302325..3cde8da3cc 100644 --- a/deps/icu-small/source/common/rbbirb.h +++ b/deps/icu-small/source/common/rbbirb.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // // rbbirb.h diff --git a/deps/icu-small/source/common/rbbirpt.h b/deps/icu-small/source/common/rbbirpt.h index 542a396df0..b94c4c25cb 100644 --- a/deps/icu-small/source/common/rbbirpt.h +++ b/deps/icu-small/source/common/rbbirpt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html //--------------------------------------------------------------------------------- // diff --git a/deps/icu-small/source/common/rbbiscan.cpp b/deps/icu-small/source/common/rbbiscan.cpp index 5743250945..6688c965c3 100644 --- a/deps/icu-small/source/common/rbbiscan.cpp +++ b/deps/icu-small/source/common/rbbiscan.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // // file: rbbiscan.cpp @@ -1179,13 +1179,12 @@ RBBINode *RBBIRuleScanner::pushNewNode(RBBINode::NodeType t) { if (U_FAILURE(*fRB->fStatus)) { return NULL; } - fNodeStackPtr++; - if (fNodeStackPtr >= kStackSize) { - error(U_BRK_INTERNAL_ERROR); + if (fNodeStackPtr >= kStackSize - 1) { + error(U_BRK_RULE_SYNTAX); RBBIDebugPuts("RBBIRuleScanner::pushNewNode - stack overflow."); - *fRB->fStatus = U_BRK_INTERNAL_ERROR; return NULL; } + fNodeStackPtr++; fNodeStack[fNodeStackPtr] = new RBBINode(t); if (fNodeStack[fNodeStackPtr] == NULL) { *fRB->fStatus = U_MEMORY_ALLOCATION_ERROR; diff --git a/deps/icu-small/source/common/rbbiscan.h b/deps/icu-small/source/common/rbbiscan.h index 6be2f9668f..3d484db0e9 100644 --- a/deps/icu-small/source/common/rbbiscan.h +++ b/deps/icu-small/source/common/rbbiscan.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // // rbbiscan.h diff --git a/deps/icu-small/source/common/rbbisetb.cpp b/deps/icu-small/source/common/rbbisetb.cpp index 22ec28c135..d17916c9e9 100644 --- a/deps/icu-small/source/common/rbbisetb.cpp +++ b/deps/icu-small/source/common/rbbisetb.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // // rbbisetb.cpp diff --git a/deps/icu-small/source/common/rbbisetb.h b/deps/icu-small/source/common/rbbisetb.h index 89bfb9865c..a7d1e7af3b 100644 --- a/deps/icu-small/source/common/rbbisetb.h +++ b/deps/icu-small/source/common/rbbisetb.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // // rbbisetb.h diff --git a/deps/icu-small/source/common/rbbistbl.cpp b/deps/icu-small/source/common/rbbistbl.cpp index f48485868c..d90992290c 100644 --- a/deps/icu-small/source/common/rbbistbl.cpp +++ b/deps/icu-small/source/common/rbbistbl.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // // file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class diff --git a/deps/icu-small/source/common/rbbitblb.cpp b/deps/icu-small/source/common/rbbitblb.cpp index c765e61052..b3e6ca51d1 100644 --- a/deps/icu-small/source/common/rbbitblb.cpp +++ b/deps/icu-small/source/common/rbbitblb.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/rbbitblb.h b/deps/icu-small/source/common/rbbitblb.h index d71a024587..1041501878 100644 --- a/deps/icu-small/source/common/rbbitblb.h +++ b/deps/icu-small/source/common/rbbitblb.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // // rbbitblb.h diff --git a/deps/icu-small/source/common/resbund.cpp b/deps/icu-small/source/common/resbund.cpp index 2976791761..29c3463ed5 100644 --- a/deps/icu-small/source/common/resbund.cpp +++ b/deps/icu-small/source/common/resbund.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/resbund_cnv.cpp b/deps/icu-small/source/common/resbund_cnv.cpp index 80a4daa3b7..ae854fe739 100644 --- a/deps/icu-small/source/common/resbund_cnv.cpp +++ b/deps/icu-small/source/common/resbund_cnv.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: resbund_cnv.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/resource.cpp b/deps/icu-small/source/common/resource.cpp index 62b3aa46a5..3d41a16029 100644 --- a/deps/icu-small/source/common/resource.cpp +++ b/deps/icu-small/source/common/resource.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/common/resource.h b/deps/icu-small/source/common/resource.h index 43c3309b5e..3dbff785ef 100644 --- a/deps/icu-small/source/common/resource.h +++ b/deps/icu-small/source/common/resource.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/common/ruleiter.cpp b/deps/icu-small/source/common/ruleiter.cpp index 6e27b4dd8c..41eea23c0d 100644 --- a/deps/icu-small/source/common/ruleiter.cpp +++ b/deps/icu-small/source/common/ruleiter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/ruleiter.h b/deps/icu-small/source/common/ruleiter.h index b0b8e5435f..b6edc657af 100644 --- a/deps/icu-small/source/common/ruleiter.h +++ b/deps/icu-small/source/common/ruleiter.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/schriter.cpp b/deps/icu-small/source/common/schriter.cpp index cc413666f1..f852800aaa 100644 --- a/deps/icu-small/source/common/schriter.cpp +++ b/deps/icu-small/source/common/schriter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/serv.cpp b/deps/icu-small/source/common/serv.cpp index 9f05c1943a..8913b21e69 100644 --- a/deps/icu-small/source/common/serv.cpp +++ b/deps/icu-small/source/common/serv.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /** ******************************************************************************* diff --git a/deps/icu-small/source/common/serv.h b/deps/icu-small/source/common/serv.h index c82c6d1dd8..70695839a8 100644 --- a/deps/icu-small/source/common/serv.h +++ b/deps/icu-small/source/common/serv.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /** ******************************************************************************* diff --git a/deps/icu-small/source/common/servlk.cpp b/deps/icu-small/source/common/servlk.cpp index cdd4e3f7f0..27b046f1e5 100644 --- a/deps/icu-small/source/common/servlk.cpp +++ b/deps/icu-small/source/common/servlk.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /** ******************************************************************************* diff --git a/deps/icu-small/source/common/servlkf.cpp b/deps/icu-small/source/common/servlkf.cpp index d8617cee48..6e46bd2079 100644 --- a/deps/icu-small/source/common/servlkf.cpp +++ b/deps/icu-small/source/common/servlkf.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /** ******************************************************************************* diff --git a/deps/icu-small/source/common/servloc.h b/deps/icu-small/source/common/servloc.h index 5a2b669d7f..5019894244 100644 --- a/deps/icu-small/source/common/servloc.h +++ b/deps/icu-small/source/common/servloc.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /** ******************************************************************************* diff --git a/deps/icu-small/source/common/servls.cpp b/deps/icu-small/source/common/servls.cpp index 10f3b88aa7..907fe7fecf 100644 --- a/deps/icu-small/source/common/servls.cpp +++ b/deps/icu-small/source/common/servls.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /** ******************************************************************************* diff --git a/deps/icu-small/source/common/servnotf.cpp b/deps/icu-small/source/common/servnotf.cpp index 3d94c8690d..5159452f0a 100644 --- a/deps/icu-small/source/common/servnotf.cpp +++ b/deps/icu-small/source/common/servnotf.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /** ******************************************************************************* diff --git a/deps/icu-small/source/common/servnotf.h b/deps/icu-small/source/common/servnotf.h index 72ae93e627..cf92fc169e 100644 --- a/deps/icu-small/source/common/servnotf.h +++ b/deps/icu-small/source/common/servnotf.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /** ******************************************************************************* diff --git a/deps/icu-small/source/common/servrbf.cpp b/deps/icu-small/source/common/servrbf.cpp index f67ed026ee..3f143afadf 100644 --- a/deps/icu-small/source/common/servrbf.cpp +++ b/deps/icu-small/source/common/servrbf.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /** ******************************************************************************* diff --git a/deps/icu-small/source/common/servslkf.cpp b/deps/icu-small/source/common/servslkf.cpp index c01a2ad4c1..4aa10414a0 100644 --- a/deps/icu-small/source/common/servslkf.cpp +++ b/deps/icu-small/source/common/servslkf.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /** ******************************************************************************* diff --git a/deps/icu-small/source/common/sharedobject.cpp b/deps/icu-small/source/common/sharedobject.cpp index 8e5095e129..37aa458e00 100644 --- a/deps/icu-small/source/common/sharedobject.cpp +++ b/deps/icu-small/source/common/sharedobject.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/sharedobject.h b/deps/icu-small/source/common/sharedobject.h index 6e205b14ba..783b55948a 100644 --- a/deps/icu-small/source/common/sharedobject.h +++ b/deps/icu-small/source/common/sharedobject.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/simpleformatter.cpp b/deps/icu-small/source/common/simpleformatter.cpp index eaeb60de14..f866e0a1a1 100644 --- a/deps/icu-small/source/common/simpleformatter.cpp +++ b/deps/icu-small/source/common/simpleformatter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/sprpimpl.h b/deps/icu-small/source/common/sprpimpl.h index 56c2f86eef..aff40ad0da 100644 --- a/deps/icu-small/source/common/sprpimpl.h +++ b/deps/icu-small/source/common/sprpimpl.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: sprpimpl.h - * encoding: US-ASCII + * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/stringpiece.cpp b/deps/icu-small/source/common/stringpiece.cpp index b032b474f6..d4f7f310ba 100644 --- a/deps/icu-small/source/common/stringpiece.cpp +++ b/deps/icu-small/source/common/stringpiece.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // Copyright (C) 2009-2013, International Business Machines // Corporation and others. All Rights Reserved. diff --git a/deps/icu-small/source/common/stringtriebuilder.cpp b/deps/icu-small/source/common/stringtriebuilder.cpp index 075d7c4324..cf5b7b73ae 100644 --- a/deps/icu-small/source/common/stringtriebuilder.cpp +++ b/deps/icu-small/source/common/stringtriebuilder.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: stringtriebuilder.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/uarrsort.c b/deps/icu-small/source/common/uarrsort.c deleted file mode 100644 index bb1b5bdd78..0000000000 --- a/deps/icu-small/source/common/uarrsort.c +++ /dev/null @@ -1,285 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2003-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uarrsort.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2003aug04 -* created by: Markus W. Scherer -* -* Internal function for sorting arrays. -*/ - -#include "unicode/utypes.h" -#include "cmemory.h" -#include "uarrsort.h" - -enum { - /** - * "from Knuth" - * - * A binary search over 8 items performs 4 comparisons: - * log2(8)=3 to subdivide, +1 to check for equality. - * A linear search over 8 items on average also performs 4 comparisons. - */ - MIN_QSORT=9, - STACK_ITEM_SIZE=200 -}; - -/* UComparator convenience implementations ---------------------------------- */ - -U_CAPI int32_t U_EXPORT2 -uprv_uint16Comparator(const void *context, const void *left, const void *right) { - return (int32_t)*(const uint16_t *)left - (int32_t)*(const uint16_t *)right; -} - -U_CAPI int32_t U_EXPORT2 -uprv_int32Comparator(const void *context, const void *left, const void *right) { - return *(const int32_t *)left - *(const int32_t *)right; -} - -U_CAPI int32_t U_EXPORT2 -uprv_uint32Comparator(const void *context, const void *left, const void *right) { - uint32_t l=*(const uint32_t *)left, r=*(const uint32_t *)right; - - /* compare directly because (l-r) would overflow the int32_t result */ - if(lr */ { - return 1; - } -} - -/* Insertion sort using binary search --------------------------------------- */ - -U_CAPI int32_t U_EXPORT2 -uprv_stableBinarySearch(char *array, int32_t limit, void *item, int32_t itemSize, - UComparator *cmp, const void *context) { - int32_t start=0; - UBool found=FALSE; - - /* Binary search until we get down to a tiny sub-array. */ - while((limit-start)>=MIN_QSORT) { - int32_t i=(start+limit)/2; - int32_t diff=cmp(context, item, array+i*itemSize); - if(diff==0) { - /* - * Found the item. We look for the *last* occurrence of such - * an item, for stable sorting. - * If we knew that there will be only few equal items, - * we could break now and enter the linear search. - * However, if there are many equal items, then it should be - * faster to continue with the binary search. - * It seems likely that we either have all unique items - * (where found will never become TRUE in the insertion sort) - * or potentially many duplicates. - */ - found=TRUE; - start=i+1; - } else if(diff<0) { - limit=i; - } else { - start=i; - } - } - - /* Linear search over the remaining tiny sub-array. */ - while(start=limit) { - doInsertionSort(array+start*itemSize, limit-start, itemSize, cmp, context, px); - break; - } - - left=start; - right=limit; - - /* x=array[middle] */ - uprv_memcpy(px, array+(size_t)((start+limit)/2)*itemSize, itemSize); - - do { - while(/* array[left]0 && array==NULL) || length<0 || itemSize<=0 || cmp==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - if(length<=1) { - return; - } else if(lengthr */ { + return 1; + } +} + +/* Insertion sort using binary search --------------------------------------- */ + +U_CAPI int32_t U_EXPORT2 +uprv_stableBinarySearch(char *array, int32_t limit, void *item, int32_t itemSize, + UComparator *cmp, const void *context) { + int32_t start=0; + UBool found=FALSE; + + /* Binary search until we get down to a tiny sub-array. */ + while((limit-start)>=MIN_QSORT) { + int32_t i=(start+limit)/2; + int32_t diff=cmp(context, item, array+i*itemSize); + if(diff==0) { + /* + * Found the item. We look for the *last* occurrence of such + * an item, for stable sorting. + * If we knew that there will be only few equal items, + * we could break now and enter the linear search. + * However, if there are many equal items, then it should be + * faster to continue with the binary search. + * It seems likely that we either have all unique items + * (where found will never become TRUE in the insertion sort) + * or potentially many duplicates. + */ + found=TRUE; + start=i+1; + } else if(diff<0) { + limit=i; + } else { + start=i; + } + } + + /* Linear search over the remaining tiny sub-array. */ + while(start=limit) { + doInsertionSort(array+start*itemSize, limit-start, itemSize, cmp, context, px); + break; + } + + left=start; + right=limit; + + /* x=array[middle] */ + uprv_memcpy(px, array+(size_t)((start+limit)/2)*itemSize, itemSize); + + do { + while(/* array[left]0 && array==NULL) || length<0 || itemSize<=0 || cmp==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + if(length<=1) { + return; + } else if(lengthbdp=ubidi_getSingleton(); - - /* allocate memory for arrays as requested */ - if(maxLength>0) { - if( !getInitialDirPropsMemory(pBiDi, maxLength) || - !getInitialLevelsMemory(pBiDi, maxLength) - ) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - } - } else { - pBiDi->mayAllocateText=TRUE; - } - - if(maxRunCount>0) { - if(maxRunCount==1) { - /* use simpleRuns[] */ - pBiDi->runsSize=sizeof(Run); - } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - } - } else { - pBiDi->mayAllocateRuns=TRUE; - } - - if(U_SUCCESS(*pErrorCode)) { - return pBiDi; - } else { - ubidi_close(pBiDi); - return NULL; - } -} - -/* - * We are allowed to allocate memory if memory==NULL or - * mayAllocate==TRUE for each array that we need. - * We also try to grow memory as needed if we - * allocate it. - * - * Assume sizeNeeded>0. - * If *pMemory!=NULL, then assume *pSize>0. - * - * ### this realloc() may unnecessarily copy the old data, - * which we know we don't need any more; - * is this the best way to do this?? - */ -U_CFUNC UBool -ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) { - void **pMemory = (void **)bidiMem; - /* check for existing memory */ - if(*pMemory==NULL) { - /* we need to allocate memory */ - if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) { - *pSize=sizeNeeded; - return TRUE; - } else { - return FALSE; - } - } else { - if(sizeNeeded<=*pSize) { - /* there is already enough memory */ - return TRUE; - } - else if(!mayAllocate) { - /* not enough memory, and we must not allocate */ - return FALSE; - } else { - /* we try to grow */ - void *memory; - /* in most cases, we do not need the copy-old-data part of - * realloc, but it is needed when adding runs using getRunsMemory() - * in setParaRunsOnly() - */ - if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) { - *pMemory=memory; - *pSize=sizeNeeded; - return TRUE; - } else { - /* we failed to grow */ - return FALSE; - } - } - } -} - -U_CAPI void U_EXPORT2 -ubidi_close(UBiDi *pBiDi) { - if(pBiDi!=NULL) { - pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */ - if(pBiDi->dirPropsMemory!=NULL) { - uprv_free(pBiDi->dirPropsMemory); - } - if(pBiDi->levelsMemory!=NULL) { - uprv_free(pBiDi->levelsMemory); - } - if(pBiDi->openingsMemory!=NULL) { - uprv_free(pBiDi->openingsMemory); - } - if(pBiDi->parasMemory!=NULL) { - uprv_free(pBiDi->parasMemory); - } - if(pBiDi->runsMemory!=NULL) { - uprv_free(pBiDi->runsMemory); - } - if(pBiDi->isolatesMemory!=NULL) { - uprv_free(pBiDi->isolatesMemory); - } - if(pBiDi->insertPoints.points!=NULL) { - uprv_free(pBiDi->insertPoints.points); - } - - uprv_free(pBiDi); - } -} - -/* set to approximate "inverse BiDi" ---------------------------------------- */ - -U_CAPI void U_EXPORT2 -ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) { - if(pBiDi!=NULL) { - pBiDi->isInverse=isInverse; - pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L - : UBIDI_REORDER_DEFAULT; - } -} - -U_CAPI UBool U_EXPORT2 -ubidi_isInverse(UBiDi *pBiDi) { - if(pBiDi!=NULL) { - return pBiDi->isInverse; - } else { - return FALSE; - } -} - -/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of - * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre - * concept of RUNS_ONLY which is a double operation. - * It could be advantageous to divide this into 3 concepts: - * a) Operation: direct / inverse / RUNS_ONLY - * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R - * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL - * This would allow combinations not possible today like RUNS_ONLY with - * NUMBERS_SPECIAL. - * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and - * REMOVE_CONTROLS for the inverse step. - * Not all combinations would be supported, and probably not all do make sense. - * This would need to document which ones are supported and what are the - * fallbacks for unsupported combinations. - */ -U_CAPI void U_EXPORT2 -ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) { - if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT) - && (reorderingMode < UBIDI_REORDER_COUNT)) { - pBiDi->reorderingMode = reorderingMode; - pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L); - } -} - -U_CAPI UBiDiReorderingMode U_EXPORT2 -ubidi_getReorderingMode(UBiDi *pBiDi) { - if (pBiDi!=NULL) { - return pBiDi->reorderingMode; - } else { - return UBIDI_REORDER_DEFAULT; - } -} - -U_CAPI void U_EXPORT2 -ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) { - if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { - reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; - } - if (pBiDi!=NULL) { - pBiDi->reorderingOptions=reorderingOptions; - } -} - -U_CAPI uint32_t U_EXPORT2 -ubidi_getReorderingOptions(UBiDi *pBiDi) { - if (pBiDi!=NULL) { - return pBiDi->reorderingOptions; - } else { - return 0; - } -} - -U_CAPI UBiDiDirection U_EXPORT2 -ubidi_getBaseDirection(const UChar *text, -int32_t length){ - - int32_t i; - UChar32 uchar; - UCharDirection dir; - - if( text==NULL || length<-1 ){ - return UBIDI_NEUTRAL; - } - - if(length==-1) { - length=u_strlen(text); - } - - for( i = 0 ; i < length; ) { - /* i is incremented by U16_NEXT */ - U16_NEXT(text, i, length, uchar); - dir = u_charDirection(uchar); - if( dir == U_LEFT_TO_RIGHT ) - return UBIDI_LTR; - if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC ) - return UBIDI_RTL; - } - return UBIDI_NEUTRAL; -} - -/* perform (P2)..(P3) ------------------------------------------------------- */ - -/** - * Returns the directionality of the first strong character - * after the last B in prologue, if any. - * Requires prologue!=null. - */ -static DirProp -firstL_R_AL(UBiDi *pBiDi) { - const UChar *text=pBiDi->prologue; - int32_t length=pBiDi->proLength; - int32_t i; - UChar32 uchar; - DirProp dirProp, result=ON; - for(i=0; iparas - */ -static UBool -checkParaCount(UBiDi *pBiDi) { - int32_t count=pBiDi->paraCount; - if(pBiDi->paras==pBiDi->simpleParas) { - if(count<=SIMPLE_PARAS_COUNT) - return TRUE; - if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_COUNT * 2)) - return FALSE; - pBiDi->paras=pBiDi->parasMemory; - uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_COUNT * sizeof(Para)); - return TRUE; - } - if(!getInitialParasMemory(pBiDi, count * 2)) - return FALSE; - pBiDi->paras=pBiDi->parasMemory; - return TRUE; -} - -/* - * Get the directional properties for the text, calculate the flags bit-set, and - * determine the paragraph level if necessary (in pBiDi->paras[i].level). - * FSI initiators are also resolved and their dirProp replaced with LRI or RLI. - * When encountering an FSI, it is initially replaced with an LRI, which is the - * default. Only if a strong R or AL is found within its scope will the LRI be - * replaced by an RLI. - */ -static UBool -getDirProps(UBiDi *pBiDi) { - const UChar *text=pBiDi->text; - DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */ - - int32_t i=0, originalLength=pBiDi->originalLength; - Flags flags=0; /* collect all directionalities in the text */ - UChar32 uchar; - DirProp dirProp=0, defaultParaLevel=0; /* initialize to avoid compiler warnings */ - UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel); - /* for inverse BiDi, the default para level is set to RTL if there is a - strong R or AL character at either end of the text */ - UBool isDefaultLevelInverse=isDefaultLevel && (UBool) - (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || - pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL); - int32_t lastArabicPos=-1; - int32_t controlCount=0; - UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions & - UBIDI_OPTION_REMOVE_CONTROLS); - - typedef enum { - NOT_SEEKING_STRONG, /* 0: not contextual paraLevel, not after FSI */ - SEEKING_STRONG_FOR_PARA, /* 1: looking for first strong char in para */ - SEEKING_STRONG_FOR_FSI, /* 2: looking for first strong after FSI */ - LOOKING_FOR_PDI /* 3: found strong after FSI, looking for PDI */ - } State; - State state; - DirProp lastStrong=ON; /* for default level & inverse BiDi */ - /* The following stacks are used to manage isolate sequences. Those - sequences may be nested, but obviously never more deeply than the - maximum explicit embedding level. - lastStack is the index of the last used entry in the stack. A value of -1 - means that there is no open isolate sequence. - lastStack is reset to -1 on paragraph boundaries. */ - /* The following stack contains the position of the initiator of - each open isolate sequence */ - int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1]; - /* The following stack contains the last known state before - encountering the initiator of an isolate sequence */ - int8_t previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1]; - int32_t stackLast=-1; - - if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) - pBiDi->length=0; - defaultParaLevel=pBiDi->paraLevel&1; - if(isDefaultLevel) { - pBiDi->paras[0].level=defaultParaLevel; - lastStrong=defaultParaLevel; - if(pBiDi->proLength>0 && /* there is a prologue */ - (dirProp=firstL_R_AL(pBiDi))!=ON) { /* with a strong character */ - if(dirProp==L) - pBiDi->paras[0].level=0; /* set the default para level */ - else - pBiDi->paras[0].level=1; /* set the default para level */ - state=NOT_SEEKING_STRONG; - } else { - state=SEEKING_STRONG_FOR_PARA; - } - } else { - pBiDi->paras[0].level=pBiDi->paraLevel; - state=NOT_SEEKING_STRONG; - } - /* count paragraphs and determine the paragraph level (P2..P3) */ - /* - * see comment in ubidi.h: - * the UBIDI_DEFAULT_XXX values are designed so that - * their bit 0 alone yields the intended default - */ - for( /* i=0 above */ ; i0xffff) { /* set the lead surrogate's property to BN */ - flags|=DIRPROP_FLAG(BN); - dirProps[i-2]=BN; - } - if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) - controlCount++; - if(dirProp==L) { - if(state==SEEKING_STRONG_FOR_PARA) { - pBiDi->paras[pBiDi->paraCount-1].level=0; - state=NOT_SEEKING_STRONG; - } - else if(state==SEEKING_STRONG_FOR_FSI) { - if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { - /* no need for next statement, already set by default */ - /* dirProps[isolateStartStack[stackLast]]=LRI; */ - flags|=DIRPROP_FLAG(LRI); - } - state=LOOKING_FOR_PDI; - } - lastStrong=L; - continue; - } - if(dirProp==R || dirProp==AL) { - if(state==SEEKING_STRONG_FOR_PARA) { - pBiDi->paras[pBiDi->paraCount-1].level=1; - state=NOT_SEEKING_STRONG; - } - else if(state==SEEKING_STRONG_FOR_FSI) { - if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { - dirProps[isolateStartStack[stackLast]]=RLI; - flags|=DIRPROP_FLAG(RLI); - } - state=LOOKING_FOR_PDI; - } - lastStrong=R; - if(dirProp==AL) - lastArabicPos=i-1; - continue; - } - if(dirProp>=FSI && dirProp<=RLI) { /* FSI, LRI or RLI */ - stackLast++; - if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { - isolateStartStack[stackLast]=i-1; - previousStateStack[stackLast]=state; - } - if(dirProp==FSI) { - dirProps[i-1]=LRI; /* default if no strong char */ - state=SEEKING_STRONG_FOR_FSI; - } - else - state=LOOKING_FOR_PDI; - continue; - } - if(dirProp==PDI) { - if(state==SEEKING_STRONG_FOR_FSI) { - if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { - /* no need for next statement, already set by default */ - /* dirProps[isolateStartStack[stackLast]]=LRI; */ - flags|=DIRPROP_FLAG(LRI); - } - } - if(stackLast>=0) { - if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) - state=previousStateStack[stackLast]; - stackLast--; - } - continue; - } - if(dirProp==B) { - if(iparas[pBiDi->paraCount-1].limit=i; - if(isDefaultLevelInverse && lastStrong==R) - pBiDi->paras[pBiDi->paraCount-1].level=1; - if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { - /* When streaming, we only process whole paragraphs - thus some updates are only done on paragraph boundaries */ - pBiDi->length=i; /* i is index to next character */ - pBiDi->controlCount=controlCount; - } - if(iparaCount++; - if(checkParaCount(pBiDi)==FALSE) /* not enough memory for a new para entry */ - return FALSE; - if(isDefaultLevel) { - pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel; - state=SEEKING_STRONG_FOR_PARA; - lastStrong=defaultParaLevel; - } else { - pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel; - state=NOT_SEEKING_STRONG; - } - stackLast=-1; - } - continue; - } - } - /* Ignore still open isolate sequences with overflow */ - if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) { - stackLast=UBIDI_MAX_EXPLICIT_LEVEL; - state=SEEKING_STRONG_FOR_FSI; /* to be on the safe side */ - } - /* Resolve direction of still unresolved open FSI sequences */ - while(stackLast>=0) { - if(state==SEEKING_STRONG_FOR_FSI) { - /* no need for next statement, already set by default */ - /* dirProps[isolateStartStack[stackLast]]=LRI; */ - flags|=DIRPROP_FLAG(LRI); - break; - } - state=previousStateStack[stackLast]; - stackLast--; - } - /* When streaming, ignore text after the last paragraph separator */ - if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { - if(pBiDi->lengthparaCount--; - } else { - pBiDi->paras[pBiDi->paraCount-1].limit=originalLength; - pBiDi->controlCount=controlCount; - } - /* For inverse bidi, default para direction is RTL if there is - a strong R or AL at either end of the paragraph */ - if(isDefaultLevelInverse && lastStrong==R) { - pBiDi->paras[pBiDi->paraCount-1].level=1; - } - if(isDefaultLevel) { - pBiDi->paraLevel=pBiDi->paras[0].level; - } - /* The following is needed to resolve the text direction for default level - paragraphs containing no strong character */ - for(i=0; iparaCount; i++) - flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level); - - if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { - flags|=DIRPROP_FLAG(L); - } - pBiDi->flags=flags; - pBiDi->lastArabicPos=lastArabicPos; - return TRUE; -} - -/* determine the paragraph level at position index */ -U_CFUNC UBiDiLevel -ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) { - int32_t i; - for(i=0; iparaCount; i++) - if(pindexparas[i].limit) - break; - if(i>=pBiDi->paraCount) - i=pBiDi->paraCount-1; - return (UBiDiLevel)(pBiDi->paras[i].level); -} - -/* Functions for handling paired brackets ----------------------------------- */ - -/* In the isoRuns array, the first entry is used for text outside of any - isolate sequence. Higher entries are used for each more deeply nested - isolate sequence. isoRunLast is the index of the last used entry. The - openings array is used to note the data of opening brackets not yet - matched by a closing bracket, or matched but still susceptible to change - level. - Each isoRun entry contains the index of the first and - one-after-last openings entries for pending opening brackets it - contains. The next openings entry to use is the one-after-last of the - most deeply nested isoRun entry. - isoRun entries also contain their current embedding level and the last - encountered strong character, since these will be needed to resolve - the level of paired brackets. */ - -static void -bracketInit(UBiDi *pBiDi, BracketData *bd) { - bd->pBiDi=pBiDi; - bd->isoRunLast=0; - bd->isoRuns[0].start=0; - bd->isoRuns[0].limit=0; - bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0); - bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=bd->isoRuns[0].contextDir=GET_PARALEVEL(pBiDi, 0)&1; - bd->isoRuns[0].contextPos=0; - if(pBiDi->openingsMemory) { - bd->openings=pBiDi->openingsMemory; - bd->openingsCount=pBiDi->openingsSize / sizeof(Opening); - } else { - bd->openings=bd->simpleOpenings; - bd->openingsCount=SIMPLE_OPENINGS_COUNT; - } - bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL || - bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL; -} - -/* paragraph boundary */ -static void -bracketProcessB(BracketData *bd, UBiDiLevel level) { - bd->isoRunLast=0; - bd->isoRuns[0].limit=0; - bd->isoRuns[0].level=level; - bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=bd->isoRuns[0].contextDir=level&1; - bd->isoRuns[0].contextPos=0; -} - -/* LRE, LRO, RLE, RLO, PDF */ -static void -bracketProcessBoundary(BracketData *bd, int32_t lastCcPos, - UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) { - IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; - DirProp *dirProps=bd->pBiDi->dirProps; - if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO) /* after an isolate */ - return; - if(NO_OVERRIDE(embeddingLevel)>NO_OVERRIDE(contextLevel)) /* not a PDF */ - contextLevel=embeddingLevel; - pLastIsoRun->limit=pLastIsoRun->start; - pLastIsoRun->level=embeddingLevel; - pLastIsoRun->lastStrong=pLastIsoRun->lastBase=pLastIsoRun->contextDir=contextLevel&1; - pLastIsoRun->contextPos=lastCcPos; -} - -/* LRI or RLI */ -static void -bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) { - IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; - int16_t lastLimit; - pLastIsoRun->lastBase=ON; - lastLimit=pLastIsoRun->limit; - bd->isoRunLast++; - pLastIsoRun++; - pLastIsoRun->start=pLastIsoRun->limit=lastLimit; - pLastIsoRun->level=level; - pLastIsoRun->lastStrong=pLastIsoRun->lastBase=pLastIsoRun->contextDir=level&1; - pLastIsoRun->contextPos=0; -} - -/* PDI */ -static void -bracketProcessPDI(BracketData *bd) { - IsoRun *pLastIsoRun; - bd->isoRunLast--; - pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; - pLastIsoRun->lastBase=ON; -} - -/* newly found opening bracket: create an openings entry */ -static UBool /* return TRUE if success */ -bracketAddOpening(BracketData *bd, UChar match, int32_t position) { - IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; - Opening *pOpening; - if(pLastIsoRun->limit>=bd->openingsCount) { /* no available new entry */ - UBiDi *pBiDi=bd->pBiDi; - if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2)) - return FALSE; - if(bd->openings==bd->simpleOpenings) - uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings, - SIMPLE_OPENINGS_COUNT * sizeof(Opening)); - bd->openings=pBiDi->openingsMemory; /* may have changed */ - bd->openingsCount=pBiDi->openingsSize / sizeof(Opening); - } - pOpening=&bd->openings[pLastIsoRun->limit]; - pOpening->position=position; - pOpening->match=match; - pOpening->contextDir=pLastIsoRun->contextDir; - pOpening->contextPos=pLastIsoRun->contextPos; - pOpening->flags=0; - pLastIsoRun->limit++; - return TRUE; -} - -/* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */ -static void -fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) { - /* This function calls itself recursively */ - IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; - Opening *qOpening; - DirProp *dirProps=bd->pBiDi->dirProps; - int32_t k, openingPosition, closingPosition; - for(k=openingIndex+1, qOpening=&bd->openings[k]; klimit; k++, qOpening++) { - if(qOpening->match>=0) /* not an N0c match */ - continue; - if(newPropPositioncontextPos) - break; - if(newPropPosition>=qOpening->position) - continue; - if(newProp==qOpening->contextDir) - break; - openingPosition=qOpening->position; - dirProps[openingPosition]=newProp; - closingPosition=-(qOpening->match); - dirProps[closingPosition]=newProp; - qOpening->match=0; /* prevent further changes */ - fixN0c(bd, k, openingPosition, newProp); - fixN0c(bd, k, closingPosition, newProp); - } -} - -/* process closing bracket */ -static DirProp /* return L or R if N0b or N0c, ON if N0d */ -bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) { - IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; - Opening *pOpening, *qOpening; - UBiDiDirection direction; - UBool stable; - DirProp newProp; - pOpening=&bd->openings[openIdx]; - direction=pLastIsoRun->level&1; - stable=TRUE; /* assume stable until proved otherwise */ - - /* The stable flag is set when brackets are paired and their - level is resolved and cannot be changed by what will be - found later in the source string. - An unstable match can occur only when applying N0c, where - the resolved level depends on the preceding context, and - this context may be affected by text occurring later. - Example: RTL paragraph containing: abc[(latin) HEBREW] - When the closing parenthesis is encountered, it appears - that N0c1 must be applied since 'abc' sets an opposite - direction context and both parentheses receive level 2. - However, when the closing square bracket is processed, - N0b applies because of 'HEBREW' being included within the - brackets, thus the square brackets are treated like R and - receive level 1. However, this changes the preceding - context of the opening parenthesis, and it now appears - that N0c2 must be applied to the parentheses rather than - N0c1. */ - - if((direction==0 && pOpening->flags&FOUND_L) || - (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */ - newProp=direction; - } - else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */ - /* it is stable if there is no containing pair or in - conditions too complicated and not worth checking */ - stable=(openIdx==pLastIsoRun->start); - if(direction!=pOpening->contextDir) - newProp=pOpening->contextDir; /* N0c1 */ - else - newProp=direction; /* N0c2 */ - } else { - /* forget this and any brackets nested within this pair */ - pLastIsoRun->limit=openIdx; - return ON; /* N0d */ - } - bd->pBiDi->dirProps[pOpening->position]=newProp; - bd->pBiDi->dirProps[position]=newProp; - /* Update nested N0c pairs that may be affected */ - fixN0c(bd, openIdx, pOpening->position, newProp); - if(stable) { - pLastIsoRun->limit=openIdx; /* forget any brackets nested within this pair */ - /* remove lower located synonyms if any */ - while(pLastIsoRun->limit>pLastIsoRun->start && - bd->openings[pLastIsoRun->limit-1].position==pOpening->position) - pLastIsoRun->limit--; - } else { - int32_t k; - pOpening->match=-position; - /* neutralize lower located synonyms if any */ - k=openIdx-1; - while(k>=pLastIsoRun->start && - bd->openings[k].position==pOpening->position) - bd->openings[k--].match=0; - /* neutralize any unmatched opening between the current pair; - this will also neutralize higher located synonyms if any */ - for(k=openIdx+1; klimit; k++) { - qOpening=&bd->openings[k]; - if(qOpening->position>=position) - break; - if(qOpening->match>0) - qOpening->match=0; - } - } - return newProp; -} - -/* handle strong characters, digits and candidates for closing brackets */ -static UBool /* return TRUE if success */ -bracketProcessChar(BracketData *bd, int32_t position) { - IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; - DirProp *dirProps, dirProp, newProp; - UBiDiLevel level; - dirProps=bd->pBiDi->dirProps; - dirProp=dirProps[position]; - if(dirProp==ON) { - UChar c, match; - int32_t idx; - /* First see if it is a matching closing bracket. Hopefully, this is - more efficient than checking if it is a closing bracket at all */ - c=bd->pBiDi->text[position]; - for(idx=pLastIsoRun->limit-1; idx>=pLastIsoRun->start; idx--) { - if(bd->openings[idx].match!=c) - continue; - /* We have a match */ - newProp=bracketProcessClosing(bd, idx, position); - if(newProp==ON) { /* N0d */ - c=0; /* prevent handling as an opening */ - break; - } - pLastIsoRun->lastBase=ON; - pLastIsoRun->contextDir=newProp; - pLastIsoRun->contextPos=position; - level=bd->pBiDi->levels[position]; - if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */ - uint16_t flag; - int32_t i; - newProp=level&1; - pLastIsoRun->lastStrong=newProp; - flag=DIRPROP_FLAG(newProp); - for(i=pLastIsoRun->start; iopenings[i].flags|=flag; - /* matching brackets are not overridden by LRO/RLO */ - bd->pBiDi->levels[position]&=~UBIDI_LEVEL_OVERRIDE; - } - /* matching brackets are not overridden by LRO/RLO */ - bd->pBiDi->levels[bd->openings[idx].position]&=~UBIDI_LEVEL_OVERRIDE; - return TRUE; - } - /* We get here only if the ON character is not a matching closing - bracket or it is a case of N0d */ - /* Now see if it is an opening bracket */ - if(c) - match=u_getBidiPairedBracket(c); /* get the matching char */ - else - match=0; - if(match!=c && /* has a matching char */ - ubidi_getPairedBracketType(bd->pBiDi->bdp, c)==U_BPT_OPEN) { /* opening bracket */ - /* special case: process synonyms - create an opening entry for each synonym */ - if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */ - if(!bracketAddOpening(bd, 0x3009, position)) - return FALSE; - } - else if(match==0x3009) { /* RIGHT ANGLE BRACKET */ - if(!bracketAddOpening(bd, 0x232A, position)) - return FALSE; - } - if(!bracketAddOpening(bd, match, position)) - return FALSE; - } - } - level=bd->pBiDi->levels[position]; - if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */ - newProp=level&1; - if(dirProp!=S && dirProp!=WS && dirProp!=ON) - dirProps[position]=newProp; - pLastIsoRun->lastBase=newProp; - pLastIsoRun->lastStrong=newProp; - pLastIsoRun->contextDir=newProp; - pLastIsoRun->contextPos=position; - } - else if(dirProp<=R || dirProp==AL) { - newProp=DIR_FROM_STRONG(dirProp); - pLastIsoRun->lastBase=dirProp; - pLastIsoRun->lastStrong=dirProp; - pLastIsoRun->contextDir=newProp; - pLastIsoRun->contextPos=position; - } - else if(dirProp==EN) { - pLastIsoRun->lastBase=EN; - if(pLastIsoRun->lastStrong==L) { - newProp=L; /* W7 */ - if(!bd->isNumbersSpecial) - dirProps[position]=ENL; - pLastIsoRun->contextDir=L; - pLastIsoRun->contextPos=position; - } - else { - newProp=R; /* N0 */ - if(pLastIsoRun->lastStrong==AL) - dirProps[position]=AN; /* W2 */ - else - dirProps[position]=ENR; - pLastIsoRun->contextDir=R; - pLastIsoRun->contextPos=position; - } - } - else if(dirProp==AN) { - newProp=R; /* N0 */ - pLastIsoRun->lastBase=AN; - pLastIsoRun->contextDir=R; - pLastIsoRun->contextPos=position; - } - else if(dirProp==NSM) { - /* if the last real char was ON, change NSM to ON so that it - will stay ON even if the last real char is a bracket which - may be changed to L or R */ - newProp=pLastIsoRun->lastBase; - if(newProp==ON) - dirProps[position]=newProp; - } - else { - newProp=dirProp; - pLastIsoRun->lastBase=dirProp; - } - if(newProp<=R || newProp==AL) { - int32_t i; - uint16_t flag=DIRPROP_FLAG(DIR_FROM_STRONG(newProp)); - for(i=pLastIsoRun->start; ilimit; i++) - if(position>bd->openings[i].position) - bd->openings[i].flags|=flag; - } - return TRUE; -} - -/* perform (X1)..(X9) ------------------------------------------------------- */ - -/* determine if the text is mixed-directional or single-directional */ -static UBiDiDirection -directionFromFlags(UBiDi *pBiDi) { - Flags flags=pBiDi->flags; - /* if the text contains AN and neutrals, then some neutrals may become RTL */ - if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) { - return UBIDI_LTR; - } else if(!(flags&MASK_LTR)) { - return UBIDI_RTL; - } else { - return UBIDI_MIXED; - } -} - -/* - * Resolve the explicit levels as specified by explicit embedding codes. - * Recalculate the flags to have them reflect the real properties - * after taking the explicit embeddings into account. - * - * The BiDi algorithm is designed to result in the same behavior whether embedding - * levels are externally specified (from "styled text", supposedly the preferred - * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text. - * That is why (X9) instructs to remove all not-isolate explicit codes (and BN). - * However, in a real implementation, the removal of these codes and their index - * positions in the plain text is undesirable since it would result in - * reallocated, reindexed text. - * Instead, this implementation leaves the codes in there and just ignores them - * in the subsequent processing. - * In order to get the same reordering behavior, positions with a BN or a not-isolate - * explicit embedding code just get the same level assigned as the last "real" - * character. - * - * Some implementations, not this one, then overwrite some of these - * directionality properties at "real" same-level-run boundaries by - * L or R codes so that the resolution of weak types can be performed on the - * entire paragraph at once instead of having to parse it once more and - * perform that resolution on same-level-runs. - * This limits the scope of the implicit rules in effectively - * the same way as the run limits. - * - * Instead, this implementation does not modify these codes, except for - * paired brackets whose properties (ON) may be replaced by L or R. - * On one hand, the paragraph has to be scanned for same-level-runs, but - * on the other hand, this saves another loop to reset these codes, - * or saves making and modifying a copy of dirProps[]. - * - * - * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm. - * - * - * Handling the stack of explicit levels (Xn): - * - * With the BiDi stack of explicit levels, as pushed with each - * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI, - * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL. - * - * In order to have a correct push-pop semantics even in the case of overflows, - * overflow counters and a valid isolate counter are used as described in UAX#9 - * section 3.3.2 "Explicit Levels and Directions". - * - * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. - * - * Returns normally the direction; -1 if there was a memory shortage - * - */ -static UBiDiDirection -resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { - DirProp *dirProps=pBiDi->dirProps; - UBiDiLevel *levels=pBiDi->levels; - const UChar *text=pBiDi->text; - - int32_t i=0, length=pBiDi->length; - Flags flags=pBiDi->flags; /* collect all directionalities in the text */ - DirProp dirProp; - UBiDiLevel level=GET_PARALEVEL(pBiDi, 0); - UBiDiDirection direction; - pBiDi->isolateCount=0; - - if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; } - - /* determine if the text is mixed-directional or single-directional */ - direction=directionFromFlags(pBiDi); - - /* we may not need to resolve any explicit levels */ - if((direction!=UBIDI_MIXED)) { - /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */ - return direction; - } - if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) { - /* inverse BiDi: mixed, but all characters are at the same embedding level */ - /* set all levels to the paragraph level */ - int32_t paraIndex, start, limit; - for(paraIndex=0; paraIndexparaCount; paraIndex++) { - if(paraIndex==0) - start=0; - else - start=pBiDi->paras[paraIndex-1].limit; - limit=pBiDi->paras[paraIndex].limit; - level=pBiDi->paras[paraIndex].level; - for(i=start; iparaCount; paraIndex++) { - if(paraIndex==0) - start=0; - else - start=pBiDi->paras[paraIndex-1].limit; - limit=pBiDi->paras[paraIndex].limit; - level=pBiDi->paras[paraIndex].level; - for(i=start; i=UBIDI_MAX_EXPLICIT_LEVEL - but we need one more entry as base */ - uint32_t stackLast=0; - int32_t overflowIsolateCount=0; - int32_t overflowEmbeddingCount=0; - int32_t validIsolateCount=0; - BracketData bracketData; - bracketInit(pBiDi, &bracketData); - stack[0]=level; /* initialize base entry to para level, no override, no isolate */ - - /* recalculate the flags */ - flags=0; - - for(i=0; i0 && stack[stackLast]pBiDi->isolateCount) - pBiDi->isolateCount=validIsolateCount; - embeddingLevel=newLevel; - /* we can increment stackLast without checking because newLevel - will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */ - stackLast++; - stack[stackLast]=embeddingLevel+ISOLATE; - bracketProcessLRI_RLI(&bracketData, embeddingLevel); - } else { - /* make it WS so that it is handled by adjustWSLevels() */ - dirProps[i]=WS; - overflowIsolateCount++; - } - break; - case PDI: - if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) { - bracketProcessBoundary(&bracketData, lastCcPos, - previousLevel, embeddingLevel); - flags|=DIRPROP_FLAG_MULTI_RUNS; - } - /* (X6a) */ - if(overflowIsolateCount) { - overflowIsolateCount--; - /* make it WS so that it is handled by adjustWSLevels() */ - dirProps[i]=WS; - } - else if(validIsolateCount) { - flags|=DIRPROP_FLAG(PDI); - lastCcPos=i; - overflowEmbeddingCount=0; - while(stack[stackLast]paraLevel); - if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) - flags|=DIRPROP_FLAG(L); - /* again, determine if the text is mixed-directional or single-directional */ - pBiDi->flags=flags; - direction=directionFromFlags(pBiDi); - } - return direction; -} - -/* - * Use a pre-specified embedding levels array: - * - * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), - * ignore all explicit codes (X9), - * and check all the preset levels. - * - * Recalculate the flags to have them reflect the real properties - * after taking the explicit embeddings into account. - */ -static UBiDiDirection -checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { - DirProp *dirProps=pBiDi->dirProps; - DirProp dirProp; - UBiDiLevel *levels=pBiDi->levels; - int32_t isolateCount=0; - - int32_t i, length=pBiDi->length; - Flags flags=0; /* collect all directionalities in the text */ - UBiDiLevel level; - pBiDi->isolateCount=0; - - for(i=0; ipBiDi->isolateCount) - pBiDi->isolateCount=isolateCount; - } - else if(dirProp==PDI) - isolateCount--; - else if(dirProp==B) - isolateCount=0; - if(level&UBIDI_LEVEL_OVERRIDE) { - /* keep the override flag in levels[i] but adjust the flags */ - level&=~UBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */ - flags|=DIRPROP_FLAG_O(level); - } else { - /* set the flags */ - flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp); - } - if((levelparaLevel); - /* determine if the text is mixed-directional or single-directional */ - pBiDi->flags=flags; - return directionFromFlags(pBiDi); -} - -/****************************************************************** - The Properties state machine table -******************************************************************* - - All table cells are 8 bits: - bits 0..4: next state - bits 5..7: action to perform (if > 0) - - Cells may be of format "n" where n represents the next state - (except for the rightmost column). - Cells may also be of format "s(x,y)" where x represents an action - to perform and y represents the next state. - -******************************************************************* - Definitions and type for properties state table -******************************************************************* -*/ -#define IMPTABPROPS_COLUMNS 16 -#define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1) -#define GET_STATEPROPS(cell) ((cell)&0x1f) -#define GET_ACTIONPROPS(cell) ((cell)>>5) -#define s(action, newState) ((uint8_t)(newState+(action<<5))) - -static const uint8_t groupProp[] = /* dirProp regrouped */ -{ -/* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */ - 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14 -}; -enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */ - -/****************************************************************** - - PROPERTIES STATE TABLE - - In table impTabProps, - - the ON column regroups ON and WS, FSI, RLI, LRI and PDI - - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF - - the Res column is the reduced property assigned to a run - - Action 1: process current run1, init new run1 - 2: init new run2 - 3: process run1, process run2, init new run1 - 4: process run1, set run1=run2, init new run2 - - Notes: - 1) This table is used in resolveImplicitLevels(). - 2) This table triggers actions when there is a change in the Bidi - property of incoming characters (action 1). - 3) Most such property sequences are processed immediately (in - fact, passed to processPropertySeq(). - 4) However, numbers are assembled as one sequence. This means - that undefined situations (like CS following digits, until - it is known if the next char will be a digit) are held until - following chars define them. - Example: digits followed by CS, then comes another CS or ON; - the digits will be processed, then the CS assigned - as the start of an ON sequence (action 3). - 5) There are cases where more than one sequence must be - processed, for instance digits followed by CS followed by L: - the digits must be processed as one sequence, and the CS - must be processed as an ON sequence, all this before starting - assembling chars for the opening L sequence. - - -*/ -static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] = -{ -/* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */ -/* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON }, -/* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L }, -/* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R }, -/* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R }, -/* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN }, -/* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN }, -/* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN }, -/* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON }, -/* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON }, -/* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON }, -/*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN }, -/*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN }, -/*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN }, -/*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN }, -/*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON }, -/*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S }, -/*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S }, -/*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B }, -/*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L }, -/*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L }, -/*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L }, -/*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN }, -/*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN }, -/*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN } -}; - -/* we must undef macro s because the levels tables have a different - * structure (4 bits for action and 4 bits for next state. - */ -#undef s - -/****************************************************************** - The levels state machine tables -******************************************************************* - - All table cells are 8 bits: - bits 0..3: next state - bits 4..7: action to perform (if > 0) - - Cells may be of format "n" where n represents the next state - (except for the rightmost column). - Cells may also be of format "s(x,y)" where x represents an action - to perform and y represents the next state. - - This format limits each table to 16 states each and to 15 actions. - -******************************************************************* - Definitions and type for levels state tables -******************************************************************* -*/ -#define IMPTABLEVELS_COLUMNS (DirProp_B + 2) -#define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1) -#define GET_STATE(cell) ((cell)&0x0f) -#define GET_ACTION(cell) ((cell)>>4) -#define s(action, newState) ((uint8_t)(newState+(action<<4))) - -typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS]; -typedef uint8_t ImpAct[]; - -/* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct, - * instead of having a pair of ImpTab and a pair of ImpAct. - */ -typedef struct ImpTabPair { - const void * pImpTab[2]; - const void * pImpAct[2]; -} ImpTabPair; - -/****************************************************************** - - LEVELS STATE TABLES - - In all levels state tables, - - state 0 is the initial state - - the Res column is the increment to add to the text level - for this property sequence. - - The impAct arrays for each table of a pair map the local action - numbers of the table to the total list of actions. For instance, - action 2 in a given table corresponds to the action number which - appears in entry [2] of the impAct array for that table. - The first entry of all impAct arrays must be 0. - - Action 1: init conditional sequence - 2: prepend conditional sequence to current sequence - 3: set ON sequence to new level - 1 - 4: init EN/AN/ON sequence - 5: fix EN/AN/ON sequence followed by R - 6: set previous level sequence to level 2 - - Notes: - 1) These tables are used in processPropertySeq(). The input - is property sequences as determined by resolveImplicitLevels. - 2) Most such property sequences are processed immediately - (levels are assigned). - 3) However, some sequences cannot be assigned a final level till - one or more following sequences are received. For instance, - ON following an R sequence within an even-level paragraph. - If the following sequence is R, the ON sequence will be - assigned basic run level+1, and so will the R sequence. - 4) S is generally handled like ON, since its level will be fixed - to paragraph level in adjustWSLevels(). - -*/ - -static const ImpTab impTabL_DEFAULT = /* Even paragraph level */ -/* In this table, conditional sequences receive the lower possible level - until proven otherwise. -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 }, -/* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 }, -/* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 }, -/* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 }, -/* 4 : R+ON */ { 0 , s(2,1), s(3,3), s(3,3), 4 , 4 , 0 , 0 }, -/* 5 : AN+ON */ { 0 , s(2,1), 0 , s(3,2), 5 , 5 , 0 , 0 } -}; -static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */ -/* In this table, conditional sequences receive the lower possible level - until proven otherwise. -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, -/* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 }, -/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, -/* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 }, -/* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 }, -/* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 } -}; -static const ImpAct impAct0 = {0,1,2,3,4}; -static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT, - &impTabR_DEFAULT}, - {&impAct0, &impAct0}}; - -static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */ -/* In this table, conditional sequences receive the lower possible level - until proven otherwise. -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 : init */ { 0 , 2 , s(1,1), s(1,1), 0 , 0 , 0 , 0 }, -/* 1 : L+EN/AN */ { 0 , s(4,2), 1 , 1 , 0 , 0 , 0 , 0 }, -/* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 1 }, -/* 3 : R+ON */ { 0 , s(2,2), s(3,4), s(3,4), 3 , 3 , 0 , 0 }, -/* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 } -}; -static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL, - &impTabR_DEFAULT}, - {&impAct0, &impAct0}}; - -static const ImpTab impTabL_GROUP_NUMBERS_WITH_R = -/* In this table, EN/AN+ON sequences receive levels as if associated with R - until proven that there is L or sor/eor on both sides. AN is handled like EN. -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 }, -/* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 }, -/* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 }, -/* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 }, -/* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 }, -/* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 } -}; -static const ImpTab impTabR_GROUP_NUMBERS_WITH_R = -/* In this table, EN/AN+ON sequences receive levels as if associated with R - until proven that there is L on both sides. AN is handled like EN. -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, -/* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, -/* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 }, -/* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 }, -/* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 } -}; -static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = { - {&impTabL_GROUP_NUMBERS_WITH_R, - &impTabR_GROUP_NUMBERS_WITH_R}, - {&impAct0, &impAct0}}; - - -static const ImpTab impTabL_INVERSE_NUMBERS_AS_L = -/* This table is identical to the Default LTR table except that EN and AN are - handled like L. -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 }, -/* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 }, -/* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 }, -/* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 }, -/* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 }, -/* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 } -}; -static const ImpTab impTabR_INVERSE_NUMBERS_AS_L = -/* This table is identical to the Default RTL table except that EN and AN are - handled like L. -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, -/* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 }, -/* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, -/* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 }, -/* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 }, -/* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 } -}; -static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = { - {&impTabL_INVERSE_NUMBERS_AS_L, - &impTabR_INVERSE_NUMBERS_AS_L}, - {&impAct0, &impAct0}}; - -static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */ -/* In this table, conditional sequences receive the lower possible level - until proven otherwise. -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, -/* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 }, -/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, -/* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 }, -/* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 }, -/* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 }, -/* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 } -}; -static const ImpAct impAct1 = {0,1,13,14}; -/* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc" - */ -static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = { - {&impTabL_DEFAULT, - &impTabR_INVERSE_LIKE_DIRECT}, - {&impAct0, &impAct1}}; - -static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS = -/* The case handled in this table is (visually): R EN L -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 }, -/* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 }, -/* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 }, -/* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 }, -/* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 }, -/* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 }, -/* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 } -}; -static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS = -/* The cases handled in this table are (visually): R EN L - R L AN L -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 }, -/* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 }, -/* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 }, -/* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 }, -/* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 }, -/* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 }, -/* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 } -}; -static const ImpAct impAct2 = {0,1,2,5,6,7,8}; -static const ImpAct impAct3 = {0,1,9,10,11,12}; -static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = { - {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS, - &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, - {&impAct2, &impAct3}}; - -static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = { - {&impTabL_NUMBERS_SPECIAL, - &impTabR_INVERSE_LIKE_DIRECT}, - {&impAct0, &impAct1}}; - -static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = -/* The case handled in this table is (visually): R EN L -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 }, -/* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 }, -/* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 }, -/* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 }, -/* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 } -}; -static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = { - {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS, - &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, - {&impAct2, &impAct3}}; - -#undef s - -typedef struct { - const ImpTab * pImpTab; /* level table pointer */ - const ImpAct * pImpAct; /* action map array */ - int32_t startON; /* start of ON sequence */ - int32_t startL2EN; /* start of level 2 sequence */ - int32_t lastStrongRTL; /* index of last found R or AL */ - int32_t state; /* current state */ - int32_t runStart; /* start position of the run */ - UBiDiLevel runLevel; /* run level before implicit solving */ -} LevState; - -/*------------------------------------------------------------------------*/ - -static void -addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag) - /* param pos: position where to insert - param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER - */ -{ -#define FIRSTALLOC 10 - Point point; - InsertPoints * pInsertPoints=&(pBiDi->insertPoints); - - if (pInsertPoints->capacity == 0) - { - pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC); - if (pInsertPoints->points == NULL) - { - pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - pInsertPoints->capacity=FIRSTALLOC; - } - if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */ - { - void * savePoints=pInsertPoints->points; - pInsertPoints->points=uprv_realloc(pInsertPoints->points, - pInsertPoints->capacity*2*sizeof(Point)); - if (pInsertPoints->points == NULL) - { - pInsertPoints->points=savePoints; - pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - else pInsertPoints->capacity*=2; - } - point.pos=pos; - point.flag=flag; - pInsertPoints->points[pInsertPoints->size]=point; - pInsertPoints->size++; -#undef FIRSTALLOC -} - -static void -setLevelsOutsideIsolates(UBiDi *pBiDi, int32_t start, int32_t limit, UBiDiLevel level) -{ - DirProp *dirProps=pBiDi->dirProps, dirProp; - UBiDiLevel *levels=pBiDi->levels; - int32_t isolateCount=0, k; - for(k=start; kpImpTab; - const ImpAct * pImpAct=pLevState->pImpAct; - UBiDiLevel * levels=pBiDi->levels; - UBiDiLevel level, addLevel; - InsertPoints * pInsertPoints; - int32_t start0, k; - - start0=start; /* save original start position */ - oldStateSeq=(uint8_t)pLevState->state; - cell=(*pImpTab)[oldStateSeq][_prop]; - pLevState->state=GET_STATE(cell); /* isolate the new state */ - actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */ - addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES]; - - if(actionSeq) { - switch(actionSeq) { - case 1: /* init ON seq */ - pLevState->startON=start0; - break; - - case 2: /* prepend ON seq to current seq */ - start=pLevState->startON; - break; - - case 3: /* EN/AN after R+ON */ - level=pLevState->runLevel+1; - setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level); - break; - - case 4: /* EN/AN before R for NUMBERS_SPECIAL */ - level=pLevState->runLevel+2; - setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level); - break; - - case 5: /* L or S after possible relevant EN/AN */ - /* check if we had EN after R/AL */ - if (pLevState->startL2EN >= 0) { - addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); - } - pLevState->startL2EN=-1; /* not within previous if since could also be -2 */ - /* check if we had any relevant EN/AN after R/AL */ - pInsertPoints=&(pBiDi->insertPoints); - if ((pInsertPoints->capacity == 0) || - (pInsertPoints->size <= pInsertPoints->confirmed)) - { - /* nothing, just clean up */ - pLevState->lastStrongRTL=-1; - /* check if we have a pending conditional segment */ - level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES]; - if ((level & 1) && (pLevState->startON > 0)) { /* after ON */ - start=pLevState->startON; /* reset to basic run level */ - } - if (_prop == DirProp_S) /* add LRM before S */ - { - addPoint(pBiDi, start0, LRM_BEFORE); - pInsertPoints->confirmed=pInsertPoints->size; - } - break; - } - /* reset previous RTL cont to level for LTR text */ - for (k=pLevState->lastStrongRTL+1; kconfirmed=pInsertPoints->size; - pLevState->lastStrongRTL=-1; - if (_prop == DirProp_S) /* add LRM before S */ - { - addPoint(pBiDi, start0, LRM_BEFORE); - pInsertPoints->confirmed=pInsertPoints->size; - } - break; - - case 6: /* R/AL after possible relevant EN/AN */ - /* just clean up */ - pInsertPoints=&(pBiDi->insertPoints); - if (pInsertPoints->capacity > 0) - /* remove all non confirmed insert points */ - pInsertPoints->size=pInsertPoints->confirmed; - pLevState->startON=-1; - pLevState->startL2EN=-1; - pLevState->lastStrongRTL=limit - 1; - break; - - case 7: /* EN/AN after R/AL + possible cont */ - /* check for real AN */ - if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) && - (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)) - { - /* real AN */ - if (pLevState->startL2EN == -1) /* if no relevant EN already found */ - { - /* just note the righmost digit as a strong RTL */ - pLevState->lastStrongRTL=limit - 1; - break; - } - if (pLevState->startL2EN >= 0) /* after EN, no AN */ - { - addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); - pLevState->startL2EN=-2; - } - /* note AN */ - addPoint(pBiDi, start0, LRM_BEFORE); - break; - } - /* if first EN/AN after R/AL */ - if (pLevState->startL2EN == -1) { - pLevState->startL2EN=start0; - } - break; - - case 8: /* note location of latest R/AL */ - pLevState->lastStrongRTL=limit - 1; - pLevState->startON=-1; - break; - - case 9: /* L after R+ON/EN/AN */ - /* include possible adjacent number on the left */ - for (k=start0-1; k>=0 && !(levels[k]&1); k--); - if(k>=0) { - addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */ - pInsertPoints=&(pBiDi->insertPoints); - pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */ - } - pLevState->startON=start0; - break; - - case 10: /* AN after L */ - /* AN numbers between L text on both sides may be trouble. */ - /* tentatively bracket with LRMs; will be confirmed if followed by L */ - addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */ - addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */ - break; - - case 11: /* R after L+ON/EN/AN */ - /* false alert, infirm LRMs around previous AN */ - pInsertPoints=&(pBiDi->insertPoints); - pInsertPoints->size=pInsertPoints->confirmed; - if (_prop == DirProp_S) /* add RLM before S */ - { - addPoint(pBiDi, start0, RLM_BEFORE); - pInsertPoints->confirmed=pInsertPoints->size; - } - break; - - case 12: /* L after L+ON/AN */ - level=pLevState->runLevel + addLevel; - for(k=pLevState->startON; kinsertPoints); - pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */ - pLevState->startON=start0; - break; - - case 13: /* L after L+ON+EN/AN/ON */ - level=pLevState->runLevel; - for(k=start0-1; k>=pLevState->startON; k--) { - if(levels[k]==level+3) { - while(levels[k]==level+3) { - levels[k--]-=2; - } - while(levels[k]==level) { - k--; - } - } - if(levels[k]==level+2) { - levels[k]=level; - continue; - } - levels[k]=level+1; - } - break; - - case 14: /* R after L+ON+EN/AN/ON */ - level=pLevState->runLevel+1; - for(k=start0-1; k>=pLevState->startON; k--) { - if(levels[k]>level) { - levels[k]-=2; - } - } - break; - - default: /* we should never get here */ - U_ASSERT(FALSE); - break; - } - } - if((addLevel) || (start < start0)) { - level=pLevState->runLevel + addLevel; - if(start>=pLevState->runStart) { - for(k=start; kprologue; - int32_t length=pBiDi->proLength; - int32_t i; - UChar32 uchar; - DirProp dirProp; - for(i=length; i>0; ) { - /* i is decremented by U16_PREV */ - U16_PREV(text, 0, i, uchar); - dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar); - if(dirProp==L) { - return DirProp_L; - } - if(dirProp==R || dirProp==AL) { - return DirProp_R; - } - if(dirProp==B) { - return DirProp_ON; - } - } - return DirProp_ON; -} - -/** - * Returns the directionality of the first strong character, or digit, in the epilogue, if any. - * Requires epilogue!=null. - */ -static DirProp -firstL_R_AL_EN_AN(UBiDi *pBiDi) { - const UChar *text=pBiDi->epilogue; - int32_t length=pBiDi->epiLength; - int32_t i; - UChar32 uchar; - DirProp dirProp; - for(i=0; idirProps; - DirProp dirProp; - LevState levState; - int32_t i, start1, start2; - uint16_t oldStateImp, stateImp, actionImp; - uint8_t gprop, resProp, cell; - UBool inverseRTL; - DirProp nextStrongProp=R; - int32_t nextStrongPos=-1; - - /* check for RTL inverse BiDi mode */ - /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to - * loop on the text characters from end to start. - * This would need a different properties state table (at least different - * actions) and different levels state tables (maybe very similar to the - * LTR corresponding ones. - */ - inverseRTL=(UBool) - ((startlastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) && - (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || - pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)); - - /* initialize for property and levels state tables */ - levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ - levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ - levState.runStart=start; - levState.runLevel=pBiDi->levels[start]; - levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1]; - levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1]; - if(start==0 && pBiDi->proLength>0) { - DirProp lastStrong=lastL_R_AL(pBiDi); - if(lastStrong!=DirProp_ON) { - sor=lastStrong; - } - } - /* The isolates[] entries contain enough information to - resume the bidi algorithm in the same state as it was - when it was interrupted by an isolate sequence. */ - if(dirProps[start]==PDI && pBiDi->isolateCount >= 0) { - levState.startON=pBiDi->isolates[pBiDi->isolateCount].startON; - start1=pBiDi->isolates[pBiDi->isolateCount].start1; - stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp; - levState.state=pBiDi->isolates[pBiDi->isolateCount].state; - pBiDi->isolateCount--; - } else { - levState.startON=-1; - start1=start; - if(dirProps[start]==NSM) - stateImp = 1 + sor; - else - stateImp=0; - levState.state=0; - processPropertySeq(pBiDi, &levState, sor, start, start); - } - start2=start; /* to make Java compiler happy */ - - for(i=start; i<=limit; i++) { - if(i>=limit) { - int32_t k; - for(k=limit-1; k>start&&(DIRPROP_FLAG(dirProps[k])&MASK_BN_EXPLICIT); k--); - dirProp=dirProps[k]; - if(dirProp==LRI || dirProp==RLI) - break; /* no forced closing for sequence ending with LRI/RLI */ - gprop=eor; - } else { - DirProp prop, prop1; - prop=dirProps[i]; - if(prop==B) { - pBiDi->isolateCount=-1; /* current isolates stack entry == none */ - } - if(inverseRTL) { - if(prop==AL) { - /* AL before EN does not make it AN */ - prop=R; - } else if(prop==EN) { - if(nextStrongPos<=i) { - /* look for next strong char (L/R/AL) */ - int32_t j; - nextStrongProp=R; /* set default */ - nextStrongPos=limit; - for(j=i+1; jlength && pBiDi->epiLength>0) { - DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi); - if(firstStrong!=DirProp_ON) { - eor=firstStrong; - } - } - - /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */ - for(i=limit-1; i>start&&(DIRPROP_FLAG(dirProps[i])&MASK_BN_EXPLICIT); i--); - dirProp=dirProps[i]; - if((dirProp==LRI || dirProp==RLI) && limitlength) { - pBiDi->isolateCount++; - pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp; - pBiDi->isolates[pBiDi->isolateCount].state=levState.state; - pBiDi->isolates[pBiDi->isolateCount].start1=start1; - pBiDi->isolates[pBiDi->isolateCount].startON=levState.startON; - } - else - processPropertySeq(pBiDi, &levState, eor, limit, limit); -} - -/* perform (L1) and (X9) ---------------------------------------------------- */ - -/* - * Reset the embedding levels for some non-graphic characters (L1). - * This function also sets appropriate levels for BN, and - * explicit embedding types that are supposed to have been removed - * from the paragraph in (X9). - */ -static void -adjustWSLevels(UBiDi *pBiDi) { - const DirProp *dirProps=pBiDi->dirProps; - UBiDiLevel *levels=pBiDi->levels; - int32_t i; - - if(pBiDi->flags&MASK_WS) { - UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR; - Flags flag; - - i=pBiDi->trailingWSStart; - while(i>0) { - /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ - while(i>0 && (flag=DIRPROP_FLAG(dirProps[--i]))&MASK_WS) { - if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { - levels[i]=0; - } else { - levels[i]=GET_PARALEVEL(pBiDi, i); - } - } - - /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ - /* here, i+1 is guaranteed to be 0) { - flag=DIRPROP_FLAG(dirProps[--i]); - if(flag&MASK_BN_EXPLICIT) { - levels[i]=levels[i+1]; - } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { - levels[i]=0; - break; - } else if(flag&MASK_B_S) { - levels[i]=GET_PARALEVEL(pBiDi, i); - break; - } - } - } - } -} - -U_CAPI void U_EXPORT2 -ubidi_setContext(UBiDi *pBiDi, - const UChar *prologue, int32_t proLength, - const UChar *epilogue, int32_t epiLength, - UErrorCode *pErrorCode) { - /* check the argument values */ - RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); - if(pBiDi==NULL || proLength<-1 || epiLength<-1 || - (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - if(proLength==-1) { - pBiDi->proLength=u_strlen(prologue); - } else { - pBiDi->proLength=proLength; - } - if(epiLength==-1) { - pBiDi->epiLength=u_strlen(epilogue); - } else { - pBiDi->epiLength=epiLength; - } - pBiDi->prologue=prologue; - pBiDi->epilogue=epilogue; -} - -static void -setParaSuccess(UBiDi *pBiDi) { - pBiDi->proLength=0; /* forget the last context */ - pBiDi->epiLength=0; - pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */ -} - -#define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y)) -#define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x))) - -static void -setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length, - UBiDiLevel paraLevel, UErrorCode *pErrorCode) { - void *runsOnlyMemory = NULL; - int32_t *visualMap; - UChar *visualText; - int32_t saveLength, saveTrailingWSStart; - const UBiDiLevel *levels; - UBiDiLevel *saveLevels; - UBiDiDirection saveDirection; - UBool saveMayAllocateText; - Run *runs; - int32_t visualLength, i, j, visualStart, logicalStart, - runCount, runLength, addedRuns, insertRemove, - start, limit, step, indexOddBit, logicalPos, - index0, index1; - uint32_t saveOptions; - - pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT; - if(length==0) { - ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); - goto cleanup3; - } - /* obtain memory for mapping table and visual text */ - runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel))); - if(runsOnlyMemory==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - goto cleanup3; - } - visualMap=runsOnlyMemory; - visualText=(UChar *)&visualMap[length]; - saveLevels=(UBiDiLevel *)&visualText[length]; - saveOptions=pBiDi->reorderingOptions; - if(saveOptions & UBIDI_OPTION_INSERT_MARKS) { - pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; - pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS; - } - paraLevel&=1; /* accept only 0 or 1 */ - ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - goto cleanup3; - } - /* we cannot access directly pBiDi->levels since it is not yet set if - * direction is not MIXED - */ - levels=ubidi_getLevels(pBiDi, pErrorCode); - uprv_memcpy(saveLevels, levels, (size_t)pBiDi->length*sizeof(UBiDiLevel)); - saveTrailingWSStart=pBiDi->trailingWSStart; - saveLength=pBiDi->length; - saveDirection=pBiDi->direction; - - /* FOOD FOR THOUGHT: instead of writing the visual text, we could use - * the visual map and the dirProps array to drive the second call - * to ubidi_setPara (but must make provision for possible removal of - * BiDi controls. Alternatively, only use the dirProps array via - * customized classifier callback. - */ - visualLength=ubidi_writeReordered(pBiDi, visualText, length, - UBIDI_DO_MIRRORING, pErrorCode); - ubidi_getVisualMap(pBiDi, visualMap, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - goto cleanup2; - } - pBiDi->reorderingOptions=saveOptions; - - pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT; - paraLevel^=1; - /* Because what we did with reorderingOptions, visualText may be shorter - * than the original text. But we don't want the levels memory to be - * reallocated shorter than the original length, since we need to restore - * the levels as after the first call to ubidi_setpara() before returning. - * We will force mayAllocateText to FALSE before the second call to - * ubidi_setpara(), and will restore it afterwards. - */ - saveMayAllocateText=pBiDi->mayAllocateText; - pBiDi->mayAllocateText=FALSE; - ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode); - pBiDi->mayAllocateText=saveMayAllocateText; - ubidi_getRuns(pBiDi, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - goto cleanup1; - } - /* check if some runs must be split, count how many splits */ - addedRuns=0; - runCount=pBiDi->runCount; - runs=pBiDi->runs; - visualStart=0; - for(i=0; irunsMemory[0]=runs[0]; - } - runs=pBiDi->runs=pBiDi->runsMemory; - pBiDi->runCount+=addedRuns; - } else { - goto cleanup1; - } - } - /* split runs which are not consecutive in source text */ - for(i=runCount-1; i>=0; i--) { - runLength= i==0 ? runs[0].visualLimit : - runs[i].visualLimit-runs[i-1].visualLimit; - logicalStart=runs[i].logicalStart; - indexOddBit=GET_ODD_BIT(logicalStart); - logicalStart=GET_INDEX(logicalStart); - if(runLength<2) { - if(addedRuns) { - runs[i+addedRuns]=runs[i]; - } - logicalPos=visualMap[logicalStart]; - runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, - saveLevels[logicalPos]^indexOddBit); - continue; - } - if(indexOddBit) { - start=logicalStart; - limit=logicalStart+runLength-1; - step=1; - } else { - start=logicalStart+runLength-1; - limit=logicalStart; - step=-1; - } - for(j=start; j!=limit; j+=step) { - index0=visualMap[j]; - index1=visualMap[j+step]; - if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) { - logicalPos=BIDI_MIN(visualMap[start], index0); - runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, - saveLevels[logicalPos]^indexOddBit); - runs[i+addedRuns].visualLimit=runs[i].visualLimit; - runs[i].visualLimit-=BIDI_ABS(j-start)+1; - insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER); - runs[i+addedRuns].insertRemove=insertRemove; - runs[i].insertRemove&=~insertRemove; - start=j+step; - addedRuns--; - } - } - if(addedRuns) { - runs[i+addedRuns]=runs[i]; - } - logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]); - runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, - saveLevels[logicalPos]^indexOddBit); - } - - cleanup1: - /* restore initial paraLevel */ - pBiDi->paraLevel^=1; - cleanup2: - /* restore real text */ - pBiDi->text=text; - pBiDi->length=saveLength; - pBiDi->originalLength=length; - pBiDi->direction=saveDirection; - /* the saved levels should never excess levelsSize, but we check anyway */ - if(saveLength>pBiDi->levelsSize) { - saveLength=pBiDi->levelsSize; - } - uprv_memcpy(pBiDi->levels, saveLevels, (size_t)saveLength*sizeof(UBiDiLevel)); - pBiDi->trailingWSStart=saveTrailingWSStart; - if(pBiDi->runCount>1) { - pBiDi->direction=UBIDI_MIXED; - } - cleanup3: - /* free memory for mapping table and visual text */ - uprv_free(runsOnlyMemory); - - pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY; -} - -/* ubidi_setPara ------------------------------------------------------------ */ - -U_CAPI void U_EXPORT2 -ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, - UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, - UErrorCode *pErrorCode) { - UBiDiDirection direction; - DirProp *dirProps; - - /* check the argument values */ - RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); - if(pBiDi==NULL || text==NULL || length<-1 || - (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevelreorderingMode==UBIDI_REORDER_RUNS_ONLY) { - setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode); - return; - } - - /* initialize the UBiDi structure */ - pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */ - pBiDi->text=text; - pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length; - pBiDi->paraLevel=paraLevel; - pBiDi->direction=paraLevel&1; - pBiDi->paraCount=1; - - pBiDi->dirProps=NULL; - pBiDi->levels=NULL; - pBiDi->runs=NULL; - pBiDi->insertPoints.size=0; /* clean up from last call */ - pBiDi->insertPoints.confirmed=0; /* clean up from last call */ - - /* - * Save the original paraLevel if contextual; otherwise, set to 0. - */ - pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel); - - if(length==0) { - /* - * For an empty paragraph, create a UBiDi object with the paraLevel and - * the flags and the direction set but without allocating zero-length arrays. - * There is nothing more to do. - */ - if(IS_DEFAULT_LEVEL(paraLevel)) { - pBiDi->paraLevel&=1; - pBiDi->defaultParaLevel=0; - } - pBiDi->flags=DIRPROP_FLAG_LR(paraLevel); - pBiDi->runCount=0; - pBiDi->paraCount=0; - setParaSuccess(pBiDi); /* mark successful setPara */ - return; - } - - pBiDi->runCount=-1; - - /* allocate paras memory */ - if(pBiDi->parasMemory) - pBiDi->paras=pBiDi->parasMemory; - else - pBiDi->paras=pBiDi->simpleParas; - - /* - * Get the directional properties, - * the flags bit-set, and - * determine the paragraph level if necessary. - */ - if(getDirPropsMemory(pBiDi, length)) { - pBiDi->dirProps=pBiDi->dirPropsMemory; - if(!getDirProps(pBiDi)) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - } else { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - dirProps=pBiDi->dirProps; - /* the processed length may have changed if UBIDI_OPTION_STREAMING */ - length= pBiDi->length; - pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */ - - /* are explicit levels specified? */ - if(embeddingLevels==NULL) { - /* no: determine explicit levels according to the (Xn) rules */\ - if(getLevelsMemory(pBiDi, length)) { - pBiDi->levels=pBiDi->levelsMemory; - direction=resolveExplicitLevels(pBiDi, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return; - } - } else { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - } else { - /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */ - pBiDi->levels=embeddingLevels; - direction=checkExplicitLevels(pBiDi, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return; - } - } - - /* allocate isolate memory */ - if(pBiDi->isolateCount<=SIMPLE_ISOLATES_COUNT) - pBiDi->isolates=pBiDi->simpleIsolates; - else - if((int32_t)(pBiDi->isolateCount*sizeof(Isolate))<=pBiDi->isolatesSize) - pBiDi->isolates=pBiDi->isolatesMemory; - else { - if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) { - pBiDi->isolates=pBiDi->isolatesMemory; - } else { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - } - pBiDi->isolateCount=-1; /* current isolates stack entry == none */ - - /* - * The steps after (X9) in the UBiDi algorithm are performed only if - * the paragraph text has mixed directionality! - */ - pBiDi->direction=direction; - switch(direction) { - case UBIDI_LTR: - /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ - pBiDi->trailingWSStart=0; - break; - case UBIDI_RTL: - /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ - pBiDi->trailingWSStart=0; - break; - default: - /* - * Choose the right implicit state table - */ - switch(pBiDi->reorderingMode) { - case UBIDI_REORDER_DEFAULT: - pBiDi->pImpTabPair=&impTab_DEFAULT; - break; - case UBIDI_REORDER_NUMBERS_SPECIAL: - pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL; - break; - case UBIDI_REORDER_GROUP_NUMBERS_WITH_R: - pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R; - break; - case UBIDI_REORDER_INVERSE_NUMBERS_AS_L: - pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L; - break; - case UBIDI_REORDER_INVERSE_LIKE_DIRECT: - if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { - pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS; - } else { - pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT; - } - break; - case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL: - if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { - pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS; - } else { - pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL; - } - break; - default: - /* we should never get here */ - U_ASSERT(FALSE); - break; - } - /* - * If there are no external levels specified and there - * are no significant explicit level codes in the text, - * then we can treat the entire paragraph as one run. - * Otherwise, we need to perform the following rules on runs of - * the text with the same embedding levels. (X10) - * "Significant" explicit level codes are ones that actually - * affect non-BN characters. - * Examples for "insignificant" ones are empty embeddings - * LRE-PDF, LRE-RLE-PDF-PDF, etc. - */ - if(embeddingLevels==NULL && pBiDi->paraCount<=1 && - !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) { - resolveImplicitLevels(pBiDi, 0, length, - GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)), - GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1))); - } else { - /* sor, eor: start and end types of same-level-run */ - UBiDiLevel *levels=pBiDi->levels; - int32_t start, limit=0; - UBiDiLevel level, nextLevel; - DirProp sor, eor; - - /* determine the first sor and set eor to it because of the loop body (sor=eor there) */ - level=GET_PARALEVEL(pBiDi, 0); - nextLevel=levels[0]; - if(level0) && (dirProps[start-1]==B)) { - /* except if this is a new paragraph, then set sor = para level */ - sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start)); - } else { - sor=eor; - } - - /* search for the limit of this run */ - while((++limitinsertPoints.errorCode)) - { - *pErrorCode=pBiDi->insertPoints.errorCode; - return; - } - /* reset the embedding levels for some non-graphic characters (L1), (X9) */ - adjustWSLevels(pBiDi); - break; - } - /* add RLM for inverse Bidi with contextual orientation resolving - * to RTL which would not round-trip otherwise - */ - if((pBiDi->defaultParaLevel>0) && - (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) && - ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) || - (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) { - int32_t i, j, start, last; - UBiDiLevel level; - DirProp dirProp; - for(i=0; iparaCount; i++) { - last=(pBiDi->paras[i].limit)-1; - level=pBiDi->paras[i].level; - if(level==0) - continue; /* LTR paragraph */ - start= i==0 ? 0 : pBiDi->paras[i-1].limit; - for(j=last; j>=start; j--) { - dirProp=dirProps[j]; - if(dirProp==L) { - if(jreorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { - pBiDi->resultLength -= pBiDi->controlCount; - } else { - pBiDi->resultLength += pBiDi->insertPoints.size; - } - setParaSuccess(pBiDi); /* mark successful setPara */ -} - -U_CAPI void U_EXPORT2 -ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) { - if(pBiDi!=NULL) { - pBiDi->orderParagraphsLTR=orderParagraphsLTR; - } -} - -U_CAPI UBool U_EXPORT2 -ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) { - if(pBiDi!=NULL) { - return pBiDi->orderParagraphsLTR; - } else { - return FALSE; - } -} - -U_CAPI UBiDiDirection U_EXPORT2 -ubidi_getDirection(const UBiDi *pBiDi) { - if(IS_VALID_PARA_OR_LINE(pBiDi)) { - return pBiDi->direction; - } else { - return UBIDI_LTR; - } -} - -U_CAPI const UChar * U_EXPORT2 -ubidi_getText(const UBiDi *pBiDi) { - if(IS_VALID_PARA_OR_LINE(pBiDi)) { - return pBiDi->text; - } else { - return NULL; - } -} - -U_CAPI int32_t U_EXPORT2 -ubidi_getLength(const UBiDi *pBiDi) { - if(IS_VALID_PARA_OR_LINE(pBiDi)) { - return pBiDi->originalLength; - } else { - return 0; - } -} - -U_CAPI int32_t U_EXPORT2 -ubidi_getProcessedLength(const UBiDi *pBiDi) { - if(IS_VALID_PARA_OR_LINE(pBiDi)) { - return pBiDi->length; - } else { - return 0; - } -} - -U_CAPI int32_t U_EXPORT2 -ubidi_getResultLength(const UBiDi *pBiDi) { - if(IS_VALID_PARA_OR_LINE(pBiDi)) { - return pBiDi->resultLength; - } else { - return 0; - } -} - -/* paragraphs API functions ------------------------------------------------- */ - -U_CAPI UBiDiLevel U_EXPORT2 -ubidi_getParaLevel(const UBiDi *pBiDi) { - if(IS_VALID_PARA_OR_LINE(pBiDi)) { - return pBiDi->paraLevel; - } else { - return 0; - } -} - -U_CAPI int32_t U_EXPORT2 -ubidi_countParagraphs(UBiDi *pBiDi) { - if(!IS_VALID_PARA_OR_LINE(pBiDi)) { - return 0; - } else { - return pBiDi->paraCount; - } -} - -U_CAPI void U_EXPORT2 -ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, - int32_t *pParaStart, int32_t *pParaLimit, - UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { - int32_t paraStart; - - /* check the argument values */ - RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); - RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode); - RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode); - - pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ - if(paraIndex) { - paraStart=pBiDi->paras[paraIndex-1].limit; - } else { - paraStart=0; - } - if(pParaStart!=NULL) { - *pParaStart=paraStart; - } - if(pParaLimit!=NULL) { - *pParaLimit=pBiDi->paras[paraIndex].limit; - } - if(pParaLevel!=NULL) { - *pParaLevel=GET_PARALEVEL(pBiDi, paraStart); - } -} - -U_CAPI int32_t U_EXPORT2 -ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, - int32_t *pParaStart, int32_t *pParaLimit, - UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { - int32_t paraIndex; - - /* check the argument values */ - /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */ - RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); - RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); - pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ - RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1); - - for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++); - ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode); - return paraIndex; -} - -U_CAPI void U_EXPORT2 -ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn, - const void *newContext, UBiDiClassCallback **oldFn, - const void **oldContext, UErrorCode *pErrorCode) -{ - RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); - if(pBiDi==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - if( oldFn ) - { - *oldFn = pBiDi->fnClassCallback; - } - if( oldContext ) - { - *oldContext = pBiDi->coClassCallback; - } - pBiDi->fnClassCallback = newFn; - pBiDi->coClassCallback = newContext; -} - -U_CAPI void U_EXPORT2 -ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context) -{ - if(pBiDi==NULL) { - return; - } - if( fn ) - { - *fn = pBiDi->fnClassCallback; - } - if( context ) - { - *context = pBiDi->coClassCallback; - } -} - -U_CAPI UCharDirection U_EXPORT2 -ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c) -{ - UCharDirection dir; - - if( pBiDi->fnClassCallback == NULL || - (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT ) - { - dir = ubidi_getClass(pBiDi->bdp, c); - } - if(dir >= U_CHAR_DIRECTION_COUNT) { - dir = ON; - } - return dir; -} diff --git a/deps/icu-small/source/common/ubidi.cpp b/deps/icu-small/source/common/ubidi.cpp new file mode 100644 index 0000000000..8e2fc36e5f --- /dev/null +++ b/deps/icu-small/source/common/ubidi.cpp @@ -0,0 +1,3042 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ubidi.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999jul27 +* created by: Markus W. Scherer, updated by Matitiahu Allouche +* +*/ + +#include "cmemory.h" +#include "unicode/utypes.h" +#include "unicode/ustring.h" +#include "unicode/uchar.h" +#include "unicode/ubidi.h" +#include "unicode/utf16.h" +#include "ubidi_props.h" +#include "ubidiimp.h" +#include "uassert.h" + +/* + * General implementation notes: + * + * Throughout the implementation, there are comments like (W2) that refer to + * rules of the BiDi algorithm, in this example to the second rule of the + * resolution of weak types. + * + * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32) + * character according to UTF-16, the second UChar gets the directional property of + * the entire character assigned, while the first one gets a BN, a boundary + * neutral, type, which is ignored by most of the algorithm according to + * rule (X9) and the implementation suggestions of the BiDi algorithm. + * + * Later, adjustWSLevels() will set the level for each BN to that of the + * following character (UChar), which results in surrogate pairs getting the + * same level on each of their surrogates. + * + * In a UTF-8 implementation, the same thing could be done: the last byte of + * a multi-byte sequence would get the "real" property, while all previous + * bytes of that sequence would get BN. + * + * It is not possible to assign all those parts of a character the same real + * property because this would fail in the resolution of weak types with rules + * that look at immediately surrounding types. + * + * As a related topic, this implementation does not remove Boundary Neutral + * types from the input, but ignores them wherever this is relevant. + * For example, the loop for the resolution of the weak types reads + * types until it finds a non-BN. + * Also, explicit embedding codes are neither changed into BN nor removed. + * They are only treated the same way real BNs are. + * As stated before, adjustWSLevels() takes care of them at the end. + * For the purpose of conformance, the levels of all these codes + * do not matter. + * + * Note that this implementation modifies the dirProps + * after the initial setup, when applying X5c (replace FSI by LRI or RLI), + * X6, N0 (replace paired brackets by L or R). + * + * In this implementation, the resolution of weak types (W1 to W6), + * neutrals (N1 and N2), and the assignment of the resolved level (In) + * are all done in one single loop, in resolveImplicitLevels(). + * Changes of dirProp values are done on the fly, without writing + * them back to the dirProps array. + * + * + * This implementation contains code that allows to bypass steps of the + * algorithm that are not needed on the specific paragraph + * in order to speed up the most common cases considerably, + * like text that is entirely LTR, or RTL text without numbers. + * + * Most of this is done by setting a bit for each directional property + * in a flags variable and later checking for whether there are + * any LTR characters or any RTL characters, or both, whether + * there are any explicit embedding codes, etc. + * + * If the (Xn) steps are performed, then the flags are re-evaluated, + * because they will then not contain the embedding codes any more + * and will be adjusted for override codes, so that subsequently + * more bypassing may be possible than what the initial flags suggested. + * + * If the text is not mixed-directional, then the + * algorithm steps for the weak type resolution are not performed, + * and all levels are set to the paragraph level. + * + * If there are no explicit embedding codes, then the (Xn) steps + * are not performed. + * + * If embedding levels are supplied as a parameter, then all + * explicit embedding codes are ignored, and the (Xn) steps + * are not performed. + * + * White Space types could get the level of the run they belong to, + * and are checked with a test of (flags&MASK_EMBEDDING) to + * consider if the paragraph direction should be considered in + * the flags variable. + * + * If there are no White Space types in the paragraph, then + * (L1) is not necessary in adjustWSLevels(). + */ + +/* to avoid some conditional statements, use tiny constant arrays */ +static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) }; +static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) }; +static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) }; + +#define DIRPROP_FLAG_LR(level) flagLR[(level)&1] +#define DIRPROP_FLAG_E(level) flagE[(level)&1] +#define DIRPROP_FLAG_O(level) flagO[(level)&1] + +#define DIR_FROM_STRONG(strong) ((strong)==L ? L : R) + +#define NO_OVERRIDE(level) ((level)&~UBIDI_LEVEL_OVERRIDE) + +/* UBiDi object management -------------------------------------------------- */ + +U_CAPI UBiDi * U_EXPORT2 +ubidi_open(void) +{ + UErrorCode errorCode=U_ZERO_ERROR; + return ubidi_openSized(0, 0, &errorCode); +} + +U_CAPI UBiDi * U_EXPORT2 +ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) { + UBiDi *pBiDi; + + /* check the argument values */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return NULL; + } else if(maxLength<0 || maxRunCount<0) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; /* invalid arguments */ + } + + /* allocate memory for the object */ + pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi)); + if(pBiDi==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + + /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */ + uprv_memset(pBiDi, 0, sizeof(UBiDi)); + + /* get BiDi properties */ + pBiDi->bdp=ubidi_getSingleton(); + + /* allocate memory for arrays as requested */ + if(maxLength>0) { + if( !getInitialDirPropsMemory(pBiDi, maxLength) || + !getInitialLevelsMemory(pBiDi, maxLength) + ) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + } + } else { + pBiDi->mayAllocateText=TRUE; + } + + if(maxRunCount>0) { + if(maxRunCount==1) { + /* use simpleRuns[] */ + pBiDi->runsSize=sizeof(Run); + } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + } + } else { + pBiDi->mayAllocateRuns=TRUE; + } + + if(U_SUCCESS(*pErrorCode)) { + return pBiDi; + } else { + ubidi_close(pBiDi); + return NULL; + } +} + +/* + * We are allowed to allocate memory if memory==NULL or + * mayAllocate==TRUE for each array that we need. + * We also try to grow memory as needed if we + * allocate it. + * + * Assume sizeNeeded>0. + * If *pMemory!=NULL, then assume *pSize>0. + * + * ### this realloc() may unnecessarily copy the old data, + * which we know we don't need any more; + * is this the best way to do this?? + */ +U_CFUNC UBool +ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) { + void **pMemory = (void **)bidiMem; + /* check for existing memory */ + if(*pMemory==NULL) { + /* we need to allocate memory */ + if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) { + *pSize=sizeNeeded; + return TRUE; + } else { + return FALSE; + } + } else { + if(sizeNeeded<=*pSize) { + /* there is already enough memory */ + return TRUE; + } + else if(!mayAllocate) { + /* not enough memory, and we must not allocate */ + return FALSE; + } else { + /* we try to grow */ + void *memory; + /* in most cases, we do not need the copy-old-data part of + * realloc, but it is needed when adding runs using getRunsMemory() + * in setParaRunsOnly() + */ + if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) { + *pMemory=memory; + *pSize=sizeNeeded; + return TRUE; + } else { + /* we failed to grow */ + return FALSE; + } + } + } +} + +U_CAPI void U_EXPORT2 +ubidi_close(UBiDi *pBiDi) { + if(pBiDi!=NULL) { + pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */ + if(pBiDi->dirPropsMemory!=NULL) { + uprv_free(pBiDi->dirPropsMemory); + } + if(pBiDi->levelsMemory!=NULL) { + uprv_free(pBiDi->levelsMemory); + } + if(pBiDi->openingsMemory!=NULL) { + uprv_free(pBiDi->openingsMemory); + } + if(pBiDi->parasMemory!=NULL) { + uprv_free(pBiDi->parasMemory); + } + if(pBiDi->runsMemory!=NULL) { + uprv_free(pBiDi->runsMemory); + } + if(pBiDi->isolatesMemory!=NULL) { + uprv_free(pBiDi->isolatesMemory); + } + if(pBiDi->insertPoints.points!=NULL) { + uprv_free(pBiDi->insertPoints.points); + } + + uprv_free(pBiDi); + } +} + +/* set to approximate "inverse BiDi" ---------------------------------------- */ + +U_CAPI void U_EXPORT2 +ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) { + if(pBiDi!=NULL) { + pBiDi->isInverse=isInverse; + pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L + : UBIDI_REORDER_DEFAULT; + } +} + +U_CAPI UBool U_EXPORT2 +ubidi_isInverse(UBiDi *pBiDi) { + if(pBiDi!=NULL) { + return pBiDi->isInverse; + } else { + return FALSE; + } +} + +/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of + * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre + * concept of RUNS_ONLY which is a double operation. + * It could be advantageous to divide this into 3 concepts: + * a) Operation: direct / inverse / RUNS_ONLY + * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R + * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL + * This would allow combinations not possible today like RUNS_ONLY with + * NUMBERS_SPECIAL. + * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and + * REMOVE_CONTROLS for the inverse step. + * Not all combinations would be supported, and probably not all do make sense. + * This would need to document which ones are supported and what are the + * fallbacks for unsupported combinations. + */ +U_CAPI void U_EXPORT2 +ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) { + if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT) + && (reorderingMode < UBIDI_REORDER_COUNT)) { + pBiDi->reorderingMode = reorderingMode; + pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L); + } +} + +U_CAPI UBiDiReorderingMode U_EXPORT2 +ubidi_getReorderingMode(UBiDi *pBiDi) { + if (pBiDi!=NULL) { + return pBiDi->reorderingMode; + } else { + return UBIDI_REORDER_DEFAULT; + } +} + +U_CAPI void U_EXPORT2 +ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) { + if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { + reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; + } + if (pBiDi!=NULL) { + pBiDi->reorderingOptions=reorderingOptions; + } +} + +U_CAPI uint32_t U_EXPORT2 +ubidi_getReorderingOptions(UBiDi *pBiDi) { + if (pBiDi!=NULL) { + return pBiDi->reorderingOptions; + } else { + return 0; + } +} + +U_CAPI UBiDiDirection U_EXPORT2 +ubidi_getBaseDirection(const UChar *text, +int32_t length){ + + int32_t i; + UChar32 uchar; + UCharDirection dir; + + if( text==NULL || length<-1 ){ + return UBIDI_NEUTRAL; + } + + if(length==-1) { + length=u_strlen(text); + } + + for( i = 0 ; i < length; ) { + /* i is incremented by U16_NEXT */ + U16_NEXT(text, i, length, uchar); + dir = u_charDirection(uchar); + if( dir == U_LEFT_TO_RIGHT ) + return UBIDI_LTR; + if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC ) + return UBIDI_RTL; + } + return UBIDI_NEUTRAL; +} + +/* perform (P2)..(P3) ------------------------------------------------------- */ + +/** + * Returns the directionality of the first strong character + * after the last B in prologue, if any. + * Requires prologue!=null. + */ +static DirProp +firstL_R_AL(UBiDi *pBiDi) { + const UChar *text=pBiDi->prologue; + int32_t length=pBiDi->proLength; + int32_t i; + UChar32 uchar; + DirProp dirProp, result=ON; + for(i=0; iparas + */ +static UBool +checkParaCount(UBiDi *pBiDi) { + int32_t count=pBiDi->paraCount; + if(pBiDi->paras==pBiDi->simpleParas) { + if(count<=SIMPLE_PARAS_COUNT) + return TRUE; + if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_COUNT * 2)) + return FALSE; + pBiDi->paras=pBiDi->parasMemory; + uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_COUNT * sizeof(Para)); + return TRUE; + } + if(!getInitialParasMemory(pBiDi, count * 2)) + return FALSE; + pBiDi->paras=pBiDi->parasMemory; + return TRUE; +} + +/* + * Get the directional properties for the text, calculate the flags bit-set, and + * determine the paragraph level if necessary (in pBiDi->paras[i].level). + * FSI initiators are also resolved and their dirProp replaced with LRI or RLI. + * When encountering an FSI, it is initially replaced with an LRI, which is the + * default. Only if a strong R or AL is found within its scope will the LRI be + * replaced by an RLI. + */ +static UBool +getDirProps(UBiDi *pBiDi) { + const UChar *text=pBiDi->text; + DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */ + + int32_t i=0, originalLength=pBiDi->originalLength; + Flags flags=0; /* collect all directionalities in the text */ + UChar32 uchar; + DirProp dirProp=0, defaultParaLevel=0; /* initialize to avoid compiler warnings */ + UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel); + /* for inverse BiDi, the default para level is set to RTL if there is a + strong R or AL character at either end of the text */ + UBool isDefaultLevelInverse=isDefaultLevel && (UBool) + (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || + pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL); + int32_t lastArabicPos=-1; + int32_t controlCount=0; + UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions & + UBIDI_OPTION_REMOVE_CONTROLS); + + enum State { + NOT_SEEKING_STRONG, /* 0: not contextual paraLevel, not after FSI */ + SEEKING_STRONG_FOR_PARA, /* 1: looking for first strong char in para */ + SEEKING_STRONG_FOR_FSI, /* 2: looking for first strong after FSI */ + LOOKING_FOR_PDI /* 3: found strong after FSI, looking for PDI */ + }; + State state; + DirProp lastStrong=ON; /* for default level & inverse BiDi */ + /* The following stacks are used to manage isolate sequences. Those + sequences may be nested, but obviously never more deeply than the + maximum explicit embedding level. + lastStack is the index of the last used entry in the stack. A value of -1 + means that there is no open isolate sequence. + lastStack is reset to -1 on paragraph boundaries. */ + /* The following stack contains the position of the initiator of + each open isolate sequence */ + int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1]; + /* The following stack contains the last known state before + encountering the initiator of an isolate sequence */ + State previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1]; + int32_t stackLast=-1; + + if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) + pBiDi->length=0; + defaultParaLevel=pBiDi->paraLevel&1; + if(isDefaultLevel) { + pBiDi->paras[0].level=defaultParaLevel; + lastStrong=defaultParaLevel; + if(pBiDi->proLength>0 && /* there is a prologue */ + (dirProp=firstL_R_AL(pBiDi))!=ON) { /* with a strong character */ + if(dirProp==L) + pBiDi->paras[0].level=0; /* set the default para level */ + else + pBiDi->paras[0].level=1; /* set the default para level */ + state=NOT_SEEKING_STRONG; + } else { + state=SEEKING_STRONG_FOR_PARA; + } + } else { + pBiDi->paras[0].level=pBiDi->paraLevel; + state=NOT_SEEKING_STRONG; + } + /* count paragraphs and determine the paragraph level (P2..P3) */ + /* + * see comment in ubidi.h: + * the UBIDI_DEFAULT_XXX values are designed so that + * their bit 0 alone yields the intended default + */ + for( /* i=0 above */ ; i0xffff) { /* set the lead surrogate's property to BN */ + flags|=DIRPROP_FLAG(BN); + dirProps[i-2]=BN; + } + if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) + controlCount++; + if(dirProp==L) { + if(state==SEEKING_STRONG_FOR_PARA) { + pBiDi->paras[pBiDi->paraCount-1].level=0; + state=NOT_SEEKING_STRONG; + } + else if(state==SEEKING_STRONG_FOR_FSI) { + if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { + /* no need for next statement, already set by default */ + /* dirProps[isolateStartStack[stackLast]]=LRI; */ + flags|=DIRPROP_FLAG(LRI); + } + state=LOOKING_FOR_PDI; + } + lastStrong=L; + continue; + } + if(dirProp==R || dirProp==AL) { + if(state==SEEKING_STRONG_FOR_PARA) { + pBiDi->paras[pBiDi->paraCount-1].level=1; + state=NOT_SEEKING_STRONG; + } + else if(state==SEEKING_STRONG_FOR_FSI) { + if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { + dirProps[isolateStartStack[stackLast]]=RLI; + flags|=DIRPROP_FLAG(RLI); + } + state=LOOKING_FOR_PDI; + } + lastStrong=R; + if(dirProp==AL) + lastArabicPos=i-1; + continue; + } + if(dirProp>=FSI && dirProp<=RLI) { /* FSI, LRI or RLI */ + stackLast++; + if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { + isolateStartStack[stackLast]=i-1; + previousStateStack[stackLast]=state; + } + if(dirProp==FSI) { + dirProps[i-1]=LRI; /* default if no strong char */ + state=SEEKING_STRONG_FOR_FSI; + } + else + state=LOOKING_FOR_PDI; + continue; + } + if(dirProp==PDI) { + if(state==SEEKING_STRONG_FOR_FSI) { + if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { + /* no need for next statement, already set by default */ + /* dirProps[isolateStartStack[stackLast]]=LRI; */ + flags|=DIRPROP_FLAG(LRI); + } + } + if(stackLast>=0) { + if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) + state=previousStateStack[stackLast]; + stackLast--; + } + continue; + } + if(dirProp==B) { + if(iparas[pBiDi->paraCount-1].limit=i; + if(isDefaultLevelInverse && lastStrong==R) + pBiDi->paras[pBiDi->paraCount-1].level=1; + if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { + /* When streaming, we only process whole paragraphs + thus some updates are only done on paragraph boundaries */ + pBiDi->length=i; /* i is index to next character */ + pBiDi->controlCount=controlCount; + } + if(iparaCount++; + if(checkParaCount(pBiDi)==FALSE) /* not enough memory for a new para entry */ + return FALSE; + if(isDefaultLevel) { + pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel; + state=SEEKING_STRONG_FOR_PARA; + lastStrong=defaultParaLevel; + } else { + pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel; + state=NOT_SEEKING_STRONG; + } + stackLast=-1; + } + continue; + } + } + /* Ignore still open isolate sequences with overflow */ + if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) { + stackLast=UBIDI_MAX_EXPLICIT_LEVEL; + state=SEEKING_STRONG_FOR_FSI; /* to be on the safe side */ + } + /* Resolve direction of still unresolved open FSI sequences */ + while(stackLast>=0) { + if(state==SEEKING_STRONG_FOR_FSI) { + /* no need for next statement, already set by default */ + /* dirProps[isolateStartStack[stackLast]]=LRI; */ + flags|=DIRPROP_FLAG(LRI); + break; + } + state=previousStateStack[stackLast]; + stackLast--; + } + /* When streaming, ignore text after the last paragraph separator */ + if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { + if(pBiDi->lengthparaCount--; + } else { + pBiDi->paras[pBiDi->paraCount-1].limit=originalLength; + pBiDi->controlCount=controlCount; + } + /* For inverse bidi, default para direction is RTL if there is + a strong R or AL at either end of the paragraph */ + if(isDefaultLevelInverse && lastStrong==R) { + pBiDi->paras[pBiDi->paraCount-1].level=1; + } + if(isDefaultLevel) { + pBiDi->paraLevel=pBiDi->paras[0].level; + } + /* The following is needed to resolve the text direction for default level + paragraphs containing no strong character */ + for(i=0; iparaCount; i++) + flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level); + + if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { + flags|=DIRPROP_FLAG(L); + } + pBiDi->flags=flags; + pBiDi->lastArabicPos=lastArabicPos; + return TRUE; +} + +/* determine the paragraph level at position index */ +U_CFUNC UBiDiLevel +ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) { + int32_t i; + for(i=0; iparaCount; i++) + if(pindexparas[i].limit) + break; + if(i>=pBiDi->paraCount) + i=pBiDi->paraCount-1; + return (UBiDiLevel)(pBiDi->paras[i].level); +} + +/* Functions for handling paired brackets ----------------------------------- */ + +/* In the isoRuns array, the first entry is used for text outside of any + isolate sequence. Higher entries are used for each more deeply nested + isolate sequence. isoRunLast is the index of the last used entry. The + openings array is used to note the data of opening brackets not yet + matched by a closing bracket, or matched but still susceptible to change + level. + Each isoRun entry contains the index of the first and + one-after-last openings entries for pending opening brackets it + contains. The next openings entry to use is the one-after-last of the + most deeply nested isoRun entry. + isoRun entries also contain their current embedding level and the last + encountered strong character, since these will be needed to resolve + the level of paired brackets. */ + +static void +bracketInit(UBiDi *pBiDi, BracketData *bd) { + bd->pBiDi=pBiDi; + bd->isoRunLast=0; + bd->isoRuns[0].start=0; + bd->isoRuns[0].limit=0; + bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0); + UBiDiLevel t = GET_PARALEVEL(pBiDi, 0) & 1; + bd->isoRuns[0].lastStrong = bd->isoRuns[0].lastBase = t; + bd->isoRuns[0].contextDir = (UBiDiDirection)t; + bd->isoRuns[0].contextPos=0; + if(pBiDi->openingsMemory) { + bd->openings=pBiDi->openingsMemory; + bd->openingsCount=pBiDi->openingsSize / sizeof(Opening); + } else { + bd->openings=bd->simpleOpenings; + bd->openingsCount=SIMPLE_OPENINGS_COUNT; + } + bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL || + bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL; +} + +/* paragraph boundary */ +static void +bracketProcessB(BracketData *bd, UBiDiLevel level) { + bd->isoRunLast=0; + bd->isoRuns[0].limit=0; + bd->isoRuns[0].level=level; + bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=level&1; + bd->isoRuns[0].contextDir=(UBiDiDirection)(level&1); + bd->isoRuns[0].contextPos=0; +} + +/* LRE, LRO, RLE, RLO, PDF */ +static void +bracketProcessBoundary(BracketData *bd, int32_t lastCcPos, + UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) { + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + DirProp *dirProps=bd->pBiDi->dirProps; + if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO) /* after an isolate */ + return; + if(NO_OVERRIDE(embeddingLevel)>NO_OVERRIDE(contextLevel)) /* not a PDF */ + contextLevel=embeddingLevel; + pLastIsoRun->limit=pLastIsoRun->start; + pLastIsoRun->level=embeddingLevel; + pLastIsoRun->lastStrong=pLastIsoRun->lastBase=contextLevel&1; + pLastIsoRun->contextDir=(UBiDiDirection)(contextLevel&1); + pLastIsoRun->contextPos=(UBiDiDirection)lastCcPos; +} + +/* LRI or RLI */ +static void +bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) { + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + int16_t lastLimit; + pLastIsoRun->lastBase=ON; + lastLimit=pLastIsoRun->limit; + bd->isoRunLast++; + pLastIsoRun++; + pLastIsoRun->start=pLastIsoRun->limit=lastLimit; + pLastIsoRun->level=level; + pLastIsoRun->lastStrong=pLastIsoRun->lastBase=level&1; + pLastIsoRun->contextDir=(UBiDiDirection)(level&1); + pLastIsoRun->contextPos=0; +} + +/* PDI */ +static void +bracketProcessPDI(BracketData *bd) { + IsoRun *pLastIsoRun; + bd->isoRunLast--; + pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + pLastIsoRun->lastBase=ON; +} + +/* newly found opening bracket: create an openings entry */ +static UBool /* return TRUE if success */ +bracketAddOpening(BracketData *bd, UChar match, int32_t position) { + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + Opening *pOpening; + if(pLastIsoRun->limit>=bd->openingsCount) { /* no available new entry */ + UBiDi *pBiDi=bd->pBiDi; + if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2)) + return FALSE; + if(bd->openings==bd->simpleOpenings) + uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings, + SIMPLE_OPENINGS_COUNT * sizeof(Opening)); + bd->openings=pBiDi->openingsMemory; /* may have changed */ + bd->openingsCount=pBiDi->openingsSize / sizeof(Opening); + } + pOpening=&bd->openings[pLastIsoRun->limit]; + pOpening->position=position; + pOpening->match=match; + pOpening->contextDir=pLastIsoRun->contextDir; + pOpening->contextPos=pLastIsoRun->contextPos; + pOpening->flags=0; + pLastIsoRun->limit++; + return TRUE; +} + +/* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */ +static void +fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) { + /* This function calls itself recursively */ + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + Opening *qOpening; + DirProp *dirProps=bd->pBiDi->dirProps; + int32_t k, openingPosition, closingPosition; + for(k=openingIndex+1, qOpening=&bd->openings[k]; klimit; k++, qOpening++) { + if(qOpening->match>=0) /* not an N0c match */ + continue; + if(newPropPositioncontextPos) + break; + if(newPropPosition>=qOpening->position) + continue; + if(newProp==qOpening->contextDir) + break; + openingPosition=qOpening->position; + dirProps[openingPosition]=newProp; + closingPosition=-(qOpening->match); + dirProps[closingPosition]=newProp; + qOpening->match=0; /* prevent further changes */ + fixN0c(bd, k, openingPosition, newProp); + fixN0c(bd, k, closingPosition, newProp); + } +} + +/* process closing bracket */ +static DirProp /* return L or R if N0b or N0c, ON if N0d */ +bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) { + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + Opening *pOpening, *qOpening; + UBiDiDirection direction; + UBool stable; + DirProp newProp; + pOpening=&bd->openings[openIdx]; + direction=(UBiDiDirection)(pLastIsoRun->level&1); + stable=TRUE; /* assume stable until proved otherwise */ + + /* The stable flag is set when brackets are paired and their + level is resolved and cannot be changed by what will be + found later in the source string. + An unstable match can occur only when applying N0c, where + the resolved level depends on the preceding context, and + this context may be affected by text occurring later. + Example: RTL paragraph containing: abc[(latin) HEBREW] + When the closing parenthesis is encountered, it appears + that N0c1 must be applied since 'abc' sets an opposite + direction context and both parentheses receive level 2. + However, when the closing square bracket is processed, + N0b applies because of 'HEBREW' being included within the + brackets, thus the square brackets are treated like R and + receive level 1. However, this changes the preceding + context of the opening parenthesis, and it now appears + that N0c2 must be applied to the parentheses rather than + N0c1. */ + + if((direction==0 && pOpening->flags&FOUND_L) || + (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */ + newProp=direction; + } + else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */ + /* it is stable if there is no containing pair or in + conditions too complicated and not worth checking */ + stable=(openIdx==pLastIsoRun->start); + if(direction!=pOpening->contextDir) + newProp=pOpening->contextDir; /* N0c1 */ + else + newProp=direction; /* N0c2 */ + } else { + /* forget this and any brackets nested within this pair */ + pLastIsoRun->limit=openIdx; + return ON; /* N0d */ + } + bd->pBiDi->dirProps[pOpening->position]=newProp; + bd->pBiDi->dirProps[position]=newProp; + /* Update nested N0c pairs that may be affected */ + fixN0c(bd, openIdx, pOpening->position, newProp); + if(stable) { + pLastIsoRun->limit=openIdx; /* forget any brackets nested within this pair */ + /* remove lower located synonyms if any */ + while(pLastIsoRun->limit>pLastIsoRun->start && + bd->openings[pLastIsoRun->limit-1].position==pOpening->position) + pLastIsoRun->limit--; + } else { + int32_t k; + pOpening->match=-position; + /* neutralize lower located synonyms if any */ + k=openIdx-1; + while(k>=pLastIsoRun->start && + bd->openings[k].position==pOpening->position) + bd->openings[k--].match=0; + /* neutralize any unmatched opening between the current pair; + this will also neutralize higher located synonyms if any */ + for(k=openIdx+1; klimit; k++) { + qOpening=&bd->openings[k]; + if(qOpening->position>=position) + break; + if(qOpening->match>0) + qOpening->match=0; + } + } + return newProp; +} + +/* handle strong characters, digits and candidates for closing brackets */ +static UBool /* return TRUE if success */ +bracketProcessChar(BracketData *bd, int32_t position) { + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + DirProp *dirProps, dirProp, newProp; + UBiDiLevel level; + dirProps=bd->pBiDi->dirProps; + dirProp=dirProps[position]; + if(dirProp==ON) { + UChar c, match; + int32_t idx; + /* First see if it is a matching closing bracket. Hopefully, this is + more efficient than checking if it is a closing bracket at all */ + c=bd->pBiDi->text[position]; + for(idx=pLastIsoRun->limit-1; idx>=pLastIsoRun->start; idx--) { + if(bd->openings[idx].match!=c) + continue; + /* We have a match */ + newProp=bracketProcessClosing(bd, idx, position); + if(newProp==ON) { /* N0d */ + c=0; /* prevent handling as an opening */ + break; + } + pLastIsoRun->lastBase=ON; + pLastIsoRun->contextDir=(UBiDiDirection)newProp; + pLastIsoRun->contextPos=position; + level=bd->pBiDi->levels[position]; + if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */ + uint16_t flag; + int32_t i; + newProp=level&1; + pLastIsoRun->lastStrong=newProp; + flag=DIRPROP_FLAG(newProp); + for(i=pLastIsoRun->start; iopenings[i].flags|=flag; + /* matching brackets are not overridden by LRO/RLO */ + bd->pBiDi->levels[position]&=~UBIDI_LEVEL_OVERRIDE; + } + /* matching brackets are not overridden by LRO/RLO */ + bd->pBiDi->levels[bd->openings[idx].position]&=~UBIDI_LEVEL_OVERRIDE; + return TRUE; + } + /* We get here only if the ON character is not a matching closing + bracket or it is a case of N0d */ + /* Now see if it is an opening bracket */ + if(c) + match=u_getBidiPairedBracket(c); /* get the matching char */ + else + match=0; + if(match!=c && /* has a matching char */ + ubidi_getPairedBracketType(bd->pBiDi->bdp, c)==U_BPT_OPEN) { /* opening bracket */ + /* special case: process synonyms + create an opening entry for each synonym */ + if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */ + if(!bracketAddOpening(bd, 0x3009, position)) + return FALSE; + } + else if(match==0x3009) { /* RIGHT ANGLE BRACKET */ + if(!bracketAddOpening(bd, 0x232A, position)) + return FALSE; + } + if(!bracketAddOpening(bd, match, position)) + return FALSE; + } + } + level=bd->pBiDi->levels[position]; + if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */ + newProp=level&1; + if(dirProp!=S && dirProp!=WS && dirProp!=ON) + dirProps[position]=newProp; + pLastIsoRun->lastBase=newProp; + pLastIsoRun->lastStrong=newProp; + pLastIsoRun->contextDir=(UBiDiDirection)newProp; + pLastIsoRun->contextPos=position; + } + else if(dirProp<=R || dirProp==AL) { + newProp=DIR_FROM_STRONG(dirProp); + pLastIsoRun->lastBase=dirProp; + pLastIsoRun->lastStrong=dirProp; + pLastIsoRun->contextDir=(UBiDiDirection)newProp; + pLastIsoRun->contextPos=position; + } + else if(dirProp==EN) { + pLastIsoRun->lastBase=EN; + if(pLastIsoRun->lastStrong==L) { + newProp=L; /* W7 */ + if(!bd->isNumbersSpecial) + dirProps[position]=ENL; + pLastIsoRun->contextDir=(UBiDiDirection)L; + pLastIsoRun->contextPos=position; + } + else { + newProp=R; /* N0 */ + if(pLastIsoRun->lastStrong==AL) + dirProps[position]=AN; /* W2 */ + else + dirProps[position]=ENR; + pLastIsoRun->contextDir=(UBiDiDirection)R; + pLastIsoRun->contextPos=position; + } + } + else if(dirProp==AN) { + newProp=R; /* N0 */ + pLastIsoRun->lastBase=AN; + pLastIsoRun->contextDir=(UBiDiDirection)R; + pLastIsoRun->contextPos=position; + } + else if(dirProp==NSM) { + /* if the last real char was ON, change NSM to ON so that it + will stay ON even if the last real char is a bracket which + may be changed to L or R */ + newProp=pLastIsoRun->lastBase; + if(newProp==ON) + dirProps[position]=newProp; + } + else { + newProp=dirProp; + pLastIsoRun->lastBase=dirProp; + } + if(newProp<=R || newProp==AL) { + int32_t i; + uint16_t flag=DIRPROP_FLAG(DIR_FROM_STRONG(newProp)); + for(i=pLastIsoRun->start; ilimit; i++) + if(position>bd->openings[i].position) + bd->openings[i].flags|=flag; + } + return TRUE; +} + +/* perform (X1)..(X9) ------------------------------------------------------- */ + +/* determine if the text is mixed-directional or single-directional */ +static UBiDiDirection +directionFromFlags(UBiDi *pBiDi) { + Flags flags=pBiDi->flags; + /* if the text contains AN and neutrals, then some neutrals may become RTL */ + if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) { + return UBIDI_LTR; + } else if(!(flags&MASK_LTR)) { + return UBIDI_RTL; + } else { + return UBIDI_MIXED; + } +} + +/* + * Resolve the explicit levels as specified by explicit embedding codes. + * Recalculate the flags to have them reflect the real properties + * after taking the explicit embeddings into account. + * + * The BiDi algorithm is designed to result in the same behavior whether embedding + * levels are externally specified (from "styled text", supposedly the preferred + * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text. + * That is why (X9) instructs to remove all not-isolate explicit codes (and BN). + * However, in a real implementation, the removal of these codes and their index + * positions in the plain text is undesirable since it would result in + * reallocated, reindexed text. + * Instead, this implementation leaves the codes in there and just ignores them + * in the subsequent processing. + * In order to get the same reordering behavior, positions with a BN or a not-isolate + * explicit embedding code just get the same level assigned as the last "real" + * character. + * + * Some implementations, not this one, then overwrite some of these + * directionality properties at "real" same-level-run boundaries by + * L or R codes so that the resolution of weak types can be performed on the + * entire paragraph at once instead of having to parse it once more and + * perform that resolution on same-level-runs. + * This limits the scope of the implicit rules in effectively + * the same way as the run limits. + * + * Instead, this implementation does not modify these codes, except for + * paired brackets whose properties (ON) may be replaced by L or R. + * On one hand, the paragraph has to be scanned for same-level-runs, but + * on the other hand, this saves another loop to reset these codes, + * or saves making and modifying a copy of dirProps[]. + * + * + * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm. + * + * + * Handling the stack of explicit levels (Xn): + * + * With the BiDi stack of explicit levels, as pushed with each + * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI, + * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL. + * + * In order to have a correct push-pop semantics even in the case of overflows, + * overflow counters and a valid isolate counter are used as described in UAX#9 + * section 3.3.2 "Explicit Levels and Directions". + * + * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. + * + * Returns normally the direction; -1 if there was a memory shortage + * + */ +static UBiDiDirection +resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { + DirProp *dirProps=pBiDi->dirProps; + UBiDiLevel *levels=pBiDi->levels; + const UChar *text=pBiDi->text; + + int32_t i=0, length=pBiDi->length; + Flags flags=pBiDi->flags; /* collect all directionalities in the text */ + DirProp dirProp; + UBiDiLevel level=GET_PARALEVEL(pBiDi, 0); + UBiDiDirection direction; + pBiDi->isolateCount=0; + + if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; } + + /* determine if the text is mixed-directional or single-directional */ + direction=directionFromFlags(pBiDi); + + /* we may not need to resolve any explicit levels */ + if((direction!=UBIDI_MIXED)) { + /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */ + return direction; + } + if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) { + /* inverse BiDi: mixed, but all characters are at the same embedding level */ + /* set all levels to the paragraph level */ + int32_t paraIndex, start, limit; + for(paraIndex=0; paraIndexparaCount; paraIndex++) { + if(paraIndex==0) + start=0; + else + start=pBiDi->paras[paraIndex-1].limit; + limit=pBiDi->paras[paraIndex].limit; + level=pBiDi->paras[paraIndex].level; + for(i=start; iparaCount; paraIndex++) { + if(paraIndex==0) + start=0; + else + start=pBiDi->paras[paraIndex-1].limit; + limit=pBiDi->paras[paraIndex].limit; + level=pBiDi->paras[paraIndex].level; + for(i=start; i=UBIDI_MAX_EXPLICIT_LEVEL + but we need one more entry as base */ + uint32_t stackLast=0; + int32_t overflowIsolateCount=0; + int32_t overflowEmbeddingCount=0; + int32_t validIsolateCount=0; + BracketData bracketData; + bracketInit(pBiDi, &bracketData); + stack[0]=level; /* initialize base entry to para level, no override, no isolate */ + + /* recalculate the flags */ + flags=0; + + for(i=0; i0 && stack[stackLast]pBiDi->isolateCount) + pBiDi->isolateCount=validIsolateCount; + embeddingLevel=newLevel; + /* we can increment stackLast without checking because newLevel + will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */ + stackLast++; + stack[stackLast]=embeddingLevel+ISOLATE; + bracketProcessLRI_RLI(&bracketData, embeddingLevel); + } else { + /* make it WS so that it is handled by adjustWSLevels() */ + dirProps[i]=WS; + overflowIsolateCount++; + } + break; + case PDI: + if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) { + bracketProcessBoundary(&bracketData, lastCcPos, + previousLevel, embeddingLevel); + flags|=DIRPROP_FLAG_MULTI_RUNS; + } + /* (X6a) */ + if(overflowIsolateCount) { + overflowIsolateCount--; + /* make it WS so that it is handled by adjustWSLevels() */ + dirProps[i]=WS; + } + else if(validIsolateCount) { + flags|=DIRPROP_FLAG(PDI); + lastCcPos=i; + overflowEmbeddingCount=0; + while(stack[stackLast]paraLevel); + if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) + flags|=DIRPROP_FLAG(L); + /* again, determine if the text is mixed-directional or single-directional */ + pBiDi->flags=flags; + direction=directionFromFlags(pBiDi); + } + return direction; +} + +/* + * Use a pre-specified embedding levels array: + * + * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), + * ignore all explicit codes (X9), + * and check all the preset levels. + * + * Recalculate the flags to have them reflect the real properties + * after taking the explicit embeddings into account. + */ +static UBiDiDirection +checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { + DirProp *dirProps=pBiDi->dirProps; + UBiDiLevel *levels=pBiDi->levels; + int32_t isolateCount=0; + + int32_t length=pBiDi->length; + Flags flags=0; /* collect all directionalities in the text */ + pBiDi->isolateCount=0; + + int32_t currentParaIndex = 0; + int32_t currentParaLimit = pBiDi->paras[0].limit; + int32_t currentParaLevel = pBiDi->paraLevel; + + for(int32_t i=0; ipBiDi->isolateCount) + pBiDi->isolateCount=isolateCount; + } + else if(dirProp==PDI) + isolateCount--; + else if(dirProp==B) + isolateCount=0; + + // optimized version of int32_t currentParaLevel = GET_PARALEVEL(pBiDi, i); + if (pBiDi->defaultParaLevel != 0 && + i == currentParaLimit && (currentParaIndex + 1) < pBiDi->paraCount) { + currentParaLevel = pBiDi->paras[++currentParaIndex].level; + currentParaLimit = pBiDi->paras[currentParaIndex].limit; + } + + UBiDiLevel overrideFlag = level & UBIDI_LEVEL_OVERRIDE; + level &= ~UBIDI_LEVEL_OVERRIDE; + if (level < currentParaLevel || UBIDI_MAX_EXPLICIT_LEVEL < level) { + if (level == 0) { + if (dirProp == B) { + // Paragraph separators are ok with explicit level 0. + // Prevents reordering of paragraphs. + } else { + // Treat explicit level 0 as a wildcard for the paragraph level. + // Avoid making the caller guess what the paragraph level would be. + level = (UBiDiLevel)currentParaLevel; + levels[i] = level | overrideFlag; + } + } else { + // 1 <= level < currentParaLevel or UBIDI_MAX_EXPLICIT_LEVEL < level + /* level out of bounds */ + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return UBIDI_LTR; + } + } + if (overrideFlag != 0) { + /* keep the override flag in levels[i] but adjust the flags */ + flags|=DIRPROP_FLAG_O(level); + } else { + /* set the flags */ + flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp); + } + } + if(flags&MASK_EMBEDDING) + flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); + /* determine if the text is mixed-directional or single-directional */ + pBiDi->flags=flags; + return directionFromFlags(pBiDi); +} + +/****************************************************************** + The Properties state machine table +******************************************************************* + + All table cells are 8 bits: + bits 0..4: next state + bits 5..7: action to perform (if > 0) + + Cells may be of format "n" where n represents the next state + (except for the rightmost column). + Cells may also be of format "s(x,y)" where x represents an action + to perform and y represents the next state. + +******************************************************************* + Definitions and type for properties state table +******************************************************************* +*/ +#define IMPTABPROPS_COLUMNS 16 +#define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1) +#define GET_STATEPROPS(cell) ((cell)&0x1f) +#define GET_ACTIONPROPS(cell) ((cell)>>5) +#define s(action, newState) ((uint8_t)(newState+(action<<5))) + +static const uint8_t groupProp[] = /* dirProp regrouped */ +{ +/* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */ + 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14 +}; +enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */ + +/****************************************************************** + + PROPERTIES STATE TABLE + + In table impTabProps, + - the ON column regroups ON and WS, FSI, RLI, LRI and PDI + - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF + - the Res column is the reduced property assigned to a run + + Action 1: process current run1, init new run1 + 2: init new run2 + 3: process run1, process run2, init new run1 + 4: process run1, set run1=run2, init new run2 + + Notes: + 1) This table is used in resolveImplicitLevels(). + 2) This table triggers actions when there is a change in the Bidi + property of incoming characters (action 1). + 3) Most such property sequences are processed immediately (in + fact, passed to processPropertySeq(). + 4) However, numbers are assembled as one sequence. This means + that undefined situations (like CS following digits, until + it is known if the next char will be a digit) are held until + following chars define them. + Example: digits followed by CS, then comes another CS or ON; + the digits will be processed, then the CS assigned + as the start of an ON sequence (action 3). + 5) There are cases where more than one sequence must be + processed, for instance digits followed by CS followed by L: + the digits must be processed as one sequence, and the CS + must be processed as an ON sequence, all this before starting + assembling chars for the opening L sequence. + + +*/ +static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] = +{ +/* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */ +/* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON }, +/* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L }, +/* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R }, +/* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R }, +/* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN }, +/* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN }, +/* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN }, +/* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON }, +/* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON }, +/* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON }, +/*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN }, +/*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN }, +/*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN }, +/*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN }, +/*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON }, +/*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S }, +/*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S }, +/*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B }, +/*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L }, +/*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L }, +/*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L }, +/*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN }, +/*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN }, +/*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN } +}; + +/* we must undef macro s because the levels tables have a different + * structure (4 bits for action and 4 bits for next state. + */ +#undef s + +/****************************************************************** + The levels state machine tables +******************************************************************* + + All table cells are 8 bits: + bits 0..3: next state + bits 4..7: action to perform (if > 0) + + Cells may be of format "n" where n represents the next state + (except for the rightmost column). + Cells may also be of format "s(x,y)" where x represents an action + to perform and y represents the next state. + + This format limits each table to 16 states each and to 15 actions. + +******************************************************************* + Definitions and type for levels state tables +******************************************************************* +*/ +#define IMPTABLEVELS_COLUMNS (DirProp_B + 2) +#define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1) +#define GET_STATE(cell) ((cell)&0x0f) +#define GET_ACTION(cell) ((cell)>>4) +#define s(action, newState) ((uint8_t)(newState+(action<<4))) + +typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS]; +typedef uint8_t ImpAct[]; + +/* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct, + * instead of having a pair of ImpTab and a pair of ImpAct. + */ +typedef struct ImpTabPair { + const void * pImpTab[2]; + const void * pImpAct[2]; +} ImpTabPair; + +/****************************************************************** + + LEVELS STATE TABLES + + In all levels state tables, + - state 0 is the initial state + - the Res column is the increment to add to the text level + for this property sequence. + + The impAct arrays for each table of a pair map the local action + numbers of the table to the total list of actions. For instance, + action 2 in a given table corresponds to the action number which + appears in entry [2] of the impAct array for that table. + The first entry of all impAct arrays must be 0. + + Action 1: init conditional sequence + 2: prepend conditional sequence to current sequence + 3: set ON sequence to new level - 1 + 4: init EN/AN/ON sequence + 5: fix EN/AN/ON sequence followed by R + 6: set previous level sequence to level 2 + + Notes: + 1) These tables are used in processPropertySeq(). The input + is property sequences as determined by resolveImplicitLevels. + 2) Most such property sequences are processed immediately + (levels are assigned). + 3) However, some sequences cannot be assigned a final level till + one or more following sequences are received. For instance, + ON following an R sequence within an even-level paragraph. + If the following sequence is R, the ON sequence will be + assigned basic run level+1, and so will the R sequence. + 4) S is generally handled like ON, since its level will be fixed + to paragraph level in adjustWSLevels(). + +*/ + +static const ImpTab impTabL_DEFAULT = /* Even paragraph level */ +/* In this table, conditional sequences receive the lower possible level + until proven otherwise. +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 }, +/* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 }, +/* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 }, +/* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 }, +/* 4 : R+ON */ { 0 , s(2,1), s(3,3), s(3,3), 4 , 4 , 0 , 0 }, +/* 5 : AN+ON */ { 0 , s(2,1), 0 , s(3,2), 5 , 5 , 0 , 0 } +}; +static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */ +/* In this table, conditional sequences receive the lower possible level + until proven otherwise. +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, +/* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 }, +/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, +/* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 }, +/* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 }, +/* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 } +}; +static const ImpAct impAct0 = {0,1,2,3,4}; +static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT, + &impTabR_DEFAULT}, + {&impAct0, &impAct0}}; + +static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */ +/* In this table, conditional sequences receive the lower possible level + until proven otherwise. +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 : init */ { 0 , 2 , s(1,1), s(1,1), 0 , 0 , 0 , 0 }, +/* 1 : L+EN/AN */ { 0 , s(4,2), 1 , 1 , 0 , 0 , 0 , 0 }, +/* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 1 }, +/* 3 : R+ON */ { 0 , s(2,2), s(3,4), s(3,4), 3 , 3 , 0 , 0 }, +/* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 } +}; +static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL, + &impTabR_DEFAULT}, + {&impAct0, &impAct0}}; + +static const ImpTab impTabL_GROUP_NUMBERS_WITH_R = +/* In this table, EN/AN+ON sequences receive levels as if associated with R + until proven that there is L or sor/eor on both sides. AN is handled like EN. +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 }, +/* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 }, +/* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 }, +/* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 }, +/* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 }, +/* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 } +}; +static const ImpTab impTabR_GROUP_NUMBERS_WITH_R = +/* In this table, EN/AN+ON sequences receive levels as if associated with R + until proven that there is L on both sides. AN is handled like EN. +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, +/* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, +/* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 }, +/* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 }, +/* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 } +}; +static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = { + {&impTabL_GROUP_NUMBERS_WITH_R, + &impTabR_GROUP_NUMBERS_WITH_R}, + {&impAct0, &impAct0}}; + + +static const ImpTab impTabL_INVERSE_NUMBERS_AS_L = +/* This table is identical to the Default LTR table except that EN and AN are + handled like L. +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 }, +/* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 }, +/* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 }, +/* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 }, +/* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 }, +/* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 } +}; +static const ImpTab impTabR_INVERSE_NUMBERS_AS_L = +/* This table is identical to the Default RTL table except that EN and AN are + handled like L. +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, +/* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 }, +/* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, +/* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 }, +/* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 }, +/* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 } +}; +static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = { + {&impTabL_INVERSE_NUMBERS_AS_L, + &impTabR_INVERSE_NUMBERS_AS_L}, + {&impAct0, &impAct0}}; + +static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */ +/* In this table, conditional sequences receive the lower possible level + until proven otherwise. +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, +/* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 }, +/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, +/* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 }, +/* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 }, +/* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 }, +/* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 } +}; +static const ImpAct impAct1 = {0,1,13,14}; +/* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc" + */ +static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = { + {&impTabL_DEFAULT, + &impTabR_INVERSE_LIKE_DIRECT}, + {&impAct0, &impAct1}}; + +static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS = +/* The case handled in this table is (visually): R EN L +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 }, +/* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 }, +/* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 }, +/* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 }, +/* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 }, +/* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 }, +/* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 } +}; +static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS = +/* The cases handled in this table are (visually): R EN L + R L AN L +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 }, +/* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 }, +/* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 }, +/* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 }, +/* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 }, +/* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 }, +/* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 } +}; +static const ImpAct impAct2 = {0,1,2,5,6,7,8}; +static const ImpAct impAct3 = {0,1,9,10,11,12}; +static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = { + {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS, + &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, + {&impAct2, &impAct3}}; + +static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = { + {&impTabL_NUMBERS_SPECIAL, + &impTabR_INVERSE_LIKE_DIRECT}, + {&impAct0, &impAct1}}; + +static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = +/* The case handled in this table is (visually): R EN L +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 }, +/* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 }, +/* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 }, +/* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 }, +/* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 } +}; +static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = { + {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS, + &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, + {&impAct2, &impAct3}}; + +#undef s + +typedef struct { + const ImpTab * pImpTab; /* level table pointer */ + const ImpAct * pImpAct; /* action map array */ + int32_t startON; /* start of ON sequence */ + int32_t startL2EN; /* start of level 2 sequence */ + int32_t lastStrongRTL; /* index of last found R or AL */ + int32_t state; /* current state */ + int32_t runStart; /* start position of the run */ + UBiDiLevel runLevel; /* run level before implicit solving */ +} LevState; + +/*------------------------------------------------------------------------*/ + +static void +addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag) + /* param pos: position where to insert + param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER + */ +{ +#define FIRSTALLOC 10 + Point point; + InsertPoints * pInsertPoints=&(pBiDi->insertPoints); + + if (pInsertPoints->capacity == 0) + { + pInsertPoints->points=static_cast(uprv_malloc(sizeof(Point)*FIRSTALLOC)); + if (pInsertPoints->points == NULL) + { + pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + pInsertPoints->capacity=FIRSTALLOC; + } + if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */ + { + Point * savePoints=pInsertPoints->points; + pInsertPoints->points=static_cast(uprv_realloc(pInsertPoints->points, + pInsertPoints->capacity*2*sizeof(Point))); + if (pInsertPoints->points == NULL) + { + pInsertPoints->points=savePoints; + pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + else pInsertPoints->capacity*=2; + } + point.pos=pos; + point.flag=flag; + pInsertPoints->points[pInsertPoints->size]=point; + pInsertPoints->size++; +#undef FIRSTALLOC +} + +static void +setLevelsOutsideIsolates(UBiDi *pBiDi, int32_t start, int32_t limit, UBiDiLevel level) +{ + DirProp *dirProps=pBiDi->dirProps, dirProp; + UBiDiLevel *levels=pBiDi->levels; + int32_t isolateCount=0, k; + for(k=start; kpImpTab; + const ImpAct * pImpAct=pLevState->pImpAct; + UBiDiLevel * levels=pBiDi->levels; + UBiDiLevel level, addLevel; + InsertPoints * pInsertPoints; + int32_t start0, k; + + start0=start; /* save original start position */ + oldStateSeq=(uint8_t)pLevState->state; + cell=(*pImpTab)[oldStateSeq][_prop]; + pLevState->state=GET_STATE(cell); /* isolate the new state */ + actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */ + addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES]; + + if(actionSeq) { + switch(actionSeq) { + case 1: /* init ON seq */ + pLevState->startON=start0; + break; + + case 2: /* prepend ON seq to current seq */ + start=pLevState->startON; + break; + + case 3: /* EN/AN after R+ON */ + level=pLevState->runLevel+1; + setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level); + break; + + case 4: /* EN/AN before R for NUMBERS_SPECIAL */ + level=pLevState->runLevel+2; + setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level); + break; + + case 5: /* L or S after possible relevant EN/AN */ + /* check if we had EN after R/AL */ + if (pLevState->startL2EN >= 0) { + addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); + } + pLevState->startL2EN=-1; /* not within previous if since could also be -2 */ + /* check if we had any relevant EN/AN after R/AL */ + pInsertPoints=&(pBiDi->insertPoints); + if ((pInsertPoints->capacity == 0) || + (pInsertPoints->size <= pInsertPoints->confirmed)) + { + /* nothing, just clean up */ + pLevState->lastStrongRTL=-1; + /* check if we have a pending conditional segment */ + level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES]; + if ((level & 1) && (pLevState->startON > 0)) { /* after ON */ + start=pLevState->startON; /* reset to basic run level */ + } + if (_prop == DirProp_S) /* add LRM before S */ + { + addPoint(pBiDi, start0, LRM_BEFORE); + pInsertPoints->confirmed=pInsertPoints->size; + } + break; + } + /* reset previous RTL cont to level for LTR text */ + for (k=pLevState->lastStrongRTL+1; kconfirmed=pInsertPoints->size; + pLevState->lastStrongRTL=-1; + if (_prop == DirProp_S) /* add LRM before S */ + { + addPoint(pBiDi, start0, LRM_BEFORE); + pInsertPoints->confirmed=pInsertPoints->size; + } + break; + + case 6: /* R/AL after possible relevant EN/AN */ + /* just clean up */ + pInsertPoints=&(pBiDi->insertPoints); + if (pInsertPoints->capacity > 0) + /* remove all non confirmed insert points */ + pInsertPoints->size=pInsertPoints->confirmed; + pLevState->startON=-1; + pLevState->startL2EN=-1; + pLevState->lastStrongRTL=limit - 1; + break; + + case 7: /* EN/AN after R/AL + possible cont */ + /* check for real AN */ + if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) && + (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)) + { + /* real AN */ + if (pLevState->startL2EN == -1) /* if no relevant EN already found */ + { + /* just note the righmost digit as a strong RTL */ + pLevState->lastStrongRTL=limit - 1; + break; + } + if (pLevState->startL2EN >= 0) /* after EN, no AN */ + { + addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); + pLevState->startL2EN=-2; + } + /* note AN */ + addPoint(pBiDi, start0, LRM_BEFORE); + break; + } + /* if first EN/AN after R/AL */ + if (pLevState->startL2EN == -1) { + pLevState->startL2EN=start0; + } + break; + + case 8: /* note location of latest R/AL */ + pLevState->lastStrongRTL=limit - 1; + pLevState->startON=-1; + break; + + case 9: /* L after R+ON/EN/AN */ + /* include possible adjacent number on the left */ + for (k=start0-1; k>=0 && !(levels[k]&1); k--); + if(k>=0) { + addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */ + pInsertPoints=&(pBiDi->insertPoints); + pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */ + } + pLevState->startON=start0; + break; + + case 10: /* AN after L */ + /* AN numbers between L text on both sides may be trouble. */ + /* tentatively bracket with LRMs; will be confirmed if followed by L */ + addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */ + addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */ + break; + + case 11: /* R after L+ON/EN/AN */ + /* false alert, infirm LRMs around previous AN */ + pInsertPoints=&(pBiDi->insertPoints); + pInsertPoints->size=pInsertPoints->confirmed; + if (_prop == DirProp_S) /* add RLM before S */ + { + addPoint(pBiDi, start0, RLM_BEFORE); + pInsertPoints->confirmed=pInsertPoints->size; + } + break; + + case 12: /* L after L+ON/AN */ + level=pLevState->runLevel + addLevel; + for(k=pLevState->startON; kinsertPoints); + pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */ + pLevState->startON=start0; + break; + + case 13: /* L after L+ON+EN/AN/ON */ + level=pLevState->runLevel; + for(k=start0-1; k>=pLevState->startON; k--) { + if(levels[k]==level+3) { + while(levels[k]==level+3) { + levels[k--]-=2; + } + while(levels[k]==level) { + k--; + } + } + if(levels[k]==level+2) { + levels[k]=level; + continue; + } + levels[k]=level+1; + } + break; + + case 14: /* R after L+ON+EN/AN/ON */ + level=pLevState->runLevel+1; + for(k=start0-1; k>=pLevState->startON; k--) { + if(levels[k]>level) { + levels[k]-=2; + } + } + break; + + default: /* we should never get here */ + U_ASSERT(FALSE); + break; + } + } + if((addLevel) || (start < start0)) { + level=pLevState->runLevel + addLevel; + if(start>=pLevState->runStart) { + for(k=start; kprologue; + int32_t length=pBiDi->proLength; + int32_t i; + UChar32 uchar; + DirProp dirProp; + for(i=length; i>0; ) { + /* i is decremented by U16_PREV */ + U16_PREV(text, 0, i, uchar); + dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar); + if(dirProp==L) { + return DirProp_L; + } + if(dirProp==R || dirProp==AL) { + return DirProp_R; + } + if(dirProp==B) { + return DirProp_ON; + } + } + return DirProp_ON; +} + +/** + * Returns the directionality of the first strong character, or digit, in the epilogue, if any. + * Requires epilogue!=null. + */ +static DirProp +firstL_R_AL_EN_AN(UBiDi *pBiDi) { + const UChar *text=pBiDi->epilogue; + int32_t length=pBiDi->epiLength; + int32_t i; + UChar32 uchar; + DirProp dirProp; + for(i=0; idirProps; + DirProp dirProp; + LevState levState; + int32_t i, start1, start2; + uint16_t oldStateImp, stateImp, actionImp; + uint8_t gprop, resProp, cell; + UBool inverseRTL; + DirProp nextStrongProp=R; + int32_t nextStrongPos=-1; + + /* check for RTL inverse BiDi mode */ + /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to + * loop on the text characters from end to start. + * This would need a different properties state table (at least different + * actions) and different levels state tables (maybe very similar to the + * LTR corresponding ones. + */ + inverseRTL=(UBool) + ((startlastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) && + (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || + pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)); + + /* initialize for property and levels state tables */ + levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ + levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ + levState.runStart=start; + levState.runLevel=pBiDi->levels[start]; + levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1]; + levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1]; + if(start==0 && pBiDi->proLength>0) { + DirProp lastStrong=lastL_R_AL(pBiDi); + if(lastStrong!=DirProp_ON) { + sor=lastStrong; + } + } + /* The isolates[] entries contain enough information to + resume the bidi algorithm in the same state as it was + when it was interrupted by an isolate sequence. */ + if(dirProps[start]==PDI && pBiDi->isolateCount >= 0) { + levState.startON=pBiDi->isolates[pBiDi->isolateCount].startON; + start1=pBiDi->isolates[pBiDi->isolateCount].start1; + stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp; + levState.state=pBiDi->isolates[pBiDi->isolateCount].state; + pBiDi->isolateCount--; + } else { + levState.startON=-1; + start1=start; + if(dirProps[start]==NSM) + stateImp = 1 + sor; + else + stateImp=0; + levState.state=0; + processPropertySeq(pBiDi, &levState, sor, start, start); + } + start2=start; /* to make Java compiler happy */ + + for(i=start; i<=limit; i++) { + if(i>=limit) { + int32_t k; + for(k=limit-1; k>start&&(DIRPROP_FLAG(dirProps[k])&MASK_BN_EXPLICIT); k--); + dirProp=dirProps[k]; + if(dirProp==LRI || dirProp==RLI) + break; /* no forced closing for sequence ending with LRI/RLI */ + gprop=eor; + } else { + DirProp prop, prop1; + prop=dirProps[i]; + if(prop==B) { + pBiDi->isolateCount=-1; /* current isolates stack entry == none */ + } + if(inverseRTL) { + if(prop==AL) { + /* AL before EN does not make it AN */ + prop=R; + } else if(prop==EN) { + if(nextStrongPos<=i) { + /* look for next strong char (L/R/AL) */ + int32_t j; + nextStrongProp=R; /* set default */ + nextStrongPos=limit; + for(j=i+1; jlength && pBiDi->epiLength>0) { + DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi); + if(firstStrong!=DirProp_ON) { + eor=firstStrong; + } + } + + /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */ + for(i=limit-1; i>start&&(DIRPROP_FLAG(dirProps[i])&MASK_BN_EXPLICIT); i--); + dirProp=dirProps[i]; + if((dirProp==LRI || dirProp==RLI) && limitlength) { + pBiDi->isolateCount++; + pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp; + pBiDi->isolates[pBiDi->isolateCount].state=levState.state; + pBiDi->isolates[pBiDi->isolateCount].start1=start1; + pBiDi->isolates[pBiDi->isolateCount].startON=levState.startON; + } + else + processPropertySeq(pBiDi, &levState, eor, limit, limit); +} + +/* perform (L1) and (X9) ---------------------------------------------------- */ + +/* + * Reset the embedding levels for some non-graphic characters (L1). + * This function also sets appropriate levels for BN, and + * explicit embedding types that are supposed to have been removed + * from the paragraph in (X9). + */ +static void +adjustWSLevels(UBiDi *pBiDi) { + const DirProp *dirProps=pBiDi->dirProps; + UBiDiLevel *levels=pBiDi->levels; + int32_t i; + + if(pBiDi->flags&MASK_WS) { + UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR; + Flags flag; + + i=pBiDi->trailingWSStart; + while(i>0) { + /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ + while(i>0 && (flag=DIRPROP_FLAG(dirProps[--i]))&MASK_WS) { + if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { + levels[i]=0; + } else { + levels[i]=GET_PARALEVEL(pBiDi, i); + } + } + + /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ + /* here, i+1 is guaranteed to be 0) { + flag=DIRPROP_FLAG(dirProps[--i]); + if(flag&MASK_BN_EXPLICIT) { + levels[i]=levels[i+1]; + } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { + levels[i]=0; + break; + } else if(flag&MASK_B_S) { + levels[i]=GET_PARALEVEL(pBiDi, i); + break; + } + } + } + } +} + +U_CAPI void U_EXPORT2 +ubidi_setContext(UBiDi *pBiDi, + const UChar *prologue, int32_t proLength, + const UChar *epilogue, int32_t epiLength, + UErrorCode *pErrorCode) { + /* check the argument values */ + RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); + if(pBiDi==NULL || proLength<-1 || epiLength<-1 || + (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + if(proLength==-1) { + pBiDi->proLength=u_strlen(prologue); + } else { + pBiDi->proLength=proLength; + } + if(epiLength==-1) { + pBiDi->epiLength=u_strlen(epilogue); + } else { + pBiDi->epiLength=epiLength; + } + pBiDi->prologue=prologue; + pBiDi->epilogue=epilogue; +} + +static void +setParaSuccess(UBiDi *pBiDi) { + pBiDi->proLength=0; /* forget the last context */ + pBiDi->epiLength=0; + pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */ +} + +#define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y)) +#define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x))) + +static void +setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length, + UBiDiLevel paraLevel, UErrorCode *pErrorCode) { + int32_t *runsOnlyMemory = NULL; + int32_t *visualMap; + UChar *visualText; + int32_t saveLength, saveTrailingWSStart; + const UBiDiLevel *levels; + UBiDiLevel *saveLevels; + UBiDiDirection saveDirection; + UBool saveMayAllocateText; + Run *runs; + int32_t visualLength, i, j, visualStart, logicalStart, + runCount, runLength, addedRuns, insertRemove, + start, limit, step, indexOddBit, logicalPos, + index0, index1; + uint32_t saveOptions; + + pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT; + if(length==0) { + ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); + goto cleanup3; + } + /* obtain memory for mapping table and visual text */ + runsOnlyMemory=static_cast(uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel)))); + if(runsOnlyMemory==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + goto cleanup3; + } + visualMap=runsOnlyMemory; + visualText=(UChar *)&visualMap[length]; + saveLevels=(UBiDiLevel *)&visualText[length]; + saveOptions=pBiDi->reorderingOptions; + if(saveOptions & UBIDI_OPTION_INSERT_MARKS) { + pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; + pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS; + } + paraLevel&=1; /* accept only 0 or 1 */ + ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + goto cleanup3; + } + /* we cannot access directly pBiDi->levels since it is not yet set if + * direction is not MIXED + */ + levels=ubidi_getLevels(pBiDi, pErrorCode); + uprv_memcpy(saveLevels, levels, (size_t)pBiDi->length*sizeof(UBiDiLevel)); + saveTrailingWSStart=pBiDi->trailingWSStart; + saveLength=pBiDi->length; + saveDirection=pBiDi->direction; + + /* FOOD FOR THOUGHT: instead of writing the visual text, we could use + * the visual map and the dirProps array to drive the second call + * to ubidi_setPara (but must make provision for possible removal of + * BiDi controls. Alternatively, only use the dirProps array via + * customized classifier callback. + */ + visualLength=ubidi_writeReordered(pBiDi, visualText, length, + UBIDI_DO_MIRRORING, pErrorCode); + ubidi_getVisualMap(pBiDi, visualMap, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + goto cleanup2; + } + pBiDi->reorderingOptions=saveOptions; + + pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT; + paraLevel^=1; + /* Because what we did with reorderingOptions, visualText may be shorter + * than the original text. But we don't want the levels memory to be + * reallocated shorter than the original length, since we need to restore + * the levels as after the first call to ubidi_setpara() before returning. + * We will force mayAllocateText to FALSE before the second call to + * ubidi_setpara(), and will restore it afterwards. + */ + saveMayAllocateText=pBiDi->mayAllocateText; + pBiDi->mayAllocateText=FALSE; + ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode); + pBiDi->mayAllocateText=saveMayAllocateText; + ubidi_getRuns(pBiDi, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + goto cleanup1; + } + /* check if some runs must be split, count how many splits */ + addedRuns=0; + runCount=pBiDi->runCount; + runs=pBiDi->runs; + visualStart=0; + for(i=0; irunsMemory[0]=runs[0]; + } + runs=pBiDi->runs=pBiDi->runsMemory; + pBiDi->runCount+=addedRuns; + } else { + goto cleanup1; + } + } + /* split runs which are not consecutive in source text */ + for(i=runCount-1; i>=0; i--) { + runLength= i==0 ? runs[0].visualLimit : + runs[i].visualLimit-runs[i-1].visualLimit; + logicalStart=runs[i].logicalStart; + indexOddBit=GET_ODD_BIT(logicalStart); + logicalStart=GET_INDEX(logicalStart); + if(runLength<2) { + if(addedRuns) { + runs[i+addedRuns]=runs[i]; + } + logicalPos=visualMap[logicalStart]; + runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, + saveLevels[logicalPos]^indexOddBit); + continue; + } + if(indexOddBit) { + start=logicalStart; + limit=logicalStart+runLength-1; + step=1; + } else { + start=logicalStart+runLength-1; + limit=logicalStart; + step=-1; + } + for(j=start; j!=limit; j+=step) { + index0=visualMap[j]; + index1=visualMap[j+step]; + if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) { + logicalPos=BIDI_MIN(visualMap[start], index0); + runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, + saveLevels[logicalPos]^indexOddBit); + runs[i+addedRuns].visualLimit=runs[i].visualLimit; + runs[i].visualLimit-=BIDI_ABS(j-start)+1; + insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER); + runs[i+addedRuns].insertRemove=insertRemove; + runs[i].insertRemove&=~insertRemove; + start=j+step; + addedRuns--; + } + } + if(addedRuns) { + runs[i+addedRuns]=runs[i]; + } + logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]); + runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, + saveLevels[logicalPos]^indexOddBit); + } + + cleanup1: + /* restore initial paraLevel */ + pBiDi->paraLevel^=1; + cleanup2: + /* restore real text */ + pBiDi->text=text; + pBiDi->length=saveLength; + pBiDi->originalLength=length; + pBiDi->direction=saveDirection; + /* the saved levels should never excess levelsSize, but we check anyway */ + if(saveLength>pBiDi->levelsSize) { + saveLength=pBiDi->levelsSize; + } + uprv_memcpy(pBiDi->levels, saveLevels, (size_t)saveLength*sizeof(UBiDiLevel)); + pBiDi->trailingWSStart=saveTrailingWSStart; + if(pBiDi->runCount>1) { + pBiDi->direction=UBIDI_MIXED; + } + cleanup3: + /* free memory for mapping table and visual text */ + uprv_free(runsOnlyMemory); + + pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY; +} + +/* ubidi_setPara ------------------------------------------------------------ */ + +U_CAPI void U_EXPORT2 +ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, + UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, + UErrorCode *pErrorCode) { + UBiDiDirection direction; + DirProp *dirProps; + + /* check the argument values */ + RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); + if(pBiDi==NULL || text==NULL || length<-1 || + (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevelreorderingMode==UBIDI_REORDER_RUNS_ONLY) { + setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode); + return; + } + + /* initialize the UBiDi structure */ + pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */ + pBiDi->text=text; + pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length; + pBiDi->paraLevel=paraLevel; + pBiDi->direction=(UBiDiDirection)(paraLevel&1); + pBiDi->paraCount=1; + + pBiDi->dirProps=NULL; + pBiDi->levels=NULL; + pBiDi->runs=NULL; + pBiDi->insertPoints.size=0; /* clean up from last call */ + pBiDi->insertPoints.confirmed=0; /* clean up from last call */ + + /* + * Save the original paraLevel if contextual; otherwise, set to 0. + */ + pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel); + + if(length==0) { + /* + * For an empty paragraph, create a UBiDi object with the paraLevel and + * the flags and the direction set but without allocating zero-length arrays. + * There is nothing more to do. + */ + if(IS_DEFAULT_LEVEL(paraLevel)) { + pBiDi->paraLevel&=1; + pBiDi->defaultParaLevel=0; + } + pBiDi->flags=DIRPROP_FLAG_LR(paraLevel); + pBiDi->runCount=0; + pBiDi->paraCount=0; + setParaSuccess(pBiDi); /* mark successful setPara */ + return; + } + + pBiDi->runCount=-1; + + /* allocate paras memory */ + if(pBiDi->parasMemory) + pBiDi->paras=pBiDi->parasMemory; + else + pBiDi->paras=pBiDi->simpleParas; + + /* + * Get the directional properties, + * the flags bit-set, and + * determine the paragraph level if necessary. + */ + if(getDirPropsMemory(pBiDi, length)) { + pBiDi->dirProps=pBiDi->dirPropsMemory; + if(!getDirProps(pBiDi)) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + } else { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + dirProps=pBiDi->dirProps; + /* the processed length may have changed if UBIDI_OPTION_STREAMING */ + length= pBiDi->length; + pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */ + + /* are explicit levels specified? */ + if(embeddingLevels==NULL) { + /* no: determine explicit levels according to the (Xn) rules */\ + if(getLevelsMemory(pBiDi, length)) { + pBiDi->levels=pBiDi->levelsMemory; + direction=resolveExplicitLevels(pBiDi, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } + } else { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + } else { + /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */ + pBiDi->levels=embeddingLevels; + direction=checkExplicitLevels(pBiDi, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } + } + + /* allocate isolate memory */ + if(pBiDi->isolateCount<=SIMPLE_ISOLATES_COUNT) + pBiDi->isolates=pBiDi->simpleIsolates; + else + if((int32_t)(pBiDi->isolateCount*sizeof(Isolate))<=pBiDi->isolatesSize) + pBiDi->isolates=pBiDi->isolatesMemory; + else { + if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) { + pBiDi->isolates=pBiDi->isolatesMemory; + } else { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + } + pBiDi->isolateCount=-1; /* current isolates stack entry == none */ + + /* + * The steps after (X9) in the UBiDi algorithm are performed only if + * the paragraph text has mixed directionality! + */ + pBiDi->direction=direction; + switch(direction) { + case UBIDI_LTR: + /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ + pBiDi->trailingWSStart=0; + break; + case UBIDI_RTL: + /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ + pBiDi->trailingWSStart=0; + break; + default: + /* + * Choose the right implicit state table + */ + switch(pBiDi->reorderingMode) { + case UBIDI_REORDER_DEFAULT: + pBiDi->pImpTabPair=&impTab_DEFAULT; + break; + case UBIDI_REORDER_NUMBERS_SPECIAL: + pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL; + break; + case UBIDI_REORDER_GROUP_NUMBERS_WITH_R: + pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R; + break; + case UBIDI_REORDER_INVERSE_NUMBERS_AS_L: + pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L; + break; + case UBIDI_REORDER_INVERSE_LIKE_DIRECT: + if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { + pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS; + } else { + pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT; + } + break; + case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL: + if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { + pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS; + } else { + pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL; + } + break; + default: + /* we should never get here */ + U_ASSERT(FALSE); + break; + } + /* + * If there are no external levels specified and there + * are no significant explicit level codes in the text, + * then we can treat the entire paragraph as one run. + * Otherwise, we need to perform the following rules on runs of + * the text with the same embedding levels. (X10) + * "Significant" explicit level codes are ones that actually + * affect non-BN characters. + * Examples for "insignificant" ones are empty embeddings + * LRE-PDF, LRE-RLE-PDF-PDF, etc. + */ + if(embeddingLevels==NULL && pBiDi->paraCount<=1 && + !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) { + resolveImplicitLevels(pBiDi, 0, length, + GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)), + GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1))); + } else { + /* sor, eor: start and end types of same-level-run */ + UBiDiLevel *levels=pBiDi->levels; + int32_t start, limit=0; + UBiDiLevel level, nextLevel; + DirProp sor, eor; + + /* determine the first sor and set eor to it because of the loop body (sor=eor there) */ + level=GET_PARALEVEL(pBiDi, 0); + nextLevel=levels[0]; + if(level0) && (dirProps[start-1]==B)) { + /* except if this is a new paragraph, then set sor = para level */ + sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start)); + } else { + sor=eor; + } + + /* search for the limit of this run */ + while((++limitinsertPoints.errorCode)) + { + *pErrorCode=pBiDi->insertPoints.errorCode; + return; + } + /* reset the embedding levels for some non-graphic characters (L1), (X9) */ + adjustWSLevels(pBiDi); + break; + } + /* add RLM for inverse Bidi with contextual orientation resolving + * to RTL which would not round-trip otherwise + */ + if((pBiDi->defaultParaLevel>0) && + (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) && + ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) || + (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) { + int32_t i, j, start, last; + UBiDiLevel level; + DirProp dirProp; + for(i=0; iparaCount; i++) { + last=(pBiDi->paras[i].limit)-1; + level=pBiDi->paras[i].level; + if(level==0) + continue; /* LTR paragraph */ + start= i==0 ? 0 : pBiDi->paras[i-1].limit; + for(j=last; j>=start; j--) { + dirProp=dirProps[j]; + if(dirProp==L) { + if(jreorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { + pBiDi->resultLength -= pBiDi->controlCount; + } else { + pBiDi->resultLength += pBiDi->insertPoints.size; + } + setParaSuccess(pBiDi); /* mark successful setPara */ +} + +U_CAPI void U_EXPORT2 +ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) { + if(pBiDi!=NULL) { + pBiDi->orderParagraphsLTR=orderParagraphsLTR; + } +} + +U_CAPI UBool U_EXPORT2 +ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) { + if(pBiDi!=NULL) { + return pBiDi->orderParagraphsLTR; + } else { + return FALSE; + } +} + +U_CAPI UBiDiDirection U_EXPORT2 +ubidi_getDirection(const UBiDi *pBiDi) { + if(IS_VALID_PARA_OR_LINE(pBiDi)) { + return pBiDi->direction; + } else { + return UBIDI_LTR; + } +} + +U_CAPI const UChar * U_EXPORT2 +ubidi_getText(const UBiDi *pBiDi) { + if(IS_VALID_PARA_OR_LINE(pBiDi)) { + return pBiDi->text; + } else { + return NULL; + } +} + +U_CAPI int32_t U_EXPORT2 +ubidi_getLength(const UBiDi *pBiDi) { + if(IS_VALID_PARA_OR_LINE(pBiDi)) { + return pBiDi->originalLength; + } else { + return 0; + } +} + +U_CAPI int32_t U_EXPORT2 +ubidi_getProcessedLength(const UBiDi *pBiDi) { + if(IS_VALID_PARA_OR_LINE(pBiDi)) { + return pBiDi->length; + } else { + return 0; + } +} + +U_CAPI int32_t U_EXPORT2 +ubidi_getResultLength(const UBiDi *pBiDi) { + if(IS_VALID_PARA_OR_LINE(pBiDi)) { + return pBiDi->resultLength; + } else { + return 0; + } +} + +/* paragraphs API functions ------------------------------------------------- */ + +U_CAPI UBiDiLevel U_EXPORT2 +ubidi_getParaLevel(const UBiDi *pBiDi) { + if(IS_VALID_PARA_OR_LINE(pBiDi)) { + return pBiDi->paraLevel; + } else { + return 0; + } +} + +U_CAPI int32_t U_EXPORT2 +ubidi_countParagraphs(UBiDi *pBiDi) { + if(!IS_VALID_PARA_OR_LINE(pBiDi)) { + return 0; + } else { + return pBiDi->paraCount; + } +} + +U_CAPI void U_EXPORT2 +ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, + int32_t *pParaStart, int32_t *pParaLimit, + UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { + int32_t paraStart; + + /* check the argument values */ + RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); + RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode); + RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode); + + pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ + if(paraIndex) { + paraStart=pBiDi->paras[paraIndex-1].limit; + } else { + paraStart=0; + } + if(pParaStart!=NULL) { + *pParaStart=paraStart; + } + if(pParaLimit!=NULL) { + *pParaLimit=pBiDi->paras[paraIndex].limit; + } + if(pParaLevel!=NULL) { + *pParaLevel=GET_PARALEVEL(pBiDi, paraStart); + } +} + +U_CAPI int32_t U_EXPORT2 +ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, + int32_t *pParaStart, int32_t *pParaLimit, + UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { + int32_t paraIndex; + + /* check the argument values */ + /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */ + RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); + RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); + pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ + RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1); + + for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++); + ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode); + return paraIndex; +} + +U_CAPI void U_EXPORT2 +ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn, + const void *newContext, UBiDiClassCallback **oldFn, + const void **oldContext, UErrorCode *pErrorCode) +{ + RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); + if(pBiDi==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if( oldFn ) + { + *oldFn = pBiDi->fnClassCallback; + } + if( oldContext ) + { + *oldContext = pBiDi->coClassCallback; + } + pBiDi->fnClassCallback = newFn; + pBiDi->coClassCallback = newContext; +} + +U_CAPI void U_EXPORT2 +ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context) +{ + if(pBiDi==NULL) { + return; + } + if( fn ) + { + *fn = pBiDi->fnClassCallback; + } + if( context ) + { + *context = pBiDi->coClassCallback; + } +} + +U_CAPI UCharDirection U_EXPORT2 +ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c) +{ + UCharDirection dir; + + if( pBiDi->fnClassCallback == NULL || + (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT ) + { + dir = ubidi_getClass(pBiDi->bdp, c); + } + if(dir >= U_CHAR_DIRECTION_COUNT) { + dir = (UCharDirection)ON; + } + return dir; +} diff --git a/deps/icu-small/source/common/ubidi_props.c b/deps/icu-small/source/common/ubidi_props.c deleted file mode 100644 index cba13ad6ea..0000000000 --- a/deps/icu-small/source/common/ubidi_props.c +++ /dev/null @@ -1,265 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2004-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: ubidi_props.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2004dec30 -* created by: Markus W. Scherer -* -* Low-level Unicode bidi/shaping properties access. -*/ - -#include "unicode/utypes.h" -#include "unicode/uset.h" -#include "unicode/udata.h" /* UDataInfo */ -#include "ucmndata.h" /* DataHeader */ -#include "udatamem.h" -#include "uassert.h" -#include "cmemory.h" -#include "utrie2.h" -#include "ubidi_props.h" -#include "ucln_cmn.h" - -struct UBiDiProps { - UDataMemory *mem; - const int32_t *indexes; - const uint32_t *mirrors; - const uint8_t *jgArray; - const uint8_t *jgArray2; - - UTrie2 trie; - uint8_t formatVersion[4]; -}; - -/* ubidi_props_data.h is machine-generated by genbidi --csource */ -#define INCLUDED_FROM_UBIDI_PROPS_C -#include "ubidi_props_data.h" - -/* UBiDiProps singleton ----------------------------------------------------- */ - -U_CFUNC const UBiDiProps * -ubidi_getSingleton() { - return &ubidi_props_singleton; -} - -/* set of property starts for UnicodeSet ------------------------------------ */ - -static UBool U_CALLCONV -_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { - /* add the start code point to the USet */ - const USetAdder *sa=(const USetAdder *)context; - sa->add(sa->set, start); - return TRUE; -} - -U_CFUNC void -ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode) { - int32_t i, length; - UChar32 c, start, limit; - - const uint8_t *jgArray; - uint8_t prev, jg; - - if(U_FAILURE(*pErrorCode)) { - return; - } - - /* add the start code point of each same-value range of the trie */ - utrie2_enum(&bdp->trie, NULL, _enumPropertyStartsRange, sa); - - /* add the code points from the bidi mirroring table */ - length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH]; - for(i=0; imirrors[i]); - sa->addRange(sa->set, c, c+1); - } - - /* add the code points from the Joining_Group array where the value changes */ - start=bdp->indexes[UBIDI_IX_JG_START]; - limit=bdp->indexes[UBIDI_IX_JG_LIMIT]; - jgArray=bdp->jgArray; - for(;;) { - prev=0; - while(startadd(sa->set, start); - prev=jg; - } - ++start; - } - if(prev!=0) { - /* add the limit code point if the last value was not 0 (it is now start==limit) */ - sa->add(sa->set, limit); - } - if(limit==bdp->indexes[UBIDI_IX_JG_LIMIT]) { - /* switch to the second Joining_Group range */ - start=bdp->indexes[UBIDI_IX_JG_START2]; - limit=bdp->indexes[UBIDI_IX_JG_LIMIT2]; - jgArray=bdp->jgArray2; - } else { - break; - } - } - - /* add code points with hardcoded properties, plus the ones following them */ - - /* (none right now) */ -} - -/* property access functions ------------------------------------------------ */ - -U_CFUNC int32_t -ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which) { - int32_t max; - - if(bdp==NULL) { - return -1; - } - - max=bdp->indexes[UBIDI_MAX_VALUES_INDEX]; - switch(which) { - case UCHAR_BIDI_CLASS: - return (max&UBIDI_CLASS_MASK); - case UCHAR_JOINING_GROUP: - return (max&UBIDI_MAX_JG_MASK)>>UBIDI_MAX_JG_SHIFT; - case UCHAR_JOINING_TYPE: - return (max&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT; - case UCHAR_BIDI_PAIRED_BRACKET_TYPE: - return (max&UBIDI_BPT_MASK)>>UBIDI_BPT_SHIFT; - default: - return -1; /* undefined */ - } -} - -U_CAPI UCharDirection -ubidi_getClass(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); - return (UCharDirection)UBIDI_GET_CLASS(props); -} - -U_CFUNC UBool -ubidi_isMirrored(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); - return (UBool)UBIDI_GET_FLAG(props, UBIDI_IS_MIRRORED_SHIFT); -} - -static UChar32 -getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) { - int32_t delta=UBIDI_GET_MIRROR_DELTA(props); - if(delta!=UBIDI_ESC_MIRROR_DELTA) { - return c+delta; - } else { - /* look for mirror code point in the mirrors[] table */ - const uint32_t *mirrors; - uint32_t m; - int32_t i, length; - UChar32 c2; - - mirrors=bdp->mirrors; - length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH]; - - /* linear search */ - for(i=0; itrie, c); - return getMirror(bdp, c, props); -} - -U_CFUNC UBool -ubidi_isBidiControl(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); - return (UBool)UBIDI_GET_FLAG(props, UBIDI_BIDI_CONTROL_SHIFT); -} - -U_CFUNC UBool -ubidi_isJoinControl(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); - return (UBool)UBIDI_GET_FLAG(props, UBIDI_JOIN_CONTROL_SHIFT); -} - -U_CFUNC UJoiningType -ubidi_getJoiningType(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); - return (UJoiningType)((props&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT); -} - -U_CFUNC UJoiningGroup -ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c) { - UChar32 start, limit; - - start=bdp->indexes[UBIDI_IX_JG_START]; - limit=bdp->indexes[UBIDI_IX_JG_LIMIT]; - if(start<=c && cjgArray[c-start]; - } - start=bdp->indexes[UBIDI_IX_JG_START2]; - limit=bdp->indexes[UBIDI_IX_JG_LIMIT2]; - if(start<=c && cjgArray2[c-start]; - } - return U_JG_NO_JOINING_GROUP; -} - -U_CFUNC UBidiPairedBracketType -ubidi_getPairedBracketType(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); - return (UBidiPairedBracketType)((props&UBIDI_BPT_MASK)>>UBIDI_BPT_SHIFT); -} - -U_CFUNC UChar32 -ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&bdp->trie, c); - if((props&UBIDI_BPT_MASK)==0) { - return c; - } else { - return getMirror(bdp, c, props); - } -} - -/* public API (see uchar.h) ------------------------------------------------- */ - -U_CFUNC UCharDirection -u_charDirection(UChar32 c) { - return ubidi_getClass(&ubidi_props_singleton, c); -} - -U_CFUNC UBool -u_isMirrored(UChar32 c) { - return ubidi_isMirrored(&ubidi_props_singleton, c); -} - -U_CFUNC UChar32 -u_charMirror(UChar32 c) { - return ubidi_getMirror(&ubidi_props_singleton, c); -} - -U_STABLE UChar32 U_EXPORT2 -u_getBidiPairedBracket(UChar32 c) { - return ubidi_getPairedBracket(&ubidi_props_singleton, c); -} diff --git a/deps/icu-small/source/common/ubidi_props.cpp b/deps/icu-small/source/common/ubidi_props.cpp new file mode 100644 index 0000000000..dcfb52c897 --- /dev/null +++ b/deps/icu-small/source/common/ubidi_props.cpp @@ -0,0 +1,267 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2004-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ubidi_props.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2004dec30 +* created by: Markus W. Scherer +* +* Low-level Unicode bidi/shaping properties access. +*/ + +#include "unicode/utypes.h" +#include "unicode/uset.h" +#include "unicode/udata.h" /* UDataInfo */ +#include "ucmndata.h" /* DataHeader */ +#include "udatamem.h" +#include "uassert.h" +#include "cmemory.h" +#include "utrie2.h" +#include "ubidi_props.h" +#include "ucln_cmn.h" + +struct UBiDiProps { + UDataMemory *mem; + const int32_t *indexes; + const uint32_t *mirrors; + const uint8_t *jgArray; + const uint8_t *jgArray2; + + UTrie2 trie; + uint8_t formatVersion[4]; +}; + +/* ubidi_props_data.h is machine-generated by genbidi --csource */ +#define INCLUDED_FROM_UBIDI_PROPS_C +#include "ubidi_props_data.h" + +/* UBiDiProps singleton ----------------------------------------------------- */ + +U_CFUNC const UBiDiProps * +ubidi_getSingleton() { + return &ubidi_props_singleton; +} + +/* set of property starts for UnicodeSet ------------------------------------ */ + +static UBool U_CALLCONV +_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { + (void)end; + (void)value; + /* add the start code point to the USet */ + const USetAdder *sa=(const USetAdder *)context; + sa->add(sa->set, start); + return TRUE; +} + +U_CFUNC void +ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode) { + int32_t i, length; + UChar32 c, start, limit; + + const uint8_t *jgArray; + uint8_t prev, jg; + + if(U_FAILURE(*pErrorCode)) { + return; + } + + /* add the start code point of each same-value range of the trie */ + utrie2_enum(&bdp->trie, NULL, _enumPropertyStartsRange, sa); + + /* add the code points from the bidi mirroring table */ + length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH]; + for(i=0; imirrors[i]); + sa->addRange(sa->set, c, c+1); + } + + /* add the code points from the Joining_Group array where the value changes */ + start=bdp->indexes[UBIDI_IX_JG_START]; + limit=bdp->indexes[UBIDI_IX_JG_LIMIT]; + jgArray=bdp->jgArray; + for(;;) { + prev=0; + while(startadd(sa->set, start); + prev=jg; + } + ++start; + } + if(prev!=0) { + /* add the limit code point if the last value was not 0 (it is now start==limit) */ + sa->add(sa->set, limit); + } + if(limit==bdp->indexes[UBIDI_IX_JG_LIMIT]) { + /* switch to the second Joining_Group range */ + start=bdp->indexes[UBIDI_IX_JG_START2]; + limit=bdp->indexes[UBIDI_IX_JG_LIMIT2]; + jgArray=bdp->jgArray2; + } else { + break; + } + } + + /* add code points with hardcoded properties, plus the ones following them */ + + /* (none right now) */ +} + +/* property access functions ------------------------------------------------ */ + +U_CFUNC int32_t +ubidi_getMaxValue(const UBiDiProps *bdp, UProperty which) { + int32_t max; + + if(bdp==NULL) { + return -1; + } + + max=bdp->indexes[UBIDI_MAX_VALUES_INDEX]; + switch(which) { + case UCHAR_BIDI_CLASS: + return (max&UBIDI_CLASS_MASK); + case UCHAR_JOINING_GROUP: + return (max&UBIDI_MAX_JG_MASK)>>UBIDI_MAX_JG_SHIFT; + case UCHAR_JOINING_TYPE: + return (max&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT; + case UCHAR_BIDI_PAIRED_BRACKET_TYPE: + return (max&UBIDI_BPT_MASK)>>UBIDI_BPT_SHIFT; + default: + return -1; /* undefined */ + } +} + +U_CAPI UCharDirection +ubidi_getClass(const UBiDiProps *bdp, UChar32 c) { + uint16_t props=UTRIE2_GET16(&bdp->trie, c); + return (UCharDirection)UBIDI_GET_CLASS(props); +} + +U_CFUNC UBool +ubidi_isMirrored(const UBiDiProps *bdp, UChar32 c) { + uint16_t props=UTRIE2_GET16(&bdp->trie, c); + return (UBool)UBIDI_GET_FLAG(props, UBIDI_IS_MIRRORED_SHIFT); +} + +static UChar32 +getMirror(const UBiDiProps *bdp, UChar32 c, uint16_t props) { + int32_t delta=UBIDI_GET_MIRROR_DELTA(props); + if(delta!=UBIDI_ESC_MIRROR_DELTA) { + return c+delta; + } else { + /* look for mirror code point in the mirrors[] table */ + const uint32_t *mirrors; + uint32_t m; + int32_t i, length; + UChar32 c2; + + mirrors=bdp->mirrors; + length=bdp->indexes[UBIDI_IX_MIRROR_LENGTH]; + + /* linear search */ + for(i=0; itrie, c); + return getMirror(bdp, c, props); +} + +U_CFUNC UBool +ubidi_isBidiControl(const UBiDiProps *bdp, UChar32 c) { + uint16_t props=UTRIE2_GET16(&bdp->trie, c); + return (UBool)UBIDI_GET_FLAG(props, UBIDI_BIDI_CONTROL_SHIFT); +} + +U_CFUNC UBool +ubidi_isJoinControl(const UBiDiProps *bdp, UChar32 c) { + uint16_t props=UTRIE2_GET16(&bdp->trie, c); + return (UBool)UBIDI_GET_FLAG(props, UBIDI_JOIN_CONTROL_SHIFT); +} + +U_CFUNC UJoiningType +ubidi_getJoiningType(const UBiDiProps *bdp, UChar32 c) { + uint16_t props=UTRIE2_GET16(&bdp->trie, c); + return (UJoiningType)((props&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT); +} + +U_CFUNC UJoiningGroup +ubidi_getJoiningGroup(const UBiDiProps *bdp, UChar32 c) { + UChar32 start, limit; + + start=bdp->indexes[UBIDI_IX_JG_START]; + limit=bdp->indexes[UBIDI_IX_JG_LIMIT]; + if(start<=c && cjgArray[c-start]; + } + start=bdp->indexes[UBIDI_IX_JG_START2]; + limit=bdp->indexes[UBIDI_IX_JG_LIMIT2]; + if(start<=c && cjgArray2[c-start]; + } + return U_JG_NO_JOINING_GROUP; +} + +U_CFUNC UBidiPairedBracketType +ubidi_getPairedBracketType(const UBiDiProps *bdp, UChar32 c) { + uint16_t props=UTRIE2_GET16(&bdp->trie, c); + return (UBidiPairedBracketType)((props&UBIDI_BPT_MASK)>>UBIDI_BPT_SHIFT); +} + +U_CFUNC UChar32 +ubidi_getPairedBracket(const UBiDiProps *bdp, UChar32 c) { + uint16_t props=UTRIE2_GET16(&bdp->trie, c); + if((props&UBIDI_BPT_MASK)==0) { + return c; + } else { + return getMirror(bdp, c, props); + } +} + +/* public API (see uchar.h) ------------------------------------------------- */ + +U_CFUNC UCharDirection +u_charDirection(UChar32 c) { + return ubidi_getClass(&ubidi_props_singleton, c); +} + +U_CFUNC UBool +u_isMirrored(UChar32 c) { + return ubidi_isMirrored(&ubidi_props_singleton, c); +} + +U_CFUNC UChar32 +u_charMirror(UChar32 c) { + return ubidi_getMirror(&ubidi_props_singleton, c); +} + +U_STABLE UChar32 U_EXPORT2 +u_getBidiPairedBracket(UChar32 c) { + return ubidi_getPairedBracket(&ubidi_props_singleton, c); +} diff --git a/deps/icu-small/source/common/ubidi_props.h b/deps/icu-small/source/common/ubidi_props.h index 4312230bc0..69e8853e69 100644 --- a/deps/icu-small/source/common/ubidi_props.h +++ b/deps/icu-small/source/common/ubidi_props.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: ubidi_props.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/ubidi_props_data.h b/deps/icu-small/source/common/ubidi_props_data.h index 685d2b1e84..8d6856d371 100644 --- a/deps/icu-small/source/common/ubidi_props_data.h +++ b/deps/icu-small/source/common/ubidi_props_data.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // // Copyright (C) 1999-2016, International Business Machines @@ -13,35 +13,35 @@ static const UVersionInfo ubidi_props_dataVersion={9,0,0,0}; -static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={0x10,0x5df0,0x5a78,0x1a,0x620,0x8c0,0x10ac0,0x10af0,0,0,0,0,0,0,0,0x5802b6}; +static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={0x10,0x6060,0x5ce8,0x1a,0x620,0x8c0,0x10ac0,0x10af0,0,0,0,0,0,0,0,0x5802b6}; -static const uint16_t ubidi_props_trieIndex[11572]={ +static const uint16_t ubidi_props_trieIndex[11884]={ 0x36a,0x372,0x37a,0x382,0x39a,0x3a2,0x3aa,0x3b2,0x38a,0x392,0x38a,0x392,0x38a,0x392,0x38a,0x392, 0x38a,0x392,0x38a,0x392,0x3b8,0x3c0,0x3c8,0x3d0,0x3d8,0x3e0,0x3dc,0x3e4,0x3ec,0x3f4,0x3ef,0x3f7, 0x38a,0x392,0x38a,0x392,0x3ff,0x407,0x38a,0x392,0x38a,0x392,0x38a,0x392,0x40d,0x415,0x41d,0x425, 0x42d,0x435,0x43d,0x445,0x44b,0x453,0x45b,0x463,0x46b,0x473,0x479,0x481,0x489,0x491,0x499,0x4a1, -0x4ad,0x4a9,0x4b5,0x41f,0x41f,0x4c5,0x4cd,0x4bd,0x4d5,0x4d7,0x4df,0x4e7,0x4ef,0x4f0,0x4f8,0x500, -0x508,0x4f0,0x510,0x515,0x508,0x4f0,0x51d,0x525,0x4ef,0x52a,0x532,0x4e7,0x537,0x38a,0x53f,0x543, -0x54b,0x54c,0x554,0x55c,0x4ef,0x564,0x56c,0x4e7,0x4ef,0x38a,0x4f8,0x4e7,0x38a,0x38a,0x572,0x38a, -0x38a,0x578,0x580,0x38a,0x38a,0x584,0x58c,0x38a,0x590,0x597,0x38a,0x59f,0x5a7,0x5ae,0x536,0x38a, -0x38a,0x5b6,0x5be,0x5c6,0x5ce,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, -0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x5d6,0x38a,0x5de,0x38a,0x38a,0x38a, -0x5e6,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, -0x38a,0x38a,0x38a,0x38a,0x5ee,0x38a,0x38a,0x38a,0x5f6,0x5f6,0x4fc,0x4fc,0x38a,0x5fc,0x604,0x5de, -0x61a,0x60c,0x60c,0x622,0x629,0x612,0x38a,0x38a,0x38a,0x631,0x639,0x38a,0x38a,0x38a,0x63b,0x643, -0x64b,0x38a,0x652,0x65a,0x38a,0x662,0x38a,0x38a,0x66a,0x66d,0x537,0x675,0x401,0x67d,0x38a,0x684, -0x38a,0x689,0x38a,0x38a,0x38a,0x38a,0x68f,0x697,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x3d8,0x69f, -0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x6a7,0x6af,0x6b3, -0x6cb,0x6d1,0x6bb,0x6c3,0x6d9,0x6e1,0x6e5,0x5b1,0x6ed,0x6f5,0x6fd,0x38a,0x705,0x643,0x643,0x643, -0x715,0x71d,0x725,0x72d,0x732,0x73a,0x742,0x70d,0x74a,0x752,0x38a,0x758,0x75f,0x643,0x643,0x765, -0x643,0x562,0x76a,0x643,0x772,0x38a,0x38a,0x640,0x643,0x643,0x643,0x643,0x643,0x643,0x643,0x643, -0x643,0x643,0x643,0x643,0x643,0x77a,0x643,0x643,0x643,0x643,0x643,0x780,0x643,0x643,0x788,0x790, -0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x643,0x643,0x643,0x643,0x7a0,0x7a7,0x7af,0x798, -0x7bf,0x7c7,0x7cf,0x7d6,0x7de,0x7e6,0x7ed,0x7b7,0x643,0x643,0x643,0x7f5,0x7fb,0x801,0x809,0x80e, -0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x815,0x38a,0x38a,0x38a,0x81d,0x38a,0x38a,0x38a,0x3d8, -0x825,0x82d,0x834,0x38a,0x83c,0x643,0x643,0x646,0x643,0x643,0x643,0x643,0x643,0x643,0x843,0x849, -0x859,0x851,0x38a,0x38a,0x861,0x5e6,0x38a,0x3b1,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x643,0x81c, -0x3bf,0x38a,0x838,0x869,0x38a,0x871,0x80e,0x38a,0x38a,0x38a,0x38a,0x879,0x38a,0x38a,0x63b,0x3b0, +0x4ad,0x4a9,0x4b5,0x4bd,0x41f,0x4cd,0x4d5,0x4c5,0x4dd,0x4df,0x4e7,0x4ef,0x4f7,0x4f8,0x500,0x508, +0x510,0x4f8,0x518,0x51d,0x510,0x4f8,0x525,0x52d,0x4f7,0x535,0x53d,0x4ef,0x542,0x38a,0x54a,0x54e, +0x556,0x557,0x55f,0x567,0x4f7,0x56f,0x577,0x4ef,0x57f,0x581,0x500,0x4ef,0x38a,0x38a,0x589,0x38a, +0x38a,0x58f,0x597,0x38a,0x38a,0x59b,0x5a3,0x38a,0x5a7,0x5ae,0x38a,0x5b6,0x5be,0x5c5,0x541,0x38a, +0x38a,0x5cd,0x5d5,0x5dd,0x5e5,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, +0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x5ed,0x38a,0x5f5,0x38a,0x38a,0x38a, +0x5fd,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, +0x38a,0x38a,0x38a,0x38a,0x605,0x38a,0x38a,0x38a,0x60d,0x60d,0x504,0x504,0x38a,0x613,0x61b,0x5f5, +0x631,0x623,0x623,0x639,0x640,0x629,0x38a,0x38a,0x38a,0x648,0x650,0x38a,0x38a,0x38a,0x652,0x65a, +0x662,0x38a,0x669,0x671,0x38a,0x679,0x38a,0x38a,0x681,0x684,0x542,0x68c,0x401,0x694,0x38a,0x69b, +0x38a,0x6a0,0x38a,0x38a,0x38a,0x38a,0x6a6,0x6ae,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x3d8,0x6b6, +0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x6be,0x6c6,0x6ca, +0x6e2,0x6e8,0x6d2,0x6da,0x6f0,0x6f8,0x6fc,0x5c8,0x704,0x70c,0x714,0x38a,0x71c,0x65a,0x65a,0x65a, +0x72c,0x734,0x73c,0x744,0x749,0x751,0x759,0x724,0x761,0x769,0x38a,0x76f,0x776,0x65a,0x65a,0x65a, +0x65a,0x56d,0x77c,0x65a,0x784,0x38a,0x38a,0x657,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a, +0x65a,0x65a,0x65a,0x65a,0x65a,0x78c,0x65a,0x65a,0x65a,0x65a,0x65a,0x792,0x65a,0x65a,0x79a,0x7a2, +0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x65a,0x65a,0x65a,0x65a,0x7b2,0x7b9,0x7c1,0x7aa, +0x7d1,0x7d9,0x7e1,0x7e8,0x7f0,0x7f8,0x7ff,0x7c9,0x65a,0x65a,0x65a,0x807,0x80d,0x813,0x81b,0x820, +0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x827,0x38a,0x38a,0x38a,0x82f,0x38a,0x38a,0x38a,0x3d8, +0x837,0x83f,0x76c,0x38a,0x842,0x65a,0x65a,0x65d,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x849,0x84f, +0x85f,0x857,0x38a,0x38a,0x867,0x5fd,0x38a,0x3b1,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x65a,0x82e, +0x3bf,0x38a,0x86f,0x877,0x38a,0x87f,0x820,0x38a,0x38a,0x38a,0x38a,0x887,0x38a,0x38a,0x652,0x3b0, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, @@ -54,7 +54,7 @@ static const uint16_t ubidi_props_trieIndex[11572]={ 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, -0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x643,0x643, +0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x65a,0x65a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, @@ -98,10 +98,10 @@ static const uint16_t ubidi_props_trieIndex[11572]={ 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, -0x38a,0x38a,0x38a,0x38a,0x838,0x643,0x562,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, -0x880,0x38a,0x38a,0x885,0x54c,0x38a,0x38a,0x592,0x643,0x63a,0x38a,0x38a,0x88d,0x38a,0x38a,0x38a, -0x895,0x89c,0x60c,0x8a4,0x38a,0x38a,0x8ab,0x8b3,0x38a,0x8ba,0x8c1,0x38a,0x4d5,0x8c6,0x38a,0x4ee, -0x38a,0x8ce,0x8d6,0x4f0,0x38a,0x8da,0x4ef,0x8e2,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x8e9, +0x38a,0x38a,0x38a,0x38a,0x86f,0x65a,0x56d,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, +0x88e,0x38a,0x38a,0x893,0x557,0x38a,0x38a,0x5a9,0x65a,0x651,0x38a,0x38a,0x89b,0x38a,0x38a,0x38a, +0x8a3,0x8aa,0x623,0x8b2,0x38a,0x38a,0x8b9,0x8c1,0x38a,0x8c8,0x8cf,0x38a,0x4dd,0x8d4,0x38a,0x4f6, +0x38a,0x8dc,0x8e4,0x4f8,0x38a,0x8e8,0x4f7,0x8f0,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x8f7, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, @@ -141,100 +141,100 @@ static const uint16_t ubidi_props_trieIndex[11572]={ 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, -0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x8fd,0x8f1,0x8f5,0x489,0x489,0x489,0x489,0x489, -0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x905,0x489,0x489,0x489,0x489,0x90d,0x911, -0x919,0x921,0x925,0x92d,0x489,0x489,0x489,0x931,0x939,0x37a,0x941,0x949,0x38a,0x38a,0x38a,0x951, +0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x90b,0x8ff,0x903,0x489,0x489,0x489,0x489,0x489, +0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x913,0x489,0x489,0x489,0x489,0x91b,0x91f, +0x927,0x92f,0x933,0x93b,0x489,0x489,0x489,0x93f,0x947,0x37a,0x94f,0x957,0x38a,0x38a,0x38a,0x95f, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0xe28,0xe28,0xe68,0xea8,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xee0,0xf20,0xf60,0xf70,0xfb0,0xfbc, 0xe28,0xe28,0xffc,0xe28,0xe28,0xe28,0x1034,0x1074,0x10b4,0x10f4,0x112c,0x116c,0x11ac,0x11e4,0x1224,0x1264, -0xa40,0xa80,0xac0,0xafa,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xb23,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xb60,0x1a0,0x1a0,0xb95,0xbd5,0xc15,0xc55,0xc95,0xcd5, +0xa40,0xa80,0xac0,0xafa,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xb25,0x1a0,0x1a0, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xb62,0x1a0,0x1a0,0xb97,0xbd7,0xc17,0xc57,0xc97,0xcd7, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd15, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd15, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd15, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd15, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd15, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd15, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd15, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd15, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd15, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd15, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd15, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd15, -0xd55,0xd65,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd15, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17, +0xd57,0xd67,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd15, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd15, -0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x959,0x38a,0x643,0x643,0x961,0x5e6,0x38a,0x4e8, -0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x969,0x38a,0x38a,0x38a,0x970,0x38a,0x38a,0x38a,0x38a, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17, +0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x967,0x38a,0x65a,0x65a,0x96f,0x5fd,0x38a,0x4f0, +0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x977,0x38a,0x38a,0x38a,0x97e,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, -0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x978,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f, -0x980,0x984,0x41f,0x41f,0x41f,0x41f,0x994,0x98c,0x41f,0x99c,0x41f,0x41f,0x9a4,0x9aa,0x41f,0x41f, +0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x986,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f, +0x98e,0x992,0x41f,0x41f,0x41f,0x41f,0x9a2,0x99a,0x41f,0x9aa,0x41f,0x41f,0x9b2,0x9b8,0x41f,0x41f, 0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f, -0x41f,0x41f,0x41f,0x9b2,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f, -0x4ef,0x8ad,0x9ba,0x9c1,0x401,0x9c4,0x38a,0x38a,0x4d5,0x9cc,0x38a,0x9d2,0x401,0x9d7,0x5f8,0x38a, -0x38a,0x9df,0x38a,0x38a,0x38a,0x38a,0x81d,0x9e7,0x401,0x4f0,0x54b,0x9ee,0x38a,0x38a,0x38a,0x38a, -0x38a,0x8ad,0x9f6,0x38a,0x38a,0x9fa,0xa02,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa06,0xa0e,0x38a, -0x38a,0xa16,0x54b,0x832,0x38a,0xa1e,0x38a,0x38a,0x5d6,0xa26,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, +0x41f,0x41f,0x41f,0x9c0,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f, +0x4f7,0x8bb,0x9c8,0x9cf,0x401,0x9d2,0x38a,0x38a,0x4dd,0x9da,0x38a,0x9e0,0x401,0x9e5,0x60f,0x38a, +0x38a,0x9ed,0x38a,0x38a,0x38a,0x38a,0x82f,0x9f5,0x401,0x4f8,0x556,0x9fc,0x38a,0x38a,0x38a,0x38a, +0x38a,0x8bb,0xa04,0x38a,0x38a,0xa08,0xa10,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa14,0xa1c,0x38a, +0x38a,0xa24,0x556,0xa2c,0x38a,0xa32,0x38a,0x38a,0x5ed,0xa3a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, +0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa42,0xa46,0xa4e,0x38a,0xa55,0x38a, +0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa5c,0x38a,0x38a,0xa64,0xa6a, +0x38a,0x38a,0x38a,0xa70,0xa78,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, +0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa7c,0x38a,0xa82,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, -0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa2a,0x38a,0x38a,0xa32,0xa38, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, -0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa3e,0x38a,0xa44,0x38a,0x38a,0x38a, +0x38a,0xa88,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, -0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa4a, +0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x511,0xa90,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, +0x38a,0x38a,0xa97,0xa9f,0xaa5,0x38a,0x38a,0x65a,0x65a,0xaad,0x38a,0x38a,0x38a,0x38a,0x38a,0x65a, +0x65a,0xab5,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, +0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xabb,0x38a,0xac2, +0x38a,0xabe,0x38a,0xac5,0x38a,0xacd,0xad1,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, +0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x3d8,0xad9,0x3d8,0xae0,0xae7,0xaef,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, -0x38a,0x38a,0x38a,0x38a,0x509,0xa52,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, -0xa59,0xa61,0xa67,0x38a,0x38a,0x643,0x643,0xa6f,0x38a,0x38a,0x38a,0x38a,0x38a,0x643,0x643,0x767, +0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xaf7,0xaff,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, -0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa71,0x38a,0xa78,0x38a,0xa74, -0x38a,0xa7b,0x38a,0xa83,0xa87,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, -0x38a,0x38a,0x38a,0x38a,0x38a,0x3d8,0xa8f,0x3d8,0xa96,0xa9d,0xaa5,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, -0x38a,0x38a,0x38a,0x38a,0x38a,0xaad,0xab5,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, -0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, -0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, -0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, -0x38a,0x38a,0x38a,0x38a,0x38a,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0xabd,0x41f,0xac5,0xac5,0xacc, +0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0xb07,0x41f,0xb0f, +0xb0f,0xb16,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f, 0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f, -0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f, -0x41f,0x41f,0x41f,0x41f,0x41f,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0xad4,0x41f,0x41f,0x41f, -0x41f,0x41f,0x41f,0x41f,0x41f,0x643,0xadc,0x643,0x643,0x646,0xae1,0xae5,0x843,0xaed,0x38a,0x38a, -0xaf3,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x643,0x643,0x643, -0x643,0x643,0x643,0x643,0x643,0x643,0x643,0x643,0x643,0x643,0x643,0x643,0x643,0x643,0x643,0x643, -0x643,0x643,0x643,0x643,0x643,0x643,0x643,0x643,0x643,0x643,0x643,0x768,0xafb,0x643,0x643,0x643, -0x646,0x643,0x643,0x830,0x38a,0xadc,0x643,0xb03,0x643,0xb0b,0x845,0x38a,0x38a,0xb1b,0xb23,0xb2b, -0x38a,0x844,0x38a,0x5e6,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, +0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0xb1e,0x41f, +0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x65a,0xb26,0x65a,0x65a,0x65d,0xb2b,0xb2f,0x849,0xb37, +0x38a,0x38a,0xb3d,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x76d,0x38a,0x38a,0x38a,0x38a,0x65a, +0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a, +0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0xb45,0xb4d,0x65a, +0x65a,0x65a,0x65d,0x65a,0x65a,0xb45,0x38a,0xb26,0x65a,0xb55,0x65a,0xb5d,0x84b,0x38a,0x38a,0xb26, +0xb61,0xb69,0x65f,0x65c,0x38a,0xb71,0x56d,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, -0x38a,0x38a,0x38a,0x38a,0xb13,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, +0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xb79,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, -0x38a,0x38a,0x38a,0x38a,0xb13,0xb3b,0xb33,0xb33,0xb33,0xb3c,0xb3c,0xb3c,0xb3c,0x3d8,0x3d8,0x3d8, -0x3d8,0x3d8,0x3d8,0x3d8,0xb44,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c, -0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c, -0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c, -0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0xb3c, -0xb3c,0xb3c,0xb3c,0xb3c,0xb3c,0x369,0x369,0x369,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, +0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xb79,0xb89,0xb81,0xb81,0xb81,0xb8a,0xb8a,0xb8a,0xb8a,0x3d8, +0x3d8,0x3d8,0x3d8,0x3d8,0x3d8,0x3d8,0xb92,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a, +0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a, +0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a, +0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a, +0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0x369,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,8,7,8,9,7,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,7,7,7,8,9,0xa,0xa,4,4,4,0xa,0xa, 0x310a,0xf20a,0xa,3,6,3,6,6,2,2,2,2,2,2,2,2, @@ -319,6 +319,8 @@ static const uint16_t ubidi_props_trieIndex[11572]={ 1,1,1,1,1,1,1,1,1,1,0xb1,0xb1,0xb1,0xb1,1,0xb1, 0xb1,0xb1,0xb1,0xb1,0x81,0x41,0x41,0x41,0x41,0x41,0x81,0x81,0x41,0x81,0x41,0x41, 0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x81,0x41,1,1,1,0xb1,0xb1,0xb1, +1,1,1,1,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, +0xd,0xd,0xd,0xd,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,0xb1,0xb1,5,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, 0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, 0xb1,0xb1,0xb1,0xb1,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x8d, @@ -346,400 +348,417 @@ static const uint16_t ubidi_props_trieIndex[11572]={ 0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0, 0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0, -0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,4,0,0,0,0,0,0,0,0,0x11,0x11, +0x11,0x11,0x11,0x11,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xb1,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0,0,0, -0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0xb1,0, +0xb1,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0, +0,0xb1,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0, -0,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xa, -0xa,0xa,0xa,0xa,0xa,4,0xa,0,0,0,0,0,0xb1,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1, -0xb1,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0,0, -0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0, -0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0xa0, -0,0,0,0,0,0,0xa0,0,0,0,0,0,0xb1,0xb1,0,0, +0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0, +0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa, +0xa,4,0xa,0,0,0,0,0,0xb1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0xb1,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0, +0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0, +0,0,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0, +0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,4, -0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0, +0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0, -0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0, +0,0,0,0,0,0,0,0,0xb1,0,0,0xa0,0,0,0,0, +0,0,0xa0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x11,0xb1,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x11, +0x11,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0, +0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,4,0,0,0,0, +0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0, +0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0xb1,0,0xb1,0,0xb1,0x310a,0xf20a,0x310a,0xf20a,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1, -0xb1,0,0xb1,0xb1,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0xb1, +0,0xb1,0x310a,0xf20a,0x310a,0xf20a,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1, +0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, 0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0xb1,0xb1,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0, -0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0, -0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0,0,0,0,0,0,0xa,0,0,0,0,0,0,0, +0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0, +0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0, +0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0,0,0,0,0, +0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0, +0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0, +0,0,0,0,0xa,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0x310a,0xf20a,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0, +0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x310a, +0xf20a,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0, -0,0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,4,0,0xb1,0,0, -0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0x40,0xb1,0x40,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x4a, -0xa,0xa,0x2a,0xb1,0xb1,0xb1,0x12,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, +0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0, +0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0,0,0,0,0,0,0,4,0,0xb1,0,0,0x40,0x40,0x40,0x40, 0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0,0,0,0,0,0,0,0,0,0xb1,0xb1,0x40,0x40,0x40,0x40,0x40, +0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xb1,0x40,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x4a,0xa,0xa,0x2a,0xb1, +0xb1,0xb1,0x12,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, +0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0,0,0,0, +0,0,0,0,0,0xb1,0xb1,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, 0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0x40,0x40,0x40,0x40,0xb1,0xb1,0xb1,0,0,0,0,0xb1,0xb1,0,0,0, -0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1, -0,0,0,0,0xa,0,0,0,0xa,0xa,0,0,0,0,0,0, +0xb1,0xb1,0xb1,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0, +0,0,0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0, +0xa,0,0,0,0xa,0xa,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa, +0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, 0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1, +0xb1,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0,0xb1,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0xb1,0xb1,0,0,0xb1,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0xb1,0,0,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0, -0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0xb1, -0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0xb1,0,0xb1,0xb1,0,0,0,0xb1,0,0xb1, -0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0xb1,0,0,0,0,0,0, -0xb1,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0, +0,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0xa,0,0xa,0xa,0xa,0,0, -0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0,0,0,0, -0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0,0,0,0, -0,0,0,0,0,0,0,0,0,0xa,0xa,0,0xa,0xa,0xa,0xa, -6,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,9,0xb2,0xb2,0xb2,0xb2, -0xb2,0x12,0x814,0x815,0x813,0x816,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,2,0,0,0, -2,2,2,2,2,2,3,3,0xa,0x310a,0xf20a,0,9,9,9,9, -9,9,9,9,9,9,9,0xb2,0x412,0x432,0x8a0,0x8a1,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,9,7,0x8ab,0x8ae, -0x8b0,0x8ac,0x8af,6,4,4,4,4,4,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa, -0xa,0xa,0xa,0xa,2,2,2,2,2,2,2,2,2,2,3,3, -0xa,0x310a,0xf20a,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,4,4,4,4,4,4,4,4,4,4,4,4, -4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, -4,4,4,4,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xa,0xa,0,0xa,0xa,0xa,0xa,0,0xa,0xa,0,0, -0,0,0,0,0,0,0,0,0xa,0,0xa,0xa,0xa,0,0,0, -0,0,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0,0xa,0,0xa,0,0, -0,0,4,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa, -0,0,0,0,0x100a,0xa,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa, -0xa,0xa,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa, -0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a, -0x300a,0xf00a,0xa,0xa,0x300a,0xf00a,0x900a,0x900a,0x900a,0x100a,0x900a,0x900a,0x100a,0x100a,0x900a,0x900a, -0x900a,0x900a,0x900a,0x100a,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0x700a,0x700a,0x700a,0xb00a, -0xb00a,0xb00a,0xa,0xa,0xa,0x100a,3,4,0xa,0x900a,0x100a,0xa,0xa,0xa,0x100a,0x100a, -0x100a,0x100a,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,0xa,0x100a,0xa,0xa,0xa,0xa,0x100a, -0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a, -0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x900a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a, -0x100a,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0x100a,0x100a,0xa,0x100a,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a, -0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a, -0x300a,0xf00a,0x300a,0xf00a,0x100a,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa, -0x900a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0x900a,0x100a, -0x900a,0x900a,0x100a,0x900a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a, -0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0x300a,0xf00a,0x300a,0xf00a,0x900a,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0x300a,0xf00a, -0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0, +0,0,0xb1,0,0xb1,0xb1,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0,0,0,0,0xb1,0,0,0,0,0,0,0xb1,0,0,0, +0xb1,0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0x11,0x11, +0x11,0x11,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0xa,0,0xa,0xa,0xa,0,0,0,0,0,0, +0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0, +0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0, +0,0,0,0,0,0xa,0xa,0,0xa,0xa,0xa,0xa,6,0x310a,0xf20a,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,9,0xb2,0xb2,0xb2,0xb2,0xb2,0x12,0x814,0x815, +0x813,0x816,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,2,0,0,0,2,2,2,2, +2,2,3,3,0xa,0x310a,0xf20a,0,9,9,9,9,9,9,9,9, +9,9,9,0xb2,0x412,0x432,0x8a0,0x8a1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,9,7,0x8ab,0x8ae,0x8b0,0x8ac,0x8af,6, +4,4,4,4,4,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa, +2,2,2,2,2,2,2,2,2,2,3,3,0xa,0x310a,0xf20a,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa, +4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, +4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xa,0xa,0,0xa,0xa,0xa,0xa,0,0xa,0xa,0,0,0,0,0,0, +0,0,0,0,0xa,0,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa, +0xa,0xa,0xa,0xa,0,0xa,0,0xa,0,0xa,0,0,0,0,4,0, +0,0,0,0,0,0,0,0,0,0,0xa,0xa,0,0,0,0, +0x100a,0xa,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa,0xa,0xa,0,0, 0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0,0,0,0, 0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a, -0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0x100a,0xa,0xa,0x300a,0xf00a,0x310a,0xf20a,0xa,0x300a,0xf00a,0xa,0x500a,0x100a,0xd00a,0xa,0xa, -0xa,0xa,0xa,0x100a,0x100a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x100a,0x300a,0xf00a,0xa, -0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a, +0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa, +0x300a,0xf00a,0x900a,0x900a,0x900a,0x100a,0x900a,0x900a,0x100a,0x100a,0x900a,0x900a,0x900a,0x900a,0x900a,0x100a, +0xa,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0x700a,0x700a,0x700a,0xb00a,0xb00a,0xb00a,0xa,0xa, +0xa,0x100a,3,4,0xa,0x900a,0x100a,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0x100a, +0x100a,0x100a,0x100a,0xa,0x100a,0xa,0x100a,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a, +0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a,0x300a,0xf00a,0x100a,0x100a, +0x100a,0x100a,0x100a,0x900a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa, +0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a, +0x100a,0xa,0x100a,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0x300a,0xf00a, +0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a, +0x100a,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x900a,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0x900a,0x100a,0x900a,0x900a,0x100a,0x900a, +0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0xa,0xa,0xa, +0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a, +0xf00a,0x900a,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a, +0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a, 0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0x100a,0xa,0x100a,0x100a,0x100a,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0x100a,0x900a,0x100a,0x100a,0x300a,0xf00a,0xa,0xa,0x310a,0xf20a,0xa,0xa, -0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x710a,0x320a,0xf10a, -0xb20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a, -0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0x900a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0x100a, -0x300a,0xf00a,0xa,0xa,0xa,0x100a,0xa,0xa,0xa,0xa,0x100a,0x300a,0xf00a,0x300a,0xf00a,0xa, -0x300a,0xf00a,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0x900a, -0x900a,0x900a,0x100a,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0xa,0xa,0xa,0xa,0x100a, -0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x100a,0xa,0x100a,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a, -0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0xa, -0x100a,0xa,0x100a,0xa,0xa,0x100a,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa, -0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa, 0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa, -0xa,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,0x100a,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa, -0xa,0x300a,0xf00a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0x100a,0x100a, -0x100a,0x100a,0x100a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a, -0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x100a,0xa,0xa,0x300a,0xf00a, -0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0x300a,0xf00a,0x100a,0x100a,0x300a,0xf00a,0x100a,0x100a,0x100a, -0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a, -0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa, -0x100a,0xa,0x900a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,0xa,0xa, +0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0xa,0x300a,0xf00a,0x310a,0xf20a,0xa, +0x300a,0xf00a,0xa,0x500a,0x100a,0xd00a,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0x300a,0xf00a,0xa, +0xa,0xa,0xa,0xa,0x100a,0x300a,0xf00a,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x310a,0xf20a, +0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a,0x100a,0x100a,0xa,0xa, +0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x900a,0x100a,0x100a, +0x300a,0xf00a,0xa,0xa,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x310a, +0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x710a,0x320a,0xf10a,0xb20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a, +0xf20a,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a, +0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x900a,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0x100a,0x300a,0xf00a,0xa,0xa,0xa,0x100a,0xa,0xa, +0xa,0xa,0x100a,0x300a,0xf00a,0x300a,0xf00a,0xa,0x300a,0xf00a,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a, +0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0x900a,0x900a,0x900a,0x100a,0xa,0xa,0xa,0xa,0xa, +0x300a,0xf00a,0x100a,0xa,0xa,0xa,0xa,0x100a,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x100a, +0xa,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a, +0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a, +0x100a,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0x100a,0xa,0x100a,0xa,0xa,0x100a,0xa,0x300a, +0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa, +0x300a,0xf00a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0x100a, +0x100a,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0x100a,0x300a,0xf00a,0x300a, +0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x100a,0x100a,0x100a, +0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0x100a,0x100a, +0x100a,0x300a,0xf00a,0x100a,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0x300a, +0xf00a,0x100a,0x100a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a, +0xf00a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a, +0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x900a,0xa,0xa,0xa,0xa,0xa, 0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0, -0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0, -0,0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0xb1,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0x300a,0xf00a,0xa, -0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0x300a,0xf00a,0xa,0xa,0x300a,0xf00a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa, +0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa, 0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0, +0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0, +0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0xb1,0xb1,0xb1,0,0, +0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0xb1,0xa,0xa,0x300a,0xf00a, +0x300a,0xf00a,0xa,0xa,0xa,0x300a,0xf00a,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0x300a,0xf00a,0x310a,0xf20a, +0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, 0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0,0,0,0,0xa,0,0,0,0,0,0,0,0,0,0xb1,0xb1, -0xb1,0xb1,0,0,0xa,0,0,0,0,0,0xa,0xa,0,0,0,0, -0,0xa,0xa,0xa,9,0xa,0xa,0xa,0xa,0,0,0,0x310a,0xf20a,0x310a,0xf20a, -0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a, -0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xa, -0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0xa,0,0,0, +0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xa,0,0,0, +0,0,0xa,0xa,0,0,0,0,0,0xa,0xa,0xa,9,0xa,0xa,0xa, +0xa,0,0,0,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa, +0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa, +0,0,0,0,0,0xb1,0xb1,0xa,0xa,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0xb1,0xb1,0xb1,0xb1,0xa,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xa,0xa,0,0,0,0,0,0,0,0,0xa,0,0,0, +0,0,0,0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0xb1,0,0,0,0xb1,0,0,0,0,0xb1, +0,0,0,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xa, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xa,0xa,0,0,0,0, +0,0,0,0,0xa,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0, +0,0,0xb1,0,0,0,0,0xb1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0, +0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0, +4,4,0,0,0,0,0,0,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, +0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x60,0,0xa,0xa,0xa,0xa, +0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0xb1,0xb1,0,0xa,0xa,0xa,0xa,0,0,0,0, -0,0,0,0,0,0,0,0,4,4,0,0,0,0,0,0, -0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0x40,0x40,0x60,0,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0, -0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1, +0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, 0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0xb1,0xb1, -0xb1,0xb1,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0, -0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0xb1,0xb1,0, -0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0,0, -0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0,0,0xb1, -0xb1,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0, -0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0xb1,0, -0,0,0,0,0,0,0,0,0,0xb1,0,0,0xb1,0,0,0, -0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,1,1,1,1,1,1,1,1,1,3,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, -0xd,0xd,0xd,0xd,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,1,0xb1,1,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, +0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0,0,0, +0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0, +0,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0, +0,0,0,0xb1,0,0,0,0,0,0,0,0,0xb1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0xb1,0,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0xb1,0xb1, +0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0, +0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0, +0,0xb1,0,0,0xb1,0,0,0,0,0xb1,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1, +1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,0xd,0xd,0xd,0xd, +0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,1,0xb1,1,0xd,0xd,0xd,0xd, 0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, -0xd,0xd,0xa,0xa,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, -0xd,0xd,0xd,0xd,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, -0x12,0x12,0x12,0x12,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, -0xd,0xa,0xd,0xd,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0, -0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,6,0xa,6,0,0xa,6,0xa,0xa,0xa,0x310a,0xf20a,0x310a, -0xf20a,0x310a,0xf20a,4,0xa,0xa,3,3,0x300a,0xf00a,0xa,0,0xa,4,4,0xa, -0,0,0,0,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, +0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa,0xd,0xd,0xd,0xd, +0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0x12,0x12,0x12,0x12, +0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0xd,0xd,0xd,0xd, +0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xd,0xd,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,6,0xa,6,0, +0xa,6,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,4,0xa,0xa,3,3, +0x300a,0xf00a,0xa,0,0xa,4,4,0xa,0,0,0,0,0xd,0xd,0xd,0xd, 0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, -0xd,0xd,0xd,0xb2,0,0xa,0xa,4,4,4,0xa,0xa,0x310a,0xf20a,0xa,3, -6,3,6,6,2,2,2,2,2,2,2,2,2,2,6,0xa, -0x500a,0xa,0xd00a,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x510a, -0xa,0xd20a,0xa,0x310a,0xf20a,0xa,0x310a,0xf20a,0xa,0xa,0,0,0,0,0,0, +0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xb2,0,0xa,0xa,4, +4,4,0xa,0xa,0x310a,0xf20a,0xa,3,6,3,6,6,2,2,2,2, +2,2,2,2,2,2,6,0xa,0x500a,0xa,0xd00a,0xa,0xa,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,4,4,0xa,0xa,0xa,4,4,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0xaa,0xaa,0xaa, -0xa,0xa,0x12,0x12,0,0xa,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0x510a,0xa,0xd20a,0xa,0x310a,0xf20a,0xa,0x310a,0xf20a, +0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,4,4,0xa,0xa, +0xa,4,4,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0x12,0x12,0x12,0x12, +0x12,0x12,0x12,0x12,0x12,0xaa,0xaa,0xaa,0xa,0xa,0x12,0x12,0,0xa,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0,0,0,0,0xb1,2,2,2,2,2,2,2,2,2,2,2, +0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0xb1,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0, +2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1, +0xb1,0xb1,0xb1,0,0,0,0,0,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xa, -1,0xb1,0xb1,0xb1,1,0xb1,0xb1,1,1,1,1,1,0xb1,0xb1,0xb1,0xb1, +1,1,1,1,1,1,1,0xa,1,0xb1,0xb1,0xb1,1,0xb1,0xb1,1, +1,1,1,1,0xb1,0xb1,0xb1,0xb1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xb1,0xb1,0xb1,1,1,1,1,0xb1, -0x41,0x81,1,1,0x81,0xb1,0xb1,1,1,1,1,0x41,0x41,0x41,0x41,0x81, +0xb1,0xb1,0xb1,1,1,1,1,0xb1,0x41,0x81,1,1,0x81,0xb1,0xb1,1, +1,1,1,0x41,0x41,0x41,0x41,0x81,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0x41,0x41,0x41,0x41,0x41,0x81,1,0x81, +1,0x81,0x81,1,1,0x61,0x81,0x81,0x81,0x81,0x81,0x41,0x41,0x41,0x41,0x61, +0x41,0x41,0x41,0x41,0x41,0x81,0x41,0x41,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0x41,0x41,0x41,0x41,0x41,0x81,1,0x81,1,0x81,0x81,1,1,0x61,0x81,0x81, -0x81,0x81,0x81,0x41,0x41,0x41,0x41,0x61,0x41,0x41,0x41,0x41,0x41,0x81,0x41,0x41, +1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x41,0x81,0x41,0x81,0x81,0x81,0x41,0x41, +0x41,0x81,0x41,0x41,0x81,0x41,0x81,0x81,0x41,0x81,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0x81,0x81,0x81,0x81,0x41,0x41,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0x41,0x81,0x41,0x81,0x81,0x81,0x41,0x41,0x41,0x81,0x41,0x41,0x81,0x41,0x81,0x81, -0x41,0x81,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,0x81,0x81,0x81,0x81,0x41,0x41,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0, -0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0, +0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0, +0,0xb1,0xb1,0,0,0xa0,0,0,0,0,0,0,0,0,0,0xb1, +0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0xa0,0,0, -0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1, +0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0xb1,0xb1,0xb1,0,0,0xb1,0,0xb1,0xb1,0,0,0,0, +0,0,0xb1,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0, 0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0,0, +0,0,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0, -0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0xb1,0,0,0,0,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0, -0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0, +0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0xb1, +0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0,0xb1,0,0,0,0,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0, +0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0xb1,0xb1,0,0xb1, +0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0, -0,0,0,0,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0, +0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0,0xb1, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0,0,0xb1,0,0xb1,0,0,0,0,0,0,0,0, 0,0,0,0xb1,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1, 0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0xb1, 0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xa0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0, +0,0,0,0,0,0,0,0,0,0x11,0x11,0x11,0x11,0x11,0x11,0, +0,0x11,0x11,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0x11,0x11,0x11,0x11,0x11, +0x11,0,0,0x11,0x11,0x11,0x11,0,0,0,0,0,0,0,0,0x11, +0,0,0,0,0,0,0,0,0,0x11,0x11,0x11,0x11,0x11,0x11,0, +0,0x11,0x11,0x11,0,0,0,0,0,0,0,0,0,0,0x11,0x11, +0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0,0x11,0x11,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0, -0,0,0,0,0,0,0,0,0xb2,0xb2,0xb2,0xb2,0,0,0,0, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xa0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0, -0,0,0,0,0,0,0,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0xb1,0xb1,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xb1,0xb1, -0xb1,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x100a, +0,0x11,0x11,0x11,0x11,0x11,0x11,0,0,0,0x11,0,0x11,0x11,0,0x11, +0x11,0x11,0x11,0x11,0x11,0x11,0,0x11,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0x100a,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x100a, -0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0xb1,0,0, -0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0, +0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1, -0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0, +0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0, +0xb2,0xb2,0xb2,0xb2,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,1,1,1,1,1, -1,1,1,1,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41, +0,0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0xb2, +0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0, +0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1, +0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0xa,0xa,0xb1,0xb1,0xb1,0xa,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0x100a,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0x100a,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0x100a,0,0,0,0,0,0,0,0, +0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0, +0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0, +0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1, +0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,1,1,1,1,1,1,1,1,1,0x41,0x41,0x41,0x41, 0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41, -0x41,0x41,0x41,0x41,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, -0xa,0xa,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0, +0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, +0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa,0xd,0xd,0xd,0xd,0xd,0xd, +0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,2,2,2,2, +2,2,2,2,2,2,2,0xa,0xa,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0, +0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0, +0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0, +0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0, +0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0, +0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, 0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,2,2,2,2,2,2,2,2,2,2,2,0xa, -0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0xa,0xa,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0, +0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0x12,0x12,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0xa,0,0,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xb2,0xb2,0xb2,0xb2, +0,0,0x12,0x12,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2, 0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2, -0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0x12,0xb2,0x12,0x12, -0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, -0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb2,0xb2,0xb2,0xb2,0x12,0xb2,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, -0,0,0,0 +0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, +0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0,0,0,0 }; static const uint32_t ubidi_props_mirrors[26]={ @@ -809,13 +828,13 @@ static const UBiDiProps ubidi_props_singleton={ ubidi_props_trieIndex+3496, NULL, 3496, - 8076, + 8388, 0x1a0, 0xe28, 0x0, 0x0, 0x110000, - 0x2d30, + 0x2e68, NULL, 0, FALSE, FALSE, 0, NULL }, { 2,2,0,0 } diff --git a/deps/icu-small/source/common/ubidiimp.h b/deps/icu-small/source/common/ubidiimp.h index a62d8b259e..fd64fac34d 100644 --- a/deps/icu-small/source/common/ubidiimp.h +++ b/deps/icu-small/source/common/ubidiimp.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: ubidiimp.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/ubidiln.c b/deps/icu-small/source/common/ubidiln.c deleted file mode 100644 index 688ca4c31e..0000000000 --- a/deps/icu-small/source/common/ubidiln.c +++ /dev/null @@ -1,1349 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: ubidiln.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999aug06 -* created by: Markus W. Scherer, updated by Matitiahu Allouche -*/ - -#include "cmemory.h" -#include "unicode/utypes.h" -#include "unicode/ustring.h" -#include "unicode/uchar.h" -#include "unicode/ubidi.h" -#include "ubidiimp.h" -#include "uassert.h" - -/* - * General remarks about the functions in this file: - * - * These functions deal with the aspects of potentially mixed-directional - * text in a single paragraph or in a line of a single paragraph - * which has already been processed according to - * the Unicode 6.3 BiDi algorithm as defined in - * http://www.unicode.org/unicode/reports/tr9/ , version 28, - * also described in The Unicode Standard, Version 6.3.0 . - * - * This means that there is a UBiDi object with a levels - * and a dirProps array. - * paraLevel and direction are also set. - * Only if the length of the text is zero, then levels==dirProps==NULL. - * - * The overall directionality of the paragraph - * or line is used to bypass the reordering steps if possible. - * Even purely RTL text does not need reordering there because - * the ubidi_getLogical/VisualIndex() functions can compute the - * index on the fly in such a case. - * - * The implementation of the access to same-level-runs and of the reordering - * do attempt to provide better performance and less memory usage compared to - * a direct implementation of especially rule (L2) with an array of - * one (32-bit) integer per text character. - * - * Here, the levels array is scanned as soon as necessary, and a vector of - * same-level-runs is created. Reordering then is done on this vector. - * For each run of text positions that were resolved to the same level, - * only 8 bytes are stored: the first text position of the run and the visual - * position behind the run after reordering. - * One sign bit is used to hold the directionality of the run. - * This is inefficient if there are many very short runs. If the average run - * length is <2, then this uses more memory. - * - * In a further attempt to save memory, the levels array is never changed - * after all the resolution rules (Xn, Wn, Nn, In). - * Many functions have to consider the field trailingWSStart: - * if it is less than length, then there is an implicit trailing run - * at the paraLevel, - * which is not reflected in the levels array. - * This allows a line UBiDi object to use the same levels array as - * its paragraph parent object. - * - * When a UBiDi object is created for a line of a paragraph, then the - * paragraph's levels and dirProps arrays are reused by way of setting - * a pointer into them, not by copying. This again saves memory and forbids to - * change the now shared levels for (L1). - */ - -/* handle trailing WS (L1) -------------------------------------------------- */ - -/* - * setTrailingWSStart() sets the start index for a trailing - * run of WS in the line. This is necessary because we do not modify - * the paragraph's levels array that we just point into. - * Using trailingWSStart is another form of performing (L1). - * - * To make subsequent operations easier, we also include the run - * before the WS if it is at the paraLevel - we merge the two here. - * - * This function is called only from ubidi_setLine(), so pBiDi->paraLevel is - * set correctly for the line even when contextual multiple paragraphs. - */ -static void -setTrailingWSStart(UBiDi *pBiDi) { - /* pBiDi->direction!=UBIDI_MIXED */ - - const DirProp *dirProps=pBiDi->dirProps; - UBiDiLevel *levels=pBiDi->levels; - int32_t start=pBiDi->length; - UBiDiLevel paraLevel=pBiDi->paraLevel; - - /* If the line is terminated by a block separator, all preceding WS etc... - are already set to paragraph level. - Setting trailingWSStart to pBidi->length will avoid changing the - level of B chars from 0 to paraLevel in ubidi_getLevels when - orderParagraphsLTR==TRUE. - */ - if(dirProps[start-1]==B) { - pBiDi->trailingWSStart=start; /* currently == pBiDi->length */ - return; - } - /* go backwards across all WS, BN, explicit codes */ - while(start>0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) { - --start; - } - - /* if the WS run can be merged with the previous run then do so here */ - while(start>0 && levels[start-1]==paraLevel) { - --start; - } - - pBiDi->trailingWSStart=start; -} - -/* ubidi_setLine ------------------------------------------------------------ */ - -U_CAPI void U_EXPORT2 -ubidi_setLine(const UBiDi *pParaBiDi, - int32_t start, int32_t limit, - UBiDi *pLineBiDi, - UErrorCode *pErrorCode) { - int32_t length; - - /* check the argument values */ - RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); - RETURN_VOID_IF_NOT_VALID_PARA(pParaBiDi, *pErrorCode); - RETURN_VOID_IF_BAD_RANGE(start, 0, limit, *pErrorCode); - RETURN_VOID_IF_BAD_RANGE(limit, 0, pParaBiDi->length+1, *pErrorCode); - if(pLineBiDi==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - if(ubidi_getParagraph(pParaBiDi, start, NULL, NULL, NULL, pErrorCode) != - ubidi_getParagraph(pParaBiDi, limit-1, NULL, NULL, NULL, pErrorCode)) { - /* the line crosses a paragraph boundary */ - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - /* set the values in pLineBiDi from its pParaBiDi parent */ - pLineBiDi->pParaBiDi=NULL; /* mark unfinished setLine */ - pLineBiDi->text=pParaBiDi->text+start; - length=pLineBiDi->length=limit-start; - pLineBiDi->resultLength=pLineBiDi->originalLength=length; - pLineBiDi->paraLevel=GET_PARALEVEL(pParaBiDi, start); - pLineBiDi->paraCount=pParaBiDi->paraCount; - pLineBiDi->runs=NULL; - pLineBiDi->flags=0; - pLineBiDi->reorderingMode=pParaBiDi->reorderingMode; - pLineBiDi->reorderingOptions=pParaBiDi->reorderingOptions; - pLineBiDi->controlCount=0; - if(pParaBiDi->controlCount>0) { - int32_t j; - for(j=start; jtext[j])) { - pLineBiDi->controlCount++; - } - } - pLineBiDi->resultLength-=pLineBiDi->controlCount; - } - - pLineBiDi->dirProps=pParaBiDi->dirProps+start; - pLineBiDi->levels=pParaBiDi->levels+start; - pLineBiDi->runCount=-1; - - if(pParaBiDi->direction!=UBIDI_MIXED) { - /* the parent is already trivial */ - pLineBiDi->direction=pParaBiDi->direction; - - /* - * The parent's levels are all either - * implicitly or explicitly ==paraLevel; - * do the same here. - */ - if(pParaBiDi->trailingWSStart<=start) { - pLineBiDi->trailingWSStart=0; - } else if(pParaBiDi->trailingWSStarttrailingWSStart=pParaBiDi->trailingWSStart-start; - } else { - pLineBiDi->trailingWSStart=length; - } - } else { - const UBiDiLevel *levels=pLineBiDi->levels; - int32_t i, trailingWSStart; - UBiDiLevel level; - - setTrailingWSStart(pLineBiDi); - trailingWSStart=pLineBiDi->trailingWSStart; - - /* recalculate pLineBiDi->direction */ - if(trailingWSStart==0) { - /* all levels are at paraLevel */ - pLineBiDi->direction=(UBiDiDirection)(pLineBiDi->paraLevel&1); - } else { - /* get the level of the first character */ - level=(UBiDiLevel)(levels[0]&1); - - /* if there is anything of a different level, then the line is mixed */ - if(trailingWSStartparaLevel&1)!=level) { - /* the trailing WS is at paraLevel, which differs from levels[0] */ - pLineBiDi->direction=UBIDI_MIXED; - } else { - /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */ - i=1; - for(;;) { - if(i==trailingWSStart) { - /* the direction values match those in level */ - pLineBiDi->direction=(UBiDiDirection)level; - break; - } else if((levels[i]&1)!=level) { - pLineBiDi->direction=UBIDI_MIXED; - break; - } - ++i; - } - } - } - - switch(pLineBiDi->direction) { - case UBIDI_LTR: - /* make sure paraLevel is even */ - pLineBiDi->paraLevel=(UBiDiLevel)((pLineBiDi->paraLevel+1)&~1); - - /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ - pLineBiDi->trailingWSStart=0; - break; - case UBIDI_RTL: - /* make sure paraLevel is odd */ - pLineBiDi->paraLevel|=1; - - /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ - pLineBiDi->trailingWSStart=0; - break; - default: - break; - } - } - pLineBiDi->pParaBiDi=pParaBiDi; /* mark successful setLine */ - return; -} - -U_CAPI UBiDiLevel U_EXPORT2 -ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex) { - /* return paraLevel if in the trailing WS run, otherwise the real level */ - if(!IS_VALID_PARA_OR_LINE(pBiDi) || charIndex<0 || pBiDi->length<=charIndex) { - return 0; - } else if(pBiDi->direction!=UBIDI_MIXED || charIndex>=pBiDi->trailingWSStart) { - return GET_PARALEVEL(pBiDi, charIndex); - } else { - return pBiDi->levels[charIndex]; - } -} - -U_CAPI const UBiDiLevel * U_EXPORT2 -ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { - int32_t start, length; - - RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, NULL); - RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, NULL); - if((length=pBiDi->length)<=0) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - if((start=pBiDi->trailingWSStart)==length) { - /* the current levels array reflects the WS run */ - return pBiDi->levels; - } - - /* - * After the previous if(), we know that the levels array - * has an implicit trailing WS run and therefore does not fully - * reflect itself all the levels. - * This must be a UBiDi object for a line, and - * we need to create a new levels array. - */ - if(getLevelsMemory(pBiDi, length)) { - UBiDiLevel *levels=pBiDi->levelsMemory; - - if(start>0 && levels!=pBiDi->levels) { - uprv_memcpy(levels, pBiDi->levels, start); - } - /* pBiDi->paraLevel is ok even if contextual multiple paragraphs, - since pBidi is a line object */ - uprv_memset(levels+start, pBiDi->paraLevel, length-start); - - /* this new levels array is set for the line and reflects the WS run */ - pBiDi->trailingWSStart=length; - return pBiDi->levels=levels; - } else { - /* out of memory */ - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } -} - -U_CAPI void U_EXPORT2 -ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition, - int32_t *pLogicalLimit, UBiDiLevel *pLevel) { - UErrorCode errorCode; - int32_t runCount, visualStart, logicalLimit, logicalFirst, i; - Run iRun; - - errorCode=U_ZERO_ERROR; - RETURN_VOID_IF_BAD_RANGE(logicalPosition, 0, pBiDi->length, errorCode); - /* ubidi_countRuns will check VALID_PARA_OR_LINE */ - runCount=ubidi_countRuns((UBiDi *)pBiDi, &errorCode); - if(U_FAILURE(errorCode)) { - return; - } - /* this is done based on runs rather than on levels since levels have - a special interpretation when UBIDI_REORDER_RUNS_ONLY - */ - visualStart=logicalLimit=0; - iRun=pBiDi->runs[0]; - - for(i=0; iruns[i]; - logicalFirst=GET_INDEX(iRun.logicalStart); - logicalLimit=logicalFirst+iRun.visualLimit-visualStart; - if((logicalPosition>=logicalFirst) && - (logicalPositionreorderingMode==UBIDI_REORDER_RUNS_ONLY) { - *pLevel=(UBiDiLevel)GET_ODD_BIT(iRun.logicalStart); - } - else if(pBiDi->direction!=UBIDI_MIXED || logicalPosition>=pBiDi->trailingWSStart) { - *pLevel=GET_PARALEVEL(pBiDi, logicalPosition); - } else { - *pLevel=pBiDi->levels[logicalPosition]; - } - } -} - -/* runs API functions ------------------------------------------------------- */ - -U_CAPI int32_t U_EXPORT2 -ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) { - RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); - RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); - ubidi_getRuns(pBiDi, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return -1; - } - return pBiDi->runCount; -} - -U_CAPI UBiDiDirection U_EXPORT2 -ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex, - int32_t *pLogicalStart, int32_t *pLength) -{ - int32_t start; - UErrorCode errorCode = U_ZERO_ERROR; - RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, errorCode, UBIDI_LTR); - ubidi_getRuns(pBiDi, &errorCode); - if(U_FAILURE(errorCode)) { - return UBIDI_LTR; - } - RETURN_IF_BAD_RANGE(runIndex, 0, pBiDi->runCount, errorCode, UBIDI_LTR); - - start=pBiDi->runs[runIndex].logicalStart; - if(pLogicalStart!=NULL) { - *pLogicalStart=GET_INDEX(start); - } - if(pLength!=NULL) { - if(runIndex>0) { - *pLength=pBiDi->runs[runIndex].visualLimit- - pBiDi->runs[runIndex-1].visualLimit; - } else { - *pLength=pBiDi->runs[0].visualLimit; - } - } - return (UBiDiDirection)GET_ODD_BIT(start); -} - -/* in trivial cases there is only one trivial run; called by ubidi_getRuns() */ -static void -getSingleRun(UBiDi *pBiDi, UBiDiLevel level) { - /* simple, single-run case */ - pBiDi->runs=pBiDi->simpleRuns; - pBiDi->runCount=1; - - /* fill and reorder the single run */ - pBiDi->runs[0].logicalStart=MAKE_INDEX_ODD_PAIR(0, level); - pBiDi->runs[0].visualLimit=pBiDi->length; - pBiDi->runs[0].insertRemove=0; -} - -/* reorder the runs array (L2) ---------------------------------------------- */ - -/* - * Reorder the same-level runs in the runs array. - * Here, runCount>1 and maxLevel>=minLevel>=paraLevel. - * All the visualStart fields=logical start before reordering. - * The "odd" bits are not set yet. - * - * Reordering with this data structure lends itself to some handy shortcuts: - * - * Since each run is moved but not modified, and since at the initial maxLevel - * each sequence of same-level runs consists of only one run each, we - * don't need to do anything there and can predecrement maxLevel. - * In many simple cases, the reordering is thus done entirely in the - * index mapping. - * Also, reordering occurs only down to the lowest odd level that occurs, - * which is minLevel|1. However, if the lowest level itself is odd, then - * in the last reordering the sequence of the runs at this level or higher - * will be all runs, and we don't need the elaborate loop to search for them. - * This is covered by ++minLevel instead of minLevel|=1 followed - * by an extra reorder-all after the reorder-some loop. - * About a trailing WS run: - * Such a run would need special treatment because its level is not - * reflected in levels[] if this is not a paragraph object. - * Instead, all characters from trailingWSStart on are implicitly at - * paraLevel. - * However, for all maxLevel>paraLevel, this run will never be reordered - * and does not need to be taken into account. maxLevel==paraLevel is only reordered - * if minLevel==paraLevel is odd, which is done in the extra segment. - * This means that for the main reordering loop we don't need to consider - * this run and can --runCount. If it is later part of the all-runs - * reordering, then runCount is adjusted accordingly. - */ -static void -reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel) { - Run *runs, tempRun; - UBiDiLevel *levels; - int32_t firstRun, endRun, limitRun, runCount; - - /* nothing to do? */ - if(maxLevel<=(minLevel|1)) { - return; - } - - /* - * Reorder only down to the lowest odd level - * and reorder at an odd minLevel in a separate, simpler loop. - * See comments above for why minLevel is always incremented. - */ - ++minLevel; - - runs=pBiDi->runs; - levels=pBiDi->levels; - runCount=pBiDi->runCount; - - /* do not include the WS run at paraLevel<=old minLevel except in the simple loop */ - if(pBiDi->trailingWSStartlength) { - --runCount; - } - - while(--maxLevel>=minLevel) { - firstRun=0; - - /* loop for all sequences of runs */ - for(;;) { - /* look for a sequence of runs that are all at >=maxLevel */ - /* look for the first run of such a sequence */ - while(firstRun=runCount) { - break; /* no more such runs */ - } - - /* look for the limit run of such a sequence (the run behind it) */ - for(limitRun=firstRun; ++limitRun=maxLevel;) {} - - /* Swap the entire sequence of runs from firstRun to limitRun-1. */ - endRun=limitRun-1; - while(firstRuntrailingWSStart==pBiDi->length) { - --runCount; - } - - /* Swap the entire sequence of all runs. (endRun==runCount) */ - while(firstRunruns; - int32_t runCount=pBiDi->runCount, visualStart=0, i, length, logicalStart; - - for(i=0; i=logicalStart) && (logicalIndex<(logicalStart+length))) { - return i; - } - visualStart+=length; - } - /* we should never get here */ - U_ASSERT(FALSE); - *pErrorCode = U_INVALID_STATE_ERROR; - return 0; -} - -/* - * Compute the runs array from the levels array. - * After ubidi_getRuns() returns TRUE, runCount is guaranteed to be >0 - * and the runs are reordered. - * Odd-level runs have visualStart on their visual right edge and - * they progress visually to the left. - * If option UBIDI_OPTION_INSERT_MARKS is set, insertRemove will contain the - * sum of appropriate LRM/RLM_BEFORE/AFTER flags. - * If option UBIDI_OPTION_REMOVE_CONTROLS is set, insertRemove will contain the - * negative number of BiDi control characters within this run. - */ -U_CFUNC UBool -ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) { - /* - * This method returns immediately if the runs are already set. This - * includes the case of length==0 (handled in setPara).. - */ - if (pBiDi->runCount>=0) { - return TRUE; - } - - if(pBiDi->direction!=UBIDI_MIXED) { - /* simple, single-run case - this covers length==0 */ - /* pBiDi->paraLevel is ok even for contextual multiple paragraphs */ - getSingleRun(pBiDi, pBiDi->paraLevel); - } else /* UBIDI_MIXED, length>0 */ { - /* mixed directionality */ - int32_t length=pBiDi->length, limit; - UBiDiLevel *levels=pBiDi->levels; - int32_t i, runCount; - UBiDiLevel level=UBIDI_DEFAULT_LTR; /* initialize with no valid level */ - /* - * If there are WS characters at the end of the line - * and the run preceding them has a level different from - * paraLevel, then they will form their own run at paraLevel (L1). - * Count them separately. - * We need some special treatment for this in order to not - * modify the levels array which a line UBiDi object shares - * with its paragraph parent and its other line siblings. - * In other words, for the trailing WS, it may be - * levels[]!=paraLevel but we have to treat it like it were so. - */ - limit=pBiDi->trailingWSStart; - /* count the runs, there is at least one non-WS run, and limit>0 */ - runCount=0; - for(i=0; i1 || limit1 */ - if(getRunsMemory(pBiDi, runCount)) { - runs=pBiDi->runsMemory; - } else { - return FALSE; - } - - /* set the runs */ - /* FOOD FOR THOUGHT: this could be optimized, e.g.: - * 464->444, 484->444, 575->555, 595->555 - * However, that would take longer. Check also how it would - * interact with BiDi control removal and inserting Marks. - */ - runIndex=0; - - /* search for the run limits and initialize visualLimit values with the run lengths */ - i=0; - do { - /* prepare this run */ - start=i; - level=levels[i]; - if(levelmaxLevel) { - maxLevel=level; - } - - /* look for the run limit */ - while(++iparaLevel is ok even - if contextual multiple paragraphs. */ - if(pBiDi->paraLevelparaLevel; - } - } - - /* set the object fields */ - pBiDi->runs=runs; - pBiDi->runCount=runCount; - - reorderLine(pBiDi, minLevel, maxLevel); - - /* now add the direction flags and adjust the visualLimit's to be just that */ - /* this loop will also handle the trailing WS run */ - limit=0; - for(i=0; iparaLevel is ok even if - contextual multiple paragraphs. */ - if(runIndexparaLevel & 1) != 0)? 0 : runIndex; - - ADD_ODD_BIT_FROM_LEVEL(runs[trailingRun].logicalStart, pBiDi->paraLevel); - } - } - } - - /* handle insert LRM/RLM BEFORE/AFTER run */ - if(pBiDi->insertPoints.size>0) { - Point *point, *start=pBiDi->insertPoints.points, - *limit=start+pBiDi->insertPoints.size; - int32_t runIndex; - for(point=start; pointpos, pErrorCode); - pBiDi->runs[runIndex].insertRemove|=point->flag; - } - } - - /* handle remove BiDi control characters */ - if(pBiDi->controlCount>0) { - int32_t runIndex; - const UChar *start=pBiDi->text, *limit=start+pBiDi->length, *pu; - for(pu=start; puruns[runIndex].insertRemove--; - } - } - } - - return TRUE; -} - -static UBool -prepareReorder(const UBiDiLevel *levels, int32_t length, - int32_t *indexMap, - UBiDiLevel *pMinLevel, UBiDiLevel *pMaxLevel) { - int32_t start; - UBiDiLevel level, minLevel, maxLevel; - - if(levels==NULL || length<=0) { - return FALSE; - } - - /* determine minLevel and maxLevel */ - minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1; - maxLevel=0; - for(start=length; start>0;) { - level=levels[--start]; - if(level>UBIDI_MAX_EXPLICIT_LEVEL+1) { - return FALSE; - } - if(levelmaxLevel) { - maxLevel=level; - } - } - *pMinLevel=minLevel; - *pMaxLevel=maxLevel; - - /* initialize the index map */ - for(start=length; start>0;) { - --start; - indexMap[start]=start; - } - - return TRUE; -} - -/* reorder a line based on a levels array (L2) ------------------------------ */ - -U_CAPI void U_EXPORT2 -ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) { - int32_t start, limit, sumOfSosEos; - UBiDiLevel minLevel = 0, maxLevel = 0; - - if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) { - return; - } - - /* nothing to do? */ - if(minLevel==maxLevel && (minLevel&1)==0) { - return; - } - - /* reorder only down to the lowest odd level */ - minLevel|=1; - - /* loop maxLevel..minLevel */ - do { - start=0; - - /* loop for all sequences of levels to reorder at the current maxLevel */ - for(;;) { - /* look for a sequence of levels that are all at >=maxLevel */ - /* look for the first index of such a sequence */ - while(start=length) { - break; /* no more such sequences */ - } - - /* look for the limit of such a sequence (the index behind it) */ - for(limit=start; ++limit=maxLevel;) {} - - /* - * sos=start of sequence, eos=end of sequence - * - * The closed (inclusive) interval from sos to eos includes all the logical - * and visual indexes within this sequence. They are logically and - * visually contiguous and in the same range. - * - * For each run, the new visual index=sos+eos-old visual index; - * we pre-add sos+eos into sumOfSosEos -> - * new visual index=sumOfSosEos-old visual index; - */ - sumOfSosEos=start+limit-1; - - /* reorder each index in the sequence */ - do { - indexMap[start]=sumOfSosEos-indexMap[start]; - } while(++start=minLevel); -} - -U_CAPI void U_EXPORT2 -ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) { - int32_t start, end, limit, temp; - UBiDiLevel minLevel = 0, maxLevel = 0; - - if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) { - return; - } - - /* nothing to do? */ - if(minLevel==maxLevel && (minLevel&1)==0) { - return; - } - - /* reorder only down to the lowest odd level */ - minLevel|=1; - - /* loop maxLevel..minLevel */ - do { - start=0; - - /* loop for all sequences of levels to reorder at the current maxLevel */ - for(;;) { - /* look for a sequence of levels that are all at >=maxLevel */ - /* look for the first index of such a sequence */ - while(start=length) { - break; /* no more such runs */ - } - - /* look for the limit of such a sequence (the index behind it) */ - for(limit=start; ++limit=maxLevel;) {} - - /* - * Swap the entire interval of indexes from start to limit-1. - * We don't need to swap the levels for the purpose of this - * algorithm: the sequence of levels that we look at does not - * move anyway. - */ - end=limit-1; - while(start=minLevel); -} - -/* API functions for logical<->visual mapping ------------------------------- */ - -U_CAPI int32_t U_EXPORT2 -ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) { - int32_t visualIndex=UBIDI_MAP_NOWHERE; - RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); - RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); - RETURN_IF_BAD_RANGE(logicalIndex, 0, pBiDi->length, *pErrorCode, -1); - - /* we can do the trivial cases without the runs array */ - switch(pBiDi->direction) { - case UBIDI_LTR: - visualIndex=logicalIndex; - break; - case UBIDI_RTL: - visualIndex=pBiDi->length-logicalIndex-1; - break; - default: - if(!ubidi_getRuns(pBiDi, pErrorCode)) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return -1; - } else { - Run *runs=pBiDi->runs; - int32_t i, visualStart=0, offset, length; - - /* linear search for the run, search on the visual runs */ - for(i=0; irunCount; ++i) { - length=runs[i].visualLimit-visualStart; - offset=logicalIndex-GET_INDEX(runs[i].logicalStart); - if(offset>=0 && offset=pBiDi->runCount) { - return UBIDI_MAP_NOWHERE; - } - } - } - - if(pBiDi->insertPoints.size>0) { - /* add the number of added marks until the calculated visual index */ - Run *runs=pBiDi->runs; - int32_t i, length, insertRemove; - int32_t visualStart=0, markFound=0; - for(i=0; ; i++, visualStart+=length) { - length=runs[i].visualLimit-visualStart; - insertRemove=runs[i].insertRemove; - if(insertRemove & (LRM_BEFORE|RLM_BEFORE)) { - markFound++; - } - /* is it the run containing the visual index? */ - if(visualIndexcontrolCount>0) { - /* subtract the number of controls until the calculated visual index */ - Run *runs=pBiDi->runs; - int32_t i, j, start, limit, length, insertRemove; - int32_t visualStart=0, controlFound=0; - UChar uchar=pBiDi->text[logicalIndex]; - /* is the logical index pointing to a control ? */ - if(IS_BIDI_CONTROL_CHAR(uchar)) { - return UBIDI_MAP_NOWHERE; - } - /* loop on runs */ - for(i=0; ; i++, visualStart+=length) { - length=runs[i].visualLimit-visualStart; - insertRemove=runs[i].insertRemove; - /* calculated visual index is beyond this run? */ - if(visualIndex>=runs[i].visualLimit) { - controlFound-=insertRemove; - continue; - } - /* calculated visual index must be within current run */ - if(insertRemove==0) { - return visualIndex-controlFound; - } - if(IS_EVEN_RUN(runs[i].logicalStart)) { - /* LTR: check from run start to logical index */ - start=runs[i].logicalStart; - limit=logicalIndex; - } else { - /* RTL: check from logical index to run end */ - start=logicalIndex+1; - limit=GET_INDEX(runs[i].logicalStart)+length; - } - for(j=start; jtext[j]; - if(IS_BIDI_CONTROL_CHAR(uchar)) { - controlFound++; - } - } - return visualIndex-controlFound; - } - } - - return visualIndex; -} - -U_CAPI int32_t U_EXPORT2 -ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) { - Run *runs; - int32_t i, runCount, start; - RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); - RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); - RETURN_IF_BAD_RANGE(visualIndex, 0, pBiDi->resultLength, *pErrorCode, -1); - /* we can do the trivial cases without the runs array */ - if(pBiDi->insertPoints.size==0 && pBiDi->controlCount==0) { - if(pBiDi->direction==UBIDI_LTR) { - return visualIndex; - } - else if(pBiDi->direction==UBIDI_RTL) { - return pBiDi->length-visualIndex-1; - } - } - if(!ubidi_getRuns(pBiDi, pErrorCode)) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return -1; - } - - runs=pBiDi->runs; - runCount=pBiDi->runCount; - if(pBiDi->insertPoints.size>0) { - /* handle inserted LRM/RLM */ - int32_t markFound=0, insertRemove; - int32_t visualStart=0, length; - runs=pBiDi->runs; - /* subtract number of marks until visual index */ - for(i=0; ; i++, visualStart+=length) { - length=runs[i].visualLimit-visualStart; - insertRemove=runs[i].insertRemove; - if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) { - if(visualIndex<=(visualStart+markFound)) { - return UBIDI_MAP_NOWHERE; - } - markFound++; - } - /* is adjusted visual index within this run? */ - if(visualIndex<(runs[i].visualLimit+markFound)) { - visualIndex-=markFound; - break; - } - if(insertRemove&(LRM_AFTER|RLM_AFTER)) { - if(visualIndex==(visualStart+length+markFound)) { - return UBIDI_MAP_NOWHERE; - } - markFound++; - } - } - } - else if(pBiDi->controlCount>0) { - /* handle removed BiDi control characters */ - int32_t controlFound=0, insertRemove, length; - int32_t logicalStart, logicalEnd, visualStart=0, j, k; - UChar uchar; - UBool evenRun; - /* add number of controls until visual index */ - for(i=0; ; i++, visualStart+=length) { - length=runs[i].visualLimit-visualStart; - insertRemove=runs[i].insertRemove; - /* is adjusted visual index beyond current run? */ - if(visualIndex>=(runs[i].visualLimit-controlFound+insertRemove)) { - controlFound-=insertRemove; - continue; - } - /* adjusted visual index is within current run */ - if(insertRemove==0) { - visualIndex+=controlFound; - break; - } - /* count non-control chars until visualIndex */ - logicalStart=runs[i].logicalStart; - evenRun=IS_EVEN_RUN(logicalStart); - REMOVE_ODD_BIT(logicalStart); - logicalEnd=logicalStart+length-1; - for(j=0; jtext[k]; - if(IS_BIDI_CONTROL_CHAR(uchar)) { - controlFound++; - } - if((visualIndex+controlFound)==(visualStart+j)) { - break; - } - } - visualIndex+=controlFound; - break; - } - } - /* handle all cases */ - if(runCount<=10) { - /* linear search for the run */ - for(i=0; visualIndex>=runs[i].visualLimit; ++i) {} - } else { - /* binary search for the run */ - int32_t begin=0, limit=runCount; - - /* the middle if() is guaranteed to find the run, we don't need a loop limit */ - for(;;) { - i=(begin+limit)/2; - if(visualIndex>=runs[i].visualLimit) { - begin=i+1; - } else if(i==0 || visualIndex>=runs[i-1].visualLimit) { - break; - } else { - limit=i; - } - } - } - - start=runs[i].logicalStart; - if(IS_EVEN_RUN(start)) { - /* LTR */ - /* the offset in runs[i] is visualIndex-runs[i-1].visualLimit */ - if(i>0) { - visualIndex-=runs[i-1].visualLimit; - } - return start+visualIndex; - } else { - /* RTL */ - return GET_INDEX(start)+runs[i].visualLimit-visualIndex-1; - } -} - -U_CAPI void U_EXPORT2 -ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { - RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); - /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */ - ubidi_countRuns(pBiDi, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - /* no op */ - } else if(indexMap==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - } else { - /* fill a logical-to-visual index map using the runs[] */ - int32_t visualStart, visualLimit, i, j, k; - int32_t logicalStart, logicalLimit; - Run *runs=pBiDi->runs; - if (pBiDi->length<=0) { - return; - } - if (pBiDi->length>pBiDi->resultLength) { - uprv_memset(indexMap, 0xFF, pBiDi->length*sizeof(int32_t)); - } - - visualStart=0; - for(j=0; jrunCount; ++j) { - logicalStart=GET_INDEX(runs[j].logicalStart); - visualLimit=runs[j].visualLimit; - if(IS_EVEN_RUN(runs[j].logicalStart)) { - do { /* LTR */ - indexMap[logicalStart++]=visualStart++; - } while(visualStartinsertPoints.size>0) { - int32_t markFound=0, runCount=pBiDi->runCount; - int32_t length, insertRemove; - visualStart=0; - /* add number of marks found until each index */ - for(i=0; i0) { - logicalStart=GET_INDEX(runs[i].logicalStart); - logicalLimit=logicalStart+length; - for(j=logicalStart; jcontrolCount>0) { - int32_t controlFound=0, runCount=pBiDi->runCount; - int32_t length, insertRemove; - UBool evenRun; - UChar uchar; - visualStart=0; - /* subtract number of controls found until each index */ - for(i=0; itext[k]; - if(IS_BIDI_CONTROL_CHAR(uchar)) { - controlFound++; - indexMap[k]=UBIDI_MAP_NOWHERE; - continue; - } - indexMap[k]-=controlFound; - } - } - } - } -} - -U_CAPI void U_EXPORT2 -ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { - RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); - if(indexMap==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */ - ubidi_countRuns(pBiDi, pErrorCode); - if(U_SUCCESS(*pErrorCode)) { - /* fill a visual-to-logical index map using the runs[] */ - Run *runs=pBiDi->runs, *runsLimit=runs+pBiDi->runCount; - int32_t logicalStart, visualStart, visualLimit, *pi=indexMap; - - if (pBiDi->resultLength<=0) { - return; - } - visualStart=0; - for(; runslogicalStart; - visualLimit=runs->visualLimit; - if(IS_EVEN_RUN(logicalStart)) { - do { /* LTR */ - *pi++ = logicalStart++; - } while(++visualStartinsertPoints.size>0) { - int32_t markFound=0, runCount=pBiDi->runCount; - int32_t insertRemove, i, j, k; - runs=pBiDi->runs; - /* count all inserted marks */ - for(i=0; iresultLength; - for(i=runCount-1; i>=0 && markFound>0; i--) { - insertRemove=runs[i].insertRemove; - if(insertRemove&(LRM_AFTER|RLM_AFTER)) { - indexMap[--k]= UBIDI_MAP_NOWHERE; - markFound--; - } - visualStart= i>0 ? runs[i-1].visualLimit : 0; - for(j=runs[i].visualLimit-1; j>=visualStart && markFound>0; j--) { - indexMap[--k]=indexMap[j]; - } - if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) { - indexMap[--k]= UBIDI_MAP_NOWHERE; - markFound--; - } - } - } - else if(pBiDi->controlCount>0) { - int32_t runCount=pBiDi->runCount, logicalEnd; - int32_t insertRemove, length, i, j, k, m; - UChar uchar; - UBool evenRun; - runs=pBiDi->runs; - visualStart=0; - /* move forward indexes by number of preceding controls */ - k=0; - for(i=0; itext[m]; - if(!IS_BIDI_CONTROL_CHAR(uchar)) { - indexMap[k++]=m; - } - } - } - } - } -} - -U_CAPI void U_EXPORT2 -ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length) { - if(srcMap!=NULL && destMap!=NULL && length>0) { - const int32_t *pi; - int32_t destLength=-1, count=0; - /* find highest value and count positive indexes in srcMap */ - pi=srcMap+length; - while(pi>srcMap) { - if(*--pi>destLength) { - destLength=*pi; - } - if(*pi>=0) { - count++; - } - } - destLength++; /* add 1 for origin 0 */ - if(count0) { - if(*--pi>=0) { - destMap[*pi]=--length; - } else { - --length; - } - } - } -} diff --git a/deps/icu-small/source/common/ubidiln.cpp b/deps/icu-small/source/common/ubidiln.cpp new file mode 100644 index 0000000000..71c581fe1c --- /dev/null +++ b/deps/icu-small/source/common/ubidiln.cpp @@ -0,0 +1,1349 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ubidiln.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999aug06 +* created by: Markus W. Scherer, updated by Matitiahu Allouche +*/ + +#include "cmemory.h" +#include "unicode/utypes.h" +#include "unicode/ustring.h" +#include "unicode/uchar.h" +#include "unicode/ubidi.h" +#include "ubidiimp.h" +#include "uassert.h" + +/* + * General remarks about the functions in this file: + * + * These functions deal with the aspects of potentially mixed-directional + * text in a single paragraph or in a line of a single paragraph + * which has already been processed according to + * the Unicode 6.3 BiDi algorithm as defined in + * http://www.unicode.org/unicode/reports/tr9/ , version 28, + * also described in The Unicode Standard, Version 6.3.0 . + * + * This means that there is a UBiDi object with a levels + * and a dirProps array. + * paraLevel and direction are also set. + * Only if the length of the text is zero, then levels==dirProps==NULL. + * + * The overall directionality of the paragraph + * or line is used to bypass the reordering steps if possible. + * Even purely RTL text does not need reordering there because + * the ubidi_getLogical/VisualIndex() functions can compute the + * index on the fly in such a case. + * + * The implementation of the access to same-level-runs and of the reordering + * do attempt to provide better performance and less memory usage compared to + * a direct implementation of especially rule (L2) with an array of + * one (32-bit) integer per text character. + * + * Here, the levels array is scanned as soon as necessary, and a vector of + * same-level-runs is created. Reordering then is done on this vector. + * For each run of text positions that were resolved to the same level, + * only 8 bytes are stored: the first text position of the run and the visual + * position behind the run after reordering. + * One sign bit is used to hold the directionality of the run. + * This is inefficient if there are many very short runs. If the average run + * length is <2, then this uses more memory. + * + * In a further attempt to save memory, the levels array is never changed + * after all the resolution rules (Xn, Wn, Nn, In). + * Many functions have to consider the field trailingWSStart: + * if it is less than length, then there is an implicit trailing run + * at the paraLevel, + * which is not reflected in the levels array. + * This allows a line UBiDi object to use the same levels array as + * its paragraph parent object. + * + * When a UBiDi object is created for a line of a paragraph, then the + * paragraph's levels and dirProps arrays are reused by way of setting + * a pointer into them, not by copying. This again saves memory and forbids to + * change the now shared levels for (L1). + */ + +/* handle trailing WS (L1) -------------------------------------------------- */ + +/* + * setTrailingWSStart() sets the start index for a trailing + * run of WS in the line. This is necessary because we do not modify + * the paragraph's levels array that we just point into. + * Using trailingWSStart is another form of performing (L1). + * + * To make subsequent operations easier, we also include the run + * before the WS if it is at the paraLevel - we merge the two here. + * + * This function is called only from ubidi_setLine(), so pBiDi->paraLevel is + * set correctly for the line even when contextual multiple paragraphs. + */ +static void +setTrailingWSStart(UBiDi *pBiDi) { + /* pBiDi->direction!=UBIDI_MIXED */ + + const DirProp *dirProps=pBiDi->dirProps; + UBiDiLevel *levels=pBiDi->levels; + int32_t start=pBiDi->length; + UBiDiLevel paraLevel=pBiDi->paraLevel; + + /* If the line is terminated by a block separator, all preceding WS etc... + are already set to paragraph level. + Setting trailingWSStart to pBidi->length will avoid changing the + level of B chars from 0 to paraLevel in ubidi_getLevels when + orderParagraphsLTR==TRUE. + */ + if(dirProps[start-1]==B) { + pBiDi->trailingWSStart=start; /* currently == pBiDi->length */ + return; + } + /* go backwards across all WS, BN, explicit codes */ + while(start>0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) { + --start; + } + + /* if the WS run can be merged with the previous run then do so here */ + while(start>0 && levels[start-1]==paraLevel) { + --start; + } + + pBiDi->trailingWSStart=start; +} + +/* ubidi_setLine ------------------------------------------------------------ */ + +U_CAPI void U_EXPORT2 +ubidi_setLine(const UBiDi *pParaBiDi, + int32_t start, int32_t limit, + UBiDi *pLineBiDi, + UErrorCode *pErrorCode) { + int32_t length; + + /* check the argument values */ + RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); + RETURN_VOID_IF_NOT_VALID_PARA(pParaBiDi, *pErrorCode); + RETURN_VOID_IF_BAD_RANGE(start, 0, limit, *pErrorCode); + RETURN_VOID_IF_BAD_RANGE(limit, 0, pParaBiDi->length+1, *pErrorCode); + if(pLineBiDi==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if(ubidi_getParagraph(pParaBiDi, start, NULL, NULL, NULL, pErrorCode) != + ubidi_getParagraph(pParaBiDi, limit-1, NULL, NULL, NULL, pErrorCode)) { + /* the line crosses a paragraph boundary */ + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + /* set the values in pLineBiDi from its pParaBiDi parent */ + pLineBiDi->pParaBiDi=NULL; /* mark unfinished setLine */ + pLineBiDi->text=pParaBiDi->text+start; + length=pLineBiDi->length=limit-start; + pLineBiDi->resultLength=pLineBiDi->originalLength=length; + pLineBiDi->paraLevel=GET_PARALEVEL(pParaBiDi, start); + pLineBiDi->paraCount=pParaBiDi->paraCount; + pLineBiDi->runs=NULL; + pLineBiDi->flags=0; + pLineBiDi->reorderingMode=pParaBiDi->reorderingMode; + pLineBiDi->reorderingOptions=pParaBiDi->reorderingOptions; + pLineBiDi->controlCount=0; + if(pParaBiDi->controlCount>0) { + int32_t j; + for(j=start; jtext[j])) { + pLineBiDi->controlCount++; + } + } + pLineBiDi->resultLength-=pLineBiDi->controlCount; + } + + pLineBiDi->dirProps=pParaBiDi->dirProps+start; + pLineBiDi->levels=pParaBiDi->levels+start; + pLineBiDi->runCount=-1; + + if(pParaBiDi->direction!=UBIDI_MIXED) { + /* the parent is already trivial */ + pLineBiDi->direction=pParaBiDi->direction; + + /* + * The parent's levels are all either + * implicitly or explicitly ==paraLevel; + * do the same here. + */ + if(pParaBiDi->trailingWSStart<=start) { + pLineBiDi->trailingWSStart=0; + } else if(pParaBiDi->trailingWSStarttrailingWSStart=pParaBiDi->trailingWSStart-start; + } else { + pLineBiDi->trailingWSStart=length; + } + } else { + const UBiDiLevel *levels=pLineBiDi->levels; + int32_t i, trailingWSStart; + UBiDiLevel level; + + setTrailingWSStart(pLineBiDi); + trailingWSStart=pLineBiDi->trailingWSStart; + + /* recalculate pLineBiDi->direction */ + if(trailingWSStart==0) { + /* all levels are at paraLevel */ + pLineBiDi->direction=(UBiDiDirection)(pLineBiDi->paraLevel&1); + } else { + /* get the level of the first character */ + level=(UBiDiLevel)(levels[0]&1); + + /* if there is anything of a different level, then the line is mixed */ + if(trailingWSStartparaLevel&1)!=level) { + /* the trailing WS is at paraLevel, which differs from levels[0] */ + pLineBiDi->direction=UBIDI_MIXED; + } else { + /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */ + i=1; + for(;;) { + if(i==trailingWSStart) { + /* the direction values match those in level */ + pLineBiDi->direction=(UBiDiDirection)level; + break; + } else if((levels[i]&1)!=level) { + pLineBiDi->direction=UBIDI_MIXED; + break; + } + ++i; + } + } + } + + switch(pLineBiDi->direction) { + case UBIDI_LTR: + /* make sure paraLevel is even */ + pLineBiDi->paraLevel=(UBiDiLevel)((pLineBiDi->paraLevel+1)&~1); + + /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ + pLineBiDi->trailingWSStart=0; + break; + case UBIDI_RTL: + /* make sure paraLevel is odd */ + pLineBiDi->paraLevel|=1; + + /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ + pLineBiDi->trailingWSStart=0; + break; + default: + break; + } + } + pLineBiDi->pParaBiDi=pParaBiDi; /* mark successful setLine */ + return; +} + +U_CAPI UBiDiLevel U_EXPORT2 +ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex) { + /* return paraLevel if in the trailing WS run, otherwise the real level */ + if(!IS_VALID_PARA_OR_LINE(pBiDi) || charIndex<0 || pBiDi->length<=charIndex) { + return 0; + } else if(pBiDi->direction!=UBIDI_MIXED || charIndex>=pBiDi->trailingWSStart) { + return GET_PARALEVEL(pBiDi, charIndex); + } else { + return pBiDi->levels[charIndex]; + } +} + +U_CAPI const UBiDiLevel * U_EXPORT2 +ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { + int32_t start, length; + + RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, NULL); + RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, NULL); + if((length=pBiDi->length)<=0) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + if((start=pBiDi->trailingWSStart)==length) { + /* the current levels array reflects the WS run */ + return pBiDi->levels; + } + + /* + * After the previous if(), we know that the levels array + * has an implicit trailing WS run and therefore does not fully + * reflect itself all the levels. + * This must be a UBiDi object for a line, and + * we need to create a new levels array. + */ + if(getLevelsMemory(pBiDi, length)) { + UBiDiLevel *levels=pBiDi->levelsMemory; + + if(start>0 && levels!=pBiDi->levels) { + uprv_memcpy(levels, pBiDi->levels, start); + } + /* pBiDi->paraLevel is ok even if contextual multiple paragraphs, + since pBidi is a line object */ + uprv_memset(levels+start, pBiDi->paraLevel, length-start); + + /* this new levels array is set for the line and reflects the WS run */ + pBiDi->trailingWSStart=length; + return pBiDi->levels=levels; + } else { + /* out of memory */ + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } +} + +U_CAPI void U_EXPORT2 +ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition, + int32_t *pLogicalLimit, UBiDiLevel *pLevel) { + UErrorCode errorCode; + int32_t runCount, visualStart, logicalLimit, logicalFirst, i; + Run iRun; + + errorCode=U_ZERO_ERROR; + RETURN_VOID_IF_BAD_RANGE(logicalPosition, 0, pBiDi->length, errorCode); + /* ubidi_countRuns will check VALID_PARA_OR_LINE */ + runCount=ubidi_countRuns((UBiDi *)pBiDi, &errorCode); + if(U_FAILURE(errorCode)) { + return; + } + /* this is done based on runs rather than on levels since levels have + a special interpretation when UBIDI_REORDER_RUNS_ONLY + */ + visualStart=logicalLimit=0; + iRun=pBiDi->runs[0]; + + for(i=0; iruns[i]; + logicalFirst=GET_INDEX(iRun.logicalStart); + logicalLimit=logicalFirst+iRun.visualLimit-visualStart; + if((logicalPosition>=logicalFirst) && + (logicalPositionreorderingMode==UBIDI_REORDER_RUNS_ONLY) { + *pLevel=(UBiDiLevel)GET_ODD_BIT(iRun.logicalStart); + } + else if(pBiDi->direction!=UBIDI_MIXED || logicalPosition>=pBiDi->trailingWSStart) { + *pLevel=GET_PARALEVEL(pBiDi, logicalPosition); + } else { + *pLevel=pBiDi->levels[logicalPosition]; + } + } +} + +/* runs API functions ------------------------------------------------------- */ + +U_CAPI int32_t U_EXPORT2 +ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) { + RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); + RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); + ubidi_getRuns(pBiDi, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return -1; + } + return pBiDi->runCount; +} + +U_CAPI UBiDiDirection U_EXPORT2 +ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex, + int32_t *pLogicalStart, int32_t *pLength) +{ + int32_t start; + UErrorCode errorCode = U_ZERO_ERROR; + RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, errorCode, UBIDI_LTR); + ubidi_getRuns(pBiDi, &errorCode); + if(U_FAILURE(errorCode)) { + return UBIDI_LTR; + } + RETURN_IF_BAD_RANGE(runIndex, 0, pBiDi->runCount, errorCode, UBIDI_LTR); + + start=pBiDi->runs[runIndex].logicalStart; + if(pLogicalStart!=NULL) { + *pLogicalStart=GET_INDEX(start); + } + if(pLength!=NULL) { + if(runIndex>0) { + *pLength=pBiDi->runs[runIndex].visualLimit- + pBiDi->runs[runIndex-1].visualLimit; + } else { + *pLength=pBiDi->runs[0].visualLimit; + } + } + return (UBiDiDirection)GET_ODD_BIT(start); +} + +/* in trivial cases there is only one trivial run; called by ubidi_getRuns() */ +static void +getSingleRun(UBiDi *pBiDi, UBiDiLevel level) { + /* simple, single-run case */ + pBiDi->runs=pBiDi->simpleRuns; + pBiDi->runCount=1; + + /* fill and reorder the single run */ + pBiDi->runs[0].logicalStart=MAKE_INDEX_ODD_PAIR(0, level); + pBiDi->runs[0].visualLimit=pBiDi->length; + pBiDi->runs[0].insertRemove=0; +} + +/* reorder the runs array (L2) ---------------------------------------------- */ + +/* + * Reorder the same-level runs in the runs array. + * Here, runCount>1 and maxLevel>=minLevel>=paraLevel. + * All the visualStart fields=logical start before reordering. + * The "odd" bits are not set yet. + * + * Reordering with this data structure lends itself to some handy shortcuts: + * + * Since each run is moved but not modified, and since at the initial maxLevel + * each sequence of same-level runs consists of only one run each, we + * don't need to do anything there and can predecrement maxLevel. + * In many simple cases, the reordering is thus done entirely in the + * index mapping. + * Also, reordering occurs only down to the lowest odd level that occurs, + * which is minLevel|1. However, if the lowest level itself is odd, then + * in the last reordering the sequence of the runs at this level or higher + * will be all runs, and we don't need the elaborate loop to search for them. + * This is covered by ++minLevel instead of minLevel|=1 followed + * by an extra reorder-all after the reorder-some loop. + * About a trailing WS run: + * Such a run would need special treatment because its level is not + * reflected in levels[] if this is not a paragraph object. + * Instead, all characters from trailingWSStart on are implicitly at + * paraLevel. + * However, for all maxLevel>paraLevel, this run will never be reordered + * and does not need to be taken into account. maxLevel==paraLevel is only reordered + * if minLevel==paraLevel is odd, which is done in the extra segment. + * This means that for the main reordering loop we don't need to consider + * this run and can --runCount. If it is later part of the all-runs + * reordering, then runCount is adjusted accordingly. + */ +static void +reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel) { + Run *runs, tempRun; + UBiDiLevel *levels; + int32_t firstRun, endRun, limitRun, runCount; + + /* nothing to do? */ + if(maxLevel<=(minLevel|1)) { + return; + } + + /* + * Reorder only down to the lowest odd level + * and reorder at an odd minLevel in a separate, simpler loop. + * See comments above for why minLevel is always incremented. + */ + ++minLevel; + + runs=pBiDi->runs; + levels=pBiDi->levels; + runCount=pBiDi->runCount; + + /* do not include the WS run at paraLevel<=old minLevel except in the simple loop */ + if(pBiDi->trailingWSStartlength) { + --runCount; + } + + while(--maxLevel>=minLevel) { + firstRun=0; + + /* loop for all sequences of runs */ + for(;;) { + /* look for a sequence of runs that are all at >=maxLevel */ + /* look for the first run of such a sequence */ + while(firstRun=runCount) { + break; /* no more such runs */ + } + + /* look for the limit run of such a sequence (the run behind it) */ + for(limitRun=firstRun; ++limitRun=maxLevel;) {} + + /* Swap the entire sequence of runs from firstRun to limitRun-1. */ + endRun=limitRun-1; + while(firstRuntrailingWSStart==pBiDi->length) { + --runCount; + } + + /* Swap the entire sequence of all runs. (endRun==runCount) */ + while(firstRunruns; + int32_t runCount=pBiDi->runCount, visualStart=0, i, length, logicalStart; + + for(i=0; i=logicalStart) && (logicalIndex<(logicalStart+length))) { + return i; + } + visualStart+=length; + } + /* we should never get here */ + U_ASSERT(FALSE); + *pErrorCode = U_INVALID_STATE_ERROR; + return 0; +} + +/* + * Compute the runs array from the levels array. + * After ubidi_getRuns() returns TRUE, runCount is guaranteed to be >0 + * and the runs are reordered. + * Odd-level runs have visualStart on their visual right edge and + * they progress visually to the left. + * If option UBIDI_OPTION_INSERT_MARKS is set, insertRemove will contain the + * sum of appropriate LRM/RLM_BEFORE/AFTER flags. + * If option UBIDI_OPTION_REMOVE_CONTROLS is set, insertRemove will contain the + * negative number of BiDi control characters within this run. + */ +U_CFUNC UBool +ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) { + /* + * This method returns immediately if the runs are already set. This + * includes the case of length==0 (handled in setPara).. + */ + if (pBiDi->runCount>=0) { + return TRUE; + } + + if(pBiDi->direction!=UBIDI_MIXED) { + /* simple, single-run case - this covers length==0 */ + /* pBiDi->paraLevel is ok even for contextual multiple paragraphs */ + getSingleRun(pBiDi, pBiDi->paraLevel); + } else /* UBIDI_MIXED, length>0 */ { + /* mixed directionality */ + int32_t length=pBiDi->length, limit; + UBiDiLevel *levels=pBiDi->levels; + int32_t i, runCount; + UBiDiLevel level=UBIDI_DEFAULT_LTR; /* initialize with no valid level */ + /* + * If there are WS characters at the end of the line + * and the run preceding them has a level different from + * paraLevel, then they will form their own run at paraLevel (L1). + * Count them separately. + * We need some special treatment for this in order to not + * modify the levels array which a line UBiDi object shares + * with its paragraph parent and its other line siblings. + * In other words, for the trailing WS, it may be + * levels[]!=paraLevel but we have to treat it like it were so. + */ + limit=pBiDi->trailingWSStart; + /* count the runs, there is at least one non-WS run, and limit>0 */ + runCount=0; + for(i=0; i1 || limit1 */ + if(getRunsMemory(pBiDi, runCount)) { + runs=pBiDi->runsMemory; + } else { + return FALSE; + } + + /* set the runs */ + /* FOOD FOR THOUGHT: this could be optimized, e.g.: + * 464->444, 484->444, 575->555, 595->555 + * However, that would take longer. Check also how it would + * interact with BiDi control removal and inserting Marks. + */ + runIndex=0; + + /* search for the run limits and initialize visualLimit values with the run lengths */ + i=0; + do { + /* prepare this run */ + start=i; + level=levels[i]; + if(levelmaxLevel) { + maxLevel=level; + } + + /* look for the run limit */ + while(++iparaLevel is ok even + if contextual multiple paragraphs. */ + if(pBiDi->paraLevelparaLevel; + } + } + + /* set the object fields */ + pBiDi->runs=runs; + pBiDi->runCount=runCount; + + reorderLine(pBiDi, minLevel, maxLevel); + + /* now add the direction flags and adjust the visualLimit's to be just that */ + /* this loop will also handle the trailing WS run */ + limit=0; + for(i=0; iparaLevel is ok even if + contextual multiple paragraphs. */ + if(runIndexparaLevel & 1) != 0)? 0 : runIndex; + + ADD_ODD_BIT_FROM_LEVEL(runs[trailingRun].logicalStart, pBiDi->paraLevel); + } + } + } + + /* handle insert LRM/RLM BEFORE/AFTER run */ + if(pBiDi->insertPoints.size>0) { + Point *point, *start=pBiDi->insertPoints.points, + *limit=start+pBiDi->insertPoints.size; + int32_t runIndex; + for(point=start; pointpos, pErrorCode); + pBiDi->runs[runIndex].insertRemove|=point->flag; + } + } + + /* handle remove BiDi control characters */ + if(pBiDi->controlCount>0) { + int32_t runIndex; + const UChar *start=pBiDi->text, *limit=start+pBiDi->length, *pu; + for(pu=start; puruns[runIndex].insertRemove--; + } + } + } + + return TRUE; +} + +static UBool +prepareReorder(const UBiDiLevel *levels, int32_t length, + int32_t *indexMap, + UBiDiLevel *pMinLevel, UBiDiLevel *pMaxLevel) { + int32_t start; + UBiDiLevel level, minLevel, maxLevel; + + if(levels==NULL || length<=0) { + return FALSE; + } + + /* determine minLevel and maxLevel */ + minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1; + maxLevel=0; + for(start=length; start>0;) { + level=levels[--start]; + if(level>UBIDI_MAX_EXPLICIT_LEVEL+1) { + return FALSE; + } + if(levelmaxLevel) { + maxLevel=level; + } + } + *pMinLevel=minLevel; + *pMaxLevel=maxLevel; + + /* initialize the index map */ + for(start=length; start>0;) { + --start; + indexMap[start]=start; + } + + return TRUE; +} + +/* reorder a line based on a levels array (L2) ------------------------------ */ + +U_CAPI void U_EXPORT2 +ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) { + int32_t start, limit, sumOfSosEos; + UBiDiLevel minLevel = 0, maxLevel = 0; + + if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) { + return; + } + + /* nothing to do? */ + if(minLevel==maxLevel && (minLevel&1)==0) { + return; + } + + /* reorder only down to the lowest odd level */ + minLevel|=1; + + /* loop maxLevel..minLevel */ + do { + start=0; + + /* loop for all sequences of levels to reorder at the current maxLevel */ + for(;;) { + /* look for a sequence of levels that are all at >=maxLevel */ + /* look for the first index of such a sequence */ + while(start=length) { + break; /* no more such sequences */ + } + + /* look for the limit of such a sequence (the index behind it) */ + for(limit=start; ++limit=maxLevel;) {} + + /* + * sos=start of sequence, eos=end of sequence + * + * The closed (inclusive) interval from sos to eos includes all the logical + * and visual indexes within this sequence. They are logically and + * visually contiguous and in the same range. + * + * For each run, the new visual index=sos+eos-old visual index; + * we pre-add sos+eos into sumOfSosEos -> + * new visual index=sumOfSosEos-old visual index; + */ + sumOfSosEos=start+limit-1; + + /* reorder each index in the sequence */ + do { + indexMap[start]=sumOfSosEos-indexMap[start]; + } while(++start=minLevel); +} + +U_CAPI void U_EXPORT2 +ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) { + int32_t start, end, limit, temp; + UBiDiLevel minLevel = 0, maxLevel = 0; + + if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) { + return; + } + + /* nothing to do? */ + if(minLevel==maxLevel && (minLevel&1)==0) { + return; + } + + /* reorder only down to the lowest odd level */ + minLevel|=1; + + /* loop maxLevel..minLevel */ + do { + start=0; + + /* loop for all sequences of levels to reorder at the current maxLevel */ + for(;;) { + /* look for a sequence of levels that are all at >=maxLevel */ + /* look for the first index of such a sequence */ + while(start=length) { + break; /* no more such runs */ + } + + /* look for the limit of such a sequence (the index behind it) */ + for(limit=start; ++limit=maxLevel;) {} + + /* + * Swap the entire interval of indexes from start to limit-1. + * We don't need to swap the levels for the purpose of this + * algorithm: the sequence of levels that we look at does not + * move anyway. + */ + end=limit-1; + while(start=minLevel); +} + +/* API functions for logical<->visual mapping ------------------------------- */ + +U_CAPI int32_t U_EXPORT2 +ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) { + int32_t visualIndex=UBIDI_MAP_NOWHERE; + RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); + RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); + RETURN_IF_BAD_RANGE(logicalIndex, 0, pBiDi->length, *pErrorCode, -1); + + /* we can do the trivial cases without the runs array */ + switch(pBiDi->direction) { + case UBIDI_LTR: + visualIndex=logicalIndex; + break; + case UBIDI_RTL: + visualIndex=pBiDi->length-logicalIndex-1; + break; + default: + if(!ubidi_getRuns(pBiDi, pErrorCode)) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return -1; + } else { + Run *runs=pBiDi->runs; + int32_t i, visualStart=0, offset, length; + + /* linear search for the run, search on the visual runs */ + for(i=0; irunCount; ++i) { + length=runs[i].visualLimit-visualStart; + offset=logicalIndex-GET_INDEX(runs[i].logicalStart); + if(offset>=0 && offset=pBiDi->runCount) { + return UBIDI_MAP_NOWHERE; + } + } + } + + if(pBiDi->insertPoints.size>0) { + /* add the number of added marks until the calculated visual index */ + Run *runs=pBiDi->runs; + int32_t i, length, insertRemove; + int32_t visualStart=0, markFound=0; + for(i=0; ; i++, visualStart+=length) { + length=runs[i].visualLimit-visualStart; + insertRemove=runs[i].insertRemove; + if(insertRemove & (LRM_BEFORE|RLM_BEFORE)) { + markFound++; + } + /* is it the run containing the visual index? */ + if(visualIndexcontrolCount>0) { + /* subtract the number of controls until the calculated visual index */ + Run *runs=pBiDi->runs; + int32_t i, j, start, limit, length, insertRemove; + int32_t visualStart=0, controlFound=0; + UChar uchar=pBiDi->text[logicalIndex]; + /* is the logical index pointing to a control ? */ + if(IS_BIDI_CONTROL_CHAR(uchar)) { + return UBIDI_MAP_NOWHERE; + } + /* loop on runs */ + for(i=0; ; i++, visualStart+=length) { + length=runs[i].visualLimit-visualStart; + insertRemove=runs[i].insertRemove; + /* calculated visual index is beyond this run? */ + if(visualIndex>=runs[i].visualLimit) { + controlFound-=insertRemove; + continue; + } + /* calculated visual index must be within current run */ + if(insertRemove==0) { + return visualIndex-controlFound; + } + if(IS_EVEN_RUN(runs[i].logicalStart)) { + /* LTR: check from run start to logical index */ + start=runs[i].logicalStart; + limit=logicalIndex; + } else { + /* RTL: check from logical index to run end */ + start=logicalIndex+1; + limit=GET_INDEX(runs[i].logicalStart)+length; + } + for(j=start; jtext[j]; + if(IS_BIDI_CONTROL_CHAR(uchar)) { + controlFound++; + } + } + return visualIndex-controlFound; + } + } + + return visualIndex; +} + +U_CAPI int32_t U_EXPORT2 +ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) { + Run *runs; + int32_t i, runCount, start; + RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); + RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); + RETURN_IF_BAD_RANGE(visualIndex, 0, pBiDi->resultLength, *pErrorCode, -1); + /* we can do the trivial cases without the runs array */ + if(pBiDi->insertPoints.size==0 && pBiDi->controlCount==0) { + if(pBiDi->direction==UBIDI_LTR) { + return visualIndex; + } + else if(pBiDi->direction==UBIDI_RTL) { + return pBiDi->length-visualIndex-1; + } + } + if(!ubidi_getRuns(pBiDi, pErrorCode)) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return -1; + } + + runs=pBiDi->runs; + runCount=pBiDi->runCount; + if(pBiDi->insertPoints.size>0) { + /* handle inserted LRM/RLM */ + int32_t markFound=0, insertRemove; + int32_t visualStart=0, length; + runs=pBiDi->runs; + /* subtract number of marks until visual index */ + for(i=0; ; i++, visualStart+=length) { + length=runs[i].visualLimit-visualStart; + insertRemove=runs[i].insertRemove; + if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) { + if(visualIndex<=(visualStart+markFound)) { + return UBIDI_MAP_NOWHERE; + } + markFound++; + } + /* is adjusted visual index within this run? */ + if(visualIndex<(runs[i].visualLimit+markFound)) { + visualIndex-=markFound; + break; + } + if(insertRemove&(LRM_AFTER|RLM_AFTER)) { + if(visualIndex==(visualStart+length+markFound)) { + return UBIDI_MAP_NOWHERE; + } + markFound++; + } + } + } + else if(pBiDi->controlCount>0) { + /* handle removed BiDi control characters */ + int32_t controlFound=0, insertRemove, length; + int32_t logicalStart, logicalEnd, visualStart=0, j, k; + UChar uchar; + UBool evenRun; + /* add number of controls until visual index */ + for(i=0; ; i++, visualStart+=length) { + length=runs[i].visualLimit-visualStart; + insertRemove=runs[i].insertRemove; + /* is adjusted visual index beyond current run? */ + if(visualIndex>=(runs[i].visualLimit-controlFound+insertRemove)) { + controlFound-=insertRemove; + continue; + } + /* adjusted visual index is within current run */ + if(insertRemove==0) { + visualIndex+=controlFound; + break; + } + /* count non-control chars until visualIndex */ + logicalStart=runs[i].logicalStart; + evenRun=IS_EVEN_RUN(logicalStart); + REMOVE_ODD_BIT(logicalStart); + logicalEnd=logicalStart+length-1; + for(j=0; jtext[k]; + if(IS_BIDI_CONTROL_CHAR(uchar)) { + controlFound++; + } + if((visualIndex+controlFound)==(visualStart+j)) { + break; + } + } + visualIndex+=controlFound; + break; + } + } + /* handle all cases */ + if(runCount<=10) { + /* linear search for the run */ + for(i=0; visualIndex>=runs[i].visualLimit; ++i) {} + } else { + /* binary search for the run */ + int32_t begin=0, limit=runCount; + + /* the middle if() is guaranteed to find the run, we don't need a loop limit */ + for(;;) { + i=(begin+limit)/2; + if(visualIndex>=runs[i].visualLimit) { + begin=i+1; + } else if(i==0 || visualIndex>=runs[i-1].visualLimit) { + break; + } else { + limit=i; + } + } + } + + start=runs[i].logicalStart; + if(IS_EVEN_RUN(start)) { + /* LTR */ + /* the offset in runs[i] is visualIndex-runs[i-1].visualLimit */ + if(i>0) { + visualIndex-=runs[i-1].visualLimit; + } + return start+visualIndex; + } else { + /* RTL */ + return GET_INDEX(start)+runs[i].visualLimit-visualIndex-1; + } +} + +U_CAPI void U_EXPORT2 +ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { + RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); + /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */ + ubidi_countRuns(pBiDi, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + /* no op */ + } else if(indexMap==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + } else { + /* fill a logical-to-visual index map using the runs[] */ + int32_t visualStart, visualLimit, i, j, k; + int32_t logicalStart, logicalLimit; + Run *runs=pBiDi->runs; + if (pBiDi->length<=0) { + return; + } + if (pBiDi->length>pBiDi->resultLength) { + uprv_memset(indexMap, 0xFF, pBiDi->length*sizeof(int32_t)); + } + + visualStart=0; + for(j=0; jrunCount; ++j) { + logicalStart=GET_INDEX(runs[j].logicalStart); + visualLimit=runs[j].visualLimit; + if(IS_EVEN_RUN(runs[j].logicalStart)) { + do { /* LTR */ + indexMap[logicalStart++]=visualStart++; + } while(visualStartinsertPoints.size>0) { + int32_t markFound=0, runCount=pBiDi->runCount; + int32_t length, insertRemove; + visualStart=0; + /* add number of marks found until each index */ + for(i=0; i0) { + logicalStart=GET_INDEX(runs[i].logicalStart); + logicalLimit=logicalStart+length; + for(j=logicalStart; jcontrolCount>0) { + int32_t controlFound=0, runCount=pBiDi->runCount; + int32_t length, insertRemove; + UBool evenRun; + UChar uchar; + visualStart=0; + /* subtract number of controls found until each index */ + for(i=0; itext[k]; + if(IS_BIDI_CONTROL_CHAR(uchar)) { + controlFound++; + indexMap[k]=UBIDI_MAP_NOWHERE; + continue; + } + indexMap[k]-=controlFound; + } + } + } + } +} + +U_CAPI void U_EXPORT2 +ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { + RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); + if(indexMap==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */ + ubidi_countRuns(pBiDi, pErrorCode); + if(U_SUCCESS(*pErrorCode)) { + /* fill a visual-to-logical index map using the runs[] */ + Run *runs=pBiDi->runs, *runsLimit=runs+pBiDi->runCount; + int32_t logicalStart, visualStart, visualLimit, *pi=indexMap; + + if (pBiDi->resultLength<=0) { + return; + } + visualStart=0; + for(; runslogicalStart; + visualLimit=runs->visualLimit; + if(IS_EVEN_RUN(logicalStart)) { + do { /* LTR */ + *pi++ = logicalStart++; + } while(++visualStartinsertPoints.size>0) { + int32_t markFound=0, runCount=pBiDi->runCount; + int32_t insertRemove, i, j, k; + runs=pBiDi->runs; + /* count all inserted marks */ + for(i=0; iresultLength; + for(i=runCount-1; i>=0 && markFound>0; i--) { + insertRemove=runs[i].insertRemove; + if(insertRemove&(LRM_AFTER|RLM_AFTER)) { + indexMap[--k]= UBIDI_MAP_NOWHERE; + markFound--; + } + visualStart= i>0 ? runs[i-1].visualLimit : 0; + for(j=runs[i].visualLimit-1; j>=visualStart && markFound>0; j--) { + indexMap[--k]=indexMap[j]; + } + if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) { + indexMap[--k]= UBIDI_MAP_NOWHERE; + markFound--; + } + } + } + else if(pBiDi->controlCount>0) { + int32_t runCount=pBiDi->runCount, logicalEnd; + int32_t insertRemove, length, i, j, k, m; + UChar uchar; + UBool evenRun; + runs=pBiDi->runs; + visualStart=0; + /* move forward indexes by number of preceding controls */ + k=0; + for(i=0; itext[m]; + if(!IS_BIDI_CONTROL_CHAR(uchar)) { + indexMap[k++]=m; + } + } + } + } + } +} + +U_CAPI void U_EXPORT2 +ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length) { + if(srcMap!=NULL && destMap!=NULL && length>0) { + const int32_t *pi; + int32_t destLength=-1, count=0; + /* find highest value and count positive indexes in srcMap */ + pi=srcMap+length; + while(pi>srcMap) { + if(*--pi>destLength) { + destLength=*pi; + } + if(*pi>=0) { + count++; + } + } + destLength++; /* add 1 for origin 0 */ + if(count0) { + if(*--pi>=0) { + destMap[*pi]=--length; + } else { + --length; + } + } + } +} diff --git a/deps/icu-small/source/common/ubiditransform.c b/deps/icu-small/source/common/ubiditransform.c deleted file mode 100644 index 15e3c7cc94..0000000000 --- a/deps/icu-small/source/common/ubiditransform.c +++ /dev/null @@ -1,528 +0,0 @@ -/* -****************************************************************************** -* -* Copyright (C) 2016 and later: Unicode, Inc. and others. -* License & terms of use: http://www.unicode.org/copyright.html -* -****************************************************************************** -* file name: ubiditransform.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2016jul24 -* created by: Lina Kemmel -* -*/ - -#include "cmemory.h" -#include "unicode/ubidi.h" -#include "unicode/ustring.h" -#include "unicode/ushape.h" -#include "unicode/utf16.h" -#include "ustr_imp.h" -#include "unicode/ubiditransform.h" - -/* Some convenience defines */ -#define LTR UBIDI_LTR -#define RTL UBIDI_RTL -#define LOGICAL UBIDI_LOGICAL -#define VISUAL UBIDI_VISUAL -#define SHAPE_LOGICAL U_SHAPE_TEXT_DIRECTION_LOGICAL -#define SHAPE_VISUAL U_SHAPE_TEXT_DIRECTION_VISUAL_LTR - -#define CHECK_LEN(STR, LEN, ERROR) { \ - if (LEN == 0) return 0; \ - if (LEN < -1) { *(ERROR) = U_ILLEGAL_ARGUMENT_ERROR; return 0; } \ - if (LEN == -1) LEN = u_strlen(STR); \ - } - -#define MAX_ACTIONS 7 - -/** - * Typedef for a pointer to a function, which performs some operation (such as - * reordering, setting "inverse" mode, character mirroring, etc.). Return value - * indicates whether the text was changed in the course of this operation or - * not. - */ -typedef UBool (*UBiDiAction)(UBiDiTransform *, UErrorCode *); - -/** - * Structure that holds a predefined reordering scheme, including the following - * information: - *
    - *
  • an input base direction,
  • - *
  • an input order,
  • - *
  • an output base direction,
  • - *
  • an output order,
  • - *
  • a digit shaping direction,
  • - *
  • a letter shaping direction,
  • - *
  • a base direction that should be applied when the reordering engine is - * invoked (which can not always be derived from the caller-defined - * options),
  • - *
  • an array of pointers to functions that accomplish the bidi layout - * transformation.
  • - *
- */ -typedef struct { - UBiDiLevel inLevel; /* input level */ - UBiDiOrder inOrder; /* input order */ - UBiDiLevel outLevel; /* output level */ - UBiDiOrder outOrder; /* output order */ - uint32_t digitsDir; /* digit shaping direction */ - uint32_t lettersDir; /* letter shaping direction */ - UBiDiLevel baseLevel; /* paragraph level to be used with setPara */ - const UBiDiAction actions[MAX_ACTIONS]; /* array of pointers to functions carrying out the transformation */ -} ReorderingScheme; - -struct UBiDiTransform { - UBiDi *pBidi; /* pointer to a UBiDi object */ - const ReorderingScheme *pActiveScheme; /* effective reordering scheme */ - UChar *src; /* input text */ - UChar *dest; /* output text */ - uint32_t srcLength; /* input text length - not really needed as we are zero-terminated and can u_strlen */ - uint32_t srcSize; /* input text capacity excluding the trailing zero */ - uint32_t destSize; /* output text capacity */ - uint32_t *pDestLength; /* number of UChars written to dest */ - uint32_t reorderingOptions; /* reordering options - currently only suppot DO_MIRRORING */ - uint32_t digits; /* digit option for ArabicShaping */ - uint32_t letters; /* letter option for ArabicShaping */ -}; - -U_DRAFT UBiDiTransform* U_EXPORT2 -ubiditransform_open(UErrorCode *pErrorCode) -{ - UBiDiTransform *pBiDiTransform = NULL; - if (U_SUCCESS(*pErrorCode)) { - pBiDiTransform = (UBiDiTransform*) uprv_calloc(1, sizeof(UBiDiTransform)); - if (pBiDiTransform == NULL) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - } - } - return pBiDiTransform; -} - -U_DRAFT void U_EXPORT2 -ubiditransform_close(UBiDiTransform *pBiDiTransform) -{ - if (pBiDiTransform != NULL) { - if (pBiDiTransform->pBidi != NULL) { - ubidi_close(pBiDiTransform->pBidi); - } - if (pBiDiTransform->src != NULL) { - uprv_free(pBiDiTransform->src); - } - uprv_free(pBiDiTransform); - } -} - -/** - * Performs Bidi resolution of text. - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param pErrorCode Pointer to the error code value. - * - * @return Whether or not this function modifies the text. Besides the return - * value, the caller should also check U_SUCCESS(*pErrorCode). - */ -static UBool -action_resolve(UBiDiTransform *pTransform, UErrorCode *pErrorCode) -{ - ubidi_setPara(pTransform->pBidi, pTransform->src, pTransform->srcLength, - pTransform->pActiveScheme->baseLevel, NULL, pErrorCode); - return FALSE; -} - -/** - * Performs basic reordering of text (Logical -> Visual LTR). - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param pErrorCode Pointer to the error code value. - * - * @return Whether or not this function modifies the text. Besides the return - * value, the caller should also check U_SUCCESS(*pErrorCode). - */ -static UBool -action_reorder(UBiDiTransform *pTransform, UErrorCode *pErrorCode) -{ - ubidi_writeReordered(pTransform->pBidi, pTransform->dest, pTransform->destSize, - pTransform->reorderingOptions, pErrorCode); - - *pTransform->pDestLength = pTransform->srcLength; - pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT; - return TRUE; -} - -/** - * Sets "inverse" mode on the UBiDi object. - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param pErrorCode Pointer to the error code value. - * - * @return Whether or not this function modifies the text. Besides the return - * value, the caller should also check U_SUCCESS(*pErrorCode). - */ -static UBool -action_setInverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode) -{ - ubidi_setInverse(pTransform->pBidi, TRUE); - ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_INVERSE_LIKE_DIRECT); - return FALSE; -} - -/** - * Sets "runs only" reordering mode indicating a Logical LTR <-> Logical RTL - * transformation. - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param pErrorCode Pointer to the error code value. - * - * @return Whether or not this function modifies the text. Besides the return - * value, the caller should also check U_SUCCESS(*pErrorCode). - */ -static UBool -action_setRunsOnly(UBiDiTransform *pTransform, UErrorCode *pErrorCode) -{ - ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_RUNS_ONLY); - return FALSE; -} - -/** - * Performs string reverse. - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param pErrorCode Pointer to the error code value. - * - * @return Whether or not this function modifies the text. Besides the return - * value, the caller should also check U_SUCCESS(*pErrorCode). - */ -static UBool -action_reverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode) -{ - ubidi_writeReverse(pTransform->src, pTransform->srcLength, - pTransform->dest, pTransform->destSize, - UBIDI_REORDER_DEFAULT, pErrorCode); - *pTransform->pDestLength = pTransform->srcLength; - return TRUE; -} - -/** - * Applies a new value to the text that serves as input at the current - * processing step. This value is identical to the original one when we begin - * the processing, but usually changes as the transformation progresses. - * - * @param pTransform A pointer to the UBiDiTransform structure. - * @param newSrc A pointer whose value is to be used as input text. - * @param newLength A length of the new text in UChars. - * @param newSize A new source capacity in UChars. - * @param pErrorCode Pointer to the error code value. - */ -static void -updateSrc(UBiDiTransform *pTransform, const UChar *newSrc, uint32_t newLength, - uint32_t newSize, UErrorCode *pErrorCode) -{ - if (newSize < newLength) { - *pErrorCode = U_BUFFER_OVERFLOW_ERROR; - return; - } - if (newSize > pTransform->srcSize) { - newSize += 50; // allocate slightly more than needed right now - if (pTransform->src != NULL) { - uprv_free(pTransform->src); - pTransform->src = NULL; - } - pTransform->src = (UChar *)uprv_malloc(newSize * sizeof(UChar)); - if (pTransform->src == NULL) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - //pTransform->srcLength = pTransform->srcSize = 0; - return; - } - pTransform->srcSize = newSize; - } - u_strncpy(pTransform->src, newSrc, newLength); - pTransform->srcLength = u_terminateUChars(pTransform->src, - pTransform->srcSize, newLength, pErrorCode); -} - -/** - * Calls a lower level shaping function. - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param options Shaping options. - * @param pErrorCode Pointer to the error code value. - */ -static void -doShape(UBiDiTransform *pTransform, uint32_t options, UErrorCode *pErrorCode) -{ - *pTransform->pDestLength = u_shapeArabic(pTransform->src, - pTransform->srcLength, pTransform->dest, pTransform->destSize, - options, pErrorCode); -} - -/** - * Performs digit and letter shaping. - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param pErrorCode Pointer to the error code value. - * - * @return Whether or not this function modifies the text. Besides the return - * value, the caller should also check U_SUCCESS(*pErrorCode). - */ -static UBool -action_shapeArabic(UBiDiTransform *pTransform, UErrorCode *pErrorCode) -{ - if ((pTransform->letters | pTransform->digits) == 0) { - return FALSE; - } - if (pTransform->pActiveScheme->lettersDir == pTransform->pActiveScheme->digitsDir) { - doShape(pTransform, pTransform->letters | pTransform->digits | pTransform->pActiveScheme->lettersDir, - pErrorCode); - } else { - doShape(pTransform, pTransform->digits | pTransform->pActiveScheme->digitsDir, pErrorCode); - if (U_SUCCESS(*pErrorCode)) { - updateSrc(pTransform, pTransform->dest, *pTransform->pDestLength, - *pTransform->pDestLength, pErrorCode); - doShape(pTransform, pTransform->letters | pTransform->pActiveScheme->lettersDir, - pErrorCode); - } - } - return TRUE; -} - -/** - * Performs character mirroring. - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param pErrorCode Pointer to the error code value. - * - * @return Whether or not this function modifies the text. Besides the return - * value, the caller should also check U_SUCCESS(*pErrorCode). - */ -static UBool -action_mirror(UBiDiTransform *pTransform, UErrorCode *pErrorCode) -{ - UChar32 c; - uint32_t i = 0, j = 0; - if (0 == (pTransform->reorderingOptions & UBIDI_DO_MIRRORING)) { - return FALSE; - } - if (pTransform->destSize < pTransform->srcLength) { - *pErrorCode = U_BUFFER_OVERFLOW_ERROR; - return FALSE; - } - do { - UBool isOdd = ubidi_getLevelAt(pTransform->pBidi, i) & 1; - U16_NEXT(pTransform->src, i, pTransform->srcLength, c); - U16_APPEND_UNSAFE(pTransform->dest, j, isOdd ? u_charMirror(c) : c); - } while (i < pTransform->srcLength); - - *pTransform->pDestLength = pTransform->srcLength; - pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT; - return TRUE; -} - -/** - * All possible reordering schemes. - * - */ -static const ReorderingScheme Schemes[] = -{ - /* 0: Logical LTR => Visual LTR */ - {LTR, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, - {action_shapeArabic, action_resolve, action_reorder, NULL}}, - /* 1: Logical RTL => Visual LTR */ - {RTL, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, - {action_resolve, action_reorder, action_shapeArabic, NULL}}, - /* 2: Logical LTR => Visual RTL */ - {LTR, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, - {action_shapeArabic, action_resolve, action_reorder, action_reverse, NULL}}, - /* 3: Logical RTL => Visual RTL */ - {RTL, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, - {action_resolve, action_reorder, action_shapeArabic, action_reverse, NULL}}, - /* 4: Visual LTR => Logical RTL */ - {LTR, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, - {action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}}, - /* 5: Visual RTL => Logical RTL */ - {RTL, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, - {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}}, - /* 6: Visual LTR => Logical LTR */ - {LTR, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, - {action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}}, - /* 7: Visual RTL => Logical LTR */ - {RTL, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, - {action_reverse, action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}}, - /* 8: Logical LTR => Logical RTL */ - {LTR, LOGICAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, - {action_shapeArabic, action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, NULL}}, - /* 9: Logical RTL => Logical LTR */ - {RTL, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, RTL, - {action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, action_shapeArabic, NULL}}, - /* 10: Visual LTR => Visual RTL */ - {LTR, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, - {action_shapeArabic, action_setInverse, action_resolve, action_mirror, action_reverse, NULL}}, - /* 11: Visual RTL => Visual LTR */ - {RTL, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, - {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_mirror, NULL}}, - /* 12: Logical LTR => Logical LTR */ - {LTR, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, - {action_resolve, action_mirror, action_shapeArabic, NULL}}, - /* 13: Logical RTL => Logical RTL */ - {RTL, LOGICAL, RTL, LOGICAL, SHAPE_VISUAL, SHAPE_LOGICAL, RTL, - {action_resolve, action_mirror, action_shapeArabic, NULL}}, - /* 14: Visual LTR => Visual LTR */ - {LTR, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, - {action_resolve, action_mirror, action_shapeArabic, NULL}}, - /* 15: Visual RTL => Visual RTL */ - {RTL, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, - {action_reverse, action_resolve, action_mirror, action_shapeArabic, action_reverse, NULL}} -}; - -static const uint32_t nSchemes = sizeof(Schemes) / sizeof(*Schemes); - -/** - * When the direction option is UBIDI_DEFAULT_LTR or - * UBIDI_DEFAULT_RTL, resolve the base direction according to that - * of the first strong bidi character. - */ -static void -resolveBaseDirection(const UChar *text, uint32_t length, - UBiDiLevel *pInLevel, UBiDiLevel *pOutLevel) -{ - switch (*pInLevel) { - case UBIDI_DEFAULT_LTR: - case UBIDI_DEFAULT_RTL: { - UBiDiLevel level = ubidi_getBaseDirection(text, length); - *pInLevel = level != UBIDI_NEUTRAL ? level - : *pInLevel == UBIDI_DEFAULT_RTL ? RTL : LTR; - break; - } - default: - *pInLevel &= 1; - break; - } - switch (*pOutLevel) { - case UBIDI_DEFAULT_LTR: - case UBIDI_DEFAULT_RTL: - *pOutLevel = *pInLevel; - break; - default: - *pOutLevel &= 1; - break; - } -} - -/** - * Finds a valid ReorderingScheme matching the - * caller-defined scheme. - * - * @return A valid ReorderingScheme object or NULL - */ -static const ReorderingScheme* -findMatchingScheme(UBiDiLevel inLevel, UBiDiLevel outLevel, - UBiDiOrder inOrder, UBiDiOrder outOrder) -{ - uint32_t i; - for (i = 0; i < nSchemes; i++) { - const ReorderingScheme *pScheme = Schemes + i; - if (inLevel == pScheme->inLevel && outLevel == pScheme->outLevel - && inOrder == pScheme->inOrder && outOrder == pScheme->outOrder) { - return pScheme; - } - } - return NULL; -} - -U_DRAFT uint32_t U_EXPORT2 -ubiditransform_transform(UBiDiTransform *pBiDiTransform, - const UChar *src, int32_t srcLength, - UChar *dest, int32_t destSize, - UBiDiLevel inParaLevel, UBiDiOrder inOrder, - UBiDiLevel outParaLevel, UBiDiOrder outOrder, - UBiDiMirroring doMirroring, uint32_t shapingOptions, - UErrorCode *pErrorCode) -{ - uint32_t destLength = 0; - UBool textChanged = FALSE; - const UBiDiTransform *pOrigTransform = pBiDiTransform; - const UBiDiAction *action = NULL; - - if (U_FAILURE(*pErrorCode)) { - return 0; - } - if (src == NULL || dest == NULL) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - CHECK_LEN(src, srcLength, pErrorCode); - CHECK_LEN(dest, destSize, pErrorCode); - - if (pBiDiTransform == NULL) { - pBiDiTransform = ubiditransform_open(pErrorCode); - if (U_FAILURE(*pErrorCode)) { - return 0; - } - } - /* Current limitation: in multiple paragraphs will be resolved according - to the 1st paragraph */ - resolveBaseDirection(src, srcLength, &inParaLevel, &outParaLevel); - - pBiDiTransform->pActiveScheme = findMatchingScheme(inParaLevel, outParaLevel, - inOrder, outOrder); - if (pBiDiTransform->pActiveScheme == NULL) { - goto cleanup; - } - pBiDiTransform->reorderingOptions = doMirroring ? UBIDI_DO_MIRRORING - : UBIDI_REORDER_DEFAULT; - - /* Ignore TEXT_DIRECTION_* flags, as we apply our own depending on the text - scheme at the time shaping is invoked. */ - shapingOptions &= ~U_SHAPE_TEXT_DIRECTION_MASK; - pBiDiTransform->digits = shapingOptions & ~U_SHAPE_LETTERS_MASK; - pBiDiTransform->letters = shapingOptions & ~U_SHAPE_DIGITS_MASK; - - updateSrc(pBiDiTransform, src, srcLength, destSize > srcLength ? destSize : srcLength, pErrorCode); - if (U_FAILURE(*pErrorCode)) { - goto cleanup; - } - if (pBiDiTransform->pBidi == NULL) { - pBiDiTransform->pBidi = ubidi_openSized(0, 0, pErrorCode); - if (U_FAILURE(*pErrorCode)) { - goto cleanup; - } - } - pBiDiTransform->dest = dest; - pBiDiTransform->destSize = destSize; - pBiDiTransform->pDestLength = &destLength; - - /* Checking for U_SUCCESS() within the loop to bail out on first failure. */ - for (action = pBiDiTransform->pActiveScheme->actions; *action && U_SUCCESS(*pErrorCode); action++) { - if ((*action)(pBiDiTransform, pErrorCode)) { - if (action + 1) { - updateSrc(pBiDiTransform, pBiDiTransform->dest, *pBiDiTransform->pDestLength, - *pBiDiTransform->pDestLength, pErrorCode); - } - textChanged = TRUE; - } - } - ubidi_setInverse(pBiDiTransform->pBidi, FALSE); - - if (!textChanged && U_SUCCESS(*pErrorCode)) { - /* Text was not changed - just copy src to dest */ - if (destSize < srcLength) { - *pErrorCode = U_BUFFER_OVERFLOW_ERROR; - } else { - u_strncpy(dest, src, srcLength); - destLength = srcLength; - } - } -cleanup: - if (pOrigTransform != pBiDiTransform) { - ubiditransform_close(pBiDiTransform); - } else { - pBiDiTransform->dest = NULL; - pBiDiTransform->pDestLength = NULL; - pBiDiTransform->srcLength = 0; - pBiDiTransform->destSize = 0; - } - return U_FAILURE(*pErrorCode) ? 0 : destLength; -} diff --git a/deps/icu-small/source/common/ubiditransform.cpp b/deps/icu-small/source/common/ubiditransform.cpp new file mode 100644 index 0000000000..80261d391e --- /dev/null +++ b/deps/icu-small/source/common/ubiditransform.cpp @@ -0,0 +1,530 @@ +/* +****************************************************************************** +* +* © 2016 and later: Unicode, Inc. and others. +* License & terms of use: http://www.unicode.org/copyright.html +* +****************************************************************************** +* file name: ubiditransform.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2016jul24 +* created by: Lina Kemmel +* +*/ + +#include "cmemory.h" +#include "unicode/ubidi.h" +#include "unicode/ustring.h" +#include "unicode/ushape.h" +#include "unicode/utf16.h" +#include "ustr_imp.h" +#include "unicode/ubiditransform.h" + +/* Some convenience defines */ +#define LTR UBIDI_LTR +#define RTL UBIDI_RTL +#define LOGICAL UBIDI_LOGICAL +#define VISUAL UBIDI_VISUAL +#define SHAPE_LOGICAL U_SHAPE_TEXT_DIRECTION_LOGICAL +#define SHAPE_VISUAL U_SHAPE_TEXT_DIRECTION_VISUAL_LTR + +#define CHECK_LEN(STR, LEN, ERROR) { \ + if (LEN == 0) return 0; \ + if (LEN < -1) { *(ERROR) = U_ILLEGAL_ARGUMENT_ERROR; return 0; } \ + if (LEN == -1) LEN = u_strlen(STR); \ + } + +#define MAX_ACTIONS 7 + +/** + * Typedef for a pointer to a function, which performs some operation (such as + * reordering, setting "inverse" mode, character mirroring, etc.). Return value + * indicates whether the text was changed in the course of this operation or + * not. + */ +typedef UBool (*UBiDiAction)(UBiDiTransform *, UErrorCode *); + +/** + * Structure that holds a predefined reordering scheme, including the following + * information: + *
    + *
  • an input base direction,
  • + *
  • an input order,
  • + *
  • an output base direction,
  • + *
  • an output order,
  • + *
  • a digit shaping direction,
  • + *
  • a letter shaping direction,
  • + *
  • a base direction that should be applied when the reordering engine is + * invoked (which can not always be derived from the caller-defined + * options),
  • + *
  • an array of pointers to functions that accomplish the bidi layout + * transformation.
  • + *
+ */ +typedef struct { + UBiDiLevel inLevel; /* input level */ + UBiDiOrder inOrder; /* input order */ + UBiDiLevel outLevel; /* output level */ + UBiDiOrder outOrder; /* output order */ + uint32_t digitsDir; /* digit shaping direction */ + uint32_t lettersDir; /* letter shaping direction */ + UBiDiLevel baseLevel; /* paragraph level to be used with setPara */ + const UBiDiAction actions[MAX_ACTIONS]; /* array of pointers to functions carrying out the transformation */ +} ReorderingScheme; + +struct UBiDiTransform { + UBiDi *pBidi; /* pointer to a UBiDi object */ + const ReorderingScheme *pActiveScheme; /* effective reordering scheme */ + UChar *src; /* input text */ + UChar *dest; /* output text */ + uint32_t srcLength; /* input text length - not really needed as we are zero-terminated and can u_strlen */ + uint32_t srcSize; /* input text capacity excluding the trailing zero */ + uint32_t destSize; /* output text capacity */ + uint32_t *pDestLength; /* number of UChars written to dest */ + uint32_t reorderingOptions; /* reordering options - currently only suppot DO_MIRRORING */ + uint32_t digits; /* digit option for ArabicShaping */ + uint32_t letters; /* letter option for ArabicShaping */ +}; + +U_DRAFT UBiDiTransform* U_EXPORT2 +ubiditransform_open(UErrorCode *pErrorCode) +{ + UBiDiTransform *pBiDiTransform = NULL; + if (U_SUCCESS(*pErrorCode)) { + pBiDiTransform = (UBiDiTransform*) uprv_calloc(1, sizeof(UBiDiTransform)); + if (pBiDiTransform == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + } + } + return pBiDiTransform; +} + +U_DRAFT void U_EXPORT2 +ubiditransform_close(UBiDiTransform *pBiDiTransform) +{ + if (pBiDiTransform != NULL) { + if (pBiDiTransform->pBidi != NULL) { + ubidi_close(pBiDiTransform->pBidi); + } + if (pBiDiTransform->src != NULL) { + uprv_free(pBiDiTransform->src); + } + uprv_free(pBiDiTransform); + } +} + +/** + * Performs Bidi resolution of text. + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check U_SUCCESS(*pErrorCode). + */ +static UBool +action_resolve(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + ubidi_setPara(pTransform->pBidi, pTransform->src, pTransform->srcLength, + pTransform->pActiveScheme->baseLevel, NULL, pErrorCode); + return FALSE; +} + +/** + * Performs basic reordering of text (Logical -> Visual LTR). + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check U_SUCCESS(*pErrorCode). + */ +static UBool +action_reorder(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + ubidi_writeReordered(pTransform->pBidi, pTransform->dest, pTransform->destSize, + pTransform->reorderingOptions, pErrorCode); + + *pTransform->pDestLength = pTransform->srcLength; + pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT; + return TRUE; +} + +/** + * Sets "inverse" mode on the UBiDi object. + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check U_SUCCESS(*pErrorCode). + */ +static UBool +action_setInverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + (void)pErrorCode; + ubidi_setInverse(pTransform->pBidi, TRUE); + ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_INVERSE_LIKE_DIRECT); + return FALSE; +} + +/** + * Sets "runs only" reordering mode indicating a Logical LTR <-> Logical RTL + * transformation. + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check U_SUCCESS(*pErrorCode). + */ +static UBool +action_setRunsOnly(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + (void)pErrorCode; + ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_RUNS_ONLY); + return FALSE; +} + +/** + * Performs string reverse. + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check U_SUCCESS(*pErrorCode). + */ +static UBool +action_reverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + ubidi_writeReverse(pTransform->src, pTransform->srcLength, + pTransform->dest, pTransform->destSize, + UBIDI_REORDER_DEFAULT, pErrorCode); + *pTransform->pDestLength = pTransform->srcLength; + return TRUE; +} + +/** + * Applies a new value to the text that serves as input at the current + * processing step. This value is identical to the original one when we begin + * the processing, but usually changes as the transformation progresses. + * + * @param pTransform A pointer to the UBiDiTransform structure. + * @param newSrc A pointer whose value is to be used as input text. + * @param newLength A length of the new text in UChars. + * @param newSize A new source capacity in UChars. + * @param pErrorCode Pointer to the error code value. + */ +static void +updateSrc(UBiDiTransform *pTransform, const UChar *newSrc, uint32_t newLength, + uint32_t newSize, UErrorCode *pErrorCode) +{ + if (newSize < newLength) { + *pErrorCode = U_BUFFER_OVERFLOW_ERROR; + return; + } + if (newSize > pTransform->srcSize) { + newSize += 50; // allocate slightly more than needed right now + if (pTransform->src != NULL) { + uprv_free(pTransform->src); + pTransform->src = NULL; + } + pTransform->src = (UChar *)uprv_malloc(newSize * sizeof(UChar)); + if (pTransform->src == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + //pTransform->srcLength = pTransform->srcSize = 0; + return; + } + pTransform->srcSize = newSize; + } + u_strncpy(pTransform->src, newSrc, newLength); + pTransform->srcLength = u_terminateUChars(pTransform->src, + pTransform->srcSize, newLength, pErrorCode); +} + +/** + * Calls a lower level shaping function. + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param options Shaping options. + * @param pErrorCode Pointer to the error code value. + */ +static void +doShape(UBiDiTransform *pTransform, uint32_t options, UErrorCode *pErrorCode) +{ + *pTransform->pDestLength = u_shapeArabic(pTransform->src, + pTransform->srcLength, pTransform->dest, pTransform->destSize, + options, pErrorCode); +} + +/** + * Performs digit and letter shaping. + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check U_SUCCESS(*pErrorCode). + */ +static UBool +action_shapeArabic(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + if ((pTransform->letters | pTransform->digits) == 0) { + return FALSE; + } + if (pTransform->pActiveScheme->lettersDir == pTransform->pActiveScheme->digitsDir) { + doShape(pTransform, pTransform->letters | pTransform->digits | pTransform->pActiveScheme->lettersDir, + pErrorCode); + } else { + doShape(pTransform, pTransform->digits | pTransform->pActiveScheme->digitsDir, pErrorCode); + if (U_SUCCESS(*pErrorCode)) { + updateSrc(pTransform, pTransform->dest, *pTransform->pDestLength, + *pTransform->pDestLength, pErrorCode); + doShape(pTransform, pTransform->letters | pTransform->pActiveScheme->lettersDir, + pErrorCode); + } + } + return TRUE; +} + +/** + * Performs character mirroring. + * + * @param pTransform Pointer to the UBiDiTransform structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check U_SUCCESS(*pErrorCode). + */ +static UBool +action_mirror(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + UChar32 c; + uint32_t i = 0, j = 0; + if (0 == (pTransform->reorderingOptions & UBIDI_DO_MIRRORING)) { + return FALSE; + } + if (pTransform->destSize < pTransform->srcLength) { + *pErrorCode = U_BUFFER_OVERFLOW_ERROR; + return FALSE; + } + do { + UBool isOdd = ubidi_getLevelAt(pTransform->pBidi, i) & 1; + U16_NEXT(pTransform->src, i, pTransform->srcLength, c); + U16_APPEND_UNSAFE(pTransform->dest, j, isOdd ? u_charMirror(c) : c); + } while (i < pTransform->srcLength); + + *pTransform->pDestLength = pTransform->srcLength; + pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT; + return TRUE; +} + +/** + * All possible reordering schemes. + * + */ +static const ReorderingScheme Schemes[] = +{ + /* 0: Logical LTR => Visual LTR */ + {LTR, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_shapeArabic, action_resolve, action_reorder, NULL}}, + /* 1: Logical RTL => Visual LTR */ + {RTL, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, + {action_resolve, action_reorder, action_shapeArabic, NULL}}, + /* 2: Logical LTR => Visual RTL */ + {LTR, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_shapeArabic, action_resolve, action_reorder, action_reverse, NULL}}, + /* 3: Logical RTL => Visual RTL */ + {RTL, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, + {action_resolve, action_reorder, action_shapeArabic, action_reverse, NULL}}, + /* 4: Visual LTR => Logical RTL */ + {LTR, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, + {action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}}, + /* 5: Visual RTL => Logical RTL */ + {RTL, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, + {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}}, + /* 6: Visual LTR => Logical LTR */ + {LTR, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}}, + /* 7: Visual RTL => Logical LTR */ + {RTL, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_reverse, action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}}, + /* 8: Logical LTR => Logical RTL */ + {LTR, LOGICAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_shapeArabic, action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, NULL}}, + /* 9: Logical RTL => Logical LTR */ + {RTL, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, RTL, + {action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, action_shapeArabic, NULL}}, + /* 10: Visual LTR => Visual RTL */ + {LTR, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, + {action_shapeArabic, action_setInverse, action_resolve, action_mirror, action_reverse, NULL}}, + /* 11: Visual RTL => Visual LTR */ + {RTL, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, + {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_mirror, NULL}}, + /* 12: Logical LTR => Logical LTR */ + {LTR, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_resolve, action_mirror, action_shapeArabic, NULL}}, + /* 13: Logical RTL => Logical RTL */ + {RTL, LOGICAL, RTL, LOGICAL, SHAPE_VISUAL, SHAPE_LOGICAL, RTL, + {action_resolve, action_mirror, action_shapeArabic, NULL}}, + /* 14: Visual LTR => Visual LTR */ + {LTR, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, + {action_resolve, action_mirror, action_shapeArabic, NULL}}, + /* 15: Visual RTL => Visual RTL */ + {RTL, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, + {action_reverse, action_resolve, action_mirror, action_shapeArabic, action_reverse, NULL}} +}; + +static const uint32_t nSchemes = sizeof(Schemes) / sizeof(*Schemes); + +/** + * When the direction option is UBIDI_DEFAULT_LTR or + * UBIDI_DEFAULT_RTL, resolve the base direction according to that + * of the first strong bidi character. + */ +static void +resolveBaseDirection(const UChar *text, uint32_t length, + UBiDiLevel *pInLevel, UBiDiLevel *pOutLevel) +{ + switch (*pInLevel) { + case UBIDI_DEFAULT_LTR: + case UBIDI_DEFAULT_RTL: { + UBiDiLevel level = ubidi_getBaseDirection(text, length); + *pInLevel = level != UBIDI_NEUTRAL ? level + : *pInLevel == UBIDI_DEFAULT_RTL ? RTL : LTR; + break; + } + default: + *pInLevel &= 1; + break; + } + switch (*pOutLevel) { + case UBIDI_DEFAULT_LTR: + case UBIDI_DEFAULT_RTL: + *pOutLevel = *pInLevel; + break; + default: + *pOutLevel &= 1; + break; + } +} + +/** + * Finds a valid ReorderingScheme matching the + * caller-defined scheme. + * + * @return A valid ReorderingScheme object or NULL + */ +static const ReorderingScheme* +findMatchingScheme(UBiDiLevel inLevel, UBiDiLevel outLevel, + UBiDiOrder inOrder, UBiDiOrder outOrder) +{ + uint32_t i; + for (i = 0; i < nSchemes; i++) { + const ReorderingScheme *pScheme = Schemes + i; + if (inLevel == pScheme->inLevel && outLevel == pScheme->outLevel + && inOrder == pScheme->inOrder && outOrder == pScheme->outOrder) { + return pScheme; + } + } + return NULL; +} + +U_DRAFT uint32_t U_EXPORT2 +ubiditransform_transform(UBiDiTransform *pBiDiTransform, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destSize, + UBiDiLevel inParaLevel, UBiDiOrder inOrder, + UBiDiLevel outParaLevel, UBiDiOrder outOrder, + UBiDiMirroring doMirroring, uint32_t shapingOptions, + UErrorCode *pErrorCode) +{ + uint32_t destLength = 0; + UBool textChanged = FALSE; + const UBiDiTransform *pOrigTransform = pBiDiTransform; + const UBiDiAction *action = NULL; + + if (U_FAILURE(*pErrorCode)) { + return 0; + } + if (src == NULL || dest == NULL) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + CHECK_LEN(src, srcLength, pErrorCode); + CHECK_LEN(dest, destSize, pErrorCode); + + if (pBiDiTransform == NULL) { + pBiDiTransform = ubiditransform_open(pErrorCode); + if (U_FAILURE(*pErrorCode)) { + return 0; + } + } + /* Current limitation: in multiple paragraphs will be resolved according + to the 1st paragraph */ + resolveBaseDirection(src, srcLength, &inParaLevel, &outParaLevel); + + pBiDiTransform->pActiveScheme = findMatchingScheme(inParaLevel, outParaLevel, + inOrder, outOrder); + if (pBiDiTransform->pActiveScheme == NULL) { + goto cleanup; + } + pBiDiTransform->reorderingOptions = doMirroring ? UBIDI_DO_MIRRORING + : UBIDI_REORDER_DEFAULT; + + /* Ignore TEXT_DIRECTION_* flags, as we apply our own depending on the text + scheme at the time shaping is invoked. */ + shapingOptions &= ~U_SHAPE_TEXT_DIRECTION_MASK; + pBiDiTransform->digits = shapingOptions & ~U_SHAPE_LETTERS_MASK; + pBiDiTransform->letters = shapingOptions & ~U_SHAPE_DIGITS_MASK; + + updateSrc(pBiDiTransform, src, srcLength, destSize > srcLength ? destSize : srcLength, pErrorCode); + if (U_FAILURE(*pErrorCode)) { + goto cleanup; + } + if (pBiDiTransform->pBidi == NULL) { + pBiDiTransform->pBidi = ubidi_openSized(0, 0, pErrorCode); + if (U_FAILURE(*pErrorCode)) { + goto cleanup; + } + } + pBiDiTransform->dest = dest; + pBiDiTransform->destSize = destSize; + pBiDiTransform->pDestLength = &destLength; + + /* Checking for U_SUCCESS() within the loop to bail out on first failure. */ + for (action = pBiDiTransform->pActiveScheme->actions; *action && U_SUCCESS(*pErrorCode); action++) { + if ((*action)(pBiDiTransform, pErrorCode)) { + if (action + 1) { + updateSrc(pBiDiTransform, pBiDiTransform->dest, *pBiDiTransform->pDestLength, + *pBiDiTransform->pDestLength, pErrorCode); + } + textChanged = TRUE; + } + } + ubidi_setInverse(pBiDiTransform->pBidi, FALSE); + + if (!textChanged && U_SUCCESS(*pErrorCode)) { + /* Text was not changed - just copy src to dest */ + if (destSize < srcLength) { + *pErrorCode = U_BUFFER_OVERFLOW_ERROR; + } else { + u_strncpy(dest, src, srcLength); + destLength = srcLength; + } + } +cleanup: + if (pOrigTransform != pBiDiTransform) { + ubiditransform_close(pBiDiTransform); + } else { + pBiDiTransform->dest = NULL; + pBiDiTransform->pDestLength = NULL; + pBiDiTransform->srcLength = 0; + pBiDiTransform->destSize = 0; + } + return U_FAILURE(*pErrorCode) ? 0 : destLength; +} diff --git a/deps/icu-small/source/common/ubidiwrt.c b/deps/icu-small/source/common/ubidiwrt.c deleted file mode 100644 index 1d0c36a5d2..0000000000 --- a/deps/icu-small/source/common/ubidiwrt.c +++ /dev/null @@ -1,640 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2000-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: ubidiwrt.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999aug06 -* created by: Markus W. Scherer, updated by Matitiahu Allouche -* -* This file contains implementations for BiDi functions that use -* the core algorithm and core API to write reordered text. -*/ - -#include "unicode/utypes.h" -#include "unicode/ustring.h" -#include "unicode/uchar.h" -#include "unicode/ubidi.h" -#include "unicode/utf16.h" -#include "cmemory.h" -#include "ustr_imp.h" -#include "ubidiimp.h" - -/* - * The function implementations in this file are designed - * for UTF-16 and UTF-32, not for UTF-8. - * - * Assumptions that are not true for UTF-8: - * - Any code point always needs the same number of code units - * ("minimum-length-problem" of UTF-8) - * - The BiDi control characters need only one code unit each - * - * Further assumptions for all UTFs: - * - u_charMirror(c) needs the same number of code units as c - */ -#if UTF_SIZE==8 -# error reimplement ubidi_writeReordered() for UTF-8, see comment above -#endif - -#define IS_COMBINING(type) ((1UL<<(type))&(1UL<0); - return srcLength; - } - case UBIDI_DO_MIRRORING: { - /* do mirroring */ - int32_t i=0, j=0; - UChar32 c; - - if(destSize0) { - c=*src++; - if(!IS_BIDI_CONTROL_CHAR(c)) { - --remaining; - } - } - return destSize-remaining; - } - *dest++=c; - } - } while(--srcLength>0); - return destSize-remaining; - } - default: { - /* remove BiDi control characters and do mirroring */ - int32_t remaining=destSize; - int32_t i, j=0; - UChar32 c; - do { - i=0; - U16_NEXT(src, i, srcLength, c); - src+=i; - srcLength-=i; - if(!IS_BIDI_CONTROL_CHAR(c)) { - remaining-=i; - if(remaining<0) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - - /* preflight the length */ - while(srcLength>0) { - c=*src++; - if(!IS_BIDI_CONTROL_CHAR(c)) { - --remaining; - } - --srcLength; - } - return destSize-remaining; - } - c=u_charMirror(c); - U16_APPEND_UNSAFE(dest, j, c); - } - } while(srcLength>0); - return j; - } - } /* end of switch */ -} - -static int32_t -doWriteReverse(const UChar *src, int32_t srcLength, - UChar *dest, int32_t destSize, - uint16_t options, - UErrorCode *pErrorCode) { - /* - * RTL run - - * - * RTL runs need to be copied to the destination in reverse order - * of code points, not code units, to keep Unicode characters intact. - * - * The general strategy for this is to read the source text - * in backward order, collect all code units for a code point - * (and optionally following combining characters, see below), - * and copy all these code units in ascending order - * to the destination for this run. - * - * Several options request whether combining characters - * should be kept after their base characters, - * whether BiDi control characters should be removed, and - * whether characters should be replaced by their mirror-image - * equivalent Unicode characters. - */ - int32_t i, j; - UChar32 c; - - /* optimize for several combinations of options */ - switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING|UBIDI_KEEP_BASE_COMBINING)) { - case 0: - /* - * With none of the "complicated" options set, the destination - * run will have the same length as the source run, - * and there is no mirroring and no keeping combining characters - * with their base characters. - */ - if(destSize0); - break; - case UBIDI_KEEP_BASE_COMBINING: - /* - * Here, too, the destination - * run will have the same length as the source run, - * and there is no mirroring. - * We do need to keep combining characters with their base characters. - */ - if(destSize0 && IS_COMBINING(u_charType(c))); - - /* copy this "user character" */ - j=srcLength; - do { - *dest++=src[j++]; - } while(j0); - break; - default: - /* - * With several "complicated" options set, this is the most - * general and the slowest copying of an RTL run. - * We will do mirroring, remove BiDi controls, and - * keep combining characters with their base characters - * as requested. - */ - if(!(options&UBIDI_REMOVE_BIDI_CONTROLS)) { - i=srcLength; - } else { - /* we need to find out the destination length of the run, - which will not include the BiDi control characters */ - int32_t length=srcLength; - UChar ch; - - i=0; - do { - ch=*src++; - if(!IS_BIDI_CONTROL_CHAR(ch)) { - ++i; - } - } while(--length>0); - src-=srcLength; - } - - if(destSize0 && IS_COMBINING(u_charType(c))) { - U16_PREV(src, 0, srcLength, c); - } - } - - if(options&UBIDI_REMOVE_BIDI_CONTROLS && IS_BIDI_CONTROL_CHAR(c)) { - /* do not copy this BiDi control character */ - continue; - } - - /* copy this "user character" */ - j=srcLength; - if(options&UBIDI_DO_MIRRORING) { - /* mirror only the base character */ - int32_t k=0; - c=u_charMirror(c); - U16_APPEND_UNSAFE(dest, k, c); - dest+=k; - j+=k; - } - while(j0); - break; - } /* end of switch */ - - return destSize; -} - -U_CAPI int32_t U_EXPORT2 -ubidi_writeReverse(const UChar *src, int32_t srcLength, - UChar *dest, int32_t destSize, - uint16_t options, - UErrorCode *pErrorCode) { - int32_t destLength; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - /* more error checking */ - if( src==NULL || srcLength<-1 || - destSize<0 || (destSize>0 && dest==NULL)) - { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* do input and output overlap? */ - if( dest!=NULL && - ((src>=dest && src=src && dest0) { - destLength=doWriteReverse(src, srcLength, dest, destSize, options, pErrorCode); - } else { - /* nothing to do */ - destLength=0; - } - - return u_terminateUChars(dest, destSize, destLength, pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -ubidi_writeReordered(UBiDi *pBiDi, - UChar *dest, int32_t destSize, - uint16_t options, - UErrorCode *pErrorCode) { - const UChar *text; - UChar *saveDest; - int32_t length, destCapacity; - int32_t run, runCount, logicalStart, runLength; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - /* more error checking */ - if( pBiDi==NULL || - (text=pBiDi->text)==NULL || (length=pBiDi->length)<0 || - destSize<0 || (destSize>0 && dest==NULL)) - { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* do input and output overlap? */ - if( dest!=NULL && - ((text>=dest && text=text && destoriginalLength))) - { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - if(length==0) { - /* nothing to do */ - return u_terminateUChars(dest, destSize, 0, pErrorCode); - } - - runCount=ubidi_countRuns(pBiDi, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - /* destSize shrinks, later destination length=destCapacity-destSize */ - saveDest=dest; - destCapacity=destSize; - - /* - * Option "insert marks" implies UBIDI_INSERT_LRM_FOR_NUMERIC if the - * reordering mode (checked below) is appropriate. - */ - if(pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { - options|=UBIDI_INSERT_LRM_FOR_NUMERIC; - options&=~UBIDI_REMOVE_BIDI_CONTROLS; - } - /* - * Option "remove controls" implies UBIDI_REMOVE_BIDI_CONTROLS - * and cancels UBIDI_INSERT_LRM_FOR_NUMERIC. - */ - if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { - options|=UBIDI_REMOVE_BIDI_CONTROLS; - options&=~UBIDI_INSERT_LRM_FOR_NUMERIC; - } - /* - * If we do not perform the "inverse BiDi" algorithm, then we - * don't need to insert any LRMs, and don't need to test for it. - */ - if((pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_NUMBERS_AS_L) && - (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_LIKE_DIRECT) && - (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL) && - (pBiDi->reorderingMode != UBIDI_REORDER_RUNS_ONLY)) { - options&=~UBIDI_INSERT_LRM_FOR_NUMERIC; - } - /* - * Iterate through all visual runs and copy the run text segments to - * the destination, according to the options. - * - * The tests for where to insert LRMs ignore the fact that there may be - * BN codes or non-BMP code points at the beginning and end of a run; - * they may insert LRMs unnecessarily but the tests are faster this way - * (this would have to be improved for UTF-8). - * - * Note that the only errors that are set by doWriteXY() are buffer overflow - * errors. Ignore them until the end, and continue for preflighting. - */ - if(!(options&UBIDI_OUTPUT_REVERSE)) { - /* forward output */ - if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) { - /* do not insert BiDi controls */ - for(run=0; rundirProps; - const UChar *src; - UChar uc; - UBiDiDirection dir; - int32_t markFlag; - - for(run=0; runruns[run].insertRemove; - if(markFlag<0) { /* BiDi controls count */ - markFlag=0; - } - - if(UBIDI_LTR==dir) { - if((pBiDi->isInverse) && - (/*run>0 &&*/ dirProps[logicalStart]!=L)) { - markFlag |= LRM_BEFORE; - } - if (markFlag & LRM_BEFORE) { - uc=LRM_CHAR; - } - else if (markFlag & RLM_BEFORE) { - uc=RLM_CHAR; - } - else uc=0; - if(uc) { - if(destSize>0) { - *dest++=uc; - } - --destSize; - } - - runLength=doWriteForward(src, runLength, - dest, destSize, - (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode); - if(dest!=NULL) { - dest+=runLength; - } - destSize-=runLength; - - if((pBiDi->isInverse) && - (/*run0) { - *dest++=uc; - } - --destSize; - } - } else { /* RTL run */ - if((pBiDi->isInverse) && - (/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1])))) { - markFlag |= RLM_BEFORE; - } - if (markFlag & LRM_BEFORE) { - uc=LRM_CHAR; - } - else if (markFlag & RLM_BEFORE) { - uc=RLM_CHAR; - } - else uc=0; - if(uc) { - if(destSize>0) { - *dest++=uc; - } - --destSize; - } - - runLength=doWriteReverse(src, runLength, - dest, destSize, - options, pErrorCode); - if(dest!=NULL) { - dest+=runLength; - } - destSize-=runLength; - - if((pBiDi->isInverse) && - (/*run0) { - *dest++=uc; - } - --destSize; - } - } - } - } - } else { - /* reverse output */ - if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) { - /* do not insert BiDi controls */ - for(run=runCount; --run>=0;) { - if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength)) { - runLength=doWriteReverse(text+logicalStart, runLength, - dest, destSize, - (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode); - } else { - runLength=doWriteForward(text+logicalStart, runLength, - dest, destSize, - options, pErrorCode); - } - if(dest!=NULL) { - dest+=runLength; - } - destSize-=runLength; - } - } else { - /* insert BiDi controls for "inverse BiDi" */ - const DirProp *dirProps=pBiDi->dirProps; - const UChar *src; - UBiDiDirection dir; - - for(run=runCount; --run>=0;) { - /* reverse output */ - dir=ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength); - src=text+logicalStart; - - if(UBIDI_LTR==dir) { - if(/*run0) { - *dest++=LRM_CHAR; - } - --destSize; - } - - runLength=doWriteReverse(src, runLength, - dest, destSize, - (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode); - if(dest!=NULL) { - dest+=runLength; - } - destSize-=runLength; - - if(/*run>0 &&*/ dirProps[logicalStart]!=L) { - if(destSize>0) { - *dest++=LRM_CHAR; - } - --destSize; - } - } else { - if(/*run0) { - *dest++=RLM_CHAR; - } - --destSize; - } - - runLength=doWriteForward(src, runLength, - dest, destSize, - options, pErrorCode); - if(dest!=NULL) { - dest+=runLength; - } - destSize-=runLength; - - if(/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1]))) { - if(destSize>0) { - *dest++=RLM_CHAR; - } - --destSize; - } - } - } - } - } - - return u_terminateUChars(saveDest, destCapacity, destCapacity-destSize, pErrorCode); -} diff --git a/deps/icu-small/source/common/ubidiwrt.cpp b/deps/icu-small/source/common/ubidiwrt.cpp new file mode 100644 index 0000000000..a89099dad0 --- /dev/null +++ b/deps/icu-small/source/common/ubidiwrt.cpp @@ -0,0 +1,640 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2000-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ubidiwrt.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999aug06 +* created by: Markus W. Scherer, updated by Matitiahu Allouche +* +* This file contains implementations for BiDi functions that use +* the core algorithm and core API to write reordered text. +*/ + +#include "unicode/utypes.h" +#include "unicode/ustring.h" +#include "unicode/uchar.h" +#include "unicode/ubidi.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "ustr_imp.h" +#include "ubidiimp.h" + +/* + * The function implementations in this file are designed + * for UTF-16 and UTF-32, not for UTF-8. + * + * Assumptions that are not true for UTF-8: + * - Any code point always needs the same number of code units + * ("minimum-length-problem" of UTF-8) + * - The BiDi control characters need only one code unit each + * + * Further assumptions for all UTFs: + * - u_charMirror(c) needs the same number of code units as c + */ +#if UTF_SIZE==8 +# error reimplement ubidi_writeReordered() for UTF-8, see comment above +#endif + +#define IS_COMBINING(type) ((1UL<<(type))&(1UL<0); + return srcLength; + } + case UBIDI_DO_MIRRORING: { + /* do mirroring */ + int32_t i=0, j=0; + UChar32 c; + + if(destSize0) { + c=*src++; + if(!IS_BIDI_CONTROL_CHAR(c)) { + --remaining; + } + } + return destSize-remaining; + } + *dest++=c; + } + } while(--srcLength>0); + return destSize-remaining; + } + default: { + /* remove BiDi control characters and do mirroring */ + int32_t remaining=destSize; + int32_t i, j=0; + UChar32 c; + do { + i=0; + U16_NEXT(src, i, srcLength, c); + src+=i; + srcLength-=i; + if(!IS_BIDI_CONTROL_CHAR(c)) { + remaining-=i; + if(remaining<0) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + + /* preflight the length */ + while(srcLength>0) { + c=*src++; + if(!IS_BIDI_CONTROL_CHAR(c)) { + --remaining; + } + --srcLength; + } + return destSize-remaining; + } + c=u_charMirror(c); + U16_APPEND_UNSAFE(dest, j, c); + } + } while(srcLength>0); + return j; + } + } /* end of switch */ +} + +static int32_t +doWriteReverse(const UChar *src, int32_t srcLength, + UChar *dest, int32_t destSize, + uint16_t options, + UErrorCode *pErrorCode) { + /* + * RTL run - + * + * RTL runs need to be copied to the destination in reverse order + * of code points, not code units, to keep Unicode characters intact. + * + * The general strategy for this is to read the source text + * in backward order, collect all code units for a code point + * (and optionally following combining characters, see below), + * and copy all these code units in ascending order + * to the destination for this run. + * + * Several options request whether combining characters + * should be kept after their base characters, + * whether BiDi control characters should be removed, and + * whether characters should be replaced by their mirror-image + * equivalent Unicode characters. + */ + int32_t i, j; + UChar32 c; + + /* optimize for several combinations of options */ + switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING|UBIDI_KEEP_BASE_COMBINING)) { + case 0: + /* + * With none of the "complicated" options set, the destination + * run will have the same length as the source run, + * and there is no mirroring and no keeping combining characters + * with their base characters. + */ + if(destSize0); + break; + case UBIDI_KEEP_BASE_COMBINING: + /* + * Here, too, the destination + * run will have the same length as the source run, + * and there is no mirroring. + * We do need to keep combining characters with their base characters. + */ + if(destSize0 && IS_COMBINING(u_charType(c))); + + /* copy this "user character" */ + j=srcLength; + do { + *dest++=src[j++]; + } while(j0); + break; + default: + /* + * With several "complicated" options set, this is the most + * general and the slowest copying of an RTL run. + * We will do mirroring, remove BiDi controls, and + * keep combining characters with their base characters + * as requested. + */ + if(!(options&UBIDI_REMOVE_BIDI_CONTROLS)) { + i=srcLength; + } else { + /* we need to find out the destination length of the run, + which will not include the BiDi control characters */ + int32_t length=srcLength; + UChar ch; + + i=0; + do { + ch=*src++; + if(!IS_BIDI_CONTROL_CHAR(ch)) { + ++i; + } + } while(--length>0); + src-=srcLength; + } + + if(destSize0 && IS_COMBINING(u_charType(c))) { + U16_PREV(src, 0, srcLength, c); + } + } + + if(options&UBIDI_REMOVE_BIDI_CONTROLS && IS_BIDI_CONTROL_CHAR(c)) { + /* do not copy this BiDi control character */ + continue; + } + + /* copy this "user character" */ + j=srcLength; + if(options&UBIDI_DO_MIRRORING) { + /* mirror only the base character */ + int32_t k=0; + c=u_charMirror(c); + U16_APPEND_UNSAFE(dest, k, c); + dest+=k; + j+=k; + } + while(j0); + break; + } /* end of switch */ + + return destSize; +} + +U_CAPI int32_t U_EXPORT2 +ubidi_writeReverse(const UChar *src, int32_t srcLength, + UChar *dest, int32_t destSize, + uint16_t options, + UErrorCode *pErrorCode) { + int32_t destLength; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + /* more error checking */ + if( src==NULL || srcLength<-1 || + destSize<0 || (destSize>0 && dest==NULL)) + { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* do input and output overlap? */ + if( dest!=NULL && + ((src>=dest && src=src && dest0) { + destLength=doWriteReverse(src, srcLength, dest, destSize, options, pErrorCode); + } else { + /* nothing to do */ + destLength=0; + } + + return u_terminateUChars(dest, destSize, destLength, pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +ubidi_writeReordered(UBiDi *pBiDi, + UChar *dest, int32_t destSize, + uint16_t options, + UErrorCode *pErrorCode) { + const UChar *text; + UChar *saveDest; + int32_t length, destCapacity; + int32_t run, runCount, logicalStart, runLength; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + /* more error checking */ + if( pBiDi==NULL || + (text=pBiDi->text)==NULL || (length=pBiDi->length)<0 || + destSize<0 || (destSize>0 && dest==NULL)) + { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* do input and output overlap? */ + if( dest!=NULL && + ((text>=dest && text=text && destoriginalLength))) + { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if(length==0) { + /* nothing to do */ + return u_terminateUChars(dest, destSize, 0, pErrorCode); + } + + runCount=ubidi_countRuns(pBiDi, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return 0; + } + + /* destSize shrinks, later destination length=destCapacity-destSize */ + saveDest=dest; + destCapacity=destSize; + + /* + * Option "insert marks" implies UBIDI_INSERT_LRM_FOR_NUMERIC if the + * reordering mode (checked below) is appropriate. + */ + if(pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { + options|=UBIDI_INSERT_LRM_FOR_NUMERIC; + options&=~UBIDI_REMOVE_BIDI_CONTROLS; + } + /* + * Option "remove controls" implies UBIDI_REMOVE_BIDI_CONTROLS + * and cancels UBIDI_INSERT_LRM_FOR_NUMERIC. + */ + if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { + options|=UBIDI_REMOVE_BIDI_CONTROLS; + options&=~UBIDI_INSERT_LRM_FOR_NUMERIC; + } + /* + * If we do not perform the "inverse BiDi" algorithm, then we + * don't need to insert any LRMs, and don't need to test for it. + */ + if((pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_NUMBERS_AS_L) && + (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_LIKE_DIRECT) && + (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL) && + (pBiDi->reorderingMode != UBIDI_REORDER_RUNS_ONLY)) { + options&=~UBIDI_INSERT_LRM_FOR_NUMERIC; + } + /* + * Iterate through all visual runs and copy the run text segments to + * the destination, according to the options. + * + * The tests for where to insert LRMs ignore the fact that there may be + * BN codes or non-BMP code points at the beginning and end of a run; + * they may insert LRMs unnecessarily but the tests are faster this way + * (this would have to be improved for UTF-8). + * + * Note that the only errors that are set by doWriteXY() are buffer overflow + * errors. Ignore them until the end, and continue for preflighting. + */ + if(!(options&UBIDI_OUTPUT_REVERSE)) { + /* forward output */ + if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) { + /* do not insert BiDi controls */ + for(run=0; rundirProps; + const UChar *src; + UChar uc; + UBiDiDirection dir; + int32_t markFlag; + + for(run=0; runruns[run].insertRemove; + if(markFlag<0) { /* BiDi controls count */ + markFlag=0; + } + + if(UBIDI_LTR==dir) { + if((pBiDi->isInverse) && + (/*run>0 &&*/ dirProps[logicalStart]!=L)) { + markFlag |= LRM_BEFORE; + } + if (markFlag & LRM_BEFORE) { + uc=LRM_CHAR; + } + else if (markFlag & RLM_BEFORE) { + uc=RLM_CHAR; + } + else uc=0; + if(uc) { + if(destSize>0) { + *dest++=uc; + } + --destSize; + } + + runLength=doWriteForward(src, runLength, + dest, destSize, + (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode); + if(dest!=NULL) { + dest+=runLength; + } + destSize-=runLength; + + if((pBiDi->isInverse) && + (/*run0) { + *dest++=uc; + } + --destSize; + } + } else { /* RTL run */ + if((pBiDi->isInverse) && + (/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1])))) { + markFlag |= RLM_BEFORE; + } + if (markFlag & LRM_BEFORE) { + uc=LRM_CHAR; + } + else if (markFlag & RLM_BEFORE) { + uc=RLM_CHAR; + } + else uc=0; + if(uc) { + if(destSize>0) { + *dest++=uc; + } + --destSize; + } + + runLength=doWriteReverse(src, runLength, + dest, destSize, + options, pErrorCode); + if(dest!=NULL) { + dest+=runLength; + } + destSize-=runLength; + + if((pBiDi->isInverse) && + (/*run0) { + *dest++=uc; + } + --destSize; + } + } + } + } + } else { + /* reverse output */ + if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) { + /* do not insert BiDi controls */ + for(run=runCount; --run>=0;) { + if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength)) { + runLength=doWriteReverse(text+logicalStart, runLength, + dest, destSize, + (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode); + } else { + runLength=doWriteForward(text+logicalStart, runLength, + dest, destSize, + options, pErrorCode); + } + if(dest!=NULL) { + dest+=runLength; + } + destSize-=runLength; + } + } else { + /* insert BiDi controls for "inverse BiDi" */ + const DirProp *dirProps=pBiDi->dirProps; + const UChar *src; + UBiDiDirection dir; + + for(run=runCount; --run>=0;) { + /* reverse output */ + dir=ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength); + src=text+logicalStart; + + if(UBIDI_LTR==dir) { + if(/*run0) { + *dest++=LRM_CHAR; + } + --destSize; + } + + runLength=doWriteReverse(src, runLength, + dest, destSize, + (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode); + if(dest!=NULL) { + dest+=runLength; + } + destSize-=runLength; + + if(/*run>0 &&*/ dirProps[logicalStart]!=L) { + if(destSize>0) { + *dest++=LRM_CHAR; + } + --destSize; + } + } else { + if(/*run0) { + *dest++=RLM_CHAR; + } + --destSize; + } + + runLength=doWriteForward(src, runLength, + dest, destSize, + options, pErrorCode); + if(dest!=NULL) { + dest+=runLength; + } + destSize-=runLength; + + if(/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1]))) { + if(destSize>0) { + *dest++=RLM_CHAR; + } + --destSize; + } + } + } + } + } + + return u_terminateUChars(saveDest, destCapacity, destCapacity-destSize, pErrorCode); +} diff --git a/deps/icu-small/source/common/ubrk.cpp b/deps/icu-small/source/common/ubrk.cpp index b02c966b10..f8bdf5a6b6 100644 --- a/deps/icu-small/source/common/ubrk.cpp +++ b/deps/icu-small/source/common/ubrk.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** @@ -20,6 +20,7 @@ #include "unicode/rbbi.h" #include "rbbirb.h" #include "uassert.h" +#include "cmemory.h" U_NAMESPACE_USE @@ -119,7 +120,28 @@ ubrk_openRules( const UChar *rules, } - +U_CAPI UBreakIterator* U_EXPORT2 +ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength, + const UChar * text, int32_t textLength, + UErrorCode * status) +{ + if (U_FAILURE(*status)) { + return NULL; + } + if (rulesLength < 0) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + LocalPointer lpRBBI(new RuleBasedBreakIterator(binaryRules, rulesLength, *status), *status); + if (U_FAILURE(*status)) { + return NULL; + } + UBreakIterator *uBI = reinterpret_cast(lpRBBI.orphan()); + if (text != NULL) { + ubrk_setText(uBI, text, textLength, status); + } + return uBI; +} U_CAPI UBreakIterator * U_EXPORT2 @@ -288,7 +310,8 @@ ubrk_getLocaleByType(const UBreakIterator *bi, } -void ubrk_refreshUText(UBreakIterator *bi, +U_CAPI void U_EXPORT2 +ubrk_refreshUText(UBreakIterator *bi, UText *text, UErrorCode *status) { @@ -296,6 +319,39 @@ void ubrk_refreshUText(UBreakIterator *bi, bii->refreshInputText(text, *status); } +U_CAPI int32_t U_EXPORT2 +ubrk_getBinaryRules(UBreakIterator *bi, + uint8_t * binaryRules, int32_t rulesCapacity, + UErrorCode * status) +{ + if (U_FAILURE(*status)) { + return 0; + } + if ((binaryRules == NULL && rulesCapacity > 0) || rulesCapacity < 0) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + RuleBasedBreakIterator* rbbi; + if ((rbbi = dynamic_cast(reinterpret_cast(bi))) == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + uint32_t rulesLength; + const uint8_t * returnedRules = rbbi->getBinaryRules(rulesLength); + if (rulesLength > INT32_MAX) { + *status = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + if (binaryRules != NULL) { // if not preflighting + // Here we know rulesLength <= INT32_MAX and rulesCapacity >= 0, can cast safely + if ((int32_t)rulesLength > rulesCapacity) { + *status = U_BUFFER_OVERFLOW_ERROR; + } else { + uprv_memcpy(binaryRules, returnedRules, rulesLength); + } + } + return (int32_t)rulesLength; +} #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/deps/icu-small/source/common/ubrkimpl.h b/deps/icu-small/source/common/ubrkimpl.h index 36ca668859..8197f66339 100644 --- a/deps/icu-small/source/common/ubrkimpl.h +++ b/deps/icu-small/source/common/ubrkimpl.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/ucase.cpp b/deps/icu-small/source/common/ucase.cpp index 5c9354e73b..566014245f 100644 --- a/deps/icu-small/source/common/ucase.cpp +++ b/deps/icu-small/source/common/ucase.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: ucase.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -46,13 +46,6 @@ struct UCaseProps { #define INCLUDED_FROM_UCASE_CPP #include "ucase_props_data.h" -/* UCaseProps singleton ----------------------------------------------------- */ - -U_CAPI const UCaseProps * U_EXPORT2 -ucase_getSingleton() { - return &ucase_props_singleton; -} - /* set of property starts for UnicodeSet ------------------------------------ */ static UBool U_CALLCONV @@ -64,13 +57,13 @@ _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, ui } U_CFUNC void U_EXPORT2 -ucase_addPropertyStarts(const UCaseProps *csp, const USetAdder *sa, UErrorCode *pErrorCode) { +ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { if(U_FAILURE(*pErrorCode)) { return; } /* add the start code point of each same-value range of the trie */ - utrie2_enum(&csp->trie, NULL, _enumPropertyStartsRange, sa); + utrie2_enum(&ucase_props_singleton.trie, NULL, _enumPropertyStartsRange, sa); /* add code points with hardcoded properties, plus the ones following them */ @@ -133,14 +126,14 @@ static const uint8_t flagsOffset[256]={ /* simple case mappings ----------------------------------------------------- */ U_CAPI UChar32 U_EXPORT2 -ucase_tolower(const UCaseProps *csp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&csp->trie, c); +ucase_tolower(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!PROPS_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { c+=UCASE_GET_DELTA(props); } } else { - const uint16_t *pe=GET_EXCEPTIONS(csp, props); + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); uint16_t excWord=*pe++; if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) { GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe, c); @@ -150,14 +143,14 @@ ucase_tolower(const UCaseProps *csp, UChar32 c) { } U_CAPI UChar32 U_EXPORT2 -ucase_toupper(const UCaseProps *csp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&csp->trie, c); +ucase_toupper(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!PROPS_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)==UCASE_LOWER) { c+=UCASE_GET_DELTA(props); } } else { - const uint16_t *pe=GET_EXCEPTIONS(csp, props); + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); uint16_t excWord=*pe++; if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) { GET_SLOT_VALUE(excWord, UCASE_EXC_UPPER, pe, c); @@ -167,14 +160,14 @@ ucase_toupper(const UCaseProps *csp, UChar32 c) { } U_CAPI UChar32 U_EXPORT2 -ucase_totitle(const UCaseProps *csp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&csp->trie, c); +ucase_totitle(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!PROPS_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)==UCASE_LOWER) { c+=UCASE_GET_DELTA(props); } } else { - const uint16_t *pe=GET_EXCEPTIONS(csp, props); + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); uint16_t excWord=*pe++; int32_t idx; if(HAS_SLOT(excWord, UCASE_EXC_TITLE)) { @@ -198,7 +191,7 @@ static const UChar iDotTilde[3] = { 0x69, 0x307, 0x303 }; U_CFUNC void U_EXPORT2 -ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa) { +ucase_addCaseClosure(UChar32 c, const USetAdder *sa) { uint16_t props; /* @@ -229,7 +222,7 @@ ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa) { break; } - props=UTRIE2_GET16(&csp->trie, c); + props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!PROPS_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)!=UCASE_NONE) { /* add the one simple case mapping, no matter what type it is */ @@ -243,7 +236,7 @@ ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa) { * c has exceptions, so there may be multiple simple and/or * full case mappings. Add them all. */ - const uint16_t *pe0, *pe=GET_EXCEPTIONS(csp, props); + const uint16_t *pe0, *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); const UChar *closure; uint16_t excWord=*pe++; int32_t idx, closureLength, fullLength, length; @@ -338,10 +331,10 @@ strcmpMax(const UChar *s, int32_t length, const UChar *t, int32_t max) { } U_CFUNC UBool U_EXPORT2 -ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length, const USetAdder *sa) { +ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa) { int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth; - if(csp->unfold==NULL || s==NULL) { + if(ucase_props_singleton.unfold==NULL || s==NULL) { return FALSE; /* no reverse case folding data, or no string */ } if(length<=1) { @@ -355,7 +348,7 @@ ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length return FALSE; } - const uint16_t *unfold=csp->unfold; + const uint16_t *unfold=ucase_props_singleton.unfold; unfoldRows=unfold[UCASE_UNFOLD_ROWS]; unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH]; unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH]; @@ -381,7 +374,7 @@ ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length for(i=unfoldStringWidth; iadd(sa->set, c); - ucase_addCaseClosure(csp, c, sa); + ucase_addCaseClosure(c, sa); } return TRUE; } else if(result<0) { @@ -430,38 +423,38 @@ U_NAMESPACE_END /** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */ U_CAPI int32_t U_EXPORT2 -ucase_getType(const UCaseProps *csp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&csp->trie, c); +ucase_getType(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); return UCASE_GET_TYPE(props); } /** @return same as ucase_getType() and set bit 2 if c is case-ignorable */ U_CAPI int32_t U_EXPORT2 -ucase_getTypeOrIgnorable(const UCaseProps *csp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&csp->trie, c); +ucase_getTypeOrIgnorable(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); return UCASE_GET_TYPE_AND_IGNORABLE(props); } /** @return UCASE_NO_DOT, UCASE_SOFT_DOTTED, UCASE_ABOVE, UCASE_OTHER_ACCENT */ static inline int32_t -getDotType(const UCaseProps *csp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&csp->trie, c); +getDotType(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!PROPS_HAS_EXCEPTION(props)) { return props&UCASE_DOT_MASK; } else { - const uint16_t *pe=GET_EXCEPTIONS(csp, props); + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); return (*pe>>UCASE_EXC_DOT_SHIFT)&UCASE_DOT_MASK; } } U_CAPI UBool U_EXPORT2 -ucase_isSoftDotted(const UCaseProps *csp, UChar32 c) { - return (UBool)(getDotType(csp, c)==UCASE_SOFT_DOTTED); +ucase_isSoftDotted(UChar32 c) { + return (UBool)(getDotType(c)==UCASE_SOFT_DOTTED); } U_CAPI UBool U_EXPORT2 -ucase_isCaseSensitive(const UCaseProps *csp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&csp->trie, c); +ucase_isCaseSensitive(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); return (UBool)((props&UCASE_SENSITIVE)!=0); } @@ -545,12 +538,10 @@ ucase_isCaseSensitive(const UCaseProps *csp, UChar32 c) { * zero or more case-ignorable characters. */ -#define is_a(c) ((c)=='a' || (c)=='A') #define is_d(c) ((c)=='d' || (c)=='D') #define is_e(c) ((c)=='e' || (c)=='E') #define is_i(c) ((c)=='i' || (c)=='I') #define is_l(c) ((c)=='l' || (c)=='L') -#define is_n(c) ((c)=='n' || (c)=='N') #define is_r(c) ((c)=='r' || (c)=='R') #define is_t(c) ((c)=='t' || (c)=='T') #define is_u(c) ((c)=='u' || (c)=='U') @@ -565,16 +556,7 @@ ucase_isCaseSensitive(const UCaseProps *csp, UChar32 c) { * Accepts both 2- and 3-letter codes and accepts case variants. */ U_CFUNC int32_t -ucase_getCaseLocale(const char *locale, int32_t *locCache) { - int32_t result; - char c; - - if(locCache!=NULL && (result=*locCache)!=UCASE_LOC_UNKNOWN) { - return result; - } - - result=UCASE_LOC_ROOT; - +ucase_getCaseLocale(const char *locale) { /* * This function used to use uloc_getLanguage(), but the current code * removes the dependency of this low-level code on uloc implementation code @@ -584,73 +566,149 @@ ucase_getCaseLocale(const char *locale, int32_t *locCache) { * Because this code does not want to depend on uloc, the caller must * pass in a non-NULL locale, i.e., may need to call uloc_getDefault(). */ - c=*locale++; - if(is_t(c)) { - /* tr or tur? */ + char c=*locale++; + // Fastpath for English "en" which is often used for default (=root locale) case mappings, + // and for Chinese "zh": Very common but no special case mapping behavior. + // Then check lowercase vs. uppercase to reduce the number of comparisons + // for other locales without special behavior. + if(c=='e') { + /* el or ell? */ c=*locale++; - if(is_u(c)) { + if(is_l(c)) { c=*locale++; - } - if(is_r(c)) { - c=*locale; + if(is_l(c)) { + c=*locale; + } if(is_sep(c)) { - result=UCASE_LOC_TURKISH; + return UCASE_LOC_GREEK; } } - } else if(is_a(c)) { - /* az or aze? */ - c=*locale++; - if(is_z(c)) { + // en, es, ... -> root + } else if(c=='z') { + return UCASE_LOC_ROOT; +#if U_CHARSET_FAMILY==U_ASCII_FAMILY + } else if(c>='a') { // ASCII a-z = 0x61..0x7a, after A-Z +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY + } else if(c<='z') { // EBCDIC a-z = 0x81..0xa9 with two gaps, before A-Z +#else +# error Unknown charset family! +#endif + // lowercase c + if(c=='t') { + /* tr or tur? */ c=*locale++; - if(is_e(c)) { + if(is_u(c)) { + c=*locale++; + } + if(is_r(c)) { c=*locale; + if(is_sep(c)) { + return UCASE_LOC_TURKISH; + } } - if(is_sep(c)) { - result=UCASE_LOC_TURKISH; + } else if(c=='a') { + /* az or aze? */ + c=*locale++; + if(is_z(c)) { + c=*locale++; + if(is_e(c)) { + c=*locale; + } + if(is_sep(c)) { + return UCASE_LOC_TURKISH; + } } - } - } else if(is_l(c)) { - /* lt or lit? */ - c=*locale++; - if(is_i(c)) { + } else if(c=='l') { + /* lt or lit? */ c=*locale++; - } - if(is_t(c)) { - c=*locale; - if(is_sep(c)) { - result=UCASE_LOC_LITHUANIAN; + if(is_i(c)) { + c=*locale++; } - } - } else if(is_e(c)) { - /* el or ell? */ - c=*locale++; - if(is_l(c)) { + if(is_t(c)) { + c=*locale; + if(is_sep(c)) { + return UCASE_LOC_LITHUANIAN; + } + } + } else if(c=='n') { + /* nl or nld? */ c=*locale++; if(is_l(c)) { + c=*locale++; + if(is_d(c)) { + c=*locale; + } + if(is_sep(c)) { + return UCASE_LOC_DUTCH; + } + } + } + } else { + // uppercase c + // Same code as for lowercase c but also check for 'E'. + if(c=='T') { + /* tr or tur? */ + c=*locale++; + if(is_u(c)) { + c=*locale++; + } + if(is_r(c)) { c=*locale; + if(is_sep(c)) { + return UCASE_LOC_TURKISH; + } } - if(is_sep(c)) { - result=UCASE_LOC_GREEK; + } else if(c=='A') { + /* az or aze? */ + c=*locale++; + if(is_z(c)) { + c=*locale++; + if(is_e(c)) { + c=*locale; + } + if(is_sep(c)) { + return UCASE_LOC_TURKISH; + } } - } - } else if(is_n(c)) { - /* nl or nld? */ - c=*locale++; - if(is_l(c)) { + } else if(c=='L') { + /* lt or lit? */ c=*locale++; - if(is_d(c)) { + if(is_i(c)) { + c=*locale++; + } + if(is_t(c)) { c=*locale; + if(is_sep(c)) { + return UCASE_LOC_LITHUANIAN; + } } - if(is_sep(c)) { - result=UCASE_LOC_DUTCH; + } else if(c=='E') { + /* el or ell? */ + c=*locale++; + if(is_l(c)) { + c=*locale++; + if(is_l(c)) { + c=*locale; + } + if(is_sep(c)) { + return UCASE_LOC_GREEK; + } + } + } else if(c=='N') { + /* nl or nld? */ + c=*locale++; + if(is_l(c)) { + c=*locale++; + if(is_d(c)) { + c=*locale; + } + if(is_sep(c)) { + return UCASE_LOC_DUTCH; + } } } } - - if(locCache!=NULL) { - *locCache=result; - } - return result; + return UCASE_LOC_ROOT; } /* @@ -662,7 +720,7 @@ ucase_getCaseLocale(const char *locale, int32_t *locCache) { * it is also cased or not. */ static UBool -isFollowedByCasedLetter(const UCaseProps *csp, UCaseContextIterator *iter, void *context, int8_t dir) { +isFollowedByCasedLetter(UCaseContextIterator *iter, void *context, int8_t dir) { UChar32 c; if(iter==NULL) { @@ -670,7 +728,7 @@ isFollowedByCasedLetter(const UCaseProps *csp, UCaseContextIterator *iter, void } for(/* dir!=0 sets direction */; (c=iter(context, dir))>=0; dir=0) { - int32_t type=ucase_getTypeOrIgnorable(csp, c); + int32_t type=ucase_getTypeOrIgnorable(c); if(type&4) { /* case-ignorable, continue with the loop */ } else if(type!=UCASE_NONE) { @@ -685,7 +743,7 @@ isFollowedByCasedLetter(const UCaseProps *csp, UCaseContextIterator *iter, void /* Is preceded by Soft_Dotted character with no intervening cc=230 ? */ static UBool -isPrecededBySoftDotted(const UCaseProps *csp, UCaseContextIterator *iter, void *context) { +isPrecededBySoftDotted(UCaseContextIterator *iter, void *context) { UChar32 c; int32_t dotType; int8_t dir; @@ -695,7 +753,7 @@ isPrecededBySoftDotted(const UCaseProps *csp, UCaseContextIterator *iter, void * } for(dir=-1; (c=iter(context, dir))>=0; dir=0) { - dotType=getDotType(csp, c); + dotType=getDotType(c); if(dotType==UCASE_SOFT_DOTTED) { return TRUE; /* preceded by TYPE_i */ } else if(dotType!=UCASE_OTHER_ACCENT) { @@ -742,7 +800,7 @@ isPrecededBySoftDotted(const UCaseProps *csp, UCaseContextIterator *iter, void * /* Is preceded by base character 'I' with no intervening cc=230 ? */ static UBool -isPrecededBy_I(const UCaseProps *csp, UCaseContextIterator *iter, void *context) { +isPrecededBy_I(UCaseContextIterator *iter, void *context) { UChar32 c; int32_t dotType; int8_t dir; @@ -755,7 +813,7 @@ isPrecededBy_I(const UCaseProps *csp, UCaseContextIterator *iter, void *context) if(c==0x49) { return TRUE; /* preceded by I */ } - dotType=getDotType(csp, c); + dotType=getDotType(c); if(dotType!=UCASE_OTHER_ACCENT) { return FALSE; /* preceded by different base character (not I), or intervening cc==230 */ } @@ -766,7 +824,7 @@ isPrecededBy_I(const UCaseProps *csp, UCaseContextIterator *iter, void *context) /* Is followed by one or more cc==230 ? */ static UBool -isFollowedByMoreAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *context) { +isFollowedByMoreAbove(UCaseContextIterator *iter, void *context) { UChar32 c; int32_t dotType; int8_t dir; @@ -776,7 +834,7 @@ isFollowedByMoreAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *c } for(dir=1; (c=iter(context, dir))>=0; dir=0) { - dotType=getDotType(csp, c); + dotType=getDotType(c); if(dotType==UCASE_ABOVE) { return TRUE; /* at least one cc==230 following */ } else if(dotType!=UCASE_OTHER_ACCENT) { @@ -789,7 +847,7 @@ isFollowedByMoreAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *c /* Is followed by a dot above (without cc==230 in between) ? */ static UBool -isFollowedByDotAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *context) { +isFollowedByDotAbove(UCaseContextIterator *iter, void *context) { UChar32 c; int32_t dotType; int8_t dir; @@ -802,7 +860,7 @@ isFollowedByDotAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *co if(c==0x307) { return TRUE; } - dotType=getDotType(csp, c); + dotType=getDotType(c); if(dotType!=UCASE_OTHER_ACCENT) { return FALSE; /* next base character or cc==230 in between */ } @@ -812,20 +870,20 @@ isFollowedByDotAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *co } U_CAPI int32_t U_EXPORT2 -ucase_toFullLower(const UCaseProps *csp, UChar32 c, +ucase_toFullLower(UChar32 c, UCaseContextIterator *iter, void *context, const UChar **pString, - const char *locale, int32_t *locCache) { + int32_t loc) { // The sign of the result has meaning, input must be non-negative so that it can be returned as is. U_ASSERT(c >= 0); UChar32 result=c; - uint16_t props=UTRIE2_GET16(&csp->trie, c); + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!PROPS_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { result=c+UCASE_GET_DELTA(props); } } else { - const uint16_t *pe=GET_EXCEPTIONS(csp, props), *pe2; + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2; uint16_t excWord=*pe++; int32_t full; @@ -833,7 +891,6 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c, if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) { /* use hardcoded conditions and mappings */ - int32_t loc=ucase_getCaseLocale(locale, locCache); /* * Test for conditional mappings first @@ -844,7 +901,7 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c, if( loc==UCASE_LOC_LITHUANIAN && /* base characters, find accents above */ (((c==0x49 || c==0x4a || c==0x12e) && - isFollowedByMoreAbove(csp, iter, context)) || + isFollowedByMoreAbove(iter, context)) || /* precomposed with accent above, no need to find one */ (c==0xcc || c==0xcd || c==0x128)) ) { @@ -896,7 +953,7 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c, 0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE */ return 0x69; - } else if(loc==UCASE_LOC_TURKISH && c==0x307 && isPrecededBy_I(csp, iter, context)) { + } else if(loc==UCASE_LOC_TURKISH && c==0x307 && isPrecededBy_I(iter, context)) { /* # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. # This matches the behavior of the canonically equivalent I-dot_above @@ -905,7 +962,7 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c, 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE */ return 0; /* remove the dot (continue without output) */ - } else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(csp, iter, context)) { + } else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) { /* # When lowercasing, unless an I is before a dot_above, it turns into a dotless i. @@ -922,8 +979,8 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c, *pString=iDot; return 2; } else if( c==0x3a3 && - !isFollowedByCasedLetter(csp, iter, context, 1) && - isFollowedByCasedLetter(csp, iter, context, -1) /* -1=preceded */ + !isFollowedByCasedLetter(iter, context, 1) && + isFollowedByCasedLetter(iter, context, -1) /* -1=preceded */ ) { /* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */ /* @@ -957,21 +1014,21 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c, /* internal */ static int32_t -toUpperOrTitle(const UCaseProps *csp, UChar32 c, +toUpperOrTitle(UChar32 c, UCaseContextIterator *iter, void *context, const UChar **pString, - const char *locale, int32_t *locCache, + int32_t loc, UBool upperNotTitle) { // The sign of the result has meaning, input must be non-negative so that it can be returned as is. U_ASSERT(c >= 0); UChar32 result=c; - uint16_t props=UTRIE2_GET16(&csp->trie, c); + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!PROPS_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)==UCASE_LOWER) { result=c+UCASE_GET_DELTA(props); } } else { - const uint16_t *pe=GET_EXCEPTIONS(csp, props), *pe2; + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2; uint16_t excWord=*pe++; int32_t full, idx; @@ -979,8 +1036,6 @@ toUpperOrTitle(const UCaseProps *csp, UChar32 c, if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) { /* use hardcoded conditions and mappings */ - int32_t loc=ucase_getCaseLocale(locale, locCache); - if(loc==UCASE_LOC_TURKISH && c==0x69) { /* # Turkish and Azeri @@ -994,7 +1049,7 @@ toUpperOrTitle(const UCaseProps *csp, UChar32 c, 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I */ return 0x130; - } else if(loc==UCASE_LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(csp, iter, context)) { + } else if(loc==UCASE_LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(iter, context)) { /* # Lithuanian @@ -1052,19 +1107,19 @@ toUpperOrTitle(const UCaseProps *csp, UChar32 c, } U_CAPI int32_t U_EXPORT2 -ucase_toFullUpper(const UCaseProps *csp, UChar32 c, +ucase_toFullUpper(UChar32 c, UCaseContextIterator *iter, void *context, const UChar **pString, - const char *locale, int32_t *locCache) { - return toUpperOrTitle(csp, c, iter, context, pString, locale, locCache, TRUE); + int32_t caseLocale) { + return toUpperOrTitle(c, iter, context, pString, caseLocale, TRUE); } U_CAPI int32_t U_EXPORT2 -ucase_toFullTitle(const UCaseProps *csp, UChar32 c, +ucase_toFullTitle(UChar32 c, UCaseContextIterator *iter, void *context, const UChar **pString, - const char *locale, int32_t *locCache) { - return toUpperOrTitle(csp, c, iter, context, pString, locale, locCache, FALSE); + int32_t caseLocale) { + return toUpperOrTitle(c, iter, context, pString, caseLocale, FALSE); } /* case folding ------------------------------------------------------------- */ @@ -1110,14 +1165,14 @@ ucase_toFullTitle(const UCaseProps *csp, UChar32 c, /* return the simple case folding mapping for c */ U_CAPI UChar32 U_EXPORT2 -ucase_fold(const UCaseProps *csp, UChar32 c, uint32_t options) { - uint16_t props=UTRIE2_GET16(&csp->trie, c); +ucase_fold(UChar32 c, uint32_t options) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!PROPS_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { c+=UCASE_GET_DELTA(props); } } else { - const uint16_t *pe=GET_EXCEPTIONS(csp, props); + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); uint16_t excWord=*pe++; int32_t idx; if(excWord&UCASE_EXC_CONDITIONAL_FOLD) { @@ -1170,19 +1225,19 @@ ucase_fold(const UCaseProps *csp, UChar32 c, uint32_t options) { */ U_CAPI int32_t U_EXPORT2 -ucase_toFullFolding(const UCaseProps *csp, UChar32 c, +ucase_toFullFolding(UChar32 c, const UChar **pString, uint32_t options) { // The sign of the result has meaning, input must be non-negative so that it can be returned as is. U_ASSERT(c >= 0); UChar32 result=c; - uint16_t props=UTRIE2_GET16(&csp->trie, c); + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!PROPS_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { result=c+UCASE_GET_DELTA(props); } } else { - const uint16_t *pe=GET_EXCEPTIONS(csp, props), *pe2; + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2; uint16_t excWord=*pe++; int32_t full, idx; @@ -1244,66 +1299,59 @@ ucase_toFullFolding(const UCaseProps *csp, UChar32 c, /* case mapping properties API ---------------------------------------------- */ -#define GET_CASE_PROPS() &ucase_props_singleton - /* public API (see uchar.h) */ U_CAPI UBool U_EXPORT2 u_isULowercase(UChar32 c) { - return (UBool)(UCASE_LOWER==ucase_getType(GET_CASE_PROPS(), c)); + return (UBool)(UCASE_LOWER==ucase_getType(c)); } U_CAPI UBool U_EXPORT2 u_isUUppercase(UChar32 c) { - return (UBool)(UCASE_UPPER==ucase_getType(GET_CASE_PROPS(), c)); + return (UBool)(UCASE_UPPER==ucase_getType(c)); } /* Transforms the Unicode character to its lower case equivalent.*/ U_CAPI UChar32 U_EXPORT2 u_tolower(UChar32 c) { - return ucase_tolower(GET_CASE_PROPS(), c); + return ucase_tolower(c); } /* Transforms the Unicode character to its upper case equivalent.*/ U_CAPI UChar32 U_EXPORT2 u_toupper(UChar32 c) { - return ucase_toupper(GET_CASE_PROPS(), c); + return ucase_toupper(c); } /* Transforms the Unicode character to its title case equivalent.*/ U_CAPI UChar32 U_EXPORT2 u_totitle(UChar32 c) { - return ucase_totitle(GET_CASE_PROPS(), c); + return ucase_totitle(c); } /* return the simple case folding mapping for c */ U_CAPI UChar32 U_EXPORT2 u_foldCase(UChar32 c, uint32_t options) { - return ucase_fold(GET_CASE_PROPS(), c, options); + return ucase_fold(c, options); } U_CFUNC int32_t U_EXPORT2 ucase_hasBinaryProperty(UChar32 c, UProperty which) { /* case mapping properties */ const UChar *resultString; - int32_t locCache; - const UCaseProps *csp=GET_CASE_PROPS(); - if(csp==NULL) { - return FALSE; - } switch(which) { case UCHAR_LOWERCASE: - return (UBool)(UCASE_LOWER==ucase_getType(csp, c)); + return (UBool)(UCASE_LOWER==ucase_getType(c)); case UCHAR_UPPERCASE: - return (UBool)(UCASE_UPPER==ucase_getType(csp, c)); + return (UBool)(UCASE_UPPER==ucase_getType(c)); case UCHAR_SOFT_DOTTED: - return ucase_isSoftDotted(csp, c); + return ucase_isSoftDotted(c); case UCHAR_CASE_SENSITIVE: - return ucase_isCaseSensitive(csp, c); + return ucase_isCaseSensitive(c); case UCHAR_CASED: - return (UBool)(UCASE_NONE!=ucase_getType(csp, c)); + return (UBool)(UCASE_NONE!=ucase_getType(c)); case UCHAR_CASE_IGNORABLE: - return (UBool)(ucase_getTypeOrIgnorable(csp, c)>>2); + return (UBool)(ucase_getTypeOrIgnorable(c)>>2); /* * Note: The following Changes_When_Xyz are defined as testing whether * the NFD form of the input changes when Xyz-case-mapped. @@ -1317,21 +1365,17 @@ ucase_hasBinaryProperty(UChar32 c, UProperty which) { * start sets for normalization and case mappings. */ case UCHAR_CHANGES_WHEN_LOWERCASED: - locCache=UCASE_LOC_ROOT; - return (UBool)(ucase_toFullLower(csp, c, NULL, NULL, &resultString, "", &locCache)>=0); + return (UBool)(ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0); case UCHAR_CHANGES_WHEN_UPPERCASED: - locCache=UCASE_LOC_ROOT; - return (UBool)(ucase_toFullUpper(csp, c, NULL, NULL, &resultString, "", &locCache)>=0); + return (UBool)(ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0); case UCHAR_CHANGES_WHEN_TITLECASED: - locCache=UCASE_LOC_ROOT; - return (UBool)(ucase_toFullTitle(csp, c, NULL, NULL, &resultString, "", &locCache)>=0); + return (UBool)(ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0); /* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */ case UCHAR_CHANGES_WHEN_CASEMAPPED: - locCache=UCASE_LOC_ROOT; return (UBool)( - ucase_toFullLower(csp, c, NULL, NULL, &resultString, "", &locCache)>=0 || - ucase_toFullUpper(csp, c, NULL, NULL, &resultString, "", &locCache)>=0 || - ucase_toFullTitle(csp, c, NULL, NULL, &resultString, "", &locCache)>=0); + ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 || + ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 || + ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0); default: return FALSE; } diff --git a/deps/icu-small/source/common/ucase.h b/deps/icu-small/source/common/ucase.h index 29ea71a533..e15bae6604 100644 --- a/deps/icu-small/source/common/ucase.h +++ b/deps/icu-small/source/common/ucase.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: ucase.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -37,18 +37,8 @@ U_NAMESPACE_END /* library API -------------------------------------------------------------- */ -U_CDECL_BEGIN - -struct UCaseProps; -typedef struct UCaseProps UCaseProps; - -U_CDECL_END - -U_CAPI const UCaseProps * U_EXPORT2 -ucase_getSingleton(void); - U_CFUNC void U_EXPORT2 -ucase_addPropertyStarts(const UCaseProps *csp, const USetAdder *sa, UErrorCode *pErrorCode); +ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode); /** * Requires non-NULL locale ID but otherwise does the equivalent of @@ -56,7 +46,7 @@ ucase_addPropertyStarts(const UCaseProps *csp, const USetAdder *sa, UErrorCode * * Accepts both 2- and 3-letter codes and accepts case variants. */ U_CFUNC int32_t -ucase_getCaseLocale(const char *locale, int32_t *locCache); +ucase_getCaseLocale(const char *locale); /* Casing locale types for ucase_getCaseLocale */ enum { @@ -87,16 +77,16 @@ enum { /* single-code point functions */ U_CAPI UChar32 U_EXPORT2 -ucase_tolower(const UCaseProps *csp, UChar32 c); +ucase_tolower(UChar32 c); U_CAPI UChar32 U_EXPORT2 -ucase_toupper(const UCaseProps *csp, UChar32 c); +ucase_toupper(UChar32 c); U_CAPI UChar32 U_EXPORT2 -ucase_totitle(const UCaseProps *csp, UChar32 c); +ucase_totitle(UChar32 c); U_CAPI UChar32 U_EXPORT2 -ucase_fold(const UCaseProps *csp, UChar32 c, uint32_t options); +ucase_fold(UChar32 c, uint32_t options); /** * Adds all simple case mappings and the full case folding for c to sa, @@ -108,7 +98,7 @@ ucase_fold(const UCaseProps *csp, UChar32 c, uint32_t options); * - for k include the Kelvin sign */ U_CFUNC void U_EXPORT2 -ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa); +ucase_addCaseClosure(UChar32 c, const USetAdder *sa); /** * Maps the string to single code points and adds the associated case closure @@ -123,7 +113,7 @@ ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa); * @return TRUE if the string was found */ U_CFUNC UBool U_EXPORT2 -ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length, const USetAdder *sa); +ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa); #ifdef __cplusplus U_NAMESPACE_BEGIN @@ -157,17 +147,17 @@ U_NAMESPACE_END /** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */ U_CAPI int32_t U_EXPORT2 -ucase_getType(const UCaseProps *csp, UChar32 c); +ucase_getType(UChar32 c); /** @return like ucase_getType() but also sets UCASE_IGNORABLE if c is case-ignorable */ U_CAPI int32_t U_EXPORT2 -ucase_getTypeOrIgnorable(const UCaseProps *csp, UChar32 c); +ucase_getTypeOrIgnorable(UChar32 c); U_CAPI UBool U_EXPORT2 -ucase_isSoftDotted(const UCaseProps *csp, UChar32 c); +ucase_isSoftDotted(UChar32 c); U_CAPI UBool U_EXPORT2 -ucase_isCaseSensitive(const UCaseProps *csp, UChar32 c); +ucase_isCaseSensitive(UChar32 c); /* string case mapping functions */ @@ -240,10 +230,7 @@ enum { * @param context Pointer to be passed into iter. * @param pString If the mapping result is a string, then the pointer is * written to *pString. - * @param locale Locale ID for locale-dependent mappings. - * @param locCache Initialize to 0; may be used to cache the result of parsing - * the locale ID for subsequent calls. - * Can be NULL. + * @param caseLocale Case locale value from ucase_getCaseLocale(). * @return Output code point or string length, see UCASE_MAX_STRING_LENGTH. * * @see UCaseContextIterator @@ -251,25 +238,25 @@ enum { * @internal */ U_CAPI int32_t U_EXPORT2 -ucase_toFullLower(const UCaseProps *csp, UChar32 c, +ucase_toFullLower(UChar32 c, UCaseContextIterator *iter, void *context, const UChar **pString, - const char *locale, int32_t *locCache); + int32_t caseLocale); U_CAPI int32_t U_EXPORT2 -ucase_toFullUpper(const UCaseProps *csp, UChar32 c, +ucase_toFullUpper(UChar32 c, UCaseContextIterator *iter, void *context, const UChar **pString, - const char *locale, int32_t *locCache); + int32_t caseLocale); U_CAPI int32_t U_EXPORT2 -ucase_toFullTitle(const UCaseProps *csp, UChar32 c, +ucase_toFullTitle(UChar32 c, UCaseContextIterator *iter, void *context, const UChar **pString, - const char *locale, int32_t *locCache); + int32_t caseLocale); U_CAPI int32_t U_EXPORT2 -ucase_toFullFolding(const UCaseProps *csp, UChar32 c, +ucase_toFullFolding(UChar32 c, const UChar **pString, uint32_t options); @@ -283,10 +270,10 @@ U_CDECL_BEGIN * @internal */ typedef int32_t U_CALLCONV -UCaseMapFull(const UCaseProps *csp, UChar32 c, +UCaseMapFull(UChar32 c, UCaseContextIterator *iter, void *context, const UChar **pString, - const char *locale, int32_t *locCache); + int32_t caseLocale); U_CDECL_END diff --git a/deps/icu-small/source/common/ucase_props_data.h b/deps/icu-small/source/common/ucase_props_data.h index aa51bac691..3663592173 100644 --- a/deps/icu-small/source/common/ucase_props_data.h +++ b/deps/icu-small/source/common/ucase_props_data.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // // Copyright (C) 1999-2016, International Business Machines diff --git a/deps/icu-small/source/common/ucasemap.cpp b/deps/icu-small/source/common/ucasemap.cpp index 0576a26ddd..391140d6c5 100644 --- a/deps/icu-small/source/common/ucasemap.cpp +++ b/deps/icu-small/source/common/ucasemap.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: ucasemap.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -20,6 +20,8 @@ #include "unicode/utypes.h" #include "unicode/brkiter.h" +#include "unicode/casemap.h" +#include "unicode/edits.h" #include "unicode/ubrk.h" #include "unicode/uloc.h" #include "unicode/ustring.h" @@ -32,47 +34,69 @@ #include "unicode/utf16.h" #include "cmemory.h" #include "cstring.h" +#include "uassert.h" #include "ucase.h" +#include "ucasemap_imp.h" #include "ustr_imp.h" +U_NAMESPACE_BEGIN + +namespace { + +// TODO: share with UTF-16? inline in ucasemap_imp.h? +int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity, + Edits *edits, UErrorCode &errorCode) { + if (U_SUCCESS(errorCode)) { + if (destIndex > destCapacity) { + errorCode = U_BUFFER_OVERFLOW_ERROR; + } else if (edits != NULL) { + edits->copyErrorTo(errorCode); + } + } + return destIndex; +} + +} // namespace + +U_NAMESPACE_END + U_NAMESPACE_USE /* UCaseMap service object -------------------------------------------------- */ +UCaseMap::UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode) : +#if !UCONFIG_NO_BREAK_ITERATION + iter(NULL), +#endif + caseLocale(UCASE_LOC_UNKNOWN), options(opts) { + ucasemap_setLocale(this, localeID, pErrorCode); +} + +UCaseMap::~UCaseMap() { +#if !UCONFIG_NO_BREAK_ITERATION + delete iter; +#endif +} + U_CAPI UCaseMap * U_EXPORT2 ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) { - UCaseMap *csm; - if(U_FAILURE(*pErrorCode)) { return NULL; } - - csm=(UCaseMap *)uprv_malloc(sizeof(UCaseMap)); + UCaseMap *csm = new UCaseMap(locale, options, pErrorCode); if(csm==NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; return NULL; - } - uprv_memset(csm, 0, sizeof(UCaseMap)); - - csm->csp=ucase_getSingleton(); - ucasemap_setLocale(csm, locale, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - uprv_free(csm); + } else if (U_FAILURE(*pErrorCode)) { + delete csm; return NULL; } - - csm->options=options; return csm; } U_CAPI void U_EXPORT2 ucasemap_close(UCaseMap *csm) { - if(csm!=NULL) { -#if !UCONFIG_NO_BREAK_ITERATION - // Do not call ubrk_close() so that we do not depend on all of the BreakIterator code. - delete reinterpret_cast(csm->iter); -#endif - uprv_free(csm); - } + delete csm; } U_CAPI const char * U_EXPORT2 @@ -87,13 +111,16 @@ ucasemap_getOptions(const UCaseMap *csm) { U_CAPI void U_EXPORT2 ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) { - int32_t length; - if(U_FAILURE(*pErrorCode)) { return; } + if (locale != NULL && *locale == 0) { + csm->locale[0] = 0; + csm->caseLocale = UCASE_LOC_ROOT; + return; + } - length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode); + int32_t length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode); if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) { *pErrorCode=U_ZERO_ERROR; /* we only really need the language code for case mappings */ @@ -102,27 +129,32 @@ ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) { if(length==sizeof(csm->locale)) { *pErrorCode=U_BUFFER_OVERFLOW_ERROR; } - csm->locCache=0; if(U_SUCCESS(*pErrorCode)) { - ucase_getCaseLocale(csm->locale, &csm->locCache); + csm->caseLocale=UCASE_LOC_UNKNOWN; + csm->caseLocale = ucase_getCaseLocale(csm->locale); } else { csm->locale[0]=0; + csm->caseLocale = UCASE_LOC_ROOT; } } U_CAPI void U_EXPORT2 -ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode * /*pErrorCode*/) { +ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return; + } csm->options=options; } /* UTF-8 string case mappings ----------------------------------------------- */ -/* TODO(markus): Move to a new, separate utf8case.c file. */ +/* TODO(markus): Move to a new, separate utf8case.cpp file. */ /* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */ static inline int32_t appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity, - int32_t result, const UChar *s) { + int32_t result, const UChar *s, + int32_t cpLength, uint32_t options, icu::Edits *edits) { UChar32 c; int32_t length; UErrorCode errorCode; @@ -130,86 +162,126 @@ appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity, /* decode the result */ if(result<0) { /* (not) original code point */ + if(edits!=NULL) { + edits->addUnchanged(cpLength); + if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) { + return destIndex; + } + } c=~result; - length=U8_LENGTH(c); - } else if(result<=UCASE_MAX_STRING_LENGTH) { - c=U_SENTINEL; - length=result; + if(destIndex(INT32_MAX-destIndex)) { + return -1; // integer overflow + } + if(edits!=NULL) { + edits->addReplace(cpLength, length); + } + // We might have an overflow, but we know the actual length. + return destIndex+length; + } else if(destIndexaddReplace(cpLength, 1); + } + return destIndex; + } else { + c=result; + length=U8_LENGTH(c); + if(edits!=NULL) { + edits->addReplace(cpLength, length); + } + } } + // c>=0 single code point if(length>(INT32_MAX-destIndex)) { return -1; // integer overflow } if(destIndex=0) { - /* code point */ - UBool isError=FALSE; - U8_APPEND(dest, destIndex, destCapacity, c, isError); - if(isError) { - /* overflow, nothing written */ - destIndex+=length; - } - } else { - /* string */ - int32_t destLength; - errorCode=U_ZERO_ERROR; - u_strToUTF8( - (char *)(dest+destIndex), destCapacity-destIndex, &destLength, - s, length, - &errorCode); - if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) { - return -1; - } - if(destLength>(INT32_MAX-destIndex)) { - return -1; // integer overflow - } - destIndex+=destLength; - /* we might have an overflow, but we know the actual length */ + UBool isError=FALSE; + U8_APPEND(dest, destIndex, destCapacity, c, isError); + if(isError) { + /* overflow, nothing written */ + destIndex+=length; } } else { /* preflight */ - if(c>=0) { - destIndex+=length; - } else { - int32_t destLength; - errorCode=U_ZERO_ERROR; - u_strToUTF8( - NULL, 0, &destLength, - s, length, - &errorCode); - if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) { - return -1; - } - if(destLength>(INT32_MAX-destIndex)) { - return -1; // integer overflow - } - destIndex+=destLength; - } + destIndex+=length; } return destIndex; } static inline int32_t -appendUChar(uint8_t *dest, int32_t destIndex, int32_t destCapacity, UChar c) { - int32_t length=U8_LENGTH(c); - if(length>(INT32_MAX-destIndex)) { +appendASCII(uint8_t *dest, int32_t destIndex, int32_t destCapacity, uint8_t c) { + if(destIndex> 6) | 0xc0); } +static inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); } + +static inline int32_t +appendTwoBytes(uint8_t *dest, int32_t destIndex, int32_t destCapacity, UChar32 c) { + U_ASSERT(0x370 <= c && c <= 0x3ff); // 2-byte UTF-8, main Greek block + if(2>(INT32_MAX-destIndex)) { return -1; // integer overflow } - int32_t limit=destIndex+length; + int32_t limit=destIndex+2; if(limit<=destCapacity) { - U8_APPEND_UNSAFE(dest, destIndex, c); + dest+=destIndex; + dest[0]=getTwoByteLead(c); + dest[1]=getTwoByteTrail(c); } return limit; } static inline int32_t -appendString(uint8_t *dest, int32_t destIndex, int32_t destCapacity, - const uint8_t *s, int32_t length) { +appendTwoBytes(uint8_t *dest, int32_t destIndex, int32_t destCapacity, const char *s) { + if(2>(INT32_MAX-destIndex)) { + return -1; // integer overflow + } + int32_t limit=destIndex+2; + if(limit<=destCapacity) { + dest+=destIndex; + dest[0]=(uint8_t)s[0]; + dest[1]=(uint8_t)s[1]; + } + return limit; +} + +static inline int32_t +appendUnchanged(uint8_t *dest, int32_t destIndex, int32_t destCapacity, + const uint8_t *s, int32_t length, uint32_t options, icu::Edits *edits) { if(length>0) { + if(edits!=NULL) { + edits->addUnchanged(length); + if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) { + return destIndex; + } + } if(length>(INT32_MAX-destIndex)) { return -1; // integer overflow } @@ -258,93 +330,77 @@ utf8_caseContextIterator(void *context, int8_t dir) { * context [0..srcLength[ into account. */ static int32_t -_caseMap(const UCaseMap *csm, UCaseMapFull *map, +_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map, uint8_t *dest, int32_t destCapacity, const uint8_t *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit, - UErrorCode *pErrorCode) { - const UChar *s = NULL; - UChar32 c, c2 = 0; - int32_t srcIndex, destIndex; - int32_t locCache; - - locCache=csm->locCache; - + icu::Edits *edits, + UErrorCode &errorCode) { /* case mapping loop */ - srcIndex=srcStart; - destIndex=0; + int32_t srcIndex=srcStart; + int32_t destIndex=0; while(srcIndexcpStart=srcIndex; + int32_t cpStart; + csc->cpStart=cpStart=srcIndex; + UChar32 c; U8_NEXT(src, srcIndex, srcLimit, c); csc->cpLimit=srcIndex; if(c<0) { // Malformed UTF-8. - destIndex=appendString(dest, destIndex, destCapacity, src+csc->cpStart, srcIndex-csc->cpStart); + destIndex=appendUnchanged(dest, destIndex, destCapacity, + src+cpStart, srcIndex-cpStart, options, edits); if(destIndex<0) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } continue; } - c=map(csm->csp, c, utf8_caseContextIterator, csc, &s, csm->locale, &locCache); - if((destIndexdestCapacity) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } return destIndex; } #if !UCONFIG_NO_BREAK_ITERATION U_CFUNC int32_t U_CALLCONV -ucasemap_internalUTF8ToTitle(const UCaseMap *csm, - uint8_t *dest, int32_t destCapacity, - const uint8_t *src, int32_t srcLength, - UErrorCode *pErrorCode) { - const UChar *s; - UChar32 c; - int32_t prev, titleStart, titleLimit, idx, destIndex; - UBool isFirstIndex; - - if(U_FAILURE(*pErrorCode)) { +ucasemap_internalUTF8ToTitle( + int32_t caseLocale, uint32_t options, BreakIterator *iter, + uint8_t *dest, int32_t destCapacity, + const uint8_t *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return 0; } - // Use the C++ abstract base class to minimize dependencies. - // TODO: Change UCaseMap.iter to store a BreakIterator directly. - BreakIterator *bi=reinterpret_cast(csm->iter); - /* set up local variables */ - int32_t locCache=csm->locCache; UCaseContext csc=UCASECONTEXT_INITIALIZER; csc.p=(void *)src; csc.limit=srcLength; - destIndex=0; - prev=0; - isFirstIndex=TRUE; + int32_t destIndex=0; + int32_t prev=0; + UBool isFirstIndex=TRUE; /* titlecasing loop */ while(prevfirst(); + index=iter->first(); } else { - idx=bi->next(); + index=iter->next(); } - if(idx==UBRK_DONE || idx>srcLength) { - idx=srcLength; + if(index==UBRK_DONE || index>srcLength) { + index=srcLength; } /* @@ -360,29 +416,32 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm, * b) first case letter (titlecase) [titleStart..titleLimit[ * c) subsequent characters (lowercase) [titleLimit..index[ */ - if(prevoptions&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) { + int32_t titleStart=prev; + int32_t titleLimit=prev; + UChar32 c; + U8_NEXT(src, titleLimit, index, c); + if((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(c)) { /* Adjust the titlecasing index (titleStart) to the next cased character. */ for(;;) { titleStart=titleLimit; - if(titleLimit==idx) { + if(titleLimit==index) { /* * only uncased characters in [prev..index[ * stop with titleStart==titleLimit==index */ break; } - U8_NEXT(src, titleLimit, idx, c); - if(UCASE_NONE!=ucase_getType(csm->csp, c)) { + U8_NEXT(src, titleLimit, index, c); + if(UCASE_NONE!=ucase_getType(c)) { break; /* cased letter at [titleStart..titleLimit[ */ } } - destIndex=appendString(dest, destIndex, destCapacity, src+prev, titleStart-prev); + destIndex=appendUnchanged(dest, destIndex, destCapacity, + src+prev, titleStart-prev, options, edits); if(destIndex<0) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } @@ -392,47 +451,69 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm, if(c>=0) { csc.cpStart=titleStart; csc.cpLimit=titleLimit; - c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, &csc, &s, csm->locale, &locCache); - destIndex=appendResult(dest, destIndex, destCapacity, c, s); + const UChar *s; + c=ucase_toFullTitle(c, utf8_caseContextIterator, &csc, &s, caseLocale); + destIndex=appendResult(dest, destIndex, destCapacity, c, s, + titleLimit-titleStart, options, edits); } else { // Malformed UTF-8. - destIndex=appendString(dest, destIndex, destCapacity, src+titleStart, titleLimit-titleStart); + destIndex=appendUnchanged(dest, destIndex, destCapacity, + src+titleStart, titleLimit-titleStart, options, edits); } if(destIndex<0) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } /* Special case Dutch IJ titlecasing */ - if (titleStart+1 < idx && - ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_DUTCH && - (src[titleStart] == 0x0049 || src[titleStart] == 0x0069) && - (src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) { - destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A); - titleLimit++; + if (titleStart+1 < index && + caseLocale == UCASE_LOC_DUTCH && + (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) { + if (src[titleStart+1] == 0x006A) { + destIndex=appendASCII(dest, destIndex, destCapacity, 0x004A); + if(destIndex<0) { + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + if(edits!=NULL) { + edits->addReplace(1, 1); + } + titleLimit++; + } else if (src[titleStart+1] == 0x004A) { + // Keep the capital J from getting lowercased. + destIndex=appendUnchanged(dest, destIndex, destCapacity, + src+titleStart+1, 1, options, edits); + if(destIndex<0) { + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + titleLimit++; + } } + /* lowercase [titleLimit..index[ */ - if(titleLimitoptions&U_TITLECASE_NO_LOWERCASE)==0) { + if(titleLimitdestCapacity) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - return destIndex; + return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); } #endif @@ -454,11 +532,11 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm, U_NAMESPACE_BEGIN namespace GreekUpper { -UBool isFollowedByCasedLetter(const UCaseProps *csp, const uint8_t *s, int32_t i, int32_t length) { +UBool isFollowedByCasedLetter(const uint8_t *s, int32_t i, int32_t length) { while (i < length) { UChar32 c; U8_NEXT(s, i, length, c); - int32_t type = ucase_getTypeOrIgnorable(csp, c); + int32_t type = ucase_getTypeOrIgnorable(c); if ((type & UCASE_IGNORABLE) != 0) { // Case-ignorable, continue with the loop. } else if (type != UCASE_NONE) { @@ -471,11 +549,11 @@ UBool isFollowedByCasedLetter(const UCaseProps *csp, const uint8_t *s, int32_t i } // Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java. -int32_t toUpper(const UCaseMap *csm, +int32_t toUpper(uint32_t options, uint8_t *dest, int32_t destCapacity, const uint8_t *src, int32_t srcLength, - UErrorCode *pErrorCode) { - int32_t locCache = UCASE_LOC_GREEK; + Edits *edits, + UErrorCode &errorCode) { int32_t destIndex=0; uint32_t state = 0; for (int32_t i = 0; i < srcLength;) { @@ -483,7 +561,7 @@ int32_t toUpper(const UCaseMap *csm, UChar32 c; U8_NEXT(src, nextIndex, srcLength, c); uint32_t nextState = 0; - int32_t type = ucase_getTypeOrIgnorable(csm->csp, c); + int32_t type = ucase_getTypeOrIgnorable(c); if ((type & UCASE_IGNORABLE) != 0) { // c is case-ignorable nextState |= (state & AFTER_CASED); @@ -533,7 +611,7 @@ int32_t toUpper(const UCaseMap *csm, (data & HAS_ACCENT) != 0 && numYpogegrammeni == 0 && (state & AFTER_CASED) == 0 && - !isFollowedByCasedLetter(csm->csp, src, nextIndex, srcLength)) { + !isFollowedByCasedLetter(src, nextIndex, srcLength)) { // Keep disjunctive "or" with (only) a tonos. // We use the same "word boundary" conditions as for the Final_Sigma test. if (i == nextIndex) { @@ -551,40 +629,75 @@ int32_t toUpper(const UCaseMap *csm, data &= ~HAS_EITHER_DIALYTIKA; } } - destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper); - if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) { - destIndex=appendUChar(dest, destIndex, destCapacity, 0x308); // restore or add a dialytika - } - if (destIndex >= 0 && addTonos) { - destIndex=appendUChar(dest, destIndex, destCapacity, 0x301); - } - while (destIndex >= 0 && numYpogegrammeni > 0) { - destIndex=appendUChar(dest, destIndex, destCapacity, 0x399); - --numYpogegrammeni; - } - if(destIndex<0) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; + + UBool change = TRUE; + if (edits != NULL) { + // Find out first whether we are changing the text. + U_ASSERT(0x370 <= upper && upper <= 0x3ff); // 2-byte UTF-8, main Greek block + change = (i + 2) > nextIndex || + src[i] != getTwoByteLead(upper) || src[i + 1] != getTwoByteTrail(upper) || + numYpogegrammeni > 0; + int32_t i2 = i + 2; + if ((data & HAS_EITHER_DIALYTIKA) != 0) { + change |= (i2 + 2) > nextIndex || + src[i2] != (uint8_t)u8"\u0308"[0] || + src[i2 + 1] != (uint8_t)u8"\u0308"[1]; + i2 += 2; + } + if (addTonos) { + change |= (i2 + 2) > nextIndex || + src[i2] != (uint8_t)u8"\u0301"[0] || + src[i2 + 1] != (uint8_t)u8"\u0301"[1]; + i2 += 2; + } + int32_t oldLength = nextIndex - i; + int32_t newLength = (i2 - i) + numYpogegrammeni * 2; // 2 bytes per U+0399 + change |= oldLength != newLength; + if (change) { + if (edits != NULL) { + edits->addReplace(oldLength, newLength); + } + } else { + if (edits != NULL) { + edits->addUnchanged(oldLength); + } + // Write unchanged text? + change = (options & UCASEMAP_OMIT_UNCHANGED_TEXT) == 0; + } } - } else if(c>=0) { - const UChar *s; - UChar32 c2 = 0; - c=ucase_toFullUpper(csm->csp, c, NULL, NULL, &s, csm->locale, &locCache); - if((destIndex= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) { + destIndex=appendTwoBytes(dest, destIndex, destCapacity, u8"\u0308"); // restore or add a dialytika + } + if (destIndex >= 0 && addTonos) { + destIndex=appendTwoBytes(dest, destIndex, destCapacity, u8"\u0301"); + } + while (destIndex >= 0 && numYpogegrammeni > 0) { + destIndex=appendTwoBytes(dest, destIndex, destCapacity, u8"\u0399"); + --numYpogegrammeni; + } if(destIndex<0) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } + } else if(c>=0) { + const UChar *s; + c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK); + destIndex = appendResult(dest, destIndex, destCapacity, c, s, + nextIndex - i, options, edits); + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } } else { // Malformed UTF-8. - destIndex=appendString(dest, destIndex, destCapacity, src+i, nextIndex-i); + destIndex=appendUnchanged(dest, destIndex, destCapacity, + src+i, nextIndex-i, options, edits); if(destIndex<0) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } @@ -592,9 +705,6 @@ int32_t toUpper(const UCaseMap *csm, state = nextState; } - if(destIndex>destCapacity) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } return destIndex; } @@ -602,102 +712,92 @@ int32_t toUpper(const UCaseMap *csm, U_NAMESPACE_END static int32_t U_CALLCONV -ucasemap_internalUTF8ToLower(const UCaseMap *csm, +ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED uint8_t *dest, int32_t destCapacity, const uint8_t *src, int32_t srcLength, - UErrorCode *pErrorCode) { + icu::Edits *edits, + UErrorCode &errorCode) { UCaseContext csc=UCASECONTEXT_INITIALIZER; csc.p=(void *)src; csc.limit=srcLength; - return _caseMap( - csm, ucase_toFullLower, + int32_t destIndex = _caseMap( + caseLocale, options, ucase_toFullLower, dest, destCapacity, src, &csc, 0, srcLength, - pErrorCode); + edits, errorCode); + return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); } static int32_t U_CALLCONV -ucasemap_internalUTF8ToUpper(const UCaseMap *csm, +ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED uint8_t *dest, int32_t destCapacity, const uint8_t *src, int32_t srcLength, - UErrorCode *pErrorCode) { - int32_t locCache = csm->locCache; - if (ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_GREEK) { - return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, pErrorCode); + icu::Edits *edits, + UErrorCode &errorCode) { + int32_t destIndex; + if (caseLocale == UCASE_LOC_GREEK) { + destIndex = GreekUpper::toUpper(options, dest, destCapacity, + src, srcLength, edits, errorCode); + } else { + UCaseContext csc=UCASECONTEXT_INITIALIZER; + csc.p=(void *)src; + csc.limit=srcLength; + destIndex = _caseMap( + caseLocale, options, ucase_toFullUpper, + dest, destCapacity, + src, &csc, 0, srcLength, + edits, errorCode); } - UCaseContext csc=UCASECONTEXT_INITIALIZER; - csc.p=(void *)src; - csc.limit=srcLength; - return _caseMap( - csm, ucase_toFullUpper, - dest, destCapacity, - src, &csc, 0, srcLength, - pErrorCode); + return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); } -static int32_t -utf8_foldCase(const UCaseProps *csp, - uint8_t *dest, int32_t destCapacity, - const uint8_t *src, int32_t srcLength, - uint32_t options, - UErrorCode *pErrorCode) { - int32_t srcIndex, destIndex; - - const UChar *s; - UChar32 c, c2; - int32_t start; - +static int32_t U_CALLCONV +ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED + uint8_t *dest, int32_t destCapacity, + const uint8_t *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode) { /* case mapping loop */ - srcIndex=destIndex=0; - while(srcIndexdestCapacity) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - return destIndex; -} - -static int32_t U_CALLCONV -ucasemap_internalUTF8Fold(const UCaseMap *csm, - uint8_t *dest, int32_t destCapacity, - const uint8_t *src, int32_t srcLength, - UErrorCode *pErrorCode) { - return utf8_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode); + return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); } U_CFUNC int32_t -ucasemap_mapUTF8(const UCaseMap *csm, +ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM uint8_t *dest, int32_t destCapacity, const uint8_t *src, int32_t srcLength, UTF8CaseMapper *stringCaseMapper, - UErrorCode *pErrorCode) { + icu::Edits *edits, + UErrorCode &errorCode) { int32_t destLength; /* check argument values */ - if(U_FAILURE(*pErrorCode)) { + if(U_FAILURE(errorCode)) { return 0; } if( destCapacity<0 || @@ -705,7 +805,7 @@ ucasemap_mapUTF8(const UCaseMap *csm, src==NULL || srcLength<-1 ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + errorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } @@ -719,12 +819,16 @@ ucasemap_mapUTF8(const UCaseMap *csm, ((src>=dest && src<(dest+destCapacity)) || (dest>=src && dest<(src+srcLength))) ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + errorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } - destLength=stringCaseMapper(csm, dest, destCapacity, src, srcLength, pErrorCode); - return u_terminateChars((char *)dest, destCapacity, destLength, pErrorCode); + if(edits!=NULL) { + edits->reset(); + } + destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR + dest, destCapacity, src, srcLength, edits, errorCode); + return u_terminateChars((char *)dest, destCapacity, destLength, &errorCode); } /* public API functions */ @@ -734,10 +838,11 @@ ucasemap_utf8ToLower(const UCaseMap *csm, char *dest, int32_t destCapacity, const char *src, int32_t srcLength, UErrorCode *pErrorCode) { - return ucasemap_mapUTF8(csm, - (uint8_t *)dest, destCapacity, - (const uint8_t *)src, srcLength, - ucasemap_internalUTF8ToLower, pErrorCode); + return ucasemap_mapUTF8( + csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL + (uint8_t *)dest, destCapacity, + (const uint8_t *)src, srcLength, + ucasemap_internalUTF8ToLower, NULL, *pErrorCode); } U_CAPI int32_t U_EXPORT2 @@ -745,10 +850,11 @@ ucasemap_utf8ToUpper(const UCaseMap *csm, char *dest, int32_t destCapacity, const char *src, int32_t srcLength, UErrorCode *pErrorCode) { - return ucasemap_mapUTF8(csm, - (uint8_t *)dest, destCapacity, - (const uint8_t *)src, srcLength, - ucasemap_internalUTF8ToUpper, pErrorCode); + return ucasemap_mapUTF8( + csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL + (uint8_t *)dest, destCapacity, + (const uint8_t *)src, srcLength, + ucasemap_internalUTF8ToUpper, NULL, *pErrorCode); } U_CAPI int32_t U_EXPORT2 @@ -756,8 +862,49 @@ ucasemap_utf8FoldCase(const UCaseMap *csm, char *dest, int32_t destCapacity, const char *src, int32_t srcLength, UErrorCode *pErrorCode) { - return ucasemap_mapUTF8(csm, - (uint8_t *)dest, destCapacity, - (const uint8_t *)src, srcLength, - ucasemap_internalUTF8Fold, pErrorCode); + return ucasemap_mapUTF8( + UCASE_LOC_ROOT, csm->options, UCASEMAP_BREAK_ITERATOR_NULL + (uint8_t *)dest, destCapacity, + (const uint8_t *)src, srcLength, + ucasemap_internalUTF8Fold, NULL, *pErrorCode); +} + +U_NAMESPACE_BEGIN + +int32_t CaseMap::utf8ToLower( + const char *locale, uint32_t options, + const char *src, int32_t srcLength, + char *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode) { + return ucasemap_mapUTF8( + ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL + (uint8_t *)dest, destCapacity, + (const uint8_t *)src, srcLength, + ucasemap_internalUTF8ToLower, edits, errorCode); } + +int32_t CaseMap::utf8ToUpper( + const char *locale, uint32_t options, + const char *src, int32_t srcLength, + char *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode) { + return ucasemap_mapUTF8( + ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL + (uint8_t *)dest, destCapacity, + (const uint8_t *)src, srcLength, + ucasemap_internalUTF8ToUpper, edits, errorCode); +} + +int32_t CaseMap::utf8Fold( + uint32_t options, + const char *src, int32_t srcLength, + char *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode) { + return ucasemap_mapUTF8( + UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL + (uint8_t *)dest, destCapacity, + (const uint8_t *)src, srcLength, + ucasemap_internalUTF8Fold, edits, errorCode); +} + +U_NAMESPACE_END diff --git a/deps/icu-small/source/common/ucasemap_imp.h b/deps/icu-small/source/common/ucasemap_imp.h new file mode 100644 index 0000000000..79204226b0 --- /dev/null +++ b/deps/icu-small/source/common/ucasemap_imp.h @@ -0,0 +1,239 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// ucasemap_imp.h +// created: 2017feb08 Markus W. Scherer + +#ifndef __UCASEMAP_IMP_H__ +#define __UCASEMAP_IMP_H__ + +#include "unicode/utypes.h" +#include "unicode/ucasemap.h" +#include "ucase.h" + +#ifndef U_COMPARE_IGNORE_CASE +/* see also unorm.h */ +/** + * Option bit for unorm_compare: + * Perform case-insensitive comparison. + */ +#define U_COMPARE_IGNORE_CASE 0x10000 +#endif + +/** + * Internal API, used by u_strcasecmp() etc. + * Compare strings case-insensitively, + * in code point order or code unit order. + */ +U_CFUNC int32_t +u_strcmpFold(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + UErrorCode *pErrorCode); + +/** + * Interanl API, used for detecting length of + * shared prefix case-insensitively. + * @param s1 input string 1 + * @param length1 length of string 1, or -1 (NULL terminated) + * @param s2 input string 2 + * @param length2 length of string 2, or -1 (NULL terminated) + * @param options compare options + * @param matchLen1 (output) length of partial prefix match in s1 + * @param matchLen2 (output) length of partial prefix match in s2 + * @param pErrorCode receives error status + */ +U_CAPI void +u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + int32_t *matchLen1, int32_t *matchLen2, + UErrorCode *pErrorCode); + +/** + * Are the Unicode properties loaded? + * This must be used before internal functions are called that do + * not perform this check. + * Generate a debug assertion failure if data is not loaded. + */ +U_CFUNC UBool +uprv_haveProperties(UErrorCode *pErrorCode); + +#ifdef __cplusplus + +#include "unicode/unistr.h" // for UStringCaseMapper + +/* + * Internal string casing functions implementing + * ustring.h/ustrcase.cpp and UnicodeString case mapping functions. + */ + +struct UCaseMap : public icu::UMemory { + /** Implements most of ucasemap_open(). */ + UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode); + ~UCaseMap(); + +#if !UCONFIG_NO_BREAK_ITERATION + icu::BreakIterator *iter; /* We adopt the iterator, so we own it. */ +#endif + char locale[32]; + int32_t caseLocale; + uint32_t options; +}; + +#if UCONFIG_NO_BREAK_ITERATION +# define UCASEMAP_BREAK_ITERATOR_PARAM +# define UCASEMAP_BREAK_ITERATOR_UNUSED +# define UCASEMAP_BREAK_ITERATOR +# define UCASEMAP_BREAK_ITERATOR_NULL +#else +# define UCASEMAP_BREAK_ITERATOR_PARAM icu::BreakIterator *iter, +# define UCASEMAP_BREAK_ITERATOR_UNUSED icu::BreakIterator *, +# define UCASEMAP_BREAK_ITERATOR iter, +# define UCASEMAP_BREAK_ITERATOR_NULL NULL, +#endif + +U_CFUNC int32_t +ustrcase_getCaseLocale(const char *locale); + +// TODO: swap src / dest if approved for new public api +/** Implements UStringCaseMapper. */ +U_CFUNC int32_t U_CALLCONV +ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode); + +/** Implements UStringCaseMapper. */ +U_CFUNC int32_t U_CALLCONV +ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode); + +#if !UCONFIG_NO_BREAK_ITERATION + +/** Implements UStringCaseMapper. */ +U_CFUNC int32_t U_CALLCONV +ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, + icu::BreakIterator *iter, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode); + +#endif + +/** Implements UStringCaseMapper. */ +U_CFUNC int32_t U_CALLCONV +ustrcase_internalFold(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode); + +/** + * Common string case mapping implementation for ucasemap_toXyz() and UnicodeString::toXyz(). + * Implements argument checking. + */ +U_CFUNC int32_t +ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UStringCaseMapper *stringCaseMapper, + icu::Edits *edits, + UErrorCode &errorCode); + +/** + * Common string case mapping implementation for old-fashioned u_strToXyz() functions + * that allow the source string to overlap the destination buffer. + * Implements argument checking and internally works with an intermediate buffer if necessary. + */ +U_CFUNC int32_t +ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UStringCaseMapper *stringCaseMapper, + UErrorCode &errorCode); + +/** + * UTF-8 string case mapping function type, used by ucasemap_mapUTF8(). + * UTF-8 version of UStringCaseMapper. + * All error checking must be done. + * The UCaseMap must be fully initialized, with locale and/or iter set as needed. + * src and dest must not overlap. + */ +typedef int32_t U_CALLCONV +UTF8CaseMapper(int32_t caseLocale, uint32_t options, +#if !UCONFIG_NO_BREAK_ITERATION + icu::BreakIterator *iter, +#endif + uint8_t *dest, int32_t destCapacity, + const uint8_t *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode); + +#if !UCONFIG_NO_BREAK_ITERATION + +/** Implements UTF8CaseMapper. */ +U_CFUNC int32_t U_CALLCONV +ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options, + icu::BreakIterator *iter, + uint8_t *dest, int32_t destCapacity, + const uint8_t *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode); + +#endif + +/** + * Implements argument checking and buffer handling + * for UTF-8 string case mapping as a common function. + */ +U_CFUNC int32_t +ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM + uint8_t *dest, int32_t destCapacity, + const uint8_t *src, int32_t srcLength, + UTF8CaseMapper *stringCaseMapper, + icu::Edits *edits, + UErrorCode &errorCode); + +U_NAMESPACE_BEGIN +namespace GreekUpper { + +// Data bits. +static const uint32_t UPPER_MASK = 0x3ff; +static const uint32_t HAS_VOWEL = 0x1000; +static const uint32_t HAS_YPOGEGRAMMENI = 0x2000; +static const uint32_t HAS_ACCENT = 0x4000; +static const uint32_t HAS_DIALYTIKA = 0x8000; +// Further bits during data building and processing, not stored in the data map. +static const uint32_t HAS_COMBINING_DIALYTIKA = 0x10000; +static const uint32_t HAS_OTHER_GREEK_DIACRITIC = 0x20000; + +static const uint32_t HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT; +static const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA = + HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA; +static const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA; + +// State bits. +static const uint32_t AFTER_CASED = 1; +static const uint32_t AFTER_VOWEL_WITH_ACCENT = 2; + +uint32_t getLetterData(UChar32 c); + +/** + * Returns a non-zero value for each of the Greek combining diacritics + * listed in The Unicode Standard, version 8, chapter 7.2 Greek, + * plus some perispomeni look-alikes. + */ +uint32_t getDiacriticData(UChar32 c); + +} // namespace GreekUpper +U_NAMESPACE_END + +#endif // __cplusplus + +#endif // __UCASEMAP_IMP_H__ diff --git a/deps/icu-small/source/common/ucasemap_titlecase_brkiter.cpp b/deps/icu-small/source/common/ucasemap_titlecase_brkiter.cpp index ab61e21765..a253850fa2 100644 --- a/deps/icu-small/source/common/ucasemap_titlecase_brkiter.cpp +++ b/deps/icu-small/source/common/ucasemap_titlecase_brkiter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: ucasemap_titlecase_brkiter.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -23,23 +23,59 @@ #include "unicode/brkiter.h" #include "unicode/ubrk.h" +#include "unicode/casemap.h" #include "unicode/ucasemap.h" #include "cmemory.h" #include "ucase.h" -#include "ustr_imp.h" +#include "ucasemap_imp.h" + +U_NAMESPACE_BEGIN + +int32_t CaseMap::utf8ToTitle( + const char *locale, uint32_t options, BreakIterator *iter, + const char *src, int32_t srcLength, + char *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { + return 0; + } + UText utext=UTEXT_INITIALIZER; + utext_openUTF8(&utext, src, srcLength, &errorCode); + LocalPointer ownedIter; + if(iter==NULL) { + iter=BreakIterator::createWordInstance(Locale(locale), errorCode); + ownedIter.adoptInstead(iter); + } + if(U_FAILURE(errorCode)) { + utext_close(&utext); + return 0; + } + iter->setText(&utext, errorCode); + int32_t length=ucasemap_mapUTF8( + ustrcase_getCaseLocale(locale), options, iter, + (uint8_t *)dest, destCapacity, + (const uint8_t *)src, srcLength, + ucasemap_internalUTF8ToTitle, edits, errorCode); + utext_close(&utext); + return length; +} + +U_NAMESPACE_END U_NAMESPACE_USE U_CAPI const UBreakIterator * U_EXPORT2 ucasemap_getBreakIterator(const UCaseMap *csm) { - return csm->iter; + return reinterpret_cast(csm->iter); } U_CAPI void U_EXPORT2 -ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode * /*pErrorCode*/) { - // Do not call ubrk_close() so that we do not depend on all of the BreakIterator code. - delete reinterpret_cast(csm->iter); - csm->iter=iterToAdopt; +ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return; + } + delete csm->iter; + csm->iter=reinterpret_cast(iterToAdopt); } U_CAPI int32_t U_EXPORT2 @@ -47,21 +83,23 @@ ucasemap_utf8ToTitle(UCaseMap *csm, char *dest, int32_t destCapacity, const char *src, int32_t srcLength, UErrorCode *pErrorCode) { - UText utext=UTEXT_INITIALIZER; - utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode); - if(U_FAILURE(*pErrorCode)) { + if (U_FAILURE(*pErrorCode)) { return 0; } + UText utext=UTEXT_INITIALIZER; + utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode); if(csm->iter==NULL) { - csm->iter=ubrk_open(UBRK_WORD, csm->locale, - NULL, 0, - pErrorCode); + csm->iter=BreakIterator::createWordInstance(Locale(csm->locale), *pErrorCode); + } + if (U_FAILURE(*pErrorCode)) { + return 0; } - ubrk_setUText(csm->iter, &utext, pErrorCode); - int32_t length=ucasemap_mapUTF8(csm, - (uint8_t *)dest, destCapacity, - (const uint8_t *)src, srcLength, - ucasemap_internalUTF8ToTitle, pErrorCode); + csm->iter->setText(&utext, *pErrorCode); + int32_t length=ucasemap_mapUTF8( + csm->caseLocale, csm->options, csm->iter, + (uint8_t *)dest, destCapacity, + (const uint8_t *)src, srcLength, + ucasemap_internalUTF8ToTitle, NULL, *pErrorCode); utext_close(&utext); return length; } diff --git a/deps/icu-small/source/common/ucat.c b/deps/icu-small/source/common/ucat.c deleted file mode 100644 index cfd8b53295..0000000000 --- a/deps/icu-small/source/common/ucat.c +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2003, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Author: Alan Liu -* Created: March 19 2003 -* Since: ICU 2.6 -********************************************************************** -*/ -#include "unicode/ucat.h" -#include "unicode/ustring.h" -#include "cstring.h" -#include "uassert.h" - -/* Separator between set_num and msg_num */ -static const char SEPARATOR = '%'; - -/* Maximum length of a set_num/msg_num key, incl. terminating zero. - * Longest possible key is "-2147483648%-2147483648" */ -#define MAX_KEY_LEN (24) - -/** - * Fill in buffer with a set_num/msg_num key string, given the numeric - * values. Numeric values must be >= 0. Buffer must be of length - * MAX_KEY_LEN or more. - */ -static char* -_catkey(char* buffer, int32_t set_num, int32_t msg_num) { - int32_t i = 0; - i = T_CString_integerToString(buffer, set_num, 10); - buffer[i++] = SEPARATOR; - T_CString_integerToString(buffer+i, msg_num, 10); - return buffer; -} - -U_CAPI u_nl_catd U_EXPORT2 -u_catopen(const char* name, const char* locale, UErrorCode* ec) { - return (u_nl_catd) ures_open(name, locale, ec); -} - -U_CAPI void U_EXPORT2 -u_catclose(u_nl_catd catd) { - ures_close((UResourceBundle*) catd); /* may be NULL */ -} - -U_CAPI const UChar* U_EXPORT2 -u_catgets(u_nl_catd catd, int32_t set_num, int32_t msg_num, - const UChar* s, - int32_t* len, UErrorCode* ec) { - - char key[MAX_KEY_LEN]; - const UChar* result; - - if (ec == NULL || U_FAILURE(*ec)) { - goto ERROR; - } - - result = ures_getStringByKey((const UResourceBundle*) catd, - _catkey(key, set_num, msg_num), - len, ec); - if (U_FAILURE(*ec)) { - goto ERROR; - } - - return result; - - ERROR: - /* In case of any failure, return s */ - if (len != NULL) { - *len = u_strlen(s); - } - return s; -} - -/*eof*/ diff --git a/deps/icu-small/source/common/ucat.cpp b/deps/icu-small/source/common/ucat.cpp new file mode 100644 index 0000000000..dac56eeb5c --- /dev/null +++ b/deps/icu-small/source/common/ucat.cpp @@ -0,0 +1,78 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2003, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Author: Alan Liu +* Created: March 19 2003 +* Since: ICU 2.6 +********************************************************************** +*/ +#include "unicode/ucat.h" +#include "unicode/ustring.h" +#include "cstring.h" +#include "uassert.h" + +/* Separator between set_num and msg_num */ +static const char SEPARATOR = '%'; + +/* Maximum length of a set_num/msg_num key, incl. terminating zero. + * Longest possible key is "-2147483648%-2147483648" */ +#define MAX_KEY_LEN (24) + +/** + * Fill in buffer with a set_num/msg_num key string, given the numeric + * values. Numeric values must be >= 0. Buffer must be of length + * MAX_KEY_LEN or more. + */ +static char* +_catkey(char* buffer, int32_t set_num, int32_t msg_num) { + int32_t i = 0; + i = T_CString_integerToString(buffer, set_num, 10); + buffer[i++] = SEPARATOR; + T_CString_integerToString(buffer+i, msg_num, 10); + return buffer; +} + +U_CAPI u_nl_catd U_EXPORT2 +u_catopen(const char* name, const char* locale, UErrorCode* ec) { + return (u_nl_catd) ures_open(name, locale, ec); +} + +U_CAPI void U_EXPORT2 +u_catclose(u_nl_catd catd) { + ures_close((UResourceBundle*) catd); /* may be NULL */ +} + +U_CAPI const UChar* U_EXPORT2 +u_catgets(u_nl_catd catd, int32_t set_num, int32_t msg_num, + const UChar* s, + int32_t* len, UErrorCode* ec) { + + char key[MAX_KEY_LEN]; + const UChar* result; + + if (ec == NULL || U_FAILURE(*ec)) { + goto ERROR; + } + + result = ures_getStringByKey((const UResourceBundle*) catd, + _catkey(key, set_num, msg_num), + len, ec); + if (U_FAILURE(*ec)) { + goto ERROR; + } + + return result; + + ERROR: + /* In case of any failure, return s */ + if (len != NULL) { + *len = u_strlen(s); + } + return s; +} + +/*eof*/ diff --git a/deps/icu-small/source/common/uchar.c b/deps/icu-small/source/common/uchar.c deleted file mode 100644 index cf28f3f03c..0000000000 --- a/deps/icu-small/source/common/uchar.c +++ /dev/null @@ -1,733 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************** -* Copyright (C) 1996-2016, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************** -* -* File UCHAR.C -* -* Modification History: -* -* Date Name Description -* 04/02/97 aliu Creation. -* 4/15/99 Madhu Updated all the function definitions for C Implementation -* 5/20/99 Madhu Added the function u_getVersion() -* 8/19/1999 srl Upgraded scripts to Unicode3.0 -* 11/11/1999 weiv added u_isalnum(), cleaned comments -* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion. -* 06/20/2000 helena OS/400 port changes; mostly typecast. -****************************************************************************** -*/ - -#include "unicode/utypes.h" -#include "unicode/uchar.h" -#include "unicode/uscript.h" -#include "unicode/udata.h" -#include "uassert.h" -#include "cmemory.h" -#include "ucln_cmn.h" -#include "utrie2.h" -#include "udataswp.h" -#include "uprops.h" -#include "ustr_imp.h" - -/* uchar_props_data.h is machine-generated by genprops --csource */ -#define INCLUDED_FROM_UCHAR_C -#include "uchar_props_data.h" - -/* constants and macros for access to the data ------------------------------ */ - -/* getting a uint32_t properties word from the data */ -#define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c)); - -U_CFUNC UBool -uprv_haveProperties(UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return FALSE; - } - return TRUE; -} - -/* API functions ------------------------------------------------------------ */ - -/* Gets the Unicode character's general category.*/ -U_CAPI int8_t U_EXPORT2 -u_charType(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (int8_t)GET_CATEGORY(props); -} - -/* Enumerate all code points with their general categories. */ -struct _EnumTypeCallback { - UCharEnumTypeRange *enumRange; - const void *context; -}; - -static uint32_t U_CALLCONV -_enumTypeValue(const void *context, uint32_t value) { - return GET_CATEGORY(value); -} - -static UBool U_CALLCONV -_enumTypeRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { - /* just cast the value to UCharCategory */ - return ((struct _EnumTypeCallback *)context)-> - enumRange(((struct _EnumTypeCallback *)context)->context, - start, end+1, (UCharCategory)value); -} - -U_CAPI void U_EXPORT2 -u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) { - struct _EnumTypeCallback callback; - - if(enumRange==NULL) { - return; - } - - callback.enumRange=enumRange; - callback.context=context; - utrie2_enum(&propsTrie, _enumTypeValue, _enumTypeRange, &callback); -} - -/* Checks if ch is a lower case letter.*/ -U_CAPI UBool U_EXPORT2 -u_islower(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)(GET_CATEGORY(props)==U_LOWERCASE_LETTER); -} - -/* Checks if ch is an upper case letter.*/ -U_CAPI UBool U_EXPORT2 -u_isupper(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)(GET_CATEGORY(props)==U_UPPERCASE_LETTER); -} - -/* Checks if ch is a title case letter; usually upper case letters.*/ -U_CAPI UBool U_EXPORT2 -u_istitle(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)(GET_CATEGORY(props)==U_TITLECASE_LETTER); -} - -/* Checks if ch is a decimal digit. */ -U_CAPI UBool U_EXPORT2 -u_isdigit(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); -} - -U_CAPI UBool U_EXPORT2 -u_isxdigit(UChar32 c) { - uint32_t props; - - /* check ASCII and Fullwidth ASCII a-fA-F */ - if( - (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) || - (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41)) - ) { - return TRUE; - } - - GET_PROPS(c, props); - return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); -} - -/* Checks if the Unicode character is a letter.*/ -U_CAPI UBool U_EXPORT2 -u_isalpha(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)((CAT_MASK(props)&U_GC_L_MASK)!=0); -} - -U_CAPI UBool U_EXPORT2 -u_isUAlphabetic(UChar32 c) { - return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_ALPHABETIC))!=0; -} - -/* Checks if c is a letter or a decimal digit */ -U_CAPI UBool U_EXPORT2 -u_isalnum(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_ND_MASK))!=0); -} - -/** - * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM. - * @internal - */ -U_CFUNC UBool -u_isalnumPOSIX(UChar32 c) { - return (UBool)(u_isUAlphabetic(c) || u_isdigit(c)); -} - -/* Checks if ch is a unicode character with assigned character type.*/ -U_CAPI UBool U_EXPORT2 -u_isdefined(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)(GET_CATEGORY(props)!=0); -} - -/* Checks if the Unicode character is a base form character that can take a diacritic.*/ -U_CAPI UBool U_EXPORT2 -u_isbase(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_N_MASK|U_GC_MC_MASK|U_GC_ME_MASK))!=0); -} - -/* Checks if the Unicode character is a control character.*/ -U_CAPI UBool U_EXPORT2 -u_iscntrl(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)((CAT_MASK(props)&(U_GC_CC_MASK|U_GC_CF_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK))!=0); -} - -U_CAPI UBool U_EXPORT2 -u_isISOControl(UChar32 c) { - return (uint32_t)c<=0x9f && (c<=0x1f || c>=0x7f); -} - -/* Some control characters that are used as space. */ -#define IS_THAT_CONTROL_SPACE(c) \ - (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL)) - -/* Java has decided that U+0085 New Line is not whitespace any more. */ -#define IS_THAT_ASCII_CONTROL_SPACE(c) \ - (c<=0x1f && c>=TAB && (c<=CR || c>=0x1c)) - -/* Checks if the Unicode character is a space character.*/ -U_CAPI UBool U_EXPORT2 -u_isspace(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0 || IS_THAT_CONTROL_SPACE(c)); -} - -U_CAPI UBool U_EXPORT2 -u_isJavaSpaceChar(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0); -} - -/* Checks if the Unicode character is a whitespace character.*/ -U_CAPI UBool U_EXPORT2 -u_isWhitespace(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)( - ((CAT_MASK(props)&U_GC_Z_MASK)!=0 && - c!=NBSP && c!=FIGURESP && c!=NNBSP) || /* exclude no-break spaces */ - IS_THAT_ASCII_CONTROL_SPACE(c) - ); -} - -U_CAPI UBool U_EXPORT2 -u_isblank(UChar32 c) { - if((uint32_t)c<=0x9f) { - return c==9 || c==0x20; /* TAB or SPACE */ - } else { - /* Zs */ - uint32_t props; - GET_PROPS(c, props); - return (UBool)(GET_CATEGORY(props)==U_SPACE_SEPARATOR); - } -} - -U_CAPI UBool U_EXPORT2 -u_isUWhiteSpace(UChar32 c) { - return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_WHITE_SPACE))!=0; -} - -/* Checks if the Unicode character is printable.*/ -U_CAPI UBool U_EXPORT2 -u_isprint(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - /* comparing ==0 returns FALSE for the categories mentioned */ - return (UBool)((CAT_MASK(props)&U_GC_C_MASK)==0); -} - -/** - * Checks if c is in \p{graph}\p{blank} - \p{cntrl}. - * Implements UCHAR_POSIX_PRINT. - * @internal - */ -U_CFUNC UBool -u_isprintPOSIX(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - /* - * The only cntrl character in graph+blank is TAB (in blank). - * Here we implement (blank-TAB)=Zs instead of calling u_isblank(). - */ - return (UBool)((GET_CATEGORY(props)==U_SPACE_SEPARATOR) || u_isgraphPOSIX(c)); -} - -U_CAPI UBool U_EXPORT2 -u_isgraph(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - /* comparing ==0 returns FALSE for the categories mentioned */ - return (UBool)((CAT_MASK(props)& - (U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK)) - ==0); -} - -/** - * Checks if c is in - * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}] - * with space=\p{Whitespace} and Control=Cc. - * Implements UCHAR_POSIX_GRAPH. - * @internal - */ -U_CFUNC UBool -u_isgraphPOSIX(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */ - /* comparing ==0 returns FALSE for the categories mentioned */ - return (UBool)((CAT_MASK(props)& - (U_GC_CC_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK)) - ==0); -} - -U_CAPI UBool U_EXPORT2 -u_ispunct(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)((CAT_MASK(props)&U_GC_P_MASK)!=0); -} - -/* Checks if the Unicode character can start a Unicode identifier.*/ -U_CAPI UBool U_EXPORT2 -u_isIDStart(UChar32 c) { - /* same as u_isalpha() */ - uint32_t props; - GET_PROPS(c, props); - return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_NL_MASK))!=0); -} - -/* Checks if the Unicode character can be a Unicode identifier part other than starting the - identifier.*/ -U_CAPI UBool U_EXPORT2 -u_isIDPart(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)( - (CAT_MASK(props)& - (U_GC_ND_MASK|U_GC_NL_MASK| - U_GC_L_MASK| - U_GC_PC_MASK|U_GC_MC_MASK|U_GC_MN_MASK) - )!=0 || - u_isIDIgnorable(c)); -} - -/*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/ -U_CAPI UBool U_EXPORT2 -u_isIDIgnorable(UChar32 c) { - if(c<=0x9f) { - return u_isISOControl(c) && !IS_THAT_ASCII_CONTROL_SPACE(c); - } else { - uint32_t props; - GET_PROPS(c, props); - return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR); - } -} - -/*Checks if the Unicode character can start a Java identifier.*/ -U_CAPI UBool U_EXPORT2 -u_isJavaIDStart(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_SC_MASK|U_GC_PC_MASK))!=0); -} - -/*Checks if the Unicode character can be a Java identifier part other than starting the - * identifier. - */ -U_CAPI UBool U_EXPORT2 -u_isJavaIDPart(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)( - (CAT_MASK(props)& - (U_GC_ND_MASK|U_GC_NL_MASK| - U_GC_L_MASK| - U_GC_SC_MASK|U_GC_PC_MASK| - U_GC_MC_MASK|U_GC_MN_MASK) - )!=0 || - u_isIDIgnorable(c)); -} - -U_CAPI int32_t U_EXPORT2 -u_charDigitValue(UChar32 c) { - uint32_t props; - int32_t value; - GET_PROPS(c, props); - value=(int32_t)GET_NUMERIC_TYPE_VALUE(props)-UPROPS_NTV_DECIMAL_START; - if(value<=9) { - return value; - } else { - return -1; - } -} - -U_CAPI double U_EXPORT2 -u_getNumericValue(UChar32 c) { - uint32_t props; - int32_t ntv; - GET_PROPS(c, props); - ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(props); - - if(ntv==UPROPS_NTV_NONE) { - return U_NO_NUMERIC_VALUE; - } else if(ntv>4)-12; - int32_t denominator=(ntv&0xf)+1; - return (double)numerator/denominator; - } else if(ntv>5)-14; - int32_t exp=(ntv&0x1f)+2; - numValue=mant; - - /* multiply by 10^exp without math.h */ - while(exp>=4) { - numValue*=10000.; - exp-=4; - } - switch(exp) { - case 3: - numValue*=1000.; - break; - case 2: - numValue*=100.; - break; - case 1: - numValue*=10.; - break; - case 0: - default: - break; - } - - return numValue; - } else if(ntv>2)-0xbf; - int32_t exp=(ntv&3)+1; - - switch(exp) { - case 4: - numValue*=60*60*60*60; - break; - case 3: - numValue*=60*60*60; - break; - case 2: - numValue*=60*60; - break; - case 1: - numValue*=60; - break; - case 0: - default: - break; - } - - return numValue; - } else if(ntv>2); - return (double)numerator/denominator; - } else { - /* reserved */ - return U_NO_NUMERIC_VALUE; - } -} - -U_CAPI int32_t U_EXPORT2 -u_digit(UChar32 ch, int8_t radix) { - int8_t value; - if((uint8_t)(radix-2)<=(36-2)) { - value=(int8_t)u_charDigitValue(ch); - if(value<0) { - /* ch is not a decimal digit, try latin letters */ - if(ch>=0x61 && ch<=0x7A) { - value=(int8_t)(ch-0x57); /* ch - 'a' + 10 */ - } else if(ch>=0x41 && ch<=0x5A) { - value=(int8_t)(ch-0x37); /* ch - 'A' + 10 */ - } else if(ch>=0xFF41 && ch<=0xFF5A) { - value=(int8_t)(ch-0xFF37); /* fullwidth ASCII a-z */ - } else if(ch>=0xFF21 && ch<=0xFF3A) { - value=(int8_t)(ch-0xFF17); /* fullwidth ASCII A-Z */ - } - } - } else { - value=-1; /* invalid radix */ - } - return (int8_t)((value(36-2) || (uint32_t)digit>=(uint32_t)radix) { - return 0; - } else if(digit<10) { - return (UChar32)(0x30+digit); - } else { - return (UChar32)((0x61-10)+digit); - } -} - -/* miscellaneous, and support for uprops.cpp -------------------------------- */ - -U_CAPI void U_EXPORT2 -u_getUnicodeVersion(UVersionInfo versionArray) { - if(versionArray!=NULL) { - uprv_memcpy(versionArray, dataVersion, U_MAX_VERSION_LENGTH); - } -} - -U_CFUNC uint32_t -u_getMainProperties(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return props; -} - -U_CFUNC uint32_t -u_getUnicodeProperties(UChar32 c, int32_t column) { - U_ASSERT(column>=0); - if(column>=propsVectorsColumns) { - return 0; - } else { - uint16_t vecIndex=UTRIE2_GET16(&propsVectorsTrie, c); - return propsVectors[vecIndex+column]; - } -} - -U_CFUNC int32_t -uprv_getMaxValues(int32_t column) { - switch(column) { - case 0: - return indexes[UPROPS_MAX_VALUES_INDEX]; - case 2: - return indexes[UPROPS_MAX_VALUES_2_INDEX]; - default: - return 0; - } -} - -U_CAPI void U_EXPORT2 -u_charAge(UChar32 c, UVersionInfo versionArray) { - if(versionArray!=NULL) { - uint32_t version=u_getUnicodeProperties(c, 0)>>UPROPS_AGE_SHIFT; - versionArray[0]=(uint8_t)(version>>4); - versionArray[1]=(uint8_t)(version&0xf); - versionArray[2]=versionArray[3]=0; - } -} - -U_CAPI UScriptCode U_EXPORT2 -uscript_getScript(UChar32 c, UErrorCode *pErrorCode) { - uint32_t scriptX; - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return USCRIPT_INVALID_CODE; - } - if((uint32_t)c>0x10ffff) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return USCRIPT_INVALID_CODE; - } - scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; - if(scriptX=UPROPS_SCRIPT_X_WITH_OTHER) { - scx=scriptExtensions+scx[1]; - } - if(sc>=USCRIPT_CODE_LIMIT) { - /* Guard against bogus input that would make us go past the Script_Extensions terminator. */ - return FALSE; - } - while(sc>*scx) { - ++scx; - } - return sc==(*scx&0x7fff); -} - -U_CAPI int32_t U_EXPORT2 -uscript_getScriptExtensions(UChar32 c, - UScriptCode *scripts, int32_t capacity, - UErrorCode *pErrorCode) { - uint32_t scriptX; - int32_t length; - const uint16_t *scx; - uint16_t sx; - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(capacity<0 || (capacity>0 && scripts==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; - if(scriptX=UPROPS_SCRIPT_X_WITH_OTHER) { - scx=scriptExtensions+scx[1]; - } - length=0; - do { - sx=*scx++; - if(lengthcapacity) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - return length; -} - -U_CAPI UBlockCode U_EXPORT2 -ublock_getCode(UChar32 c) { - return (UBlockCode)((u_getUnicodeProperties(c, 0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT); -} - -/* property starts for UnicodeSet ------------------------------------------- */ - -static UBool U_CALLCONV -_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { - /* add the start code point to the USet */ - const USetAdder *sa=(const USetAdder *)context; - sa->add(sa->set, start); - return TRUE; -} - -#define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1) - -U_CFUNC void U_EXPORT2 -uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return; - } - - /* add the start code point of each same-value range of the main trie */ - utrie2_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa); - - /* add code points with hardcoded properties, plus the ones following them */ - - /* add for u_isblank() */ - USET_ADD_CP_AND_NEXT(sa, TAB); - - /* add for IS_THAT_CONTROL_SPACE() */ - sa->add(sa->set, CR+1); /* range TAB..CR */ - sa->add(sa->set, 0x1c); - sa->add(sa->set, 0x1f+1); - USET_ADD_CP_AND_NEXT(sa, NL); - - /* add for u_isIDIgnorable() what was not added above */ - sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */ - sa->add(sa->set, HAIRSP); - sa->add(sa->set, RLM+1); - sa->add(sa->set, INHSWAP); - sa->add(sa->set, NOMDIG+1); - USET_ADD_CP_AND_NEXT(sa, ZWNBSP); - - /* add no-break spaces for u_isWhitespace() what was not added above */ - USET_ADD_CP_AND_NEXT(sa, NBSP); - USET_ADD_CP_AND_NEXT(sa, FIGURESP); - USET_ADD_CP_AND_NEXT(sa, NNBSP); - - /* add for u_digit() */ - sa->add(sa->set, U_a); - sa->add(sa->set, U_z+1); - sa->add(sa->set, U_A); - sa->add(sa->set, U_Z+1); - sa->add(sa->set, U_FW_a); - sa->add(sa->set, U_FW_z+1); - sa->add(sa->set, U_FW_A); - sa->add(sa->set, U_FW_Z+1); - - /* add for u_isxdigit() */ - sa->add(sa->set, U_f+1); - sa->add(sa->set, U_F+1); - sa->add(sa->set, U_FW_f+1); - sa->add(sa->set, U_FW_F+1); - - /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */ - sa->add(sa->set, WJ); /* range WJ..NOMDIG */ - sa->add(sa->set, 0xfff0); - sa->add(sa->set, 0xfffb+1); - sa->add(sa->set, 0xe0000); - sa->add(sa->set, 0xe0fff+1); - - /* add for UCHAR_GRAPHEME_BASE and others */ - USET_ADD_CP_AND_NEXT(sa, CGJ); -} - -U_CFUNC void U_EXPORT2 -upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return; - } - - /* add the start code point of each same-value range of the properties vectors trie */ - if(propsVectorsColumns>0) { - /* if propsVectorsColumns==0 then the properties vectors trie may not be there at all */ - utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa); - } -} diff --git a/deps/icu-small/source/common/uchar.cpp b/deps/icu-small/source/common/uchar.cpp new file mode 100644 index 0000000000..03592fe036 --- /dev/null +++ b/deps/icu-small/source/common/uchar.cpp @@ -0,0 +1,736 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 1996-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************** +* +* File UCHAR.C +* +* Modification History: +* +* Date Name Description +* 04/02/97 aliu Creation. +* 4/15/99 Madhu Updated all the function definitions for C Implementation +* 5/20/99 Madhu Added the function u_getVersion() +* 8/19/1999 srl Upgraded scripts to Unicode3.0 +* 11/11/1999 weiv added u_isalnum(), cleaned comments +* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion. +* 06/20/2000 helena OS/400 port changes; mostly typecast. +****************************************************************************** +*/ + +#include "unicode/utypes.h" +#include "unicode/uchar.h" +#include "unicode/uscript.h" +#include "unicode/udata.h" +#include "uassert.h" +#include "cmemory.h" +#include "ucln_cmn.h" +#include "utrie2.h" +#include "udataswp.h" +#include "uprops.h" +#include "ustr_imp.h" + +/* uchar_props_data.h is machine-generated by genprops --csource */ +#define INCLUDED_FROM_UCHAR_C +#include "uchar_props_data.h" + +/* constants and macros for access to the data ------------------------------ */ + +/* getting a uint32_t properties word from the data */ +#define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c)); + +U_CFUNC UBool +uprv_haveProperties(UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return FALSE; + } + return TRUE; +} + +/* API functions ------------------------------------------------------------ */ + +/* Gets the Unicode character's general category.*/ +U_CAPI int8_t U_EXPORT2 +u_charType(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (int8_t)GET_CATEGORY(props); +} + +/* Enumerate all code points with their general categories. */ +struct _EnumTypeCallback { + UCharEnumTypeRange *enumRange; + const void *context; +}; + +static uint32_t U_CALLCONV +_enumTypeValue(const void *context, uint32_t value) { + (void)context; + return GET_CATEGORY(value); +} + +static UBool U_CALLCONV +_enumTypeRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { + /* just cast the value to UCharCategory */ + return ((struct _EnumTypeCallback *)context)-> + enumRange(((struct _EnumTypeCallback *)context)->context, + start, end+1, (UCharCategory)value); +} + +U_CAPI void U_EXPORT2 +u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) { + struct _EnumTypeCallback callback; + + if(enumRange==NULL) { + return; + } + + callback.enumRange=enumRange; + callback.context=context; + utrie2_enum(&propsTrie, _enumTypeValue, _enumTypeRange, &callback); +} + +/* Checks if ch is a lower case letter.*/ +U_CAPI UBool U_EXPORT2 +u_islower(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)(GET_CATEGORY(props)==U_LOWERCASE_LETTER); +} + +/* Checks if ch is an upper case letter.*/ +U_CAPI UBool U_EXPORT2 +u_isupper(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)(GET_CATEGORY(props)==U_UPPERCASE_LETTER); +} + +/* Checks if ch is a title case letter; usually upper case letters.*/ +U_CAPI UBool U_EXPORT2 +u_istitle(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)(GET_CATEGORY(props)==U_TITLECASE_LETTER); +} + +/* Checks if ch is a decimal digit. */ +U_CAPI UBool U_EXPORT2 +u_isdigit(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); +} + +U_CAPI UBool U_EXPORT2 +u_isxdigit(UChar32 c) { + uint32_t props; + + /* check ASCII and Fullwidth ASCII a-fA-F */ + if( + (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) || + (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41)) + ) { + return TRUE; + } + + GET_PROPS(c, props); + return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); +} + +/* Checks if the Unicode character is a letter.*/ +U_CAPI UBool U_EXPORT2 +u_isalpha(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)((CAT_MASK(props)&U_GC_L_MASK)!=0); +} + +U_CAPI UBool U_EXPORT2 +u_isUAlphabetic(UChar32 c) { + return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_ALPHABETIC))!=0; +} + +/* Checks if c is a letter or a decimal digit */ +U_CAPI UBool U_EXPORT2 +u_isalnum(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_ND_MASK))!=0); +} + +/** + * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM. + * @internal + */ +U_CFUNC UBool +u_isalnumPOSIX(UChar32 c) { + return (UBool)(u_isUAlphabetic(c) || u_isdigit(c)); +} + +/* Checks if ch is a unicode character with assigned character type.*/ +U_CAPI UBool U_EXPORT2 +u_isdefined(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)(GET_CATEGORY(props)!=0); +} + +/* Checks if the Unicode character is a base form character that can take a diacritic.*/ +U_CAPI UBool U_EXPORT2 +u_isbase(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_N_MASK|U_GC_MC_MASK|U_GC_ME_MASK))!=0); +} + +/* Checks if the Unicode character is a control character.*/ +U_CAPI UBool U_EXPORT2 +u_iscntrl(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)((CAT_MASK(props)&(U_GC_CC_MASK|U_GC_CF_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK))!=0); +} + +U_CAPI UBool U_EXPORT2 +u_isISOControl(UChar32 c) { + return (uint32_t)c<=0x9f && (c<=0x1f || c>=0x7f); +} + +/* Some control characters that are used as space. */ +#define IS_THAT_CONTROL_SPACE(c) \ + (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL)) + +/* Java has decided that U+0085 New Line is not whitespace any more. */ +#define IS_THAT_ASCII_CONTROL_SPACE(c) \ + (c<=0x1f && c>=TAB && (c<=CR || c>=0x1c)) + +/* Checks if the Unicode character is a space character.*/ +U_CAPI UBool U_EXPORT2 +u_isspace(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0 || IS_THAT_CONTROL_SPACE(c)); +} + +U_CAPI UBool U_EXPORT2 +u_isJavaSpaceChar(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0); +} + +/* Checks if the Unicode character is a whitespace character.*/ +U_CAPI UBool U_EXPORT2 +u_isWhitespace(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)( + ((CAT_MASK(props)&U_GC_Z_MASK)!=0 && + c!=NBSP && c!=FIGURESP && c!=NNBSP) || /* exclude no-break spaces */ + IS_THAT_ASCII_CONTROL_SPACE(c) + ); +} + +U_CAPI UBool U_EXPORT2 +u_isblank(UChar32 c) { + if((uint32_t)c<=0x9f) { + return c==9 || c==0x20; /* TAB or SPACE */ + } else { + /* Zs */ + uint32_t props; + GET_PROPS(c, props); + return (UBool)(GET_CATEGORY(props)==U_SPACE_SEPARATOR); + } +} + +U_CAPI UBool U_EXPORT2 +u_isUWhiteSpace(UChar32 c) { + return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_WHITE_SPACE))!=0; +} + +/* Checks if the Unicode character is printable.*/ +U_CAPI UBool U_EXPORT2 +u_isprint(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + /* comparing ==0 returns FALSE for the categories mentioned */ + return (UBool)((CAT_MASK(props)&U_GC_C_MASK)==0); +} + +/** + * Checks if c is in \p{graph}\p{blank} - \p{cntrl}. + * Implements UCHAR_POSIX_PRINT. + * @internal + */ +U_CFUNC UBool +u_isprintPOSIX(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + /* + * The only cntrl character in graph+blank is TAB (in blank). + * Here we implement (blank-TAB)=Zs instead of calling u_isblank(). + */ + return (UBool)((GET_CATEGORY(props)==U_SPACE_SEPARATOR) || u_isgraphPOSIX(c)); +} + +U_CAPI UBool U_EXPORT2 +u_isgraph(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + /* comparing ==0 returns FALSE for the categories mentioned */ + return (UBool)((CAT_MASK(props)& + (U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK)) + ==0); +} + +/** + * Checks if c is in + * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}] + * with space=\p{Whitespace} and Control=Cc. + * Implements UCHAR_POSIX_GRAPH. + * @internal + */ +U_CFUNC UBool +u_isgraphPOSIX(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */ + /* comparing ==0 returns FALSE for the categories mentioned */ + return (UBool)((CAT_MASK(props)& + (U_GC_CC_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK)) + ==0); +} + +U_CAPI UBool U_EXPORT2 +u_ispunct(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)((CAT_MASK(props)&U_GC_P_MASK)!=0); +} + +/* Checks if the Unicode character can start a Unicode identifier.*/ +U_CAPI UBool U_EXPORT2 +u_isIDStart(UChar32 c) { + /* same as u_isalpha() */ + uint32_t props; + GET_PROPS(c, props); + return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_NL_MASK))!=0); +} + +/* Checks if the Unicode character can be a Unicode identifier part other than starting the + identifier.*/ +U_CAPI UBool U_EXPORT2 +u_isIDPart(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)( + (CAT_MASK(props)& + (U_GC_ND_MASK|U_GC_NL_MASK| + U_GC_L_MASK| + U_GC_PC_MASK|U_GC_MC_MASK|U_GC_MN_MASK) + )!=0 || + u_isIDIgnorable(c)); +} + +/*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/ +U_CAPI UBool U_EXPORT2 +u_isIDIgnorable(UChar32 c) { + if(c<=0x9f) { + return u_isISOControl(c) && !IS_THAT_ASCII_CONTROL_SPACE(c); + } else { + uint32_t props; + GET_PROPS(c, props); + return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR); + } +} + +/*Checks if the Unicode character can start a Java identifier.*/ +U_CAPI UBool U_EXPORT2 +u_isJavaIDStart(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_SC_MASK|U_GC_PC_MASK))!=0); +} + +/*Checks if the Unicode character can be a Java identifier part other than starting the + * identifier. + */ +U_CAPI UBool U_EXPORT2 +u_isJavaIDPart(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)( + (CAT_MASK(props)& + (U_GC_ND_MASK|U_GC_NL_MASK| + U_GC_L_MASK| + U_GC_SC_MASK|U_GC_PC_MASK| + U_GC_MC_MASK|U_GC_MN_MASK) + )!=0 || + u_isIDIgnorable(c)); +} + +U_CAPI int32_t U_EXPORT2 +u_charDigitValue(UChar32 c) { + uint32_t props; + int32_t value; + GET_PROPS(c, props); + value=(int32_t)GET_NUMERIC_TYPE_VALUE(props)-UPROPS_NTV_DECIMAL_START; + if(value<=9) { + return value; + } else { + return -1; + } +} + +U_CAPI double U_EXPORT2 +u_getNumericValue(UChar32 c) { + uint32_t props; + int32_t ntv; + GET_PROPS(c, props); + ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(props); + + if(ntv==UPROPS_NTV_NONE) { + return U_NO_NUMERIC_VALUE; + } else if(ntv>4)-12; + int32_t denominator=(ntv&0xf)+1; + return (double)numerator/denominator; + } else if(ntv>5)-14; + int32_t exp=(ntv&0x1f)+2; + numValue=mant; + + /* multiply by 10^exp without math.h */ + while(exp>=4) { + numValue*=10000.; + exp-=4; + } + switch(exp) { + case 3: + numValue*=1000.; + break; + case 2: + numValue*=100.; + break; + case 1: + numValue*=10.; + break; + case 0: + default: + break; + } + + return numValue; + } else if(ntv>2)-0xbf; + int32_t exp=(ntv&3)+1; + + switch(exp) { + case 4: + numValue*=60*60*60*60; + break; + case 3: + numValue*=60*60*60; + break; + case 2: + numValue*=60*60; + break; + case 1: + numValue*=60; + break; + case 0: + default: + break; + } + + return numValue; + } else if(ntv>2); + return (double)numerator/denominator; + } else { + /* reserved */ + return U_NO_NUMERIC_VALUE; + } +} + +U_CAPI int32_t U_EXPORT2 +u_digit(UChar32 ch, int8_t radix) { + int8_t value; + if((uint8_t)(radix-2)<=(36-2)) { + value=(int8_t)u_charDigitValue(ch); + if(value<0) { + /* ch is not a decimal digit, try latin letters */ + if(ch>=0x61 && ch<=0x7A) { + value=(int8_t)(ch-0x57); /* ch - 'a' + 10 */ + } else if(ch>=0x41 && ch<=0x5A) { + value=(int8_t)(ch-0x37); /* ch - 'A' + 10 */ + } else if(ch>=0xFF41 && ch<=0xFF5A) { + value=(int8_t)(ch-0xFF37); /* fullwidth ASCII a-z */ + } else if(ch>=0xFF21 && ch<=0xFF3A) { + value=(int8_t)(ch-0xFF17); /* fullwidth ASCII A-Z */ + } + } + } else { + value=-1; /* invalid radix */ + } + return (int8_t)((value(36-2) || (uint32_t)digit>=(uint32_t)radix) { + return 0; + } else if(digit<10) { + return (UChar32)(0x30+digit); + } else { + return (UChar32)((0x61-10)+digit); + } +} + +/* miscellaneous, and support for uprops.cpp -------------------------------- */ + +U_CAPI void U_EXPORT2 +u_getUnicodeVersion(UVersionInfo versionArray) { + if(versionArray!=NULL) { + uprv_memcpy(versionArray, dataVersion, U_MAX_VERSION_LENGTH); + } +} + +U_CFUNC uint32_t +u_getMainProperties(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return props; +} + +U_CFUNC uint32_t +u_getUnicodeProperties(UChar32 c, int32_t column) { + U_ASSERT(column>=0); + if(column>=propsVectorsColumns) { + return 0; + } else { + uint16_t vecIndex=UTRIE2_GET16(&propsVectorsTrie, c); + return propsVectors[vecIndex+column]; + } +} + +U_CFUNC int32_t +uprv_getMaxValues(int32_t column) { + switch(column) { + case 0: + return indexes[UPROPS_MAX_VALUES_INDEX]; + case 2: + return indexes[UPROPS_MAX_VALUES_2_INDEX]; + default: + return 0; + } +} + +U_CAPI void U_EXPORT2 +u_charAge(UChar32 c, UVersionInfo versionArray) { + if(versionArray!=NULL) { + uint32_t version=u_getUnicodeProperties(c, 0)>>UPROPS_AGE_SHIFT; + versionArray[0]=(uint8_t)(version>>4); + versionArray[1]=(uint8_t)(version&0xf); + versionArray[2]=versionArray[3]=0; + } +} + +U_CAPI UScriptCode U_EXPORT2 +uscript_getScript(UChar32 c, UErrorCode *pErrorCode) { + uint32_t scriptX; + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return USCRIPT_INVALID_CODE; + } + if((uint32_t)c>0x10ffff) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return USCRIPT_INVALID_CODE; + } + scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; + if(scriptX=UPROPS_SCRIPT_X_WITH_OTHER) { + scx=scriptExtensions+scx[1]; + } + if(sc>=USCRIPT_CODE_LIMIT) { + /* Guard against bogus input that would make us go past the Script_Extensions terminator. */ + return FALSE; + } + while(sc>*scx) { + ++scx; + } + return sc==(*scx&0x7fff); +} + +U_CAPI int32_t U_EXPORT2 +uscript_getScriptExtensions(UChar32 c, + UScriptCode *scripts, int32_t capacity, + UErrorCode *pErrorCode) { + uint32_t scriptX; + int32_t length; + const uint16_t *scx; + uint16_t sx; + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(capacity<0 || (capacity>0 && scripts==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; + if(scriptX=UPROPS_SCRIPT_X_WITH_OTHER) { + scx=scriptExtensions+scx[1]; + } + length=0; + do { + sx=*scx++; + if(lengthcapacity) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + return length; +} + +U_CAPI UBlockCode U_EXPORT2 +ublock_getCode(UChar32 c) { + return (UBlockCode)((u_getUnicodeProperties(c, 0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT); +} + +/* property starts for UnicodeSet ------------------------------------------- */ + +static UBool U_CALLCONV +_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { + /* add the start code point to the USet */ + const USetAdder *sa=(const USetAdder *)context; + sa->add(sa->set, start); + (void)end; + (void)value; + return TRUE; +} + +#define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1) + +U_CFUNC void U_EXPORT2 +uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return; + } + + /* add the start code point of each same-value range of the main trie */ + utrie2_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa); + + /* add code points with hardcoded properties, plus the ones following them */ + + /* add for u_isblank() */ + USET_ADD_CP_AND_NEXT(sa, TAB); + + /* add for IS_THAT_CONTROL_SPACE() */ + sa->add(sa->set, CR+1); /* range TAB..CR */ + sa->add(sa->set, 0x1c); + sa->add(sa->set, 0x1f+1); + USET_ADD_CP_AND_NEXT(sa, NL); + + /* add for u_isIDIgnorable() what was not added above */ + sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */ + sa->add(sa->set, HAIRSP); + sa->add(sa->set, RLM+1); + sa->add(sa->set, INHSWAP); + sa->add(sa->set, NOMDIG+1); + USET_ADD_CP_AND_NEXT(sa, ZWNBSP); + + /* add no-break spaces for u_isWhitespace() what was not added above */ + USET_ADD_CP_AND_NEXT(sa, NBSP); + USET_ADD_CP_AND_NEXT(sa, FIGURESP); + USET_ADD_CP_AND_NEXT(sa, NNBSP); + + /* add for u_digit() */ + sa->add(sa->set, U_a); + sa->add(sa->set, U_z+1); + sa->add(sa->set, U_A); + sa->add(sa->set, U_Z+1); + sa->add(sa->set, U_FW_a); + sa->add(sa->set, U_FW_z+1); + sa->add(sa->set, U_FW_A); + sa->add(sa->set, U_FW_Z+1); + + /* add for u_isxdigit() */ + sa->add(sa->set, U_f+1); + sa->add(sa->set, U_F+1); + sa->add(sa->set, U_FW_f+1); + sa->add(sa->set, U_FW_F+1); + + /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */ + sa->add(sa->set, WJ); /* range WJ..NOMDIG */ + sa->add(sa->set, 0xfff0); + sa->add(sa->set, 0xfffb+1); + sa->add(sa->set, 0xe0000); + sa->add(sa->set, 0xe0fff+1); + + /* add for UCHAR_GRAPHEME_BASE and others */ + USET_ADD_CP_AND_NEXT(sa, CGJ); +} + +U_CFUNC void U_EXPORT2 +upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return; + } + + /* add the start code point of each same-value range of the properties vectors trie */ + if(propsVectorsColumns>0) { + /* if propsVectorsColumns==0 then the properties vectors trie may not be there at all */ + utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa); + } +} diff --git a/deps/icu-small/source/common/uchar_props_data.h b/deps/icu-small/source/common/uchar_props_data.h index 79ba55eb75..fd74402e2d 100644 --- a/deps/icu-small/source/common/uchar_props_data.h +++ b/deps/icu-small/source/common/uchar_props_data.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // // Copyright (C) 1999-2016, International Business Machines @@ -1330,2199 +1330,2241 @@ static const UTrie2 propsTrie={ NULL, 0, FALSE, FALSE, 0, NULL }; -static const uint16_t propsVectorsTrie_index[28540]={ -0x4bb,0x4c3,0x4cb,0x4d3,0x4eb,0x4f3,0x4fb,0x503,0x50b,0x513,0x51b,0x523,0x52b,0x533,0x53b,0x543, -0x54a,0x552,0x55a,0x562,0x565,0x56d,0x575,0x57d,0x585,0x58d,0x595,0x59d,0x5a5,0x5ad,0x5b5,0x5bd, -0x5c5,0x5cd,0x5d4,0x5dc,0x5e4,0x5ec,0x5f4,0x5fc,0x604,0x60c,0x611,0x619,0x620,0x628,0x630,0x638, -0x640,0x648,0x650,0x658,0x65f,0x667,0x66f,0x677,0x67f,0x687,0x68f,0x697,0x69f,0x6a7,0x6af,0x6b7, -0x18ce,0xd31,0xe19,0x4db,0x4db,0xe89,0xe91,0x1a56,0x11bd,0x11d5,0x11c5,0x11cd,0x75c,0x762,0x76a,0x772, -0x77a,0x780,0x788,0x790,0x798,0x79e,0x7a6,0x7ae,0x7b6,0x7bc,0x7c4,0x7cc,0x7d4,0x7dc,0x7e4,0x7eb, -0x7f3,0x7f9,0x801,0x809,0x811,0x817,0x81f,0x827,0x82f,0x835,0x83d,0x845,0x84d,0x854,0x85c,0x864, -0x86c,0x870,0x878,0x87f,0x887,0x88f,0x897,0x89f,0x14dd,0x14e5,0x8a7,0x8af,0x8b7,0x8bf,0x8c7,0x8ce, -0x1543,0x1533,0x153b,0x1811,0x1819,0x11e5,0x8d6,0x11dd,0x1427,0x1427,0x1429,0x11f9,0x11fa,0x11ed,0x11ef,0x11f1, -0x154b,0x154d,0x8de,0x154d,0x8e6,0x8eb,0x8f3,0x1552,0x8f9,0x154d,0x8ff,0x907,0xc09,0x155a,0x155a,0x90f, -0x156a,0x156b,0x156b,0x156b,0x156b,0x156b,0x156b,0x156b,0x156b,0x156b,0x156b,0x156b,0x156b,0x156b,0x156b,0x156b, -0x156b,0x156b,0x156b,0x1562,0x917,0x1573,0x1573,0x91f,0xb16,0xb1e,0xb26,0xb2e,0x1583,0x157b,0x927,0x92f, -0x937,0x158d,0x1595,0x93f,0x158b,0x947,0x18d6,0xd39,0xb36,0xb3e,0xb46,0xb4b,0x1787,0xc3c,0xc43,0x16ef, -0xbd9,0x18de,0xd41,0xd49,0xd51,0xd59,0xf41,0xf41,0x17d7,0x17dc,0xc75,0xc7d,0x184d,0x1855,0x197f,0xe21, -0x185d,0xcc5,0xccd,0x1865,0x6bf,0x4db,0xf21,0xd61,0x170f,0x16f7,0x1707,0x16ff,0x179f,0x1797,0x175f,0xbe9, -0x1202,0x1202,0x1202,0x1202,0x1205,0x1202,0x1202,0x120d,0x94f,0x1215,0x953,0x95b,0x1215,0x963,0x96b,0x973, -0x1225,0x121d,0x122d,0x97b,0x983,0x98b,0x993,0x99b,0x1235,0x123d,0x1245,0x124d,0x9a3,0x1255,0x125c,0x1264, -0x126c,0x1274,0x127c,0x1284,0x128c,0x1293,0x129b,0x12a3,0x12ab,0x12b3,0x12b6,0x12b8,0x159d,0x1682,0x1688,0x9ab, -0x12c0,0x9b3,0x9bb,0x13da,0x13df,0x13e2,0x13ea,0x12c8,0x13f2,0x13f2,0x12d8,0x12d0,0x12e0,0x12e8,0x12f0,0x12f8, -0x1300,0x1308,0x1310,0x1318,0x1690,0x16e7,0x1821,0x195f,0x1328,0x132f,0x1337,0x133f,0x1320,0x1347,0x1698,0x169f, -0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x16a7,0x16aa,0x16a7,0x16a7,0x16b2,0x16b9,0x16bb,0x16c2, -0x16ca,0x16ce,0x16ce,0x16d1,0x16ce,0x16ce,0x16d7,0x16ce,0x1717,0x17cf,0x1829,0xb53,0xb59,0xb5f,0xb67,0xb6c, -0x1777,0xc19,0xc1d,0x17e4,0x1767,0x1767,0x1767,0xbf1,0x176f,0xc11,0x17b7,0xc65,0xbf9,0xc01,0xc01,0x186d, -0x17a7,0x1831,0xc53,0xc55,0x9c3,0x15ad,0x15ad,0x9cb,0x15b5,0x15b5,0x15b5,0x15b5,0x15b5,0x15b5,0x9d3,0x6c3, -0x140f,0x1431,0x9db,0x1439,0x9e3,0x1441,0x1449,0x1451,0x9eb,0x9f0,0x1459,0x1460,0x9f5,0x9fd,0x17c7,0xbe1, -0xa05,0x14b7,0x14be,0x1468,0x14c6,0x14cd,0x1470,0xa0d,0x1489,0x1489,0x148b,0x1478,0x1480,0x1480,0x1481,0x14d5, -0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd, -0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd, -0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd, -0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd, -0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd, -0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd, -0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd, -0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd, -0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd, -0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd, -0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd, -0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd, -0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x1174,0x171f,0x171f, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493, -0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x1493,0x149a,0x117c,0x1182, -0x15c5,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb, -0x15cb,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb,0x15cb, -0x15cb,0x15cb,0x15cb,0x15cb,0xa15,0x15d3,0xa1d,0x18e6,0x1879,0x1879,0x1879,0x1879,0x1879,0x1879,0x1879,0x1879, -0x1875,0xcd5,0x1889,0x1881,0x188b,0x18ee,0x18ee,0xd69,0x177f,0x17ec,0x1841,0x1845,0x1839,0xc85,0xc8b,0xc8e, -0x17af,0xc5d,0x17f4,0xc96,0x1893,0x1896,0xcdd,0xd71,0x18a6,0x189e,0xce5,0xd79,0x18f6,0x18fa,0xd81,0xfe7, -0x18ae,0xced,0xcf5,0x1902,0x1912,0x190a,0xd89,0xee4,0xe29,0xe31,0x1ac9,0xf9f,0x1b6e,0x1b6e,0x191a,0xd91, -0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526, -0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528, -0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a, -0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525, -0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527, -0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529, -0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b, -0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526, -0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528, -0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a, -0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525, -0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527, -0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529, -0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b, -0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526, -0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528, -0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a, -0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525, -0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527, -0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529, -0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b, -0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0x152b,0x1525,0x1526,0x1527,0x1528,0x1529,0x152a,0xa25,0xd99,0xd9c, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd, -0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd, -0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa, -0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa, -0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa, -0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa, -0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa, -0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa, -0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa, -0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa, -0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa, -0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa, -0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa, -0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa, -0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x13fa,0x14a2,0x14a2,0x14a2,0x14a2,0x14a2,0x14a2,0x14a2,0x14a2, -0x14a7,0x14af,0x16df,0x118a,0x17bf,0x17bf,0x118e,0x1195,0xa2d,0xa35,0xa3d,0x1367,0x136e,0x1376,0xa45,0x137e, -0x13af,0x13af,0x1357,0x135f,0x1386,0x13a6,0x13a7,0x13b7,0x138e,0x134f,0xa4d,0x1396,0xa55,0x139e,0xa5d,0xa61, -0xc6d,0x13bf,0xa69,0xa71,0x13c7,0x13cd,0x13d2,0xa79,0xa89,0x1417,0x141f,0x1402,0x1407,0xa91,0xa99,0xa81, -0x14ed,0x14ed,0x14ed,0x14ed,0x14ed,0x14ed,0x14ed,0x14ed,0x14ed,0x14ed,0x14ed,0x14ed,0x14ed,0x14ed,0x14ed,0x14ed, -0x14ed,0x14ed,0x14ed,0x14ed,0x14ed,0x14ed,0x14ed,0x14ed,0x14ed,0x14ed,0x14ed,0x14ed,0x14f5,0x14f5,0x14f5,0x14f5, -0x136c,0x136c,0x13ac,0x13ec,0x142c,0x146c,0x14ac,0x14ec,0x1528,0x1568,0x1594,0x15d4,0x1614,0x1654,0x1694,0x16d4, -0x1714,0x1750,0x1790,0x17d0,0x1810,0x1844,0x1880,0x18c0,0x1900,0x1940,0x197c,0x19bc,0x19fc,0x1a3c,0x1a7c,0x1abc, -0xa80,0xac0,0xb00,0xe4d,0xb40,0xa40,0xb80,0xa40,0xe73,0xa40,0xa40,0xa40,0xa40,0xbc0,0x12a9,0x12a9, -0xeb3,0xef3,0xa40,0xa40,0xa40,0xa40,0xdd2,0xc00,0xa40,0xa40,0xc40,0xc80,0xcc0,0xe12,0xd92,0xd02, -0x11e9,0x11e9,0x11e9,0x11e9,0x11e9,0x11e9,0x11e9,0x11e9,0x11e9,0x11e9,0x11e9,0x11e9,0x11e9,0x11e9,0x11e9,0x11e9, -0x11e9,0x11e9,0x11e9,0x11e9,0xf33,0x1229,0x1069,0x10a9,0x1269,0xf73,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfe9, -0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9, -0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0xfa9,0x1029, +static const uint16_t propsVectorsTrie_index[29136]={ +0x4cf,0x4d7,0x4df,0x4e7,0x4ff,0x507,0x50f,0x517,0x51f,0x527,0x52f,0x537,0x53f,0x547,0x54f,0x557, +0x55e,0x566,0x56e,0x576,0x579,0x581,0x589,0x591,0x599,0x5a1,0x5a9,0x5b1,0x5b9,0x5c1,0x5c9,0x5d1, +0x5d9,0x5e1,0x5e8,0x5f0,0x5f8,0x600,0x608,0x610,0x618,0x620,0x625,0x62d,0x634,0x63c,0x644,0x64c, +0x654,0x65c,0x664,0x66c,0x673,0x67b,0x683,0x68b,0x693,0x69b,0x6a3,0x6ab,0x6b3,0x6bb,0x6c3,0x6cb, +0x195d,0xda7,0xe8f,0x6d3,0x4ef,0xeff,0xf07,0x1aeb,0x124c,0x1264,0x1254,0x125c,0x7cf,0x7d5,0x7dd,0x7e5, +0x7ed,0x7f3,0x7fb,0x803,0x80b,0x811,0x819,0x821,0x829,0x82f,0x837,0x83f,0x847,0x84f,0x857,0x85e, +0x866,0x86c,0x874,0x87c,0x884,0x88a,0x892,0x89a,0x8a2,0x8ba,0x8aa,0x8b2,0x8c2,0x8c9,0x8d1,0x8d9, +0x8e1,0x8e5,0x8ed,0x8f4,0x8fc,0x904,0x90c,0x914,0x156c,0x1574,0x91c,0x924,0x92c,0x934,0x93c,0x943, +0x15d2,0x15c2,0x15ca,0x18a0,0x18a8,0x1274,0x94b,0x126c,0x14b6,0x14b6,0x14b8,0x1288,0x1289,0x127c,0x127e,0x1280, +0x15da,0x15dc,0x953,0x15dc,0x95b,0x960,0x968,0x15e1,0x96e,0x15dc,0x974,0x97c,0xc7e,0x15e9,0x15e9,0x984, +0x15f9,0x15fa,0x15fa,0x15fa,0x15fa,0x15fa,0x15fa,0x15fa,0x15fa,0x15fa,0x15fa,0x15fa,0x15fa,0x15fa,0x15fa,0x15fa, +0x15fa,0x15fa,0x15fa,0x15f1,0x98c,0x1602,0x1602,0x994,0xb8b,0xb93,0xb9b,0xba3,0x1612,0x160a,0x99c,0x9a4, +0x9ac,0x161c,0x1624,0x9b4,0x161a,0x9bc,0x1965,0xdaf,0xbab,0xbb3,0xbbb,0xbc0,0x1816,0xcb1,0xcb8,0x177e, +0xc4e,0x196d,0xdb7,0xdbf,0xdc7,0xdcf,0xfb7,0xfb7,0x1866,0x186b,0xceb,0xcf3,0x18dc,0x18e4,0x1a0e,0xe97, +0x18ec,0xd3b,0xd43,0x18f4,0x6db,0x4ef,0xf97,0xdd7,0x179e,0x1786,0x1796,0x178e,0x182e,0x1826,0x17ee,0xc5e, +0x1291,0x1291,0x1291,0x1291,0x1294,0x1291,0x1291,0x129c,0x9c4,0x12a4,0x9c8,0x9d0,0x12a4,0x9d8,0x9e0,0x9e8, +0x12b4,0x12ac,0x12bc,0x9f0,0x9f8,0xa00,0xa08,0xa10,0x12c4,0x12cc,0x12d4,0x12dc,0xa18,0x12e4,0x12eb,0x12f3, +0x12fb,0x1303,0x130b,0x1313,0x131b,0x1322,0x132a,0x1332,0x133a,0x1342,0x1345,0x1347,0x162c,0x1711,0x1717,0xa20, +0x134f,0xa28,0xa30,0x1469,0x146e,0x1471,0x1479,0x1357,0x1481,0x1481,0x1367,0x135f,0x136f,0x1377,0x137f,0x1387, +0x138f,0x1397,0x139f,0x13a7,0x171f,0x1776,0x18b0,0x19ee,0x13b7,0x13be,0x13c6,0x13ce,0x13af,0x13d6,0x1727,0x172e, +0x1634,0x1634,0x1634,0x1634,0x1634,0x1634,0x1634,0x1634,0x1736,0x1739,0x1736,0x1736,0x1741,0x1748,0x174a,0x1751, +0x1759,0x175d,0x175d,0x1760,0x175d,0x175d,0x1766,0x175d,0x17a6,0x185e,0x18b8,0xbc8,0xbce,0xbd4,0xbdc,0xbe1, +0x1806,0xc8e,0xc92,0x1873,0x17f6,0x17f6,0x17f6,0xc66,0x17fe,0xc86,0x1846,0xcdb,0xc6e,0xc76,0xc76,0x18fc, +0x1836,0x18c0,0xcc8,0xccb,0xa38,0x163c,0x163c,0xa40,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0xa48,0x6df, +0x149e,0x14c0,0xa50,0x14c8,0xa58,0x14d0,0x14d8,0x14e0,0xa60,0xa65,0x14e8,0x14ef,0xa6a,0xa72,0x1856,0xc56, +0xa7a,0x1546,0x154d,0x14f7,0x1555,0x155c,0x14ff,0xa82,0x1518,0x1518,0x151a,0x1507,0x150f,0x150f,0x1510,0x1564, +0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c, +0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c, +0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c, +0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c, +0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c, +0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c, +0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c, +0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c, +0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c, +0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c, +0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c, +0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c, +0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x1201,0x17ae,0x17ae, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522, +0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1522,0x1529,0x1211,0x1209, +0x1654,0x165a,0x165a,0x165a,0x165a,0x165a,0x165a,0x165a,0x165a,0x165a,0x165a,0x165a,0x165a,0x165a,0x165a,0x165a, +0x165a,0x165a,0x165a,0x165a,0x165a,0x165a,0x165a,0x165a,0x165a,0x165a,0x165a,0x165a,0x165a,0x165a,0x165a,0x165a, +0x165a,0x165a,0x165a,0x165a,0xa8a,0x1662,0xa92,0x1975,0x1908,0x1908,0x1908,0x1908,0x1908,0x1908,0x1908,0x1908, +0x1904,0xd4b,0x1918,0x1910,0x191a,0x197d,0x197d,0xddf,0x180e,0x187b,0x18d0,0x18d4,0x18c8,0xcfb,0xd01,0xd04, +0x183e,0xcd3,0x1883,0xd0c,0x1922,0x1925,0xd53,0xde7,0x1935,0x192d,0xd5b,0xdef,0x1985,0x1989,0xdf7,0x105d, +0x193d,0xd63,0xd6b,0x1991,0x19a1,0x1999,0xdff,0xf5a,0xe9f,0xea7,0x1b5e,0x1015,0x1c03,0x1c03,0x19a9,0xe07, +0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5, +0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7, +0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9, +0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4, +0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6, +0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8, +0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba, +0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5, +0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7, +0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9, +0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4, +0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6, +0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8, +0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba, +0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5, +0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7, +0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9, +0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4, +0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6, +0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8, +0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba, +0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0x15ba,0x15b4,0x15b5,0x15b6,0x15b7,0x15b8,0x15b9,0xa9a,0xe0f,0xe12, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x158c,0x158c,0x158c,0x158c,0x158c,0x158c,0x158c,0x158c,0x158c,0x158c,0x158c,0x158c,0x158c,0x158c,0x158c,0x158c, +0x158c,0x158c,0x158c,0x158c,0x158c,0x158c,0x158c,0x158c,0x158c,0x158c,0x158c,0x158c,0x158c,0x158c,0x158c,0x158c, +0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489, +0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489, +0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489, +0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489, +0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489, +0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489, +0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489, +0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489, +0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489, +0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489, +0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489, +0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489, +0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1489,0x1531,0x1531,0x1531,0x1531,0x1531,0x1531,0x1531,0x1531, +0x1536,0x153e,0x176e,0x1219,0x184e,0x184e,0x121d,0x1224,0xaa2,0xaaa,0xab2,0x13f6,0x13fd,0x1405,0xaba,0x140d, +0x143e,0x143e,0x13e6,0x13ee,0x1415,0x1435,0x1436,0x1446,0x141d,0x13de,0xac2,0x1425,0xaca,0x142d,0xad2,0xad6, +0xce3,0x144e,0xade,0xae6,0x1456,0x145c,0x1461,0xaee,0xafe,0x14a6,0x14ae,0x1491,0x1496,0xb06,0xb0e,0xaf6, +0x157c,0x157c,0x157c,0x157c,0x157c,0x157c,0x157c,0x157c,0x157c,0x157c,0x157c,0x157c,0x157c,0x157c,0x157c,0x157c, +0x157c,0x157c,0x157c,0x157c,0x157c,0x157c,0x157c,0x157c,0x157c,0x157c,0x157c,0x157c,0x1584,0x1584,0x1584,0x1584, +0x13bc,0x13bc,0x13fc,0x143c,0x147c,0x14bc,0x14fc,0x153c,0x1578,0x15b8,0x15e4,0x1624,0x1664,0x16a4,0x16e4,0x1724, +0x1764,0x17a0,0x17e0,0x1820,0x1860,0x1894,0x18d0,0x1910,0x1950,0x1990,0x19cc,0x1a0c,0x1a4c,0x1a8c,0x1acc,0x1b0c, +0xa80,0xac0,0xb00,0xb3b,0xb7b,0xa40,0xbbb,0xa40,0xe65,0xa40,0xa40,0xa40,0xa40,0xbfb,0x12fb,0x12fb, +0xea5,0xee5,0xa40,0xa40,0xa40,0xa40,0xc3b,0xc5b,0xa40,0xa40,0xc9b,0xcdb,0xd1b,0xe2d,0xded,0xd5d, +0x123b,0x123b,0x123b,0x123b,0x123b,0x123b,0x123b,0x123b,0x123b,0x123b,0x123b,0x123b,0x123b,0x123b,0x123b,0x123b, +0x123b,0x123b,0x123b,0x123b,0xf25,0x127b,0x10bb,0x10fb,0x12bb,0x1045,0x107b,0x107b,0x107b,0xf65,0xf85,0xfc5, +0xf85,0xf85,0xf85,0xf85,0xf85,0xf85,0xf85,0xf85,0xf85,0xf85,0xf85,0xf85,0xf85,0xf85,0xf85,0xf85, +0xf85,0xf85,0xf85,0xf85,0xf85,0xf85,0xf85,0xf85,0xf85,0xf85,0xf85,0xf85,0xf85,0xf85,0xf85,0x1005, 0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xcc2, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd1d, 0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xcc2, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd1d, 0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xcc2, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd1d, 0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xcc2, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd1d, 0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xcc2, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd1d, 0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xcc2, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd1d, 0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xcc2, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd1d, 0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xcc2, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd1d, 0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xcc2, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd1d, 0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xcc2, -0xd42,0xd52,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xcc2, -0x1169,0x1169,0x1169,0x1169,0x1169,0x1169,0x1169,0x1169,0x1169,0x1169,0x1169,0x1169,0x1169,0x1169,0x1169,0x1169, -0x1169,0x1169,0x1169,0x1169,0x1169,0x1169,0x1169,0x1169,0x1169,0x1169,0x1169,0x1169,0x1169,0x1169,0x1169,0x10e9, -0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9, -0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9,0x1129, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0xb74,0xb7b,0xb83,0xb8b,0x1727,0x1727,0x1727,0xb93,0xb9b,0xb9e,0x1757,0x174f,0xbd1,0xcfd,0xd01,0xd05, -0x4db,0x4db,0x4db,0x4db,0xd0d,0x18b6,0xd15,0xf39,0x15db,0xaa1,0xaa7,0xff7,0xba6,0x178f,0xc4b,0x4db, -0x15f0,0x15e3,0x15e8,0x172f,0xbae,0xbb6,0x1142,0x1148,0x1ab1,0xf56,0x1aa1,0x6cb,0x4db,0x4db,0x4db,0x4db, -0x1ad1,0x1ad1,0x1ad1,0x1ad1,0x1ad1,0x1ad1,0x1ad1,0x1ad1,0x1ad1,0xfa7,0xfaf,0xfb7,0x4db,0x4db,0x4db,0x4db, -0xbbe,0xbc1,0xda4,0x1b19,0xfef,0x6d3,0x4db,0x1088,0xc9e,0xd1d,0x4db,0x4db,0x1a66,0xeec,0xef4,0x1b59, -0xc25,0xc2c,0xc34,0x1922,0x1af9,0x4db,0x1ad9,0xfc7,0x192a,0xdac,0xdb4,0xdbc,0x1017,0x6db,0x4db,0x4db, -0x1932,0x1932,0x6e3,0x4db,0x1b86,0x10a0,0x1b7e,0x10a8,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0xdc4,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x1987,0x1989,0xe39,0xe40,0x1942,0x193a,0xdcc,0xf19,0x1a5e,0xed4,0xedc,0xfbf,0x1a76,0x1a7a,0xf11,0x1037, -0xf8a,0xf8f,0x6eb,0x4db,0x1090,0x1098,0x1ac1,0xf97,0xf6c,0xf72,0xf7a,0xf82,0x4db,0x4db,0x4db,0x4db, -0x1bc6,0x1bbe,0x1132,0x113a,0x1b41,0x1b39,0x105e,0x4db,0x4db,0x4db,0x4db,0x4db,0x1b29,0x101f,0x1027,0x102f, -0x1af1,0x1ae9,0xfd7,0x112a,0x1a82,0xf29,0x6f3,0x4db,0x106e,0x1076,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x17fc,0x17fc,0x17fc,0x17fc,0x17fc,0x17fc,0x17fc,0x17fc,0x17fc,0x17fc,0x17fc,0x17fc,0x17fc,0x17fc,0x17fc,0x17fc, -0x17fc,0x17fc,0x17fc,0x17fc,0x17fc,0x17fc,0x17fc,0x17fc,0x17fc,0x17fc,0x17fc,0x1801,0xca6,0xcad,0xcad,0xcad, -0x1809,0x1809,0x1809,0xcb5,0x1b76,0x1b76,0x1b76,0x1b76,0x1b76,0x1b76,0x6fb,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x194a,0x194a,0x194a,0x194a,0x194a,0x194a,0x194a,0x194a,0x194a,0x194a,0x194a,0x194a,0x194a,0x194a,0x194a,0x194a, -0x194a,0x194a,0x194c,0x194a,0x1954,0x194a,0x194a,0x194a,0x194a,0x194a,0x194a,0x1957,0x194a,0x194a,0x194a,0x194a, -0x194a,0x703,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x1991,0x1991,0x1991,0x1991,0x1991,0x1991,0x1991,0x1991,0x1991,0x1991,0x1991,0x1991,0x1991,0x1991,0x1991,0x1991, -0x1991,0xe48,0xfdf,0x70b,0x4db,0x4db,0x70f,0xf31,0x1b11,0x1b09,0xfff,0x1007,0x717,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x1a6e,0x1a6e,0xefc,0xf01,0xf09,0x4db,0x4db,0x1114, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x1aa9,0x1aa9,0x1aa9,0xf49,0xf4e,0x71f,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x15f8,0x15f8,0x15f8,0x15f8,0x15f8,0x15f8,0x15f8,0xaaf,0x1608,0xab7,0x1609,0x1600,0x1611,0x1617,0x161f,0xabf, -0x1747,0x1747,0x727,0x4db,0x4db,0x4db,0x4db,0x4db,0x1737,0x1737,0xbc9,0xcbd,0x4db,0x4db,0x4db,0x4db, -0x1650,0x1657,0xac7,0x165a,0xacf,0xad7,0xadf,0x1654,0xae7,0xaef,0xaf7,0x1659,0x1661,0x1650,0x1657,0x1653, -0x165a,0x1662,0x1651,0x1658,0x1654,0xafe,0x1627,0x162f,0x1636,0x163d,0x162a,0x1632,0x1639,0x1640,0xb06,0x1648, -0x1b9e,0x1b9e,0x1b9e,0x1b9e,0x1b9e,0x1b9e,0x1b9e,0x1b9e,0x1b9e,0x1b9e,0x1b9e,0x1b9e,0x1b9e,0x1b9e,0x1b9e,0x1b9e, -0x1b8e,0x1b91,0x1b8e,0x1b98,0x10e0,0x72f,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x110c,0x737,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x73b,0x103f,0x1b31,0x1045,0x1b31,0x104d,0x1052,0x1056,0x1056,0x10b0,0x10b8,0x10c0,0x10c8,0x10d0,0x10c8, -0x10d8,0x10c8,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743, -0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743, -0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743,0x743, -0x743,0x744,0xb0e,0x166a,0x166a,0x166a,0x74c,0x74c,0x74c,0x74c,0x173f,0x173f,0x173f,0x173f,0x173f,0x173f, -0x173f,0x754,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c, -0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c, -0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c, -0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c,0x74c, -0x74c,0x74c,0x18be,0xd25,0x18c6,0x18c6,0xd29,0xe59,0xe61,0xe69,0xdd4,0xdda,0x196f,0xde2,0x1967,0xdea, -0xdee,0xdf5,0xdfd,0xe04,0xe0c,0xe11,0xe11,0xe11,0xe11,0xe11,0x19c0,0x19c8,0x19c0,0x19ce,0x19d6,0x19a1, -0x19de,0x19e6,0x19c0,0x19ee,0x19f6,0x19fd,0x1a05,0x19a9,0x19c0,0x1a08,0x19b1,0x19b8,0x1a10,0x1a16,0x1a92,0x1a99, -0x1a8a,0x1a1e,0x1a26,0x1a2e,0x1a36,0x1b01,0x1a3e,0x1a46,0xe71,0xe79,0x1999,0x1999,0x1999,0xe81,0x1ab9,0x1ab9, -0xf5e,0xf64,0xe50,0xe51,0xe51,0xe51,0xe51,0xe51,0xe51,0xe51,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x1ae1,0x1ae1,0x1ae1,0x1ae1,0x1ae1,0x1ae1,0xfcf,0x4db,0x1bb6,0x1bae,0x10e8,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0xe99,0xea1,0xea9,0xeb1,0xeb9,0xec1,0xec8,0xecc,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x1b51,0x1b49,0x1066,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x1b21,0x100f,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x10f0,0x10f5,0x10fd, -0x1104,0x111c,0x1122,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x1b61,0x1b61,0x1b61,0x1b61,0x1b61,0x1b61,0x1b61,0x1b61,0x1b61,0x1b61,0x1b61,0x1b61,0x1b61, -0x1b61,0x1b66,0x1b61,0x1b61,0x1b61,0x107e,0x1080,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce, -0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce, -0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce, -0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce, -0x1bce,0x1bce,0x1150,0x1bd6,0x1bd6,0x1bd6,0x1bd6,0x1bd6,0x1bd6,0x1bd6,0x1bd6,0x1bd6,0x1bd6,0x1bd6,0x1bd6,0x1bd6, -0x1bd6,0x1bd6,0x1bd6,0x1bd6,0x1bd6,0x1bd6,0x1bd6,0x1bd6,0x1bd6,0x1bd6,0x1158,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db,0x4db, -0x4db,0x4db,0x4db,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672, -0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672, -0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672, -0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x119d,0x1160,0x1977,0x1977,0x1977,0x1977,0x1977, -0x1977,0x1977,0x1977,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6, -0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6, -0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6, -0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1168,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160, -0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160, -0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160, -0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160, -0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a, -0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x11a5,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160, -0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160, -0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160, -0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x116c,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160, -0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160, -0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160, -0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160, -0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x1160,0x116c,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977, -0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977, -0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977, -0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977, -0x1977,0x1977,0x11ad,0x1a4e,0x1a4e,0x1a4e,0x1a4e,0x1a4e,0x1a4e,0x11b5,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6, -0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6, -0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6, -0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6, -0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515, -0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515, -0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515, -0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515, -0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1505,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d, -0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d, -0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d, -0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d, -0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x150d,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515, -0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515, -0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515, -0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515, -0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d, -0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d, -0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d, -0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d, -0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x151d,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672, -0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672, -0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672, -0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672, -0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1672,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977, -0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977, -0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977, -0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977, -0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1977,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6, -0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6, -0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6, -0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6, -0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1ba6,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce, -0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce, -0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce, -0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce, -0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x1bce,0x4ba,0x4ba,0x4ba,0x273,0x273,0x273,0x273, -0x273,0x273,0x273,0x273,0x273,0x276,0x27f,0x279,0x279,0x27c,0x273,0x273,0x273,0x273,0x273,0x273, -0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x7a1,0x79b,0x780,0x777, -0x76e,0x76b,0x762,0x77d,0x768,0x774,0x777,0x792,0x789,0x77a,0x79e,0x771,0x75f,0x75f,0x75f,0x75f, -0x75f,0x75f,0x75f,0x75f,0x75f,0x75f,0x786,0x783,0x78c,0x78c,0x78c,0x79b,0x762,0x7ad,0x7ad,0x7ad, -0x7ad,0x7ad,0x7ad,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7, -0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x768,0x76e,0x774,0x798,0x75c,0x795,0x7aa,0x7aa,0x7aa, -0x7aa,0x7aa,0x7aa,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4, -0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x768,0x78f,0x765,0x78c,0x273,0,0,0,0, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd1d, +0xd9d,0xdad,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd1d, +0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb, +0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x113b, +0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x11fb, +0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x11fb,0x117b, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0xbe9,0xbf0,0xbf8,0xc00,0x17b6,0x17b6,0x17b6,0xc08,0xc10,0xc13,0x17e6,0x17de,0xc46,0xd73,0xd77,0xd7b, +0x4ef,0x4ef,0x4ef,0x4ef,0xd83,0x1945,0xd8b,0xfaf,0x166a,0xb16,0xb1c,0x106d,0xc1b,0x181e,0xcc0,0x4ef, +0x167f,0x1672,0x1677,0x17be,0xc23,0xc2b,0x11c6,0x11cc,0x1b46,0xfcc,0x1b36,0x6e7,0x4ef,0x4ef,0x4ef,0x4ef, +0x1b66,0x1b66,0x1b66,0x1b66,0x1b66,0x1b66,0x1b66,0x1b66,0x1b66,0x101d,0x1025,0x102d,0x4ef,0x4ef,0x4ef,0x4ef, +0xc33,0xc36,0xe1a,0x1bae,0x1065,0x6ef,0x4ef,0x10fe,0xd14,0xd93,0x4ef,0x4ef,0x1afb,0xf62,0xf6a,0x1bee, +0xc9a,0xca1,0xca9,0x19b1,0x1b8e,0x4ef,0x1b6e,0x103d,0x19b9,0xe22,0xe2a,0xe32,0x108d,0x6f7,0x4ef,0x4ef, +0x19c1,0x19c1,0x6ff,0x4ef,0x1c1b,0x1116,0x1c13,0x111e,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0xe3a,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x1a16,0x1a18,0xeaf,0xeb6,0x19d1,0x19c9,0xe42,0xf8f,0x1af3,0xf4a,0xf52,0x1035,0x1b0b,0x1b0f,0xf87,0x10ad, +0x1000,0x1005,0x707,0x4ef,0x1106,0x110e,0x1b56,0x100d,0xfe2,0xfe8,0xff0,0xff8,0x4ef,0x4ef,0x4ef,0x4ef, +0x1c5b,0x1c53,0x11b6,0x11be,0x1bd6,0x1bce,0x10d4,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x1bbe,0x1095,0x109d,0x10a5, +0x1b86,0x1b7e,0x104d,0x11ae,0x1b17,0xf9f,0x70f,0x4ef,0x10e4,0x10ec,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x1be6,0x1bde,0x10dc,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x79a,0x79e,0x717,0x7a6,0x71e, +0x726,0x1bb6,0x1085,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x1174,0x1179,0x1181,0x1188,0x11a0, +0x11a6,0x4ef,0x4ef,0x72e,0x732,0x73a,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x188b,0x188b,0x188b,0x188b,0x188b, +0x188b,0x188b,0x188b,0x188b,0x188b,0x188b,0x188b,0x188b,0x188b,0x188b,0x188b,0x188b,0x188b,0x188b,0x188b,0x188b, +0x188b,0x188b,0x188b,0x188b,0x188b,0x188b,0x1890,0xd1c,0xd23,0xd23,0xd23,0x1898,0x1898,0x1898,0xd2b,0x1c0b, +0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x1c0b,0x742,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x19d9,0x19d9,0x19d9,0x19d9,0x19d9, +0x19d9,0x19d9,0x19d9,0x19d9,0x19d9,0x19d9,0x19d9,0x19d9,0x19d9,0x19d9,0x19d9,0x19d9,0x19d9,0x19db,0x19d9,0x19e3, +0x19d9,0x19d9,0x19d9,0x19d9,0x19d9,0x19d9,0x19e6,0x19d9,0x19d9,0x19d9,0x19d9,0x19d9,0x74a,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x1a20,0x1a20,0x1a20,0x1a20,0x1a20, +0x1a20,0x1a20,0x1a20,0x1a20,0x1a20,0x1a20,0x1a20,0x1a20,0x1a20,0x1a20,0x1a20,0x1a20,0xebe,0x1055,0x752,0x4ef, +0x4ef,0x756,0xfa7,0x1ba6,0x1b9e,0x1075,0x107d,0x75e,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x1b03,0x1b03,0xf72,0xf77,0xf7f,0x4ef,0x4ef,0x1198,0xec6,0xec7,0xec7,0xec7,0xec7, +0xec7,0xec7,0xec7,0x766,0x4ef,0x4ef,0x762,0x7b7,0x7b7,0x7b7,0x7b7,0x7b7,0x7b7,0x7b7,0x7b7,0x7b7, +0x7b7,0x7b7,0x76e,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x1b3e,0x1b3e,0x1b3e,0xfbf,0xfc4, +0x776,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x1687,0x1687,0x1687,0x1687,0x1687, +0x1687,0x1687,0xb24,0x1697,0xb2c,0x1698,0x168f,0x16a0,0x16a6,0x16ae,0xb34,0x17d6,0x17d6,0x77e,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x17c6,0x17c6,0xc3e,0xd33,0x4ef,0x4ef,0x4ef,0x4ef,0x16df,0x16e6,0xb3c,0x16e9,0xb44, +0xb4c,0xb54,0x16e3,0xb5c,0xb64,0xb6c,0x16e8,0x16f0,0x16df,0x16e6,0x16e2,0x16e9,0x16f1,0x16e0,0x16e7,0x16e3, +0xb73,0x16b6,0x16be,0x16c5,0x16cc,0x16b9,0x16c1,0x16c8,0x16cf,0xb7b,0x16d7,0x1c33,0x1c33,0x1c33,0x1c33,0x1c33, +0x1c33,0x1c33,0x1c33,0x1c33,0x1c33,0x1c33,0x1c33,0x1c33,0x1c33,0x1c33,0x1c33,0x1c23,0x1c26,0x1c23,0x1c2d,0x1164, +0x786,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x1190,0x78e,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x792,0x10b5,0x1bc6,0x10bb, +0x1bc6,0x10c3,0x10c8,0x10cc,0x10cc,0x1126,0x112e,0x1136,0x113e,0x1146,0x114c,0x1154,0x115c,0x7ae,0x7ae,0x7ae, +0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae, +0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae, +0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7ae,0x7af,0xb83,0x16f9,0x16f9, +0x16f9,0x7bf,0x7bf,0x7bf,0x7bf,0x17ce,0x17ce,0x17ce,0x17ce,0x17ce,0x17ce,0x17ce,0x7c7,0x7bf,0x7bf,0x7bf, +0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf, +0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf, +0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf, +0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x194d,0xd9b,0x1955, +0x1955,0xd9f,0xecf,0xed7,0xedf,0xe4a,0xe50,0x19fe,0xe58,0x19f6,0xe60,0xe64,0xe6b,0xe73,0xe7a,0xe82, +0xe87,0xe87,0xe87,0xe87,0xe87,0x1a4f,0x1a57,0x1a5f,0x1a63,0x1a6b,0x1a30,0x1a73,0x1a7b,0x1a5f,0x1a83,0x1a8b, +0x1a92,0x1a9a,0x1a38,0x1a5f,0x1a9d,0x1a40,0x1a47,0x1aa5,0x1aab,0x1b27,0x1b2e,0x1b1f,0x1ab3,0x1abb,0x1ac3,0x1acb, +0x1b96,0x1ad3,0x1adb,0xee7,0xeef,0x1a28,0x1a28,0x1a28,0xef7,0x1b4e,0x1b4e,0xfd4,0xfda,0x1b76,0x1b76,0x1b76, +0x1b76,0x1b76,0x1b76,0x1045,0x4ef,0x1c4b,0x1c43,0x116c,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0xf0f,0xf17,0xf1f, +0xf27,0xf2f,0xf37,0xf3e,0xf42,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x1bf6,0x1bf6,0x1bf6,0x1bf6,0x1bf6,0x1bf6,0x1bf6,0x1bf6,0x1bf6,0x1bf6,0x1bf6, +0x1bf6,0x1bf6,0x1bf6,0x1bfb,0x1bf6,0x1bf6,0x1bf6,0x10f4,0x10f6,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63, +0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63, +0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63, +0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63, +0x1c63,0x1c63,0x1c63,0x1c63,0x11d4,0x1c6b,0x1c6b,0x1c6b,0x1c6b,0x1c6b,0x1c6b,0x1c6b,0x1c6b,0x1c6b,0x1c6b,0x1c6b, +0x1c6b,0x1c6b,0x1c6b,0x1c6b,0x1c6b,0x1c6b,0x1c6b,0x1c6b,0x1c6b,0x1c6b,0x1c6b,0x1c6b,0x11dc,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x4ef, +0x4ef,0x4ef,0x4ef,0x4ef,0x4ef,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701, +0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701, +0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701, +0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x122c,0x11e4,0x1a06,0x1a06,0x1a06, +0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9, +0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9, +0x11f9,0x11f9,0x11f9,0x11f9,0x11ec,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4, +0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4, +0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4, +0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4, +0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x1709,0x1709,0x1709,0x1709,0x1709,0x1709,0x1709,0x1709,0x1709,0x1709,0x1709, +0x1709,0x1709,0x1709,0x1709,0x1709,0x1234,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4, +0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4, +0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4, +0x11e4,0x11e4,0x11e4,0x11e4,0x11ed,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4, +0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4, +0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4, +0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4,0x11e4, +0x11e4,0x11e4,0x11e4,0x11e4,0x11ed,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b, +0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b, +0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b, +0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x11f5,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9, +0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9, +0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9, +0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9, +0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x11f9,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06, +0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06, +0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06, +0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06, +0x1a06,0x1a06,0x1a06,0x1a06,0x123c,0x1ae3,0x1ae3,0x1ae3,0x1ae3,0x1ae3,0x1ae3,0x1244,0x1c3b,0x1c3b,0x1c3b,0x1c3b, +0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b, +0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b, +0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b, +0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4, +0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4, +0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4, +0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4, +0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x1594,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac, +0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac, +0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac, +0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac, +0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x159c,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4, +0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4, +0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4, +0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4, +0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15a4,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac, +0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac, +0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac, +0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac, +0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x15ac,0x1701,0x1701,0x1701,0x1701,0x1701, +0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701, +0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701, +0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701, +0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1701,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06, +0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06, +0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06, +0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06, +0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1a06,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b, +0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b, +0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b, +0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b, +0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c3b,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63, +0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63, +0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63, +0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63, +0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x1c63,0x4ce,0x2c4,0x2c4,0x2c4,0x2c4, +0x2c4,0x2c4,0x2c4,0x2c4,0x2c4,0x2c7,0x2d0,0x2ca,0x2ca,0x2cd,0x2c4,0x2c4,0x2c4,0x2c4,0x2c4,0x2c4, +0x2c4,0x2c4,0x2c4,0x2c4,0x2c4,0x2c4,0x2c4,0x2c4,0x2c4,0x2c4,0x2c4,0x2c4,0x7f5,0x7ef,0x7d4,0x7cb, +0x7c2,0x7bf,0x7b6,0x7d1,0x7bc,0x7c8,0x7cb,0x7e6,0x7dd,0x7ce,0x7f2,0x7c5,0x7b3,0x7b3,0x7b3,0x7b3, +0x7b3,0x7b3,0x7b3,0x7b3,0x7b3,0x7b3,0x7da,0x7d7,0x7e0,0x7e0,0x7e0,0x7ef,0x7b6,0x801,0x801,0x801, +0x801,0x801,0x801,0x7fb,0x7fb,0x7fb,0x7fb,0x7fb,0x7fb,0x7fb,0x7fb,0x7fb,0x7fb,0x7fb,0x7fb,0x7fb, +0x7fb,0x7fb,0x7fb,0x7fb,0x7fb,0x7fb,0x7fb,0x7bc,0x7c2,0x7c8,0x7ec,0x7b0,0x7e9,0x7fe,0x7fe,0x7fe, +0x7fe,0x7fe,0x7fe,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8, +0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7f8,0x7bc,0x7e3,0x7b9,0x7e0,0x2c4,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0x282,0x282,0x282,0x282, -0x282,0x291,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282, -0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x285,0x5fa,0x7b6,0x7b9, -0x600,0x7b9,0x7b3,0x5f7,0x5ee,0x28b,0x60c,0x28e,0x7bc,0x5e5,0x603,0x7b0,0x5fd,0x609,0x5eb,0x5eb, -0x5f1,0x288,0x5f7,0x5f4,0x5ee,0x5eb,0x60c,0x28e,0x5e8,0x5e8,0x5e8,0x5fa,0x297,0x297,0x297,0x297, -0x297,0x297,0x615,0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x615,0x297,0x297,0x297, -0x297,0x297,0x297,0x606,0x615,0x297,0x297,0x297,0x297,0x297,0x615,0x60f,0x612,0x612,0x294,0x294, -0x294,0x294,0x60f,0x294,0x612,0x612,0x612,0x294,0x612,0x612,0x294,0x294,0x60f,0x294,0x612,0x612, -0x294,0x294,0x294,0x606,0x60f,0x612,0x612,0x294,0x612,0x294,0x60f,0x294,0x2a3,0x61b,0x2a3,0x29a, -0x2a3,0x29a,0x2a3,0x29a,0x2a3,0x29a,0x2a3,0x29a,0x2a3,0x29a,0x2a3,0x29a,0x2a0,0x618,0x2a3,0x61b, -0x2a3,0x29a,0x2a3,0x29a,0x2a3,0x29a,0x2a3,0x61b,0x2a3,0x29a,0x2a3,0x29a,0x2a3,0x29a,0x2a3,0x29a, -0x2a3,0x29a,0x621,0x618,0x2a3,0x29a,0x2a3,0x61b,0x2a3,0x29a,0x2a3,0x29a,0x2a3,0x618,0x624,0x61e, -0x2a3,0x29a,0x2a3,0x29a,0x618,0x2a3,0x29a,0x2a3,0x29a,0x2a3,0x29a,0x624,0x61e,0x621,0x618,0x2a3, -0x61b,0x2a3,0x29a,0x2a3,0x61b,0x627,0x621,0x618,0x2a3,0x61b,0x2a3,0x29a,0x2a3,0x29a,0x621,0x618, -0x2a3,0x29a,0x2a3,0x29a,0x2a3,0x29a,0x2a3,0x29a,0x2a3,0x29a,0x2a3,0x29a,0x2a3,0x29a,0x2a3,0x29a, -0x2a3,0x29a,0x621,0x618,0x2a3,0x29a,0x2a3,0x61b,0x2a3,0x29a,0x2a3,0x29a,0x2a3,0x29a,0x2a3,0x29a, -0x2a3,0x29a,0x2a3,0x29a,0x2a3,0x2a3,0x29a,0x2a3,0x29a,0x2a3,0x29a,0x29d,0x2a6,0x2b2,0x2b2,0x2a6, -0x2b2,0x2a6,0x2b2,0x2b2,0x2a6,0x2b2,0x2b2,0x2b2,0x2a6,0x2a6,0x2b2,0x2b2,0x2b2,0x2b2,0x2a6,0x2b2, -0x2b2,0x2a6,0x2b2,0x2b2,0x2b2,0x2a6,0x2a6,0x2a6,0x2b2,0x2b2,0x2a6,0x2b2,0x2b5,0x2a9,0x2b2,0x2a6, -0x2b2,0x2a6,0x2b2,0x2b2,0x2a6,0x2b2,0x2a6,0x2a6,0x2b2,0x2a6,0x2b2,0x2b5,0x2a9,0x2b2,0x2b2,0x2b2, -0x2a6,0x2b2,0x2a6,0x2b2,0x2b2,0x2a6,0x2a6,0x2af,0x2b2,0x2a6,0x2a6,0x2a6,0x2af,0x2af,0x2af,0x2af, -0x2b8,0x2b8,0x2ac,0x2b8,0x2b8,0x2ac,0x2b8,0x2b8,0x2ac,0x2b5,0x62a,0x2b5,0x62a,0x2b5,0x62a,0x2b5, -0x62a,0x2b5,0x62a,0x2b5,0x62a,0x2b5,0x62a,0x2b5,0x62a,0x2a6,0x2b5,0x2a9,0x2b5,0x2a9,0x2b5,0x2a9, -0x2b2,0x2a6,0x2b5,0x2a9,0x2b5,0x2a9,0x2b5,0x2a9,0x2b5,0x2a9,0x2b5,0x2a9,0x2a9,0x2b8,0x2b8,0x2ac, -0x2b5,0x2a9,0x990,0x990,0x993,0x98d,0x2b5,0x2a9,0x2b5,0x2a9,0x2b5,0x2a9,0x2b5,0x2a9,0x2b5,0x2a9, -0x2b5,0x2a9,0x2b5,0x2a9,0x2b5,0x2a9,0x2b5,0x2a9,0x2b5,0x2a9,0x2b5,0x2a9,0x2b5,0x2a9,0x2b5,0x2a9, -0x993,0x98d,0x993,0x98d,0x990,0x98a,0x993,0x98d,0xb4f,0xc51,0x990,0x98a,0x990,0x98a,0x993,0x98d, -0x993,0x98d,0x993,0x98d,0x993,0x98d,0x993,0x98d,0x993,0x98d,0x993,0x98d,0xc51,0xc51,0xc51,0xd4a, -0xd4a,0xd4a,0xd4d,0xd4d,0xd4a,0xd4d,0xd4d,0xd4a,0xd4a,0xd4d,0xe8e,0xe91,0xe91,0xe91,0xe91,0xe8e, -0xe91,0xe8e,0xe91,0xe8e,0xe91,0xe8e,0xe91,0xe8e,0x2bb,0x62d,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb, -0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x62d,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb, -0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb, -0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2be,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb, -0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x996,0x996,0x996, -0x996,0x996,0xc54,0xc54,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2cd,0x2cd,0x2cd, -0x2cd,0x2cd,0x2cd,0x2cd,0x2ca,0x2ca,0x2c1,0x2c1,0x633,0x2c1,0x2cd,0x636,0x2d0,0x636,0x636,0x636, -0x2d0,0x636,0x2cd,0x2cd,0x639,0x2d3,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c7,0x630,0x630,0x630,0x630, -0x2c4,0x630,0x2c1,0xac8,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x99f,0x99f, -0x99c,0x999,0x99c,0xc57,0xc57,0xc57,0xc57,0xc57,0xc57,0xc57,0xc57,0xc57,0xc57,0xc57,0xc57,0xc57, -0xc57,0xc57,0xc57,0xc57,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c, -0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c, -0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c, -0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c,0x63c, -0x63c,0x63c,0x63c,0x63c,0x63f,0x63f,0x8f4,0x63f,0x63f,0x8f7,0xacb,0xacb,0xacb,0xacb,0xacb,0xacb, -0xacb,0xacb,0xacb,0xc09,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xe55,0xe55,0xe55,0xe55, -0xe58,0xd1d,0xd1d,0xd1d,0x642,0x642,0xace,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e, -0xc4e,0xc4e,0xc4e,0xc4e,0xf3c,0xf39,0xf3c,0xf39,0x2e2,0x2eb,0xf3c,0xf39,9,9,0x2f1,0xe94, -0xe94,0xe94,0x2d9,0x1491,9,9,9,9,0x2ee,0x2dc,0x300,0x2df,0x300,0x300,0x300,9, -0x300,9,0x300,0x300,0x2f7,0x648,0x648,0x648,0x648,0x648,0x648,0x648,0x648,0x648,0x648,0x648, -0x648,0x648,0x648,0x648,0x648,0x648,9,0x648,0x648,0x648,0x648,0x648,0x648,0x648,0x300,0x300, -0x2f7,0x2f7,0x2f7,0x2f7,0x2f7,0x645,0x645,0x645,0x645,0x645,0x645,0x645,0x645,0x645,0x645,0x645, -0x645,0x645,0x645,0x645,0x645,0x645,0x2f4,0x645,0x645,0x645,0x645,0x645,0x645,0x645,0x2f7,0x2f7, -0x2f7,0x2f7,0x2f7,0xf3c,0x303,0x303,0x306,0x300,0x300,0x303,0x2fa,0x9a2,0xb58,0xb55,0x2fd,0x9a2, -0x2fd,0x9a2,0x2fd,0x9a2,0x2fd,0x9a2,0x2e8,0x2e5,0x2e8,0x2e5,0x2e8,0x2e5,0x2e8,0x2e5,0x2e8,0x2e5, -0x2e8,0x2e5,0x2e8,0x2e5,0x303,0x303,0x2fa,0x2f4,0xb07,0xb04,0xb52,0xc5d,0xc5a,0xc60,0xc5d,0xc5a, -0xd50,0xd53,0xd53,0xd53,0x9b1,0x654,0x312,0x315,0x312,0x312,0x312,0x315,0x312,0x312,0x312,0x312, -0x315,0x9b1,0x315,0x312,0x651,0x651,0x651,0x651,0x651,0x651,0x651,0x651,0x651,0x654,0x651,0x651, -0x651,0x651,0x651,0x651,0x651,0x651,0x651,0x651,0x651,0x651,0x651,0x651,0x651,0x651,0x651,0x651, -0x651,0x651,0x651,0x651,0x64b,0x64b,0x64b,0x64b,0x64b,0x64b,0x64b,0x64b,0x64b,0x64e,0x64b,0x64b, -0x64b,0x64b,0x64b,0x64b,0x64b,0x64b,0x64b,0x64b,0x64b,0x64b,0x64b,0x64b,0x64b,0x64b,0x64b,0x64b, -0x9ab,0x64e,0x30c,0x30f,0x30c,0x30c,0x30c,0x30f,0x30c,0x30c,0x30c,0x30c,0x30f,0x9ab,0x30f,0x30c, -0x312,0x30c,0x312,0x30c,0x312,0x30c,0x312,0x30c,0x312,0x30c,0x312,0x30c,0x312,0x30c,0x312,0x30c, -0x312,0x30c,0x312,0x30c,0x312,0x30c,0x315,0x30f,0x312,0x30c,0x312,0x30c,0x312,0x30c,0x312,0x30c, -0x312,0x30c,0x309,0x900,0x903,0x8e5,0x8e5,0x10e6,0x9a5,0x9a5,0xb5e,0xb5b,0x9ae,0x9a8,0x9ae,0x9a8, -0x312,0x30c,0x312,0x30c,0x312,0x30c,0x312,0x30c,0x312,0x30c,0x312,0x30c,0x312,0x30c,0x312,0x30c, -0x312,0x30c,0x312,0x30c,0x312,0x30c,0x312,0x30c,0x312,0x30c,0x312,0x30c,0x312,0x30c,0x312,0x30c, -0x312,0x30c,0x312,0x30c,0x312,0x30c,0x312,0x30c,0x312,0x30c,0x312,0x30c,0x312,0x30c,0x312,0x30c, -0x312,0x315,0x30f,0x312,0x30c,0xb5e,0xb5b,0x312,0x30c,0xb5e,0xb5b,0x312,0x30c,0xb5e,0xb5b,0xe97, -0x315,0x30f,0x315,0x30f,0x312,0x30c,0x315,0x30f,0x312,0x30c,0x315,0x30f,0x315,0x30f,0x315,0x30f, -0x312,0x30c,0x315,0x30f,0x315,0x30f,0x315,0x30f,0x312,0x30c,0x315,0x30f,0x9b1,0x9ab,0x315,0x30f, -0x315,0x30f,0x315,0x30f,0x315,0x30f,0xd59,0xd56,0x315,0x30f,0xe9a,0xe97,0xe9a,0xe97,0xe9a,0xe97, -0xbca,0xbc7,0xbca,0xbc7,0xbca,0xbc7,0xbca,0xbc7,0xbca,0xbc7,0xbca,0xbc7,0xbca,0xbc7,0xbca,0xbc7, -0xec7,0xec4,0xec7,0xec4,0xfba,0xfb7,0xfba,0xfb7,0xfba,0xfb7,0xfba,0xfb7,0xfba,0xfb7,0xfba,0xfb7, -0xfba,0xfb7,0xfba,0xfb7,0x111f,0x111c,0x12f9,0x12f6,0x14ca,0x14c7,0x14ca,0x14c7,0x14ca,0x14c7,0x14ca,0x14c7, -0xc,0x324,0x324,0x324,0x324,0x324,0x324,0x324,0x324,0x324,0x324,0x324,0x324,0x324,0x324,0x324, -0x324,0x324,0x324,0x324,0x324,0x324,0x324,0x324,0x324,0x324,0x324,0xc,0xc,0x327,0x318,0x318, -0x318,0x31b,0x318,0x318,0xc,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e, -0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e, -0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x321,0xc,0x86a,0x9b4,0xc,0xc,0x1494,0x1494,0x13ad, -0xf,0x924,0x924,0x924,0x924,0x924,0x924,0x924,0x924,0x924,0x924,0x924,0x924,0x924,0x924,0x924, -0x924,0x924,0xd5c,0x924,0x924,0x924,0x924,0x924,0x924,0x924,0x924,0x924,0x924,0x924,0x924,0x924, -0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0x32a,0xe9d,0x32a,0x32a,0x32a,0x336,0x32a, -0x32d,0x32a,0x32a,0x339,0x927,0xd5f,0xd62,0xd5f,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, -0x33c,0x33c,0x33c,0x33c,0x33c,0x33c,0x33c,0x33c,0x33c,0x33c,0x33c,0x33c,0x33c,0x33c,0x33c,0x33c, -0x33c,0x33c,0x33c,0x33c,0x33c,0x33c,0x33c,0x33c,0x33c,0x33c,0x33c,0xf,0xf,0xf,0xf,0xf, -0x33c,0x33c,0x33c,0x333,0x330,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, -0xc63,0xc63,0xc63,0xc63,0x13b0,0x1497,0xf45,0xf45,0xf45,0xf42,0xf42,0xd6b,0x870,0xc72,0xc6f,0xc6f, -0xc66,0xc66,0xc66,0xc66,0xc66,0xc66,0xf3f,0xf3f,0xf3f,0xf3f,0xf3f,0x86d,0x148b,0x12,0xd68,0x873, -0x12c0,0x357,0x35a,0x35a,0x35a,0x35a,0x35a,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357, -0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0xf48,0xf48,0xf48,0xf48,0xf48, -0x876,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x8eb,0x8eb,0x8eb,0x8eb,0x8eb, -0x8eb,0x8eb,0x8eb,0xafe,0xafe,0xafe,0xc66,0xc6c,0xc69,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0x12bd, -0x906,0x906,0x906,0x906,0x906,0x906,0x906,0x906,0x906,0x906,0x351,0x34e,0x34b,0x348,0xb61,0xb61, -0x8e8,0x357,0x357,0x363,0x357,0x35d,0x35d,0x35d,0x35d,0x357,0x357,0x357,0x357,0x357,0x357,0x357, -0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357, -0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357, -0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357, -0x357,0x357,0x357,0x357,0x9ba,0x9ba,0x357,0x357,0x357,0x357,0x357,0x9ba,0x35a,0x357,0x35a,0x357, -0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x357,0x9ba,0x357,0x357,0x357,0x35a, -0x366,0x357,0x342,0x342,0x342,0x342,0x342,0x342,0x342,0x33f,0x348,0x345,0x345,0x342,0x342,0x342, -0x342,0x360,0x360,0x342,0x342,0x348,0x345,0x345,0x345,0x342,0xc75,0xc75,0x354,0x354,0x354,0x354, -0x354,0x354,0x354,0x354,0x354,0x354,0x9ba,0x9ba,0x9ba,0x9b7,0x9b7,0xc75,0x9d2,0x9d2,0x9d2,0x9cc, -0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9c9,0x9cc,0x9c9,0x15,0x9bd,0x9cf,0x9c0,0x9cf,0x9cf, -0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf, -0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0xc78,0xc78,0xc78,0x9c6,0x9c6,0x9c6,0x9c6, -0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c3,0x9c3,0x9c3,0x9c3, -0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x15,0x15,0xc78,0xc78,0xc78,0xdcb,0xdcb,0xdcb,0xdcb, -0xdcb,0xdcb,0xdcb,0xdcb,0xdcb,0xdcb,0xdcb,0xdcb,0xdcb,0xdcb,0xdcb,0xdcb,0xdcb,0xdcb,0xdcb,0xdcb, -0xdcb,0xdcb,0xdcb,0xdcb,0xdcb,0xdcb,0xdcb,0xdcb,0xdcb,0xdcb,0xfcc,0xfcc,0xfcc,0xfcc,0xfcc,0xfcc, -0xfcc,0xfcc,0xfcc,0xfcc,0xfcc,0xfcc,0xfcc,0xfcc,0xfcc,0xfcc,0xfcc,0xfcc,0x9d8,0x9d8,0x9d8,0x9d8, -0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8, -0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8,0x9d8, -0x9d8,0x9d8,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0xb64,0x18,0x18, -0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0xedf,0xedf,0xedf,0xedf, -0xedf,0xedf,0xedf,0xedf,0xedf,0xedf,0xee2,0xee2,0xee2,0xee2,0xee2,0xee2,0xee2,0xee2,0xee2,0xee2, -0xee2,0xee2,0xee2,0xee2,0xee2,0xee2,0xee2,0xee2,0xee2,0xee2,0xee2,0xee2,0xee2,0xee2,0xee2,0xee2, -0xee2,0xee2,0xee2,0xee2,0xee2,0xee2,0xee2,0xed6,0xed6,0xed6,0xed6,0xed6,0xed6,0xed6,0xed6,0xed6, -0xee5,0xee5,0xed9,0xed9,0xedc,0xeeb,0xee8,0x111,0x111,0x111,0x111,0x111,0x17d3,0x17d3,0x17d3,0x17d3, -0x17d3,0x17d3,0x17d3,0x17d3,0x17d3,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0xada,0xada,0xadd,0xadd, -0xada,0xada,0xada,0xada,0xada,0xada,0xada,0xada,0x75,0x75,0x75,0x75,0x1551,0x1551,0x1551,0x1551, -0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x154e,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0x1fe,0x1fe,0x1fe,0x1fe, -0x1fe,0x1fe,0x1fe,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0x160b,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0x20a,0x20a,0x20a,0x20a, -0x20a,0x20a,0x20a,0x20a,0x20a,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0x120f,0x120f,0x120f,0x120f, -0x120f,0x120f,0x120f,0x120f,0x120f,0x17d,0x17d,0x17d,0x17d,0x17d,0x17d,0x17d,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0x1e0,0x1e0,0x1e0,0x1e0, -0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0x1467,0x1467,0x1467,0x1467, -0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0x173d,0x173d,0x173d,0x173d, -0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0x1233,0x1233,0x1233,0x1233, -0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x186,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0x15f6,0x15f6,0x15f6,0x15f6, -0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x1f8,0x1f8,0x1f8,0x1f8,0x15fc,0x15fc,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0x1548,0x1548,0x1548,0x1548, -0x1548,0x1548,0x1548,0x1548,0x1548,0x1548,0x1548,0x1548,0x1548,0x1548,0x1548,0x1548,0x1632,0x1632,0x1632,0x1632, -0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0x16aa,0x16aa,0x16aa,0x16aa, -0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0xdc2,0xdc2,0xdbf,0xdbf, -0xdbf,0xdc2,0xde,0xde,0xde,0xde,0xde,0xde,0xde,0xde,0xde,0xde,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0x234,0x1755,0x1755,0x1755, -0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0x17d6,0x17d6,0x240,0x17d6, -0x17d6,0x240,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x240,0x240,0x240,0x240,0x240,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x2d3,0x2d3,0x2d3,0x2d3, +0x2d3,0x2e2,0x2d3,0x2d3,0x2d3,0x2d3,0x2d3,0x2d3,0x2d3,0x2d3,0x2d3,0x2d3,0x2d3,0x2d3,0x2d3,0x2d3, +0x2d3,0x2d3,0x2d3,0x2d3,0x2d3,0x2d3,0x2d3,0x2d3,0x2d3,0x2d3,0x2d3,0x2d3,0x2d6,0x64b,0x80a,0x80d, +0x651,0x80d,0x807,0x648,0x63f,0x2dc,0x65d,0x2df,0x810,0x636,0x654,0x804,0x64e,0x65a,0x63c,0x63c, +0x642,0x2d9,0x648,0x645,0x63f,0x63c,0x65d,0x2df,0x639,0x639,0x639,0x64b,0x2e8,0x2e8,0x2e8,0x2e8, +0x2e8,0x2e8,0x666,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x666,0x2e8,0x2e8,0x2e8, +0x2e8,0x2e8,0x2e8,0x657,0x666,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x666,0x660,0x663,0x663,0x2e5,0x2e5, +0x2e5,0x2e5,0x660,0x2e5,0x663,0x663,0x663,0x2e5,0x663,0x663,0x2e5,0x2e5,0x660,0x2e5,0x663,0x663, +0x2e5,0x2e5,0x2e5,0x657,0x660,0x663,0x663,0x2e5,0x663,0x2e5,0x660,0x2e5,0x2f4,0x66c,0x2f4,0x2eb, +0x2f4,0x2eb,0x2f4,0x2eb,0x2f4,0x2eb,0x2f4,0x2eb,0x2f4,0x2eb,0x2f4,0x2eb,0x2f1,0x669,0x2f4,0x66c, +0x2f4,0x2eb,0x2f4,0x2eb,0x2f4,0x2eb,0x2f4,0x66c,0x2f4,0x2eb,0x2f4,0x2eb,0x2f4,0x2eb,0x2f4,0x2eb, +0x2f4,0x2eb,0x672,0x669,0x2f4,0x2eb,0x2f4,0x66c,0x2f4,0x2eb,0x2f4,0x2eb,0x2f4,0x669,0x675,0x66f, +0x2f4,0x2eb,0x2f4,0x2eb,0x669,0x2f4,0x2eb,0x2f4,0x2eb,0x2f4,0x2eb,0x675,0x66f,0x672,0x669,0x2f4, +0x66c,0x2f4,0x2eb,0x2f4,0x66c,0x678,0x672,0x669,0x2f4,0x66c,0x2f4,0x2eb,0x2f4,0x2eb,0x672,0x669, +0x2f4,0x2eb,0x2f4,0x2eb,0x2f4,0x2eb,0x2f4,0x2eb,0x2f4,0x2eb,0x2f4,0x2eb,0x2f4,0x2eb,0x2f4,0x2eb, +0x2f4,0x2eb,0x672,0x669,0x2f4,0x2eb,0x2f4,0x66c,0x2f4,0x2eb,0x2f4,0x2eb,0x2f4,0x2eb,0x2f4,0x2eb, +0x2f4,0x2eb,0x2f4,0x2eb,0x2f4,0x2f4,0x2eb,0x2f4,0x2eb,0x2f4,0x2eb,0x2ee,0x2f7,0x303,0x303,0x2f7, +0x303,0x2f7,0x303,0x303,0x2f7,0x303,0x303,0x303,0x2f7,0x2f7,0x303,0x303,0x303,0x303,0x2f7,0x303, +0x303,0x2f7,0x303,0x303,0x303,0x2f7,0x2f7,0x2f7,0x303,0x303,0x2f7,0x303,0x306,0x2fa,0x303,0x2f7, +0x303,0x2f7,0x303,0x303,0x2f7,0x303,0x2f7,0x2f7,0x303,0x2f7,0x303,0x306,0x2fa,0x303,0x303,0x303, +0x2f7,0x303,0x2f7,0x303,0x303,0x2f7,0x2f7,0x300,0x303,0x2f7,0x2f7,0x2f7,0x300,0x300,0x300,0x300, +0x309,0x309,0x2fd,0x309,0x309,0x2fd,0x309,0x309,0x2fd,0x306,0x67b,0x306,0x67b,0x306,0x67b,0x306, +0x67b,0x306,0x67b,0x306,0x67b,0x306,0x67b,0x306,0x67b,0x2f7,0x306,0x2fa,0x306,0x2fa,0x306,0x2fa, +0x303,0x2f7,0x306,0x2fa,0x306,0x2fa,0x306,0x2fa,0x306,0x2fa,0x306,0x2fa,0x2fa,0x309,0x309,0x2fd, +0x306,0x2fa,0x9e4,0x9e4,0x9e7,0x9e1,0x306,0x2fa,0x306,0x2fa,0x306,0x2fa,0x306,0x2fa,0x306,0x2fa, +0x306,0x2fa,0x306,0x2fa,0x306,0x2fa,0x306,0x2fa,0x306,0x2fa,0x306,0x2fa,0x306,0x2fa,0x306,0x2fa, +0x9e7,0x9e1,0x9e7,0x9e1,0x9e4,0x9de,0x9e7,0x9e1,0xba3,0xca5,0x9e4,0x9de,0x9e4,0x9de,0x9e7,0x9e1, +0x9e7,0x9e1,0x9e7,0x9e1,0x9e7,0x9e1,0x9e7,0x9e1,0x9e7,0x9e1,0x9e7,0x9e1,0xca5,0xca5,0xca5,0xd9e, +0xd9e,0xd9e,0xda1,0xda1,0xd9e,0xda1,0xda1,0xd9e,0xd9e,0xda1,0xee5,0xee8,0xee8,0xee8,0xee8,0xee5, +0xee8,0xee5,0xee8,0xee5,0xee8,0xee5,0xee8,0xee5,0x30c,0x67e,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c, +0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x67e,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c, +0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c, +0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30f,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c, +0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x30c,0x9ea,0x9ea,0x9ea, +0x9ea,0x9ea,0xca8,0xca8,0x327,0x327,0x327,0x327,0x327,0x327,0x327,0x327,0x327,0x31e,0x31e,0x31e, +0x31e,0x31e,0x31e,0x31e,0x31b,0x31b,0x318,0x318,0x684,0x318,0x31e,0x687,0x321,0x687,0x687,0x687, +0x321,0x687,0x31e,0x31e,0x68a,0x324,0x318,0x318,0x318,0x318,0x318,0x318,0x681,0x681,0x681,0x681, +0x315,0x681,0x318,0xb1c,0x327,0x327,0x327,0x327,0x327,0x312,0x312,0x312,0x312,0x312,0x9f3,0x9f3, +0x9f0,0x9ed,0x9f0,0xcab,0xcab,0xcab,0xcab,0xcab,0xcab,0xcab,0xcab,0xcab,0xcab,0xcab,0xcab,0xcab, +0xcab,0xcab,0xcab,0xcab,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d, +0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d, +0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d, +0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d,0x68d, +0x68d,0x68d,0x68d,0x68d,0x690,0x690,0x948,0x690,0x690,0x94b,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f, +0xb1f,0xb1f,0xb1f,0xc5d,0xd6e,0xd6e,0xd6e,0xd6e,0xd6e,0xd6e,0xd6e,0xd6e,0xeac,0xeac,0xeac,0xeac, +0xeaf,0xd71,0xd71,0xd71,0x693,0x693,0xb22,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2, +0xca2,0xca2,0xca2,0xca2,0xf93,0xf90,0xf93,0xf90,0x333,0x33c,0xf93,0xf90,0x27,0x27,0x342,0xeeb, +0xeeb,0xeeb,0x32a,0x14e8,0x27,0x27,0x27,0x27,0x33f,0x32d,0x351,0x330,0x351,0x351,0x351,0x27, +0x351,0x27,0x351,0x351,0x348,0x699,0x699,0x699,0x699,0x699,0x699,0x699,0x699,0x699,0x699,0x699, +0x699,0x699,0x699,0x699,0x699,0x699,0x27,0x699,0x699,0x699,0x699,0x699,0x699,0x699,0x351,0x351, +0x348,0x348,0x348,0x348,0x348,0x696,0x696,0x696,0x696,0x696,0x696,0x696,0x696,0x696,0x696,0x696, +0x696,0x696,0x696,0x696,0x696,0x696,0x345,0x696,0x696,0x696,0x696,0x696,0x696,0x696,0x348,0x348, +0x348,0x348,0x348,0xf93,0x354,0x354,0x357,0x351,0x351,0x354,0x34b,0x9f6,0xbac,0xba9,0x34e,0x9f6, +0x34e,0x9f6,0x34e,0x9f6,0x34e,0x9f6,0x339,0x336,0x339,0x336,0x339,0x336,0x339,0x336,0x339,0x336, +0x339,0x336,0x339,0x336,0x354,0x354,0x34b,0x345,0xb5b,0xb58,0xba6,0xcb1,0xcae,0xcb4,0xcb1,0xcae, +0xda4,0xda7,0xda7,0xda7,0xa05,0x6a5,0x363,0x366,0x363,0x363,0x363,0x366,0x363,0x363,0x363,0x363, +0x366,0xa05,0x366,0x363,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a5,0x6a2,0x6a2, +0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2, +0x6a2,0x6a2,0x6a2,0x6a2,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69f,0x69c,0x69c, +0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c, +0x9ff,0x69f,0x35d,0x360,0x35d,0x35d,0x35d,0x360,0x35d,0x35d,0x35d,0x35d,0x360,0x9ff,0x360,0x35d, +0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d, +0x363,0x35d,0x363,0x35d,0x363,0x35d,0x366,0x360,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d, +0x363,0x35d,0x35a,0x954,0x957,0x939,0x939,0x113d,0x9f9,0x9f9,0xbb2,0xbaf,0xa02,0x9fc,0xa02,0x9fc, +0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d, +0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d, +0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d, +0x363,0x366,0x360,0x363,0x35d,0xbb2,0xbaf,0x363,0x35d,0xbb2,0xbaf,0x363,0x35d,0xbb2,0xbaf,0xeee, +0x366,0x360,0x366,0x360,0x363,0x35d,0x366,0x360,0x363,0x35d,0x366,0x360,0x366,0x360,0x366,0x360, +0x363,0x35d,0x366,0x360,0x366,0x360,0x366,0x360,0x363,0x35d,0x366,0x360,0xa05,0x9ff,0x366,0x360, +0x366,0x360,0x366,0x360,0x366,0x360,0xdad,0xdaa,0x366,0x360,0xef1,0xeee,0xef1,0xeee,0xef1,0xeee, +0xc1e,0xc1b,0xc1e,0xc1b,0xc1e,0xc1b,0xc1e,0xc1b,0xc1e,0xc1b,0xc1e,0xc1b,0xc1e,0xc1b,0xc1e,0xc1b, +0xf1e,0xf1b,0xf1e,0xf1b,0x1011,0x100e,0x1011,0x100e,0x1011,0x100e,0x1011,0x100e,0x1011,0x100e,0x1011,0x100e, +0x1011,0x100e,0x1011,0x100e,0x1176,0x1173,0x1350,0x134d,0x1521,0x151e,0x1521,0x151e,0x1521,0x151e,0x1521,0x151e, +0x2a,0x375,0x375,0x375,0x375,0x375,0x375,0x375,0x375,0x375,0x375,0x375,0x375,0x375,0x375,0x375, +0x375,0x375,0x375,0x375,0x375,0x375,0x375,0x375,0x375,0x375,0x375,0x2a,0x2a,0x378,0x369,0x369, +0x369,0x36c,0x369,0x369,0x2a,0x36f,0x36f,0x36f,0x36f,0x36f,0x36f,0x36f,0x36f,0x36f,0x36f,0x36f, +0x36f,0x36f,0x36f,0x36f,0x36f,0x36f,0x36f,0x36f,0x36f,0x36f,0x36f,0x36f,0x36f,0x36f,0x36f,0x36f, +0x36f,0x36f,0x36f,0x36f,0x36f,0x36f,0x36f,0x372,0x2a,0x8be,0xa08,0x2a,0x2a,0x14eb,0x14eb,0x1404, +0x2d,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978, +0x978,0x978,0xdb0,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978, +0x37b,0x37b,0x37b,0x37b,0x37b,0x37b,0x37b,0x37b,0x37b,0x37b,0xef4,0x37b,0x37b,0x37b,0x387,0x37b, +0x37e,0x37b,0x37b,0x38a,0x97b,0xdb3,0xdb6,0xdb3,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d, +0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d, +0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x38d,0x2d,0x2d,0x2d,0x2d,0x2d, +0x38d,0x38d,0x38d,0x384,0x381,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d, +0xcb7,0xcb7,0xcb7,0xcb7,0x1407,0x14ee,0xf9c,0xf9c,0xf9c,0xf99,0xf99,0xdbf,0x8c4,0xcc6,0xcc3,0xcc3, +0xcba,0xcba,0xcba,0xcba,0xcba,0xcba,0xf96,0xf96,0xf96,0xf96,0xf96,0x8c1,0x14e2,0x30,0xdbc,0x8c7, +0x1317,0x3a8,0x3ab,0x3ab,0x3ab,0x3ab,0x3ab,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8, +0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f, +0x8ca,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x93f,0x93f,0x93f,0x93f,0x93f, +0x93f,0x93f,0x93f,0xb52,0xb52,0xb52,0xcba,0xcc0,0xcbd,0xdb9,0xdb9,0xdb9,0xdb9,0xdb9,0xdb9,0x1314, +0x95a,0x95a,0x95a,0x95a,0x95a,0x95a,0x95a,0x95a,0x95a,0x95a,0x3a2,0x39f,0x39c,0x399,0xbb5,0xbb5, +0x93c,0x3a8,0x3a8,0x3b4,0x3a8,0x3ae,0x3ae,0x3ae,0x3ae,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8, +0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8, +0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8, +0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8, +0x3a8,0x3a8,0x3a8,0x3a8,0xa0e,0xa0e,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0xa0e,0x3ab,0x3a8,0x3ab,0x3a8, +0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0xa0e,0x3a8,0x3a8,0x3a8,0x3ab, +0x3b7,0x3a8,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x390,0x399,0x396,0x396,0x393,0x393,0x393, +0x393,0x3b1,0x3b1,0x393,0x393,0x399,0x396,0x396,0x396,0x393,0xcc9,0xcc9,0x3a5,0x3a5,0x3a5,0x3a5, +0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0xa0e,0xa0e,0xa0e,0xa0b,0xa0b,0xcc9,0xa26,0xa26,0xa26,0xa20, +0xa20,0xa20,0xa20,0xa20,0xa20,0xa20,0xa20,0xa1d,0xa20,0xa1d,0x33,0xa11,0xa23,0xa14,0xa23,0xa23, +0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23, +0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xccc,0xccc,0xccc,0xa1a,0xa1a,0xa1a,0xa1a, +0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa17,0xa17,0xa17,0xa17, +0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0x33,0x33,0xccc,0xccc,0xccc,0xe22,0xe22,0xe22,0xe22, +0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0xe22, +0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0x1023,0x1023,0x1023,0x1023,0x1023,0x1023, +0x1023,0x1023,0x1023,0x1023,0x1023,0x1023,0x1023,0x1023,0x1023,0x1023,0x1023,0x1023,0xa2c,0xa2c,0xa2c,0xa2c, +0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c, +0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c,0xa2c, +0xa2c,0xa2c,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xbb8,0x36,0x36, +0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0xf36,0xf36,0xf36,0xf36, +0xf36,0xf36,0xf36,0xf36,0xf36,0xf36,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39, +0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39, +0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d, +0xf3c,0xf3c,0xf30,0xf30,0xf33,0xf42,0xf3f,0x14a,0x14a,0x14a,0x14a,0x14a,6,6,6,6, +6,6,6,6,6,6,6,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x1827,0x1827,0x1827,0x1827, +0x1827,0x1827,0x1827,0x1827,0x1827,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0xb2e,0xb2e,0xb31,0xb31, +0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0x9f,0x9f,0x9f,0x9f,0x15a5,0x15a5,0x15a5,0x15a5, +0x20a,0x20a,0x20a,0x20a,0x20a,0x20a,0x20a,0x20a,0x20a,0x20a,0x20a,0x15a2,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x23d,0x23d,0x23d,0x23d, +0x23d,0x23d,0x23d,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x249,0x249,0x249,0x249, +0x249,0x249,0x249,0x249,0x249,0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x1266,0x1266,0x1266,0x1266, +0x1266,0x1266,0x1266,0x1266,0x1266,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x21f,0x21f,0x21f,0x21f, +0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x14be,0x14be,0x14be,0x14be, +0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x204,0x204,0x204,0x204,0x204,0x204,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,3,0xc,0xf,0xf, +0xc,0x12,3,0x15,0,0,0,0,0,0,0,0,6,0x15,0x15,0x15, +0x15,0x15,0x15,0x18,0x18,0x15,0x15,0x15,6,6,6,6,0,0,9,9, +9,9,0x15,0x15,0x15,0x15,0x15,0x15,0x15,0x15,0x15,0x15,0x15,0x15,0x15,0x18, +0x15,0x15,0xc,0xf,0xf,0,0x12,0x12,0x12,0xc,0xc,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0x921,0x921,3,3,3,3, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,0x921,0x921, -6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23, +0,0,0,0,0,0,0,0,6,6,6,6,6,6,6,0, +6,6,0,6,6,6,6,6,6,6,6,6,6,6,6,6, +6,6,6,6,6,6,6,6,6,0x15,0x15,0x15,0x15,0x15,0x15,0, +0,0,0x15,0,0x15,0x15,0,0x15,0x15,0x15,0x15,0x15,0x15,0x15,9,0x15, +0,0,0,0,0,0,0,0,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21, +0x21,0x21,0,0,0,0,0,0,0x1791,0x1791,0x1791,0x1791,0x264,0x264,0x264,0x264, +0x264,0x264,0x264,0x264,0x264,0x264,0x264,0x264,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a, +0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x1c2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a, +0x164a,0x164a,0x237,0x237,0x237,0x237,0x1650,0x1650,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x159c,0x159c,0x159c,0x159c,0x159c,0x159c,0x159c,0x159c, +0x159c,0x159c,0x159c,0x159c,0x159c,0x159c,0x159c,0x159c,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, +0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e, +0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e, +0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e, +0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e, +0x1e,0x1e,0x1e,0x1e,0,0,0,0,0x16fe,0x16fe,0x16fe,0x16fe,0x24c,0x24c,0x24c,0x24c, +0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0xe19,0xe19,0xe16,0xe16,0xe16,0xe19,0x111,0x111, +0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x27c,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9, +0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x182a,0x182a,0x288,0x182a,0x182a,0x288,0x182a,0x182a, +0x182a,0x182a,0x182a,0x288,0x288,0x288,0x288,0x288,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0x975,0x975,6,0x15,0x15,0x15,0x15,0x15,0x15,0x18, +0x18,0x15,0x15,6,6,6,6,6,6,6,6,6,6,6,6,6, +6,6,6,6,6,6,6,6,6,6,6,0x15,0x15,0x15,0x15,0x15, +0x15,0x18,9,0x15,0x15,0x15,0x15,0x12,6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -0x149d,0x37e,0x38d,0x38d,0x1b,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x1b,0x1b,0x393, -0x393,0x1b,0x1b,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393, -0x393,0x1b,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x1b,0x393,0x1b,0x1b,0x1b,0x393,0x393, -0x393,0x393,0x1b,0x1b,0x381,0xc7e,0x37e,0x38d,0x38d,0x37e,0x37e,0x37e,0x37e,0x1b,0x1b,0x38d, -0x38d,0x1b,0x1b,0x390,0x390,0x384,0xd71,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x37e, -0x1b,0x1b,0x1b,0x1b,0x396,0x396,0x1b,0x396,0x393,0x393,0x37e,0x37e,0x1b,0x1b,0x90c,0x90c, -0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x393,0x393,0x38a,0x38a,0x387,0x387,0x387,0x387, -0x387,0x38a,0x387,0x10f5,0x1b,0x1b,0x1b,0x1b,0x1e,0xc81,0x399,0xc84,0x1e,0x3a5,0x3a5,0x3a5, -0x3a5,0x3a5,0x3a5,0x1e,0x1e,0x1e,0x1e,0x3a5,0x3a5,0x1e,0x1e,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5, -0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x1e,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5, -0x3a5,0x1e,0x3a5,0x3a8,0x1e,0x3a5,0x3a8,0x1e,0x3a5,0x3a5,0x1e,0x1e,0x39c,0x1e,0x3a2,0x3a2, -0x3a2,0x399,0x399,0x1e,0x1e,0x1e,0x1e,0x399,0x399,0x1e,0x1e,0x399,0x399,0x39f,0x1e,0x1e, -0x1e,0xf51,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x3a8,0x3a8,0x3a8,0x3a5,0x1e,0x3a8,0x1e, -0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f,0x90f, -0x399,0x399,0x3a5,0x3a5,0x3a5,0xf51,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e, -0x21,0x3ab,0x3ab,0x3b4,0x21,0x3b7,0x3b7,0x3b7,0x3b7,0x3b7,0x3b7,0x3b7,0xc8d,0x3b7,0x21,0x3b7, -0x3b7,0x3b7,0x21,0x3b7,0x3b7,0x3b7,0x3b7,0x3b7,0x3b7,0x3b7,0x3b7,0x3b7,0x3b7,0x3b7,0x3b7,0x3b7, -0x3b7,0x21,0x3b7,0x3b7,0x3b7,0x3b7,0x3b7,0x3b7,0x3b7,0x21,0x3b7,0x3b7,0x21,0x3b7,0x3b7,0x3b7, -0x3b7,0x3b7,0x21,0x21,0x3ae,0x3b7,0x3b4,0x3b4,0x3b4,0x3ab,0x3ab,0x3ab,0x3ab,0x3ab,0x21,0x3ab, -0x3ab,0x3b4,0x21,0x3b4,0x3b4,0x3b1,0x21,0x21,0x3b7,0x21,0x21,0x21,0x21,0x21,0x21,0x21, -0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x3b7,0xc8d,0xc87,0xc87,0x21,0x21,0x912,0x912, -0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x13b3,0xc8a,0x21,0x21,0x21,0x21,0x21,0x21, -0x21,0x16b9,0x21,0x21,0x21,0x21,0x21,0x21,0x24,0x3ba,0x3c9,0x3c9,0x24,0x3cf,0x3cf,0x3cf, -0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x24,0x24,0x3cf,0x3cf,0x24,0x24,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf, -0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x24,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf, -0x3cf,0x24,0x3cf,0x3cf,0x24,0xc90,0x3cf,0x3cf,0x3cf,0x3cf,0x24,0x24,0x3bd,0x3cf,0x3ba,0x3ba, -0x3c9,0x3ba,0x3ba,0x3ba,0xf54,0x24,0x24,0x3c9,0x3cc,0x24,0x24,0x3cc,0x3cc,0x3c0,0x24,0x24, -0x24,0x24,0x24,0x24,0x24,0x24,0x3ba,0x3ba,0x24,0x24,0x24,0x24,0x3d2,0x3d2,0x24,0x3cf, -0x3cf,0x3cf,0xf54,0xf54,0x24,0x24,0x3c6,0x3c6,0x3c6,0x3c6,0x3c6,0x3c6,0x3c6,0x3c6,0x3c6,0x3c6, -0x3c3,0xc90,0x12cc,0x12cc,0x12cc,0x12cc,0x12cc,0x12cc,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24, -0x27,0x27,0x3d5,0x3e1,0x27,0x3e1,0x3e1,0x3e1,0x3e1,0x3e1,0x3e1,0x27,0x27,0x27,0x3e1,0x3e1, -0x3e1,0x27,0x3e1,0x3e1,0x3e4,0x3e1,0x27,0x27,0x27,0x3e1,0x3e1,0x27,0x3e1,0x27,0x3e1,0x3e1, -0x27,0x27,0x27,0x3e1,0x3e1,0x27,0x27,0x27,0x3e1,0x3e1,0x91b,0x27,0x27,0x27,0x3e1,0x3e1, -0x3e1,0x3e1,0x3e1,0x3e1,0x3e1,0x91b,0xd74,0x3e1,0x3e1,0x3e1,0x27,0x27,0x27,0x27,0x3d5,0x3db, -0x3d5,0x3db,0x3db,0x27,0x27,0x27,0x3db,0x3db,0x3db,0x27,0x3de,0x3de,0x3de,0x3d8,0x27,0x27, -0xf57,0x27,0x27,0x27,0x27,0x27,0x27,0x3d5,0x27,0x27,0x27,0x27,0x27,0x27,0x27,0x27, -0x27,0x27,0xe8b,0x918,0x918,0x918,0x918,0x918,0x918,0x918,0x918,0x918,0x915,0x915,0x915,0xc93, -0xc93,0xc93,0xc93,0xc93,0xc93,0xc96,0xc93,0x27,0x27,0x27,0x27,0x27,0x14a0,0x3f3,0x3f3,0x3f3, -0x2a,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x2a,0x3f6,0x3f6,0x3f6,0x2a,0x3f6,0x3f6, -0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x2a,0x3f6,0x3f6, -0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x14a3,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x2a,0x2a, -0x2a,0xf60,0x3e7,0x3e7,0x3e7,0x3f3,0x3f3,0x3f3,0x3f3,0x2a,0x3e7,0x3e7,0x3ea,0x2a,0x3e7,0x3e7, -0x3e7,0x3ed,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x3e7,0x3e7,0x2a,0xf60,0xf60,0x16bc,0x2a, -0x2a,0x2a,0x2a,0x2a,0x3f6,0x3f6,0xf5a,0xf5a,0x2a,0x2a,0x3f0,0x3f0,0x3f0,0x3f0,0x3f0,0x3f0, -0x3f0,0x3f0,0x3f0,0x3f0,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0xf5d,0xf5d,0xf5d,0xf5d, -0xf5d,0xf5d,0xf5d,0xf5d,0x1779,0x14a6,0x402,0x402,0x2d,0x408,0x408,0x408,0x408,0x408,0x408,0x408, -0x408,0x2d,0x408,0x408,0x408,0x2d,0x408,0x408,0x408,0x408,0x408,0x408,0x408,0x408,0x408,0x408, -0x408,0x408,0x408,0x408,0x408,0x2d,0x408,0x408,0x408,0x408,0x408,0x408,0x408,0x408,0x408,0x408, -0x2d,0x408,0x408,0x408,0x408,0x408,0x2d,0x2d,0xc99,0xc9c,0x402,0x3f9,0x405,0x402,0x3f9,0x402, -0x402,0x2d,0x3f9,0x405,0x405,0x2d,0x405,0x405,0x3f9,0x3fc,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d, -0x2d,0x3f9,0x3f9,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x408,0x2d,0x408,0x408,0xea3,0xea3, -0x2d,0x2d,0x3ff,0x3ff,0x3ff,0x3ff,0x3ff,0x3ff,0x3ff,0x3ff,0x3ff,0x3ff,0x2d,0xea6,0xea6,0x2d, -0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x30,0x14a9,0x414,0x414, -0x30,0x41a,0x41a,0x41a,0x41a,0x41a,0x41a,0x41a,0x41a,0x30,0x41a,0x41a,0x41a,0x30,0x41a,0x41a, -0x41a,0x41a,0x41a,0x41a,0x41a,0x41a,0x41a,0x41a,0x41a,0x41a,0x41a,0x41a,0x41a,0x12cf,0x41a,0x41a, -0x41a,0x41a,0x41a,0x41a,0x41a,0x41a,0x41a,0x41a,0x41a,0x41a,0x41a,0x41a,0x41a,0x41a,0x12cf,0x30, -0x30,0xf6c,0x40b,0x414,0x414,0x40b,0x40b,0x40b,0xf63,0x30,0x414,0x414,0x414,0x30,0x417,0x417, -0x417,0x40e,0x12d2,0x177c,0x30,0x30,0x30,0x30,0x177f,0x177f,0x177f,0x40b,0x177c,0x177c,0x177c,0x177c, -0x177c,0x177c,0x177c,0x16bf,0x41a,0x41a,0xf63,0xf63,0x30,0x30,0x411,0x411,0x411,0x411,0x411,0x411, -0x411,0x411,0x411,0x411,0xf66,0xf66,0xf66,0xf66,0xf66,0xf66,0x177c,0x177c,0x177c,0xf69,0xf6c,0xf6c, -0xf6c,0xf6c,0xf6c,0xf6c,0x33,0x33,0x9e4,0x9e4,0x33,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea, -0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x33,0x33,0x33,0x9ea,0x9ea, -0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea, -0x9ea,0x9ea,0x33,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x33,0x9ea,0x33,0x33, -0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x9ea,0x33,0x33,0x33,0x9de,0x33,0x33,0x33,0x33,0x9db, -0x9e4,0x9e4,0x9db,0x9db,0x9db,0x33,0x9db,0x33,0x9e4,0x9e4,0x9e7,0x9e4,0x9e7,0x9e7,0x9e7,0x9db, -0x33,0x33,0x33,0x33,0x33,0x33,0x14ac,0x14ac,0x14ac,0x14ac,0x14ac,0x14ac,0x14ac,0x14ac,0x14ac,0x14ac, -0x33,0x33,0x9e4,0x9e4,0x9e1,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, -0x36,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435, -0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435, -0x435,0x420,0x435,0x432,0x420,0x420,0x420,0x420,0x420,0x420,0x426,0x36,0x36,0x36,0x36,0x41d, -0x43b,0x43b,0x43b,0x43b,0x43b,0x435,0x438,0x423,0x423,0x423,0x423,0x423,0x423,0x420,0x423,0x429, -0x42f,0x42f,0x42f,0x42f,0x42f,0x42f,0x42f,0x42f,0x42f,0x42f,0x42c,0x42c,0x36,0x36,0x36,0x36, -0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36, -0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x39,0x44a,0x44a,0x39, -0x44a,0x39,0x39,0x44a,0x44a,0x39,0x44a,0x39,0x39,0x44a,0x39,0x39,0x39,0x39,0x39,0x39, -0x44a,0x44a,0x44a,0x44a,0x39,0x44a,0x44a,0x44a,0x44a,0x44a,0x44a,0x44a,0x39,0x44a,0x44a,0x44a, -0x39,0x44a,0x39,0x44a,0x39,0x39,0x44a,0x44a,0x39,0x44a,0x44a,0x44a,0x44a,0x43e,0x44a,0x447, -0x43e,0x43e,0x43e,0x43e,0x43e,0x43e,0x39,0x43e,0x43e,0x44a,0x39,0x39,0x453,0x453,0x453,0x453, -0x453,0x39,0x450,0x39,0x441,0x441,0x441,0x441,0x441,0x43e,0x39,0x39,0x444,0x444,0x444,0x444, -0x444,0x444,0x444,0x444,0x444,0x444,0x39,0x39,0x44d,0x44d,0x13b6,0x13b6,0x39,0x39,0x39,0x39, -0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39, -0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x95d,0x95d,0x95d,0x960, -0x95d,0x95d,0x95d,0x95d,0x3c,0x95d,0x95d,0x95d,0x95d,0x960,0x95d,0x95d,0x95d,0x95d,0x960,0x95d, -0x95d,0x95d,0x95d,0x960,0x95d,0x95d,0x95d,0x95d,0x960,0x95d,0x95d,0x95d,0x95d,0x95d,0x95d,0x95d, -0x95d,0x95d,0x95d,0x95d,0x95d,0x960,0x9f9,0xf78,0xf78,0x3c,0x3c,0x3c,0x3c,0x92a,0x92a,0x92d, -0x92a,0x92d,0x92d,0x936,0x92d,0x936,0x92a,0x92a,0x92a,0x92a,0x92a,0x957,0x92a,0x92d,0x930,0x930, -0x933,0x93c,0x930,0x930,0x95d,0x95d,0x95d,0x95d,0x12db,0x12d5,0x12d5,0x12d5,0x92a,0x92a,0x92a,0x92d, -0x92a,0x92a,0x9ed,0x92a,0x3c,0x92a,0x92a,0x92a,0x92a,0x92d,0x92a,0x92a,0x92a,0x92a,0x92d,0x92a, -0x92a,0x92a,0x92a,0x92d,0x92a,0x92a,0x92a,0x92a,0x92d,0x92a,0x9ed,0x9ed,0x9ed,0x92a,0x92a,0x92a, -0x92a,0x92a,0x92a,0x92a,0x9ed,0x92d,0x9ed,0x9ed,0x9ed,0x3c,0x9f6,0x9f6,0x9f3,0x9f3,0x9f3,0x9f3, -0x9f3,0x9f3,0x9f0,0x9f3,0x9f3,0x9f3,0x9f3,0x9f3,0x9f3,0x3c,0xf6f,0x9f3,0xd77,0xd77,0xf72,0xf75, -0xf6f,0x10f8,0x10f8,0x10f8,0x10f8,0x12d8,0x12d8,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c, -0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c, -0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x459,0x459,0x459,0x459,0x459,0x459,0x3f,0x13bc, -0x3f,0x3f,0x3f,0x3f,0x3f,0x13bc,0x3f,0x3f,0x456,0x456,0x456,0x456,0x456,0x456,0x456,0x456, -0x456,0x456,0x456,0x456,0x456,0x456,0x456,0x456,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xd86, -0xa23,0x42,0xa23,0xa23,0xa23,0xa23,0x42,0x42,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0x42, -0xa23,0x42,0xa23,0xa23,0xa23,0xa23,0x42,0x42,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xd86, -0xa23,0x42,0xa23,0xa23,0xa23,0xa23,0x42,0x42,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23, -0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xd86,0xa23,0x42,0xa23,0xa23, -0xa23,0xa23,0x42,0x42,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0x42,0xa23,0x42,0xa23,0xa23, -0xa23,0xa23,0x42,0x42,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xd86,0xa23,0xa23,0xa23,0xa23, -0xa23,0xa23,0xa23,0x42,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23, -0xa23,0xa23,0xa23,0xd86,0xa23,0x42,0xa23,0xa23,0xa23,0xa23,0x42,0x42,0xa23,0xa23,0xa23,0xa23, -0xa23,0xa23,0xa23,0xd86,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23, -0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0x42,0x42,0x12de,0x12de,0xd80,0xd83,0xa1d,0xa26,0xa1a, -0xa1a,0xa1a,0xa1a,0xa26,0xa26,0xa20,0xa20,0xa20,0xa20,0xa20,0xa20,0xa20,0xa20,0xa20,0xa17,0xa17, -0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0xa17,0x42,0x42,0x42,0xa29,0xa29,0xa29,0xa29, -0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29, -0xa29,0x16c5,0x45,0x45,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x45,0x45,0xa3b,0xa3e,0xa3e,0xa3e, -0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e, -0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa38,0xa35,0x48,0x48,0x48,0xa44,0xa44,0xa44,0xa44, -0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa41,0xa41,0xa41,0xa44,0xa44,0xa44,0x14af,0x14af,0x14af, -0x14af,0x14af,0x14af,0x14af,0x14af,0x4b,0x4b,0x4b,0x4b,0x4b,0x4b,0x4b,0xa65,0xa65,0xa65,0xa65, -0xa65,0xa65,0xa47,0xa65,0xa65,0xa4a,0xa4a,0xa4a,0xa4a,0xa4a,0xa4a,0xa4a,0xa4a,0xa4a,0xa4d,0xa4a, -0xa5c,0xa5c,0xa5f,0xa68,0xa56,0xa53,0xa5c,0xa59,0xa68,0xc9f,0x4e,0x4e,0xa62,0xa62,0xa62,0xa62, -0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0x4e,0x4e,0x4e,0x4e,0x4e,0x4e,0xca2,0xca2,0xca2,0xca2, -0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0x4e,0x4e,0x4e,0x4e,0x4e,0x4e,0xa77,0xa77,0xaf5,0xaf8, -0xa7d,0xaf2,0xa7a,0xa77,0xa80,0xa8f,0xa83,0xa92,0xa92,0xa92,0xa6e,0x51,0xa86,0xa86,0xa86,0xa86, -0xa86,0xa86,0xa86,0xa86,0xa86,0xa86,0x51,0x51,0x51,0x51,0x51,0x51,0xa89,0xa89,0xa89,0xa89, -0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89, -0xa89,0xa89,0xa89,0xa89,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0xa89,0xa89,0xa89,0xa89, -0xa89,0xa89,0xa89,0xa89,0xa89,0xa71,0xf99,0x51,0x51,0x51,0x51,0x51,0x114c,0x114c,0x114c,0x114c, -0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x477,0x477,0x477,0x477, -0x477,0x477,0x477,0x477,0x47a,0x47a,0x47a,0x47a,0x47a,0x47a,0x47a,0x47a,0x477,0x477,0x477,0x477, -0x477,0x477,0x54,0x54,0x47a,0x47a,0x47a,0x47a,0x47a,0x47a,0x54,0x54,0x477,0x477,0x477,0x477, -0x477,0x477,0x477,0x477,0x54,0x47a,0x54,0x47a,0x54,0x47a,0x54,0x47a,0x477,0x477,0x477,0x477, -0x477,0x477,0x477,0x477,0x47a,0x47a,0x47a,0x47a,0x47a,0x47a,0x47a,0x47a,0x477,0x477,0x477,0x477, -0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x54,0x54,0x477,0x477,0x477,0x477, -0x477,0x477,0x477,0x477,0x47a,0x47a,0x47a,0x47a,0x47a,0x47a,0x47a,0x47a,0x477,0x477,0x477,0x477, -0x477,0x54,0x477,0x477,0x47a,0x47a,0x47a,0x47a,0x47a,0x471,0x477,0x471,0x471,0x46e,0x477,0x477, -0x477,0x54,0x477,0x477,0x47a,0x47a,0x47a,0x47a,0x47a,0x46e,0x46e,0x46e,0x477,0x477,0x477,0x477, -0x54,0x54,0x477,0x477,0x47a,0x47a,0x47a,0x47a,0x54,0x46e,0x46e,0x46e,0x477,0x477,0x477,0x477, -0x477,0x477,0x477,0x477,0x47a,0x47a,0x47a,0x47a,0x47a,0x46e,0x46e,0x46e,0x54,0x54,0x477,0x477, -0x477,0x54,0x477,0x477,0x47a,0x47a,0x47a,0x47a,0x47a,0x474,0x471,0x54,0xb6a,0xb6d,0xb6d,0xb6d, -0xfa2,0x57,0x148e,0x148e,0x148e,0x148e,0x483,0x483,0x483,0x483,0x483,0x483,0x4ce,0xb7f,0x5a,0x5a, -0x68a,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4d4,0x4e6,0x4d4,0x4e0,0x4da,0x68d,0x4cb,0x687,0x687,0x687, -0x687,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4d1,0x4e3,0x4d1,0x4dd,0x4d7,0x5a,0xd8f,0xd8f,0xd8f,0xd8f, -0xd8f,0x12e1,0x12e1,0x12e1,0x12e1,0x12e1,0x12e1,0x12e1,0x12e1,0x5a,0x5a,0x5a,0x4ec,0x4ec,0x4ec,0x4ec, -0x4ec,0x4ec,0x4ec,0x4e9,0x4ef,0x702,0x4ec,0x966,0x987,0xaa1,0xaa1,0xaa1,0xb82,0xb82,0xd92,0xd92, -0xd92,0xd92,0x1110,0x1113,0x1113,0x12e4,0x1488,0x14b2,0x14b5,0x14b5,0x16c8,0x5d,0x5d,0x5d,0x5d,0x5d, -0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f2,0x4f2,0x4f2,0x4f2,0x4f5,0xaa4,0xaa4, -0xb85,0xb8b,0xb8b,0xb88,0xb88,0xb88,0xb88,0xd95,0xea9,0xea9,0xea9,0xea9,0x10e3,0x60,0x60,0x60, -0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x525,0x525,0x525,0xaad, -0xeb2,0xfa8,0xfa8,0xfa8,0xfa8,0x123f,0x16cb,0x16cb,0x63,0x63,0x63,0x63,0x6b4,0x6b4,0x6b4,0x6b4, -0x6b7,0x6b7,0x6b7,0x6b7,0x6b7,0x6b7,0x531,0x531,0x52e,0x52e,0x52e,0x52e,0xeb8,0xeb8,0xeb8,0xeb5, -0xeb5,0xeb5,0xeb5,0xeb5,0x1119,0x1365,0x1365,0x1365,0x1365,0x12e7,0x12e7,0x12e7,0x1368,0x12ea,0x12ea,0x1368, -0x14b8,0x14b8,0x14b8,0x14b8,0x14bb,0x14bb,0x14bb,0x1782,0x1782,0x1782,0x1782,0x66,0x558,0x558,0x558,0x558, -0x558,0xab6,0xab6,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69, -0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x55b,0x55b,0x55b,0x55b, -0x55b,0x55b,0x55b,0x55b,0x55b,0x55b,0x55b,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c, -0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0xad1,0xad1,0xad1,0xad1, -0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1, -0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0x6f,0xad1,0xad1,0xad1,0xad1,0xad4,0xad1,0xad1,0xad1,0xad1, -0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad4, -0x6f,0x6f,0x6f,0x6f,0x6f,0x6f,0x6f,0x6f,0x6f,0x6f,0x6f,0x6f,0xad7,0xad7,0xad7,0xad7, -0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7, -0xad7,0xad7,0x72,0x72,0x72,0x72,0x72,0x72,0x72,0x72,0x72,0x72,0x78,0x7e9,0x7e3,0x7e9, -0x7e3,0x7e9,0x7e3,0x7e9,0x7e3,0x7e9,0x7e3,0x7e3,0x7e6,0x7e3,0x7e6,0x7e3,0x7e6,0x7e3,0x7e6,0x7e3, -0x7e6,0x7e3,0x7e6,0x7e3,0x7e6,0x7e3,0x7e6,0x7e3,0x7e6,0x7e3,0x7e6,0x7e3,0x7e3,0x7e3,0x7e3,0x7e9, -0x7e3,0x7e9,0x7e3,0x7e9,0x7e3,0x7e3,0x7e3,0x7e3,0x7e3,0x7e3,0x7e9,0x7e3,0x7e3,0x7e3,0x7e3,0x7e3, -0x7e6,0xc2d,0xc2d,0x78,0x78,0x8fd,0x8fd,0x8c7,0x8c7,0x7ec,0x7ef,0xc2a,0x7b,0x7b,0x7b,0x7b, -0x7b,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801, -0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x10d1,0x7b,0x7b, -0x7e,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804, -0x804,0x804,0x804,0x7e,0x8d0,0x8d0,0x8d3,0x8d3,0x8d3,0x8d3,0x8d3,0x8d3,0x8d3,0x8d3,0x8d3,0x8d3, -0x8d3,0x8d3,0x8d3,0x8d3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3, -0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0x1374,0x1374,0x1374,0x81, -0x81,0x81,0x81,0x81,0x80d,0x80d,0x80d,0x80d,0x80d,0x80d,0x80d,0x80d,0x80d,0x80d,0x80d,0x80d, -0x80d,0x80d,0x80d,0x80d,0x80d,0x80d,0x80d,0x80d,0x80d,0x80d,0x80d,0x80d,0x80d,0x80d,0x80d,0x80d, -0x80d,0xd2f,0xd2f,0x84,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813, -0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813, -0x813,0x813,0x813,0x84,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9, -0xae9,0x87,0x87,0x87,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef, -0xaef,0xaef,0xaef,0xaef,0xaef,0xc36,0xaef,0xaef,0xaef,0xc36,0xaef,0x8a,0x8a,0x8a,0x8a,0x8a, -0x8a,0x8a,0x8a,0x8a,0x1173,0x1173,0x1173,0x1173,0x1173,0x1173,0x1173,0x1173,0x1173,0x1173,0x1173,0x1173, -0x1173,0x1173,0x1173,0x1173,0x981,0x981,0x981,0x981,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d, -0x8d,0x8d,0x8d,0x8d,0x11e8,0x11e8,0x11e8,0x11e8,0x11e8,0x11e8,0x11e8,0x11e8,0x11e8,0x11e8,0x11e8,0x11e8, -0x11e8,0x11e8,0x11e8,0x11e8,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x90,0x90,0x90,0x90,0x90, -0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x90,0x90,0x90,0x90, -0x90,0xac2,0x5a6,0x5ac,0x5b2,0x5b2,0x5b2,0x5b2,0x5b2,0x5b2,0x5b2,0x5b2,0x5b2,0x5a9,0x5ac,0x5ac, -0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x90,0x5ac,0x5ac,0x5ac,0x5ac, -0x5ac,0x90,0x5ac,0x90,0x5ac,0x5ac,0x90,0x5ac,0x5ac,0x90,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac, -0x5ac,0x5ac,0x5ac,0x5af,0x5c7,0x5c1,0x5c7,0x5c1,0x5c4,0x5ca,0x5c7,0x5c1,0x5c4,0x5ca,0x5c7,0x5c1, -0x5c4,0x5ca,0x5c7,0x5c1,0x12f3,0x12f3,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, -0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x5c7,0x5c1,0x5c4,0x5ca,0x5c7,0x5c1,0x5c7,0x5c1,0x5c7, -0x5c1,0x5c7,0x5c7,0x5c1,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, -0x93,0x93,0x93,0x93,0x5c4,0x5c1,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c1,0x5c4,0x5c1,0x5c1, -0x5c4,0x5c4,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c4,0x5c1,0x5c1,0x5c4,0x5c1,0x5c4,0x5c4,0x5c4,0x5c1, -0x5c4,0x5c4,0x5c4,0x5c4,0x93,0x93,0x5c4,0x5c4,0x5c4,0x5c4,0x5c1,0x5c1,0x5c4,0x5c1,0x5c1,0x5c1, -0x5c1,0x5c4,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c4,0x5c4,0x5c4,0x5c1,0x5c1,0x93,0x93,0x93,0x93, -0x93,0x93,0x93,0x93,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a, -0xb0a,0xb0a,0xb0a,0xb0a,0x5c7,0x5c7,0x91e,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5be,0x5be, -0xbc1,0xd47,0x93,0x93,0x825,0x837,0x834,0x837,0x834,0xc4b,0xc4b,0xd3b,0xd38,0x828,0x828,0x828, -0x828,0x83a,0x83a,0x83a,0x852,0x855,0x864,0x96,0x858,0x85b,0x867,0x867,0x84f,0x846,0x840,0x846, -0x840,0x846,0x840,0x843,0x843,0x85e,0x85e,0x861,0x85e,0x85e,0x85e,0x96,0x85e,0x84c,0x849,0x843, -0x96,0x96,0x96,0x96,0x5d3,0x5df,0x5d3,0xbc4,0x5d3,0x99,0x5d3,0x5df,0x5d3,0x5df,0x5d3,0x5df, -0x5d3,0x5df,0x5d3,0x5df,0x5df,0x5dc,0x5d6,0x5d9,0x5df,0x5dc,0x5d6,0x5d9,0x5df,0x5dc,0x5d6,0x5d9, -0x5df,0x5dc,0x5d6,0x5dc,0x5d6,0x5dc,0x5d6,0x5d9,0x5df,0x5dc,0x5d6,0x5dc,0x5d6,0x5dc,0x5d6,0x5dc, -0x5d6,0x99,0x99,0x5d0,0x723,0x726,0x73b,0x73e,0x71d,0x726,0x726,0x9f,0x705,0x708,0x708,0x708, -0x708,0x705,0x705,0x9f,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0xac5,0xac5,0xac5, -0x984,0x6ff,0x5e2,0x5e2,0x9f,0x74d,0x72c,0x71d,0x726,0x723,0x71d,0x72f,0x720,0x71a,0x71d,0x73b, -0x732,0x729,0x74a,0x71d,0x747,0x747,0x747,0x747,0x747,0x747,0x747,0x747,0x747,0x747,0x738,0x735, -0x73b,0x73b,0x73b,0x74d,0x70e,0x70b,0x70b,0x70b,0x70b,0x70b,0x70b,0x70b,0x70b,0x70b,0x70b,0x70b, -0x70b,0x70b,0x70b,0x70b,0x70b,0x70b,0x70b,0x70b,0x70b,0x70b,0x70b,0x70b,0x70b,0x70b,0x70b,0x70b, -0x70b,0x70b,0x70b,0x9f,0x9f,0x9f,0x70b,0x70b,0x70b,0x70b,0x70b,0x70b,0x9f,0x9f,0x70b,0x70b, -0x70b,0x70b,0x70b,0x70b,0x9f,0x9f,0x70b,0x70b,0x70b,0x70b,0x70b,0x70b,0x9f,0x9f,0x70b,0x70b, -0x70b,0x9f,0x9f,0x9f,0xb0d,0xb0d,0xb0d,0xb0d,0xa2,0xa2,0xa2,0xa2,0xa2,0xa2,0xa2,0xa2, -0xa2,0xa2,0xa2,0xa2,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13, -0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xa5,0xa5,0xa5,0xa5,0xa5,0x161a,0x161a,0x161a,0x161a, -0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0xb1c,0xb1c,0xb1c,0xb1c, -0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c, -0xb1c,0xb1c,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xb28,0xb28,0xb28,0xb28, -0xb28,0xb28,0xb28,0xab,0xab,0xfb4,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28, -0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0x16d1,0x16d1,0x16d1,0x16d1, -0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab, -0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xb40,0xb40,0xb40,0xb40, -0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d, -0xb3d,0xae,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb40,0xb40,0xb3d,0xb3d, -0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d, -0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb40,0xae,0xb40,0xb40,0xae,0xae,0xb40,0xae, -0xae,0xb40,0xb40,0xae,0xae,0xb40,0xb40,0xb40,0xb40,0xae,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40, -0xb40,0xb40,0xb3d,0xb3d,0xb3d,0xb3d,0xae,0xb3d,0xae,0xb3d,0xb3d,0xb3d,0xb3d,0xcc0,0xb3d,0xb3d, -0xae,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb40,0xb40,0xb40,0xb40, -0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb3d,0xb3d,0xb3d,0xb3d, -0xb40,0xb40,0xae,0xb40,0xb40,0xb40,0xb40,0xae,0xae,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40, -0xb40,0xae,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xae,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d, -0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d, -0xb3d,0xb3d,0xb3d,0xb3d,0xb40,0xb40,0xae,0xb40,0xb40,0xb40,0xb40,0xae,0xb40,0xb40,0xb40,0xb40, -0xb40,0xae,0xb40,0xae,0xae,0xae,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xae,0xb3d,0xb3d, -0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xdaa,0xdaa,0xae,0xae, -0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40, -0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb3d,0xb3d,0xb3d,0xb37,0xb3d,0xb3d,0xb3d,0xb3d, -0xb3d,0xb3d,0xec1,0xebe,0xae,0xae,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a, -0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb1,0xb46,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xbd3,0xbd3,0xbd3,0xbd3,0xbd3,0xbd3,0xbd3,0xbd3, -0xbd3,0xbd3,0xbd3,0xbd3,0xbd3,0xb4,0xbd3,0xbd3,0xbd3,0xbd3,0xbcd,0xbcd,0xbd0,0xb4,0xb4,0xb4, -0xb4,0xb4,0xb4,0xb4,0xb4,0xb4,0xb4,0xb4,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc, -0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbd6,0xbd6,0xbd9,0xc3f,0xc3f,0xb7, -0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2, -0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbdf,0xbdf,0xba,0xba,0xba,0xba, -0xba,0xba,0xba,0xba,0xba,0xba,0xba,0xba,0xbe8,0xbe8,0xbe8,0xbe8,0xbe8,0xbe8,0xbe8,0xbe8, -0xbe8,0xbe8,0xbe8,0xbe8,0xbe8,0xbd,0xbe8,0xbe8,0xbe8,0xbd,0xbe5,0xbe5,0xbd,0xbd,0xbd,0xbd, -0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xcd2,0xcd2,0xcd2,0xcd2,0xcd2,0xcd2,0xcd2,0xcd2, -0xcd2,0xcd2,0xcd2,0xcd2,0xcd2,0xcd2,0xcd2,0xcd2,0xcd2,0xcd2,0xcd2,0xcd2,0xcd2,0xcd2,0xcd2,0xcd2, -0xcd2,0xcd2,0xcd2,0xcd2,0xcd2,0x14cd,0x14cd,0xc0,0xcc3,0xcc3,0xcc3,0xccf,0xccf,0xccf,0xccf,0xcc3, -0xcc3,0xccf,0xccf,0xccf,0xc0,0xc0,0xc0,0xc0,0xccf,0xccf,0xcc3,0xccf,0xccf,0xccf,0xccf,0xccf, -0xccf,0xcc6,0xcc6,0xcc6,0xc0,0xc0,0xc0,0xc0,0xcc9,0xc0,0xc0,0xc0,0xcd5,0xcd5,0xccc,0xccc, -0xccc,0xccc,0xccc,0xccc,0xccc,0xccc,0xccc,0xccc,0xcd8,0xcd8,0xcd8,0xcd8,0xcd8,0xcd8,0xcd8,0xcd8, -0xcd8,0xcd8,0xcd8,0xcd8,0xcd8,0xcd8,0xcd8,0xcd8,0xcd8,0xcd8,0xc3,0xc3,0xcd8,0xcd8,0xcd8,0xcd8, -0xcd8,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0x14d0,0x14d0,0x14d0,0x14d0, -0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0, -0xc6,0xc6,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0, -0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0xc6,0xc6,0x14d0,0x14d0,0x14d0,0x14d0, -0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0, -0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0xc6,0xc6,0xc6,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0, -0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0xc6,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0x14d0,0xc6,0xc6, -0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0x16d4,0x16d4,0x16d4,0x16d4, -0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6, -0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xc9,0xcff,0xcff,0xcff, -0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff, -0xcff,0xcff,0xcff,0xc9,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff, -0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xc9,0xcff,0xcff,0xc9,0xcff,0xcff,0xcff,0xcff,0xcff, -0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xc9,0xc9,0xcff,0xcff,0xcff,0xcff, -0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xcff,0xc9,0xc9,0xc9,0xc9,0xc9,0xc9, -0xc9,0xc9,0xc9,0xc9,0xc9,0xc9,0xc9,0xc9,0xc9,0xc9,0xc9,0xc9,0xc9,0xc9,0xc9,0xc9, -0xc9,0xc9,0xc9,0xc9,0xc9,0xc9,0xc9,0xc9,0xc9,0xc9,0xc9,0xc9,0xd02,0xd02,0xd02,0xd02, -0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02, -0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xcc,0xcc,0xcc,0xcc,0xcc,0xd44,0xd44,0xd44,0xcf, -0xcf,0xcf,0xcf,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e, -0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xcf,0xcf,0xcf,0xd41, -0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08, -0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08, -0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd2,0xd05,0xd11,0xd11,0xd11,0xd11,0xd11,0xd11,0xd11,0xd11, -0xd11,0xd11,0xd11,0xd11,0xd11,0xd11,0xd11,0xd11,0xd11,0xd11,0xd11,0xd11,0xd11,0xd11,0xd11,0xd11, -0xd11,0xd11,0xd11,0xd11,0xd11,0xd11,0xd5,0xd5,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e, -0xd0e,0xd0e,0xd5,0xd5,0xd5,0xd5,0xd5,0xd5,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c, -0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd8,0xd8, -0xd14,0xd8,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14, -0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd8,0xd14,0xd14,0xd8,0xd8,0xd8, -0xd14,0xd8,0xd8,0xd14,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17, -0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xdb,0xdb,0xdb,0xdb,0xdb, -0xdb,0xdb,0xdb,0xdb,0xdc5,0xdc5,0xdc5,0xdc5,0xdc5,0xdc5,0xdc5,0xdc5,0xdc5,0xdc5,0xdc5,0x14d3, -0x14d3,0x1785,0x1785,0xe1,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0, -0x138,0x138,0x138,0x138,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7, -0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xdd7,0xdce,0xdce,0xdd4,0xdd4,0xdce, -0xe4,0xe4,0xdd1,0xdd1,0x10e0,0x10e0,0x10e0,0x10e0,0xe7,0xe7,0xe7,0xe7,0xe7,0xe7,0xe7,0xe7, -0xe7,0xe7,0xe7,0xe7,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c, -0xc3c,0xc3c,0xc3c,0xc3c,0xfcf,0xfcf,0xfcf,0xfcf,0xfcf,0xfcf,0xfcf,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6, -0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d9,0xea,0xea,0xea,0xea,0xea,0x1788, -0x12ff,0x1122,0xed0,0xed0,0xde9,0xde6,0xde9,0xde6,0xde6,0xddd,0xddd,0xddd,0xddd,0xddd,0xddd,0x112b, -0x1128,0x112b,0x1128,0x1125,0x1125,0x1125,0x13c5,0x13c2,0xed,0xed,0xed,0xed,0xed,0xde3,0xde0,0xde0, -0xde0,0xddd,0xde3,0xde0,0xdec,0xdec,0xdec,0xdec,0xdec,0xdec,0xdec,0xdec,0xdec,0xdec,0xdec,0xdec, -0xdec,0xdec,0xdec,0xdec,0xdec,0xdec,0xdec,0xdec,0xdec,0xdec,0xdec,0xf0,0xf0,0xf0,0xf0,0xf0, -0xf0,0xf0,0xf0,0xf0,0xdec,0xdec,0xdec,0xdec,0xdec,0xdec,0xdec,0xf0,0xdec,0xdec,0xdec,0xdec, -0xdec,0xdec,0xdec,0xf0,0xdec,0xdec,0xdec,0xdec,0xdec,0xdec,0xdec,0xf0,0xdec,0xdec,0xdec,0xdec, -0xdec,0xdec,0xdec,0xf0,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2, -0xdf2,0xdf2,0xdf2,0xdf2,0xdef,0xdef,0xdef,0xdef,0xdef,0xdef,0xdef,0xdef,0xdef,0xdef,0xf3,0xf3, -0xf3,0xf3,0xf3,0xf3,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xf6,0x13c8,0xf6,0xf6,0xf6,0xf6, -0xf6,0x13c8,0xf6,0xf6,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, -0xe4f,0xe4f,0xe4f,0xe4f,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb, -0xdfb,0xdfb,0xdfb,0xf9,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8, -0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8,0xdf8, -0xdf8,0xdf8,0xdf8,0xf9,0xe0d,0xe01,0xe01,0xe01,0xfc,0xe01,0xe01,0xfc,0xfc,0xfc,0xfc,0xfc, -0xe01,0xe01,0xe01,0xe01,0xe0d,0xe0d,0xe0d,0xe0d,0xfc,0xe0d,0xe0d,0xe0d,0xfc,0xe0d,0xe0d,0xe0d, -0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d, -0xe0d,0xe0d,0xe0d,0xe0d,0xfc,0xfc,0xfc,0xfc,0xdfe,0xdfe,0xdfe,0xfc,0xfc,0xfc,0xfc,0xe04, -0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc, -0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe10,0xe10,0xe07,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc, -0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0x1131,0x1131,0xff,0xff,0xff,0xff, -0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1f,0xe1f,0xe1f,0xe1c,0xe1c,0xe1f,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c, -0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xff,0xff,0xff,0xff,0xff,0xff,0xe19,0xe19,0xe19,0xe19, -0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0x112e,0xff,0xff,0xff,0xe16,0xe16,0xe25,0xe25,0xe25,0xe25, -0x102,0x102,0x102,0x102,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe22,0xe25,0xe25,0xe25, -0xe25,0xe25,0x102,0x102,0x102,0x102,0x102,0x102,0x102,0x102,0x102,0x102,0x14e2,0x14e8,0x14e5,0x1830, -0x178b,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105, -0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105, -0x105,0x105,0x105,0x105,0xe4c,0xe4c,0xe4c,0xe49,0xe49,0xe40,0xe40,0xe49,0xe46,0xe46,0xe46,0xe46, -0x108,0x108,0x108,0x108,0x129c,0x129c,0x129c,0x129c,0x129c,0x129c,0x129f,0x129f,0x12a2,0x129f,0x15c,0x15c, -0x15c,0x15c,0x15c,0x15c,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0x13d4,0x13d4,0x10b,0x10b,0x10b,0x10b, -0x10b,0x10b,0x10b,0xe52,0x1305,0x10b,0x10b,0x10b,0x10b,0x10b,0x10b,0x10b,0x10b,0x10b,0x10b,0x10b, -0x10b,0x10b,0x10b,0x1302,0xc0f,0xc0f,0xc0f,0xc0f,0xc0f,0xc0f,0xc0f,0xc0f,0xc0f,0xc0f,0xc0f,0xc0f, -0xc0f,0xc0f,0xc0f,0xc0f,0xe7f,0xe70,0xe6a,0xe7c,0xe79,0xe73,0xe73,0xe82,0xe6d,0xe76,0x10e,0x10e, -0x10e,0x10e,0x10e,0x10e,0xf03,0xf03,0xeee,0xf03,0xf06,0xf09,0xf09,0xf09,0xf09,0xf09,0xf09,0xf09, -0x114,0x114,0x114,0x114,0xefd,0xefd,0xefd,0xefd,0xefd,0xefd,0xefd,0xefd,0xefd,0xefd,0xf0f,0xf0f, -0xef4,0xefa,0xf0f,0xf0f,0xef7,0xef4,0xef4,0xef4,0xef4,0xef4,0xef4,0xef4,0xef4,0xef4,0xef4,0xef1, -0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,0xef4,0xef4,0xef4,0xef4,0xef4,0xef4,0xef4,0xef4, -0xef4,0x114,0x114,0x114,0x130b,0x1308,0x130b,0x1308,0x130b,0x1308,0x130b,0x1308,0x130b,0x1308,0x13da,0x14f4, -0x14f4,0x14f4,0x178e,0x117,0x14f4,0x14f4,0x16dd,0x16dd,0x16dd,0x16d7,0x16dd,0x16d7,0x117,0x117,0x117,0x117, -0x117,0x117,0x117,0x117,0x117,0x117,0x117,0x117,0x117,0x117,0x117,0x117,0x117,0x117,0x117,0x117, -0x117,0x117,0x117,0x117,0x117,0x117,0x117,0x117,0x117,0x117,0x117,0x117,0x117,0x117,0x117,0x14f1, -0x13dd,0x13dd,0x1308,0x100b,0x100b,0x100b,0x100b,0x100b,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e, -0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1b,0xf1b,0xf21,0xf21, -0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a, -0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf2a,0xf24,0xf24, -0xf24,0xf24,0x113a,0x113a,0x11d,0x11d,0x11d,0xf27,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7, -0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7, -0x14f7,0x16e0,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120, -0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120, -0x120,0x120,0x120,0x120,0xf33,0xf33,0xf33,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd,0x14fd, -0x14fd,0x14fd,0x14fd,0x123,0xf30,0xf30,0xf30,0xf30,0x14fa,0x123,0x123,0x123,0x123,0x123,0x123,0x123, -0x123,0x123,0x123,0x123,0xf36,0xf36,0xf36,0xf36,0xf36,0xf36,0xf36,0xf36,0xf36,0xf36,0xf36,0xf36, -0xf36,0xf36,0xf36,0xf36,0xf36,0xf36,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126, -0x126,0x126,0x126,0x126,0x1032,0x1032,0x1032,0x1032,0x102f,0x102f,0x102f,0x102f,0x102f,0x102f,0x102f,0x102f, -0x1020,0x1020,0x1020,0x1020,0x1020,0x1020,0x1020,0x1020,0x102f,0x102f,0x1026,0x1023,0x129,0x129,0x129,0x1035, -0x1035,0x1029,0x1029,0x1029,0x102c,0x102c,0x102c,0x102c,0x102c,0x102c,0x102c,0x102c,0x102c,0x102c,0x129,0x129, -0x129,0x1032,0x1032,0x1032,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x103b,0x103b, -0x103b,0x103b,0x103b,0x103b,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x104d,0x1050,0x1050, -0x12c,0x12c,0x12c,0x12c,0x12c,0x12c,0x12c,0x12c,0x12c,0x12c,0x12c,0x12c,0x12c,0x12c,0x12c,0x12c, -0x12c,0x12c,0x12c,0x12c,0x1077,0x1077,0x1077,0x1077,0x1071,0x1791,0x12f,0x12f,0x12f,0x12f,0x12f,0x12f, -0x12f,0x12f,0x107d,0x107d,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x12f,0x12f, -0x12f,0x12f,0x12f,0x12f,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x108f,0x108f,0x108f,0x108f,0x108f, -0x108f,0x108f,0x108f,0x108f,0x108f,0x108f,0x1095,0x1098,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132, -0x132,0x132,0x132,0x1092,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x109e,0x109e,0x109e, -0x109e,0x109e,0x109e,0x10a7,0x10a7,0x109e,0x109e,0x10a7,0x10a7,0x109e,0x109e,0x135,0x135,0x135,0x135,0x135, -0x135,0x135,0x135,0x135,0x10aa,0x10aa,0x10aa,0x109e,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa, -0x109e,0x10a7,0x135,0x135,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x135,0x135, -0x10a1,0x10ad,0x10ad,0x10ad,0x1509,0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x138, -0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x138, -0x138,0x138,0x138,0x138,0x10b3,0x10b3,0x10b3,0x10b3,0x10b3,0x10b3,0x10b3,0x10b3,0x10b3,0x10b3,0x10b3,0x10b3, -0x10b3,0x10b3,0x10b3,0x10b3,0x10b3,0x10b3,0x10b3,0x10b3,0x10b3,0x10b3,0x10b3,0x10b3,0x10b3,0x10b3,0x10b3,0x10b3, -0x10b3,0x10b6,0x13b,0x13b,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9, -0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9, -0x10b9,0x13e,0x13e,0x13e,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc, -0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x141,0x141,0x141,0x141,0x141,0x141,0x141,0x141,0x141,0x141,0x141, -0x141,0x141,0x141,0x141,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2, -0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x10c2,0x144,0x144, -0x144,0x144,0x144,0x10bf,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5, -0x147,0x147,0x147,0x147,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8, -0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x14a,0x14a,0x14a,0x14a,0x14a,0x14a,0x14a,0x14a, -0x14a,0x14a,0x14a,0x14a,0x1140,0x1140,0x1140,0x1140,0x1149,0x1140,0x1140,0x1140,0x1149,0x1140,0x1140,0x1140, -0x1140,0x113d,0x14d,0x14d,0x1146,0x1146,0x1146,0x1146,0x1146,0x1146,0x1146,0x1146,0x1146,0x1146,0x1146,0x1146, -0x1146,0x1146,0x1146,0x14d,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c, -0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x150,0x150,0x150,0x150,0x150,0x150, -0x150,0x150,0x150,0x150,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167, -0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1164,0x114f,0x1164,0x114f,0x114f,0x114f,0x114f, -0x114f,0x114f,0x114f,0x153,0x1158,0x1161,0x114f,0x1161,0x1161,0x114f,0x114f,0x114f,0x114f,0x114f,0x114f,0x114f, -0x114f,0x1164,0x1164,0x1164,0x1164,0x1164,0x1164,0x114f,0x114f,0x1155,0x1155,0x1155,0x1155,0x1155,0x1155,0x1155, -0x1155,0x153,0x153,0x1152,0x115e,0x115e,0x115e,0x115e,0x115e,0x115e,0x115e,0x115e,0x115e,0x115e,0x153,0x153, -0x153,0x153,0x153,0x153,0x115e,0x115e,0x115e,0x115e,0x115e,0x115e,0x115e,0x115e,0x115e,0x115e,0x153,0x153, -0x153,0x153,0x153,0x153,0x115b,0x115b,0x115b,0x115b,0x115b,0x115b,0x115b,0x116a,0x116d,0x116d,0x116d,0x116d, -0x115b,0x115b,0x153,0x153,0x1557,0x1557,0x1557,0x1557,0x1557,0x1557,0x1557,0x1557,0x1557,0x1557,0x1557,0x1557, -0x1557,0x1557,0x1554,0x1d1,0x12b1,0x1290,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x12ab,0x1293,0x1293,0x1293, -0x1293,0x12ab,0x1293,0x1293,0x1293,0x1293,0x1299,0x147f,0x1485,0x1482,0x147c,0x156,0x16ad,0x16ad,0x156,0x156, -0x156,0x156,0x156,0x156,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182, -0x1182,0x1182,0x1182,0x1182,0x1179,0x1179,0x117c,0x1185,0x117f,0x117f,0x117f,0x1185,0x159,0x159,0x159,0x159, -0x159,0x159,0x159,0x159,0x1188,0x1188,0x1188,0x1188,0x1188,0x1188,0x1188,0x1188,0x1188,0x1188,0x1188,0x1188, -0x1188,0x1188,0x1188,0x1188,0x1188,0x12b7,0x118e,0x12ba,0x118e,0x118e,0x118e,0x118e,0x118b,0x118b,0x118b,0x118e, -0x16e6,0x16e9,0x15f,0x15f,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e, -0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e, -0x127e,0x162,0x162,0x162,0x11a3,0x1197,0x1197,0x1197,0x1197,0x1197,0x1197,0x119a,0x11a9,0x11a9,0x1197,0x1197, -0x1197,0x1197,0x165,0x12a5,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x165,0x165, -0x165,0x165,0x1197,0x1197,0x11c7,0x11bb,0x11c7,0x168,0x168,0x168,0x168,0x168,0x168,0x168,0x168,0x168, -0x168,0x168,0x168,0x168,0x168,0x168,0x168,0x168,0x168,0x168,0x168,0x168,0x168,0x168,0x168,0x11c4, -0x11c4,0x11ca,0x11be,0x11c1,0x11df,0x11df,0x11df,0x11d9,0x11d9,0x11d0,0x11d9,0x11d9,0x11d0,0x11d9,0x11d9,0x11e2, -0x11dc,0x11d3,0x16b,0x16b,0x11d6,0x11d6,0x11d6,0x11d6,0x11d6,0x11d6,0x11d6,0x11d6,0x11d6,0x11d6,0x16b,0x16b, -0x16b,0x16b,0x16b,0x16b,0x11e8,0x11e8,0x11e8,0x11e8,0x11e8,0x11e8,0x11e8,0x16e,0x16e,0x16e,0x16e,0x11e5, -0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5, -0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x16e,0x16e,0x16e,0x16e, -0x11f1,0x11f1,0x11f1,0x11f1,0x11f1,0x11f1,0x11f1,0x11f1,0x11f1,0x11f1,0x11f1,0x11f1,0x11f1,0x11f1,0x11f1,0x11f1, -0x11f1,0x11f1,0x11f1,0x11f1,0x11f1,0x11f1,0x171,0x11ee,0x11eb,0x11eb,0x11eb,0x11eb,0x11eb,0x11eb,0x11eb,0x11eb, -0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200, -0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x174,0x174,0x174,0x11fa,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd, -0x1206,0x1206,0x1206,0x1206,0x1206,0x1206,0x1206,0x1206,0x1206,0x1206,0x1206,0x1206,0x1206,0x1206,0x1206,0x1206, -0x1206,0x1206,0x1206,0x1206,0x1206,0x1206,0x177,0x177,0x1203,0x1203,0x1203,0x1203,0x1203,0x1203,0x1203,0x1203, -0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c, -0x120c,0x120c,0x120c,0x17a,0x17a,0x17a,0x17a,0x17a,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209, -0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212, -0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x180, -0x1230,0x1230,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183, -0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b, -0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x1515,0x1515,0x189,0x189,0x189, -0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a, -0x125a,0x125a,0x125a,0x125d,0x125d,0x125d,0x123c,0x189,0x135f,0x1266,0x135f,0x135f,0x135f,0x135f,0x135f,0x135f, -0x135f,0x135f,0x135f,0x135f,0x135f,0x1266,0x135f,0x1266,0x135c,0x135c,0x135c,0x135c,0x135c,0x135c,0x135c,0x135c, -0x135c,0x135c,0x13ec,0x13ec,0x189,0x189,0x189,0x189,0x1362,0x1362,0x135c,0x135c,0x135c,0x135c,0x135c,0x135c, -0x135c,0x1263,0x135c,0x1263,0x1263,0x135c,0x1362,0x1269,0x180f,0x180f,0x180f,0x180f,0x180f,0x180f,0x180f,0x180f, -0x180f,0x180f,0x180f,0x180f,0x180f,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189, -0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189, -0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x1314,0x1314,0x1314,0x1314,0x1314,0x1314, -0x1314,0x1314,0x1314,0x1314,0x1314,0x1314,0x1314,0x1314,0x1314,0x1314,0x1314,0x1314,0x1314,0x1314,0x1314,0x1314, -0x1314,0x1314,0x1314,0x1314,0x128a,0x137d,0x137a,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c, -0x18c,0x18c,0x18c,0x18c,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1287,0x1284, -0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1284,0x1287, -0x1284,0x1284,0x137d,0x137d,0x137d,0x137d,0x137d,0x137a,0x137d,0x137d,0x137d,0x1812,0x18c,0x18c,0x18c,0x18c, -0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c, -0x13aa,0x13aa,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c, -0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c, -0x18c,0x18c,0x18c,0x18c,0x131d,0x131d,0x131d,0x131d,0x131d,0x131d,0x131d,0x131d,0x131d,0x131d,0x131d,0x131d, -0x131d,0x131d,0x131d,0x131d,0x131d,0x131d,0x131d,0x131d,0x131d,0x131d,0x131d,0x131d,0x131d,0x1317,0x1317,0x1317, -0x18f,0x18f,0x131a,0x18f,0x132f,0x132f,0x132f,0x132f,0x132f,0x132f,0x1320,0x1329,0x1323,0x1323,0x1329,0x1329, -0x1329,0x1323,0x1329,0x1323,0x1323,0x1323,0x132c,0x132c,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192, -0x1326,0x1326,0x1326,0x1326,0x195,0x1332,0x1332,0x1332,0x1332,0x1332,0x1332,0x195,0x195,0x1332,0x1332,0x1332, -0x1332,0x1332,0x1332,0x195,0x195,0x1332,0x1332,0x1332,0x1332,0x1332,0x1332,0x195,0x195,0x195,0x195,0x195, -0x195,0x195,0x195,0x195,0x1332,0x1332,0x1332,0x1332,0x1332,0x1332,0x1332,0x195,0x1332,0x1332,0x1332,0x1332, -0x1332,0x1332,0x1332,0x195,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4, -0x15b4,0x15b4,0x15b4,0x15b4,0x1335,0x1335,0x1335,0x1335,0x1335,0x1335,0x1338,0x134a,0x134a,0x133e,0x133e,0x133e, -0x133e,0x133e,0x198,0x198,0x198,0x198,0x133b,0x133b,0x133b,0x133b,0x133b,0x133b,0x133b,0x133b,0x133b,0x133b, -0x133b,0x133b,0x133b,0x133b,0x133b,0x133b,0x1341,0x1341,0x1341,0x1341,0x1341,0x1341,0x1341,0x1341,0x1341,0x1341, -0x198,0x198,0x198,0x198,0x198,0x198,0x198,0x198,0x198,0x198,0x198,0x198,0x198,0x198,0x198,0x1518, -0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d, -0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b, -0x1383,0x1380,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e, -0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e, -0x19e,0x19e,0x19e,0x19e,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350, -0x1350,0x1350,0x1350,0x1a1,0x1a1,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350, -0x1350,0x1350,0x1350,0x151b,0x1a1,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350, -0x1350,0x1350,0x1350,0x1386,0x1a1,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350,0x1350, -0x1350,0x1350,0x1350,0x1350,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b, -0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1, -0x1a1,0x1a1,0x1a1,0x1a1,0x13a4,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x1533,0x1533,0x1533,0x1533,0x1533,0x1536, -0x16a4,0x1536,0x1536,0x1536,0x176d,0x181b,0x181b,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4, -0x1a4,0x1a4,0x1a4,0x1a4,0x1536,0x1536,0x1536,0x1536,0x1536,0x1536,0x1533,0x1533,0x1533,0x1536,0x1533,0x16a1, -0x16a1,0x1a4,0x1a4,0x1a4,0x1536,0x1533,0x1533,0x1536,0x181b,0x181b,0x181b,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4, -0x1a4,0x1a4,0x1a4,0x1a4,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353, -0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7, -0x1a7,0x1a7,0x1a7,0x1a7,0x13f8,0x153c,0x13f8,0x13f8,0x13f8,0x13f8,0x13f8,0x13f8,0x13f8,0x13f8,0x13f8,0x13f8, -0x13f8,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x16f2,0x16f2,0x1aa,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d, -0x179d,0x179d,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa, -0x1aa,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a, -0x179a,0x179a,0x179a,0x179a,0x13fe,0x13fe,0x13fe,0x13fe,0x1ad,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe, -0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe, -0x13fe,0x13fe,0x13fe,0x13fe,0x1ad,0x13fe,0x13fe,0x1ad,0x13fe,0x1ad,0x1ad,0x13fe,0x1ad,0x13fe,0x13fe,0x13fe, -0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x1ad,0x13fe,0x13fe,0x13fe,0x13fe,0x1ad,0x13fe,0x1ad,0x13fe, -0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x13fe,0x1ad,0x1ad,0x1ad,0x1ad,0x13fe,0x1ad,0x13fe,0x1ad,0x13fe, -0x1ad,0x13fe,0x13fe,0x13fe,0x1ad,0x13fe,0x13fe,0x1ad,0x13fe,0x1ad,0x1ad,0x13fe,0x1ad,0x13fe,0x1ad,0x13fe, -0x1ad,0x13fe,0x1ad,0x13fe,0x1ad,0x13fe,0x13fe,0x1ad,0x13fe,0x1ad,0x1ad,0x13fe,0x13fe,0x13fe,0x13fe,0x1ad, -0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x1ad,0x13fe,0x13fe,0x13fe,0x13fe,0x1ad,0x13fe,0x13fe,0x13fe, -0x13fe,0x1ad,0x13fe,0x1ad,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x1ad,0x13fe, -0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe, -0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x13fe,0x13fe,0x13fe,0x1ad,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x1ad,0x13fe, -0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe, -0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad, -0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad, -0x13fb,0x13fb,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad, -0x1413,0x1413,0x1413,0x1413,0x1413,0x1413,0x1413,0x1401,0x1401,0x1401,0x1401,0x1401,0x1410,0x1401,0x1404,0x1404, -0x1401,0x1401,0x1401,0x1407,0x1407,0x1b0,0x140d,0x140d,0x140d,0x140d,0x140d,0x140d,0x140d,0x140d,0x140d,0x140d, -0x140a,0x1416,0x1416,0x1416,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0, -0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6, -0x1422,0x1422,0x1422,0x1422,0x1422,0x1422,0x1422,0x1422,0x1422,0x1422,0x1422,0x141f,0x1419,0x1419,0x141f,0x141f, -0x1428,0x1428,0x1422,0x1425,0x1425,0x141f,0x141c,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3, -0x142b,0x142b,0x142b,0x142b,0x142b,0x142b,0x142b,0x142b,0x142b,0x142b,0x142b,0x142b,0x142b,0x142b,0x142b,0x142b, -0x142b,0x142b,0x142b,0x142b,0x142b,0x142b,0x142b,0x142b,0x1b6,0x1b6,0x1b6,0x1b6,0x16f5,0x16f5,0x142b,0x142b, -0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5, -0x1b6,0x1b6,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5, -0x1437,0x1437,0x1437,0x1437,0x1437,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9, -0x1437,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434, -0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434,0x1434, -0x1434,0x1434,0x1434,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9, -0x1b9,0x1b9,0x1b9,0x1431,0x1431,0x1431,0x1431,0x143a,0x143a,0x143a,0x143a,0x143a,0x143a,0x143a,0x143a,0x143a, -0x143a,0x143a,0x143a,0x143a,0x144c,0x144f,0x1452,0x1452,0x144f,0x1455,0x1455,0x1440,0x1443,0x16fb,0x16f8,0x16f8, -0x16f8,0x1542,0x1bc,0x1bc,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x153f,0x1701, -0x1704,0x16fe,0x1707,0x1707,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x1bf,0x1bf,0x1bf, -0x1bf,0x1bf,0x1bf,0x1bf,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1bf,0x1bf, -0x1bf,0x1bf,0x1bf,0x1bf,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x145e,0x1c2,0x1c2,0x1c2,0x1c2, -0x1c2,0x1c2,0x1c2,0x1c2,0x12ae,0x12ab,0x12ae,0x1296,0x12ab,0x12ab,0x12ab,0x12b1,0x12ab,0x12b1,0x12b4,0x12ab, -0x12b1,0x12b1,0x12ab,0x12ab,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1461, -0x146a,0x1461,0x146a,0x146a,0x1461,0x1461,0x1461,0x1461,0x1461,0x1461,0x146d,0x1464,0x1c5,0x1c5,0x1c5,0x1c5, -0x1c5,0x1c5,0x1c5,0x1c5,0x1548,0x1548,0x1548,0x1548,0x1548,0x1548,0x1548,0x1548,0x1548,0x1548,0x1548,0x1548, -0x1548,0x1548,0x1c8,0x1c8,0x1545,0x1545,0x1545,0x1545,0x1545,0x154b,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8, -0x1c8,0x1c8,0x1c8,0x1c8,0x16b0,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7, -0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7, -0x1ce,0x1ce,0x1ce,0x1ce,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1, -0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1, -0x1d1,0x1d1,0x1d1,0x1d1,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1d4, -0x1d4,0x1d4,0x1d4,0x1d4,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563, -0x1563,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563, -0x1563,0x1563,0x1d4,0x1d4,0x1560,0x155a,0x155d,0x1566,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569, -0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551, -0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c, -0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x1da,0x1da,0x1da, -0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da, +6,6,6,6,6,6,6,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x975,0x975,0x1e,0x1e,0x1e,0x1e, +0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e, +0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x24,0x24,0x24,0x24, +0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24, +0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0xd77,0xd77,0xd77,0xd77, +0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0x24,0x24,0x24,0x24, +0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x14f4,0x3cf,0x3de,0x3de, +0x39,0x3e4,0x3e4,0x3e4,0x3e4,0x3e4,0x3e4,0x3e4,0x3e4,0x39,0x39,0x3e4,0x3e4,0x39,0x39,0x3e4, +0x3e4,0x3e4,0x3e4,0x3e4,0x3e4,0x3e4,0x3e4,0x3e4,0x3e4,0x3e4,0x3e4,0x3e4,0x3e4,0x39,0x3e4,0x3e4, +0x3e4,0x3e4,0x3e4,0x3e4,0x3e4,0x39,0x3e4,0x39,0x39,0x39,0x3e4,0x3e4,0x3e4,0x3e4,0x39,0x39, +0x3d2,0xcd2,0x3cf,0x3de,0x3de,0x3cf,0x3cf,0x3cf,0x3cf,0x39,0x39,0x3de,0x3de,0x39,0x39,0x3e1, +0x3e1,0x3d5,0xdc5,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x3cf,0x39,0x39,0x39,0x39, +0x3e7,0x3e7,0x39,0x3e7,0x3e4,0x3e4,0x3cf,0x3cf,0x39,0x39,0x960,0x960,0x960,0x960,0x960,0x960, +0x960,0x960,0x960,0x960,0x3e4,0x3e4,0x3db,0x3db,0x3d8,0x3d8,0x3d8,0x3d8,0x3d8,0x3db,0x3d8,0x114c, +0x3f,0x3c,0x39,0x39,0x42,0xcd5,0x3ea,0xcd8,0x42,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x42, +0x42,0x42,0x42,0x3f6,0x3f6,0x42,0x42,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6, +0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x42,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x3f6,0x42,0x3f6,0x3f9, +0x42,0x3f6,0x3f9,0x42,0x3f6,0x3f6,0x42,0x42,0x3ed,0x42,0x3f3,0x3f3,0x3f3,0x3ea,0x3ea,0x42, +0x42,0x42,0x42,0x3ea,0x3ea,0x42,0x42,0x3ea,0x3ea,0x3f0,0x42,0x42,0x42,0xfa8,0x42,0x42, +0x42,0x42,0x42,0x42,0x42,0x3f9,0x3f9,0x3f9,0x3f6,0x42,0x3f9,0x42,0x42,0x42,0x42,0x42, +0x42,0x42,0x963,0x963,0x963,0x963,0x963,0x963,0x963,0x963,0x963,0x963,0x3ea,0x3ea,0x3f6,0x3f6, +0x3f6,0xfa8,0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x45,0x3fc,0x3fc,0x405, +0x45,0x408,0x408,0x408,0x408,0x408,0x408,0x408,0xce1,0x408,0x45,0x408,0x408,0x408,0x45,0x408, +0x408,0x408,0x408,0x408,0x408,0x408,0x408,0x408,0x408,0x408,0x408,0x408,0x408,0x45,0x408,0x408, +0x408,0x408,0x408,0x408,0x408,0x45,0x408,0x408,0x45,0x408,0x408,0x408,0x408,0x408,0x45,0x45, +0x3ff,0x408,0x405,0x405,0x405,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x45,0x3fc,0x3fc,0x405,0x45,0x405, +0x405,0x402,0x45,0x45,0x408,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x45, +0x45,0x45,0x45,0x45,0x408,0xce1,0xcdb,0xcdb,0x45,0x45,0x966,0x966,0x966,0x966,0x966,0x966, +0x966,0x966,0x966,0x966,0x140a,0xcde,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x170d,0x48,0x48, +0x48,0x48,0x48,0x48,0x4b,0x40b,0x41a,0x41a,0x4b,0x420,0x420,0x420,0x420,0x420,0x420,0x420, +0x420,0x4b,0x4b,0x420,0x420,0x4b,0x4b,0x420,0x420,0x420,0x420,0x420,0x420,0x420,0x420,0x420, +0x420,0x420,0x420,0x420,0x420,0x4b,0x420,0x420,0x420,0x420,0x420,0x420,0x420,0x4b,0x420,0x420, +0x4b,0xce4,0x420,0x420,0x420,0x420,0x4b,0x4b,0x40e,0x420,0x40b,0x40b,0x41a,0x40b,0x40b,0x40b, +0xfab,0x4b,0x4b,0x41a,0x41d,0x4b,0x4b,0x41d,0x41d,0x411,0x4b,0x4b,0x4b,0x4b,0x4b,0x4b, +0x4b,0x4b,0x40b,0x40b,0x4b,0x4b,0x4b,0x4b,0x423,0x423,0x4b,0x420,0x420,0x420,0xfab,0xfab, +0x4b,0x4b,0x417,0x417,0x417,0x417,0x417,0x417,0x417,0x417,0x417,0x417,0x414,0xce4,0x1323,0x1323, +0x1323,0x1323,0x1323,0x1323,0x4b,0x4b,0x4b,0x4b,0x4b,0x4b,0x4b,0x4b,0x4e,0x4e,0x426,0x432, +0x4e,0x432,0x432,0x432,0x432,0x432,0x432,0x4e,0x4e,0x4e,0x432,0x432,0x432,0x4e,0x432,0x432, +0x435,0x432,0x4e,0x4e,0x4e,0x432,0x432,0x4e,0x432,0x4e,0x432,0x432,0x4e,0x4e,0x4e,0x432, +0x432,0x4e,0x4e,0x4e,0x432,0x432,0x96f,0x4e,0x4e,0x4e,0x432,0x432,0x432,0x432,0x432,0x432, +0x432,0x96f,0xdc8,0x432,0x432,0x432,0x4e,0x4e,0x4e,0x4e,0x426,0x42c,0x426,0x42c,0x42c,0x4e, +0x4e,0x4e,0x42c,0x42c,0x42c,0x4e,0x42f,0x42f,0x42f,0x429,0x4e,0x4e,0xfae,0x4e,0x4e,0x4e, +0x4e,0x4e,0x4e,0x426,0x4e,0x4e,0x4e,0x4e,0x4e,0x4e,0x4e,0x4e,0x4e,0x4e,0xee2,0x96c, +0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x969,0x969,0x969,0xce7,0xce7,0xce7,0xce7,0xce7, +0xce7,0xcea,0xce7,0x4e,0x4e,0x4e,0x4e,0x4e,0x14f7,0x444,0x444,0x444,0x51,0x447,0x447,0x447, +0x447,0x447,0x447,0x447,0x447,0x51,0x447,0x447,0x447,0x51,0x447,0x447,0x447,0x447,0x447,0x447, +0x447,0x447,0x447,0x447,0x447,0x447,0x447,0x447,0x447,0x51,0x447,0x447,0x447,0x447,0x447,0x447, +0x447,0x447,0x447,0x447,0x14fa,0x447,0x447,0x447,0x447,0x447,0x51,0x51,0x51,0xfb7,0x438,0x438, +0x438,0x444,0x444,0x444,0x444,0x51,0x438,0x438,0x43b,0x51,0x438,0x438,0x438,0x43e,0x51,0x51, +0x51,0x51,0x51,0x51,0x51,0x438,0x438,0x51,0xfb7,0xfb7,0x1710,0x51,0x51,0x51,0x51,0x51, +0x447,0x447,0xfb1,0xfb1,0x51,0x51,0x441,0x441,0x441,0x441,0x441,0x441,0x441,0x441,0x441,0x441, +0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0xfb4,0xfb4,0xfb4,0xfb4,0xfb4,0xfb4,0xfb4,0xfb4, +0x17cd,0x14fd,0x453,0x453,0x54,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x54,0x459,0x459, +0x459,0x54,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459, +0x459,0x54,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x54,0x459,0x459,0x459, +0x459,0x459,0x54,0x54,0xced,0xcf0,0x453,0x44a,0x456,0x453,0x44a,0x453,0x453,0x54,0x44a,0x456, +0x456,0x54,0x456,0x456,0x44a,0x44d,0x54,0x54,0x54,0x54,0x54,0x54,0x54,0x44a,0x44a,0x54, +0x54,0x54,0x54,0x54,0x54,0x54,0x459,0x54,0x459,0x459,0xefa,0xefa,0x54,0x54,0x450,0x450, +0x450,0x450,0x450,0x450,0x450,0x450,0x450,0x450,0x54,0xefd,0xefd,0x54,0x54,0x54,0x54,0x54, +0x54,0x54,0x54,0x54,0x54,0x54,0x54,0x54,0x5a,0x1500,0x465,0x465,0x57,0x46b,0x46b,0x46b, +0x46b,0x46b,0x46b,0x46b,0x46b,0x57,0x46b,0x46b,0x46b,0x57,0x46b,0x46b,0x46b,0x46b,0x46b,0x46b, +0x46b,0x46b,0x46b,0x46b,0x46b,0x46b,0x46b,0x46b,0x465,0x45c,0x45c,0x45c,0xfba,0x57,0x465,0x465, +0x465,0x57,0x468,0x468,0x468,0x45f,0x1329,0x17d0,0x57,0x57,0x57,0x57,0x17d3,0x17d3,0x17d3,0x45c, +0x17d0,0x17d0,0x17d0,0x17d0,0x17d0,0x17d0,0x17d0,0x1713,0x46b,0x46b,0xfba,0xfba,0x57,0x57,0x462,0x462, +0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0xfbd,0xfbd,0xfbd,0xfbd,0xfbd,0xfbd,0x17d0,0x17d0, +0x17d0,0xfc0,0xfc3,0xfc3,0xfc3,0xfc3,0xfc3,0xfc3,0x46b,0x46b,0x46b,0x46b,0x46b,0x46b,0x46b,0x46b, +0x46b,0x1326,0x46b,0x46b,0x46b,0x46b,0x46b,0x46b,0x46b,0x46b,0x46b,0x46b,0x46b,0x46b,0x46b,0x46b, +0x46b,0x46b,0x1326,0x5a,0x5a,0xfc3,0x45c,0x465,0x5d,0x5d,0xa38,0xa38,0x5d,0xa3e,0xa3e,0xa3e, +0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0x5d, +0x5d,0x5d,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e, +0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0x5d,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e, +0x5d,0xa3e,0x5d,0x5d,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0xa3e,0x5d,0x5d,0x5d,0xa32,0x5d, +0x5d,0x5d,0x5d,0xa2f,0xa38,0xa38,0xa2f,0xa2f,0xa2f,0x5d,0xa2f,0x5d,0xa38,0xa38,0xa3b,0xa38, +0xa3b,0xa3b,0xa3b,0xa2f,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x1503,0x1503,0x1503,0x1503,0x1503,0x1503, +0x1503,0x1503,0x1503,0x1503,0x5d,0x5d,0xa38,0xa38,0xa35,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d, +0x5d,0x5d,0x5d,0x5d,0x60,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486, +0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486, +0x486,0x486,0x486,0x486,0x486,0x471,0x486,0x483,0x471,0x471,0x471,0x471,0x471,0x471,0x477,0x60, +0x60,0x60,0x60,0x46e,0x48c,0x48c,0x48c,0x48c,0x48c,0x486,0x489,0x474,0x474,0x474,0x474,0x474, +0x474,0x471,0x474,0x47a,0x480,0x480,0x480,0x480,0x480,0x480,0x480,0x480,0x480,0x480,0x47d,0x47d, +0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60, +0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60, +0x63,0x49b,0x49b,0x63,0x49b,0x63,0x63,0x49b,0x49b,0x63,0x49b,0x63,0x63,0x49b,0x63,0x63, +0x63,0x63,0x63,0x63,0x49b,0x49b,0x49b,0x49b,0x63,0x49b,0x49b,0x49b,0x49b,0x49b,0x49b,0x49b, +0x63,0x49b,0x49b,0x49b,0x63,0x49b,0x63,0x49b,0x63,0x63,0x49b,0x49b,0x63,0x49b,0x49b,0x49b, +0x49b,0x48f,0x49b,0x498,0x48f,0x48f,0x48f,0x48f,0x48f,0x48f,0x63,0x48f,0x48f,0x49b,0x63,0x63, +0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x63,0x4a1,0x63,0x492,0x492,0x492,0x492,0x492,0x48f,0x63,0x63, +0x495,0x495,0x495,0x495,0x495,0x495,0x495,0x495,0x495,0x495,0x63,0x63,0x49e,0x49e,0x140d,0x140d, +0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63, +0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63, +0x9b1,0x9b1,0x9b1,0x9b4,0x9b1,0x9b1,0x9b1,0x9b1,0x66,0x9b1,0x9b1,0x9b1,0x9b1,0x9b4,0x9b1,0x9b1, +0x9b1,0x9b1,0x9b4,0x9b1,0x9b1,0x9b1,0x9b1,0x9b4,0x9b1,0x9b1,0x9b1,0x9b1,0x9b4,0x9b1,0x9b1,0x9b1, +0x9b1,0x9b1,0x9b1,0x9b1,0x9b1,0x9b1,0x9b1,0x9b1,0x9b1,0x9b4,0xa4d,0xfcf,0xfcf,0x66,0x66,0x66, +0x66,0x97e,0x97e,0x981,0x97e,0x981,0x981,0x98a,0x981,0x98a,0x97e,0x97e,0x97e,0x97e,0x97e,0x9ab, +0x97e,0x981,0x984,0x984,0x987,0x990,0x984,0x984,0x9b1,0x9b1,0x9b1,0x9b1,0x1332,0x132c,0x132c,0x132c, +0x97e,0x97e,0x97e,0x981,0x97e,0x97e,0xa41,0x97e,0x66,0x97e,0x97e,0x97e,0x97e,0x981,0x97e,0x97e, +0x97e,0x97e,0x981,0x97e,0x97e,0x97e,0x97e,0x981,0x97e,0x97e,0x97e,0x97e,0x981,0x97e,0xa41,0xa41, +0xa41,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0xa41,0x981,0xa41,0xa41,0xa41,0x66,0xa4a,0xa4a, +0xa47,0xa47,0xa47,0xa47,0xa47,0xa47,0xa44,0xa47,0xa47,0xa47,0xa47,0xa47,0xa47,0x66,0xfc6,0xa47, +0xdcb,0xdcb,0xfc9,0xfcc,0xfc6,0x114f,0x114f,0x114f,0x114f,0x132f,0x132f,0x66,0x66,0x66,0x66,0x66, +0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66, +0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x4aa,0x4aa,0x4aa,0x4aa, +0x4aa,0x4aa,0x69,0x1413,0x69,0x69,0x69,0x69,0x69,0x1413,0x69,0x69,0x4a7,0x4a7,0x4a7,0x4a7, +0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0xa77,0xa77,0xa77,0xa77, +0xa77,0xa77,0xa77,0xdda,0xa77,0x6c,0xa77,0xa77,0xa77,0xa77,0x6c,0x6c,0xa77,0xa77,0xa77,0xa77, +0xa77,0xa77,0xa77,0x6c,0xa77,0x6c,0xa77,0xa77,0xa77,0xa77,0x6c,0x6c,0xa77,0xa77,0xa77,0xa77, +0xa77,0xa77,0xa77,0xdda,0xa77,0x6c,0xa77,0xa77,0xa77,0xa77,0x6c,0x6c,0xa77,0xa77,0xa77,0xa77, +0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xdda, +0xa77,0x6c,0xa77,0xa77,0xa77,0xa77,0x6c,0x6c,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0x6c, +0xa77,0x6c,0xa77,0xa77,0xa77,0xa77,0x6c,0x6c,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xdda, +0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0x6c,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77, +0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xdda,0xa77,0x6c,0xa77,0xa77,0xa77,0xa77,0x6c,0x6c, +0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xdda,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77, +0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0x6c,0x6c,0x1335,0x1335,0xdd4, +0xdd7,0xa71,0xa7a,0xa6e,0xa6e,0xa6e,0xa6e,0xa7a,0xa7a,0xa74,0xa74,0xa74,0xa74,0xa74,0xa74,0xa74, +0xa74,0xa74,0xa6b,0xa6b,0xa6b,0xa6b,0xa6b,0xa6b,0xa6b,0xa6b,0xa6b,0xa6b,0xa6b,0x6c,0x6c,0x6c, +0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d, +0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0x1719,0x6f,0x6f,0x1716,0x1716,0x1716,0x1716,0x1716,0x1716,0x6f,0x6f, +0xa8f,0xa92,0xa92,0xa92,0xa92,0xa92,0xa92,0xa92,0xa92,0xa92,0xa92,0xa92,0xa92,0xa92,0xa92,0xa92, +0xa92,0xa92,0xa92,0xa92,0xa92,0xa92,0xa92,0xa92,0xa92,0xa92,0xa92,0xa8c,0xa89,0x72,0x72,0x72, +0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa95,0xa95,0xa95,0xa98,0xa98, +0xa98,0x1506,0x1506,0x1506,0x1506,0x1506,0x1506,0x1506,0x1506,0x75,0x75,0x75,0x75,0x75,0x75,0x75, +0xab9,0xab9,0xab9,0xab9,0xab9,0xab9,0xa9b,0xab9,0xab9,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e, +0xa9e,0xa9e,0xaa1,0xa9e,0xab0,0xab0,0xab3,0xabc,0xaaa,0xaa7,0xab0,0xaad,0xabc,0xcf3,0x78,0x78, +0xab6,0xab6,0xab6,0xab6,0xab6,0xab6,0xab6,0xab6,0xab6,0xab6,0x78,0x78,0x78,0x78,0x78,0x78, +0xcf6,0xcf6,0xcf6,0xcf6,0xcf6,0xcf6,0xcf6,0xcf6,0xcf6,0xcf6,0x78,0x78,0x78,0x78,0x78,0x78, +0xacb,0xacb,0xb49,0xb4c,0xad1,0xb46,0xace,0xacb,0xad4,0xae3,0xad7,0xae6,0xae6,0xae6,0xac2,0x7b, +0xada,0xada,0xada,0xada,0xada,0xada,0xada,0xada,0xada,0xada,0x7b,0x7b,0x7b,0x7b,0x7b,0x7b, +0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd, +0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0x7b,0x7b,0x7b,0x7b,0x7b,0x7b,0x7b,0x7b, +0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xac5,0xff0,0x7b,0x7b,0x7b,0x7b,0x7b, +0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3, +0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb, +0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x7e,0x7e,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x7e,0x7e, +0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x7e,0x4cb,0x7e,0x4cb,0x7e,0x4cb,0x7e,0x4cb, +0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb, +0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x7e,0x7e, +0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb, +0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x7e,0x4c8,0x4c8,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4c2,0x4c8,0x4c2, +0x4c2,0x4bf,0x4c8,0x4c8,0x4c8,0x7e,0x4c8,0x4c8,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4bf,0x4bf,0x4bf, +0x4c8,0x4c8,0x4c8,0x4c8,0x7e,0x7e,0x4c8,0x4c8,0x4cb,0x4cb,0x4cb,0x4cb,0x7e,0x4bf,0x4bf,0x4bf, +0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4bf,0x4bf,0x4bf, +0x7e,0x7e,0x4c8,0x4c8,0x4c8,0x7e,0x4c8,0x4c8,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4c5,0x4c2,0x7e, +0xbbe,0xbc1,0xbc1,0xbc1,0xff9,0x81,0x14e5,0x14e5,0x14e5,0x14e5,0x4d4,0x4d4,0x4d4,0x4d4,0x4d4,0x4d4, +0x51f,0xbd3,0x84,0x84,0x6db,0x51f,0x51f,0x51f,0x51f,0x51f,0x525,0x537,0x525,0x531,0x52b,0x6de, +0x51c,0x6d8,0x6d8,0x6d8,0x6d8,0x51c,0x51c,0x51c,0x51c,0x51c,0x522,0x534,0x522,0x52e,0x528,0x84, +0xde3,0xde3,0xde3,0xde3,0xde3,0x1338,0x1338,0x1338,0x1338,0x1338,0x1338,0x1338,0x1338,0x84,0x84,0x84, +0x53d,0x53d,0x53d,0x53d,0x53d,0x53d,0x53d,0x53a,0x540,0x756,0x53d,0x9ba,0x9db,0xaf5,0xaf5,0xaf5, +0xbd6,0xbd6,0xde6,0xde6,0xde6,0xde6,0x1167,0x116a,0x116a,0x133b,0x14df,0x1509,0x150c,0x150c,0x171c,0x87, +0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87, +0x546,0x546,0x546,0x546,0x546,0x546,0x546,0x546,0x546,0x546,0x546,0x546,0x546,0x543,0x543,0x543, +0x543,0x546,0xaf8,0xaf8,0xbd9,0xbdf,0xbdf,0xbdc,0xbdc,0xbdc,0xbdc,0xde9,0xf00,0xf00,0xf00,0xf00, +0x113a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a, +0x576,0x576,0x576,0xb01,0xf09,0xfff,0xfff,0xfff,0xfff,0x1296,0x171f,0x171f,0x8d,0x8d,0x8d,0x8d, +0x705,0x705,0x705,0x705,0x708,0x708,0x708,0x708,0x708,0x708,0x582,0x582,0x57f,0x57f,0x57f,0x57f, +0xf0f,0xf0f,0xf0f,0xf0c,0xf0c,0xf0c,0xf0c,0xf0c,0x1170,0x13bc,0x13bc,0x13bc,0x13bc,0x133e,0x133e,0x133e, +0x13bf,0x1341,0x1341,0x13bf,0x150f,0x150f,0x150f,0x150f,0x1512,0x1512,0x1512,0x17d6,0x17d6,0x17d6,0x17d6,0x90, +0x5a9,0x5a9,0x5a9,0x5a9,0x5a9,0xb0a,0xb0a,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, +0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x96,0x96,0x96,0x96,0x96, +0x96,0x96,0x96,0x96,0x96,0x96,0x96,0x96,0x96,0x96,0x96,0x96,0x96,0x96,0x96,0x96, +0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25, +0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0x99,0xb25,0xb25,0xb25,0xb25,0xb28, +0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25, +0xb25,0xb25,0xb25,0xb28,0x99,0x99,0x99,0x99,0x99,0x99,0x99,0x99,0x99,0x99,0x99,0x99, +0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b, +0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c, +0xa2,0x83d,0x837,0x83d,0x837,0x83d,0x837,0x83d,0x837,0x83d,0x837,0x837,0x83a,0x837,0x83a,0x837, +0x83a,0x837,0x83a,0x837,0x83a,0x837,0x83a,0x837,0x83a,0x837,0x83a,0x837,0x83a,0x837,0x83a,0x837, +0x837,0x837,0x837,0x83d,0x837,0x83d,0x837,0x83d,0x837,0x837,0x837,0x837,0x837,0x837,0x83d,0x837, +0x837,0x837,0x837,0x837,0x83a,0xc81,0xc81,0xa2,0xa2,0x951,0x951,0x91b,0x91b,0x840,0x843,0xc7e, +0xa5,0xa5,0xa5,0xa5,0xa5,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855, +0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855,0x855, +0x855,0x1128,0xa8,0xa5,0xab,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858, +0x858,0x858,0x858,0x858,0x858,0x858,0x858,0xab,0x924,0x924,0x927,0x927,0x927,0x927,0x927,0x927, +0x927,0x927,0x927,0x927,0x927,0x927,0x927,0x927,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37, +0xb37,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37, +0x13cb,0x13cb,0x13cb,0xae,0xae,0xae,0xae,0xae,0x861,0x861,0x861,0x861,0x861,0x861,0x861,0x861, +0x861,0x861,0x861,0x861,0x861,0x861,0x861,0x861,0x861,0x861,0x861,0x861,0x861,0x861,0x861,0x861, +0x861,0x861,0x861,0x861,0x861,0xd83,0xd83,0xb1,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867, +0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867, +0x867,0x867,0x867,0x867,0x867,0x867,0x867,0xb1,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d, +0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb4,0xb4,0xb4,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43, +0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xc8a,0xb43,0xb43,0xb43,0xc8a,0xb43,0xb7, +0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca, +0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x9d5,0x9d5,0x9d5,0x9d5,0xba,0xba,0xba,0xba, +0xba,0xba,0xba,0xba,0xba,0xba,0xba,0xba,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f, +0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x606,0x606,0x606,0x606,0x606,0x606,0x606,0xbd, +0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4, +0xbd,0xbd,0xbd,0xbd,0xbd,0xb16,0x5f7,0x5fd,0x603,0x603,0x603,0x603,0x603,0x603,0x603,0x603, +0x603,0x5fa,0x5fd,0x5fd,0x5fd,0x5fd,0x5fd,0x5fd,0x5fd,0x5fd,0x5fd,0x5fd,0x5fd,0x5fd,0x5fd,0xbd, +0x5fd,0x5fd,0x5fd,0x5fd,0x5fd,0xbd,0x5fd,0xbd,0x5fd,0x5fd,0xbd,0x5fd,0x5fd,0xbd,0x5fd,0x5fd, +0x5fd,0x5fd,0x5fd,0x5fd,0x5fd,0x5fd,0x5fd,0x600,0x618,0x612,0x618,0x612,0x615,0x61b,0x618,0x612, +0x615,0x61b,0x618,0x612,0x615,0x61b,0x618,0x612,0x134a,0x134a,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0, +0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0x618,0x612,0x615,0x61b,0x618, +0x612,0x618,0x612,0x618,0x612,0x618,0x618,0x612,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0, +0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0x615,0x612,0x615,0x615,0x615,0x615,0x615,0x615, +0x612,0x615,0x612,0x612,0x615,0x615,0x612,0x612,0x612,0x612,0x612,0x615,0x612,0x612,0x615,0x612, +0x615,0x615,0x615,0x612,0x615,0x615,0x615,0x615,0xc0,0xc0,0x615,0x615,0x615,0x615,0x612,0x612, +0x615,0x612,0x612,0x612,0x612,0x615,0x612,0x612,0x612,0x612,0x612,0x615,0x615,0x615,0x612,0x612, +0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xb5e,0xb5e,0xb5e,0xb5e,0xb5e,0xb5e,0xb5e,0xb5e, +0xb5e,0xb5e,0xb5e,0xb5e,0xb5e,0xb5e,0xb5e,0xb5e,0x618,0x618,0x972,0x618,0x618,0x618,0x618,0x618, +0x618,0x618,0x60f,0x60f,0xc15,0xd9b,0xc0,0xc0,0x879,0x88b,0x888,0x88b,0x888,0xc9f,0xc9f,0xd8f, +0xd8c,0x87c,0x87c,0x87c,0x87c,0x88e,0x88e,0x88e,0x8a6,0x8a9,0x8b8,0xc3,0x8ac,0x8af,0x8bb,0x8bb, +0x8a3,0x89a,0x894,0x89a,0x894,0x89a,0x894,0x897,0x897,0x8b2,0x8b2,0x8b5,0x8b2,0x8b2,0x8b2,0xc3, +0x8b2,0x8a0,0x89d,0x897,0xc3,0xc3,0xc3,0xc3,0x624,0x630,0x624,0xc18,0x624,0xc6,0x624,0x630, +0x624,0x630,0x624,0x630,0x624,0x630,0x624,0x630,0x630,0x62d,0x627,0x62a,0x630,0x62d,0x627,0x62a, +0x630,0x62d,0x627,0x62a,0x630,0x62d,0x627,0x62d,0x627,0x62d,0x627,0x62a,0x630,0x62d,0x627,0x62d, +0x627,0x62d,0x627,0x62d,0x627,0xc6,0xc6,0x621,0x777,0x77a,0x78f,0x792,0x771,0x77a,0x77a,0xcc, +0x759,0x75c,0x75c,0x75c,0x75c,0x759,0x759,0xcc,0xc9,0xc9,0xc9,0xc9,0xc9,0xc9,0xc9,0xc9, +0xc9,0xb19,0xb19,0xb19,0x9d8,0x753,0x633,0x633,0xcc,0x7a1,0x780,0x771,0x77a,0x777,0x771,0x783, +0x774,0x76e,0x771,0x78f,0x786,0x77d,0x79e,0x771,0x79b,0x79b,0x79b,0x79b,0x79b,0x79b,0x79b,0x79b, +0x79b,0x79b,0x78c,0x789,0x78f,0x78f,0x78f,0x7a1,0x762,0x75f,0x75f,0x75f,0x75f,0x75f,0x75f,0x75f, +0x75f,0x75f,0x75f,0x75f,0x75f,0x75f,0x75f,0x75f,0x75f,0x75f,0x75f,0x75f,0x75f,0x75f,0x75f,0x75f, +0x75f,0x75f,0x75f,0x75f,0x75f,0x75f,0x75f,0xcc,0xcc,0xcc,0x75f,0x75f,0x75f,0x75f,0x75f,0x75f, +0xcc,0xcc,0x75f,0x75f,0x75f,0x75f,0x75f,0x75f,0xcc,0xcc,0x75f,0x75f,0x75f,0x75f,0x75f,0x75f, +0xcc,0xcc,0x75f,0x75f,0x75f,0xcc,0xcc,0xcc,0xb61,0xb61,0xb61,0xb61,0xcf,0xcf,0xcf,0xcf, +0xcf,0xcf,0xcf,0xcf,0xcf,0xd2,0xd2,0xd2,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67, +0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xb67,0xd5,0xd5,0xd5,0xd5,0xd5, +0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e, +0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70, +0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xd8,0xd8,0xd8,0xd8,0xd8,0xd8,0xd8,0xd8,0xd8,0xd8, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xdb,0xdb,0x100b,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, +0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0xdb,0xdb,0xdb,0xdb,0xdb,0xdb,0xdb, +0xdb,0xdb,0xdb,0xdb,0xdb,0xdb,0xdb,0xdb,0xdb,0xdb,0xdb,0xdb,0xdb,0xdb,0xdb,0xdb, +0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb91,0xb91, +0xb91,0xb91,0xb91,0xb91,0xb91,0xde,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91, +0xb94,0xb94,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91, +0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb94,0xde,0xb94,0xb94, +0xde,0xde,0xb94,0xde,0xde,0xb94,0xb94,0xde,0xde,0xb94,0xb94,0xb94,0xb94,0xde,0xb94,0xb94, +0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb91,0xb91,0xb91,0xb91,0xde,0xb91,0xde,0xb91,0xb91,0xb91, +0xb91,0xd14,0xb91,0xb91,0xde,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91, +0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94, +0xb91,0xb91,0xb91,0xb91,0xb94,0xb94,0xde,0xb94,0xb94,0xb94,0xb94,0xde,0xde,0xb94,0xb94,0xb94, +0xb94,0xb94,0xb94,0xb94,0xb94,0xde,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xde,0xb91,0xb91, +0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91, +0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb94,0xb94,0xde,0xb94,0xb94,0xb94,0xb94,0xde, +0xb94,0xb94,0xb94,0xb94,0xb94,0xde,0xb94,0xde,0xde,0xde,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94, +0xb94,0xde,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91, +0xe01,0xe01,0xde,0xde,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94, +0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb91,0xb91,0xb91,0xb8b, +0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xf18,0xf15,0xde,0xde,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e, +0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xe1,0xb9a,0xe1,0xe1, +0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1, +0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xc27,0xc27,0xc27,0xc27, +0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xe4,0xc27,0xc27,0xc27,0xc27,0xc21,0xc21, +0xc24,0xe4,0xe4,0xe4,0xe4,0xe4,0xe4,0xe4,0xe4,0xe4,0xe4,0xe4,0xc30,0xc30,0xc30,0xc30, +0xc30,0xc30,0xc30,0xc30,0xc30,0xc30,0xc30,0xc30,0xc30,0xc30,0xc30,0xc30,0xc30,0xc30,0xc2a,0xc2a, +0xc2d,0xc93,0xc93,0xe7,0xe7,0xe7,0xe7,0xe7,0xe7,0xe7,0xe7,0xe7,0xc36,0xc36,0xc36,0xc36, +0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc33,0xc33, +0xea,0xea,0xea,0xea,0xea,0xea,0xea,0xea,0xea,0xea,0xea,0xea,0xc3c,0xc3c,0xc3c,0xc3c, +0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xed,0xc3c,0xc3c,0xc3c,0xed,0xc39,0xc39, +0xed,0xed,0xed,0xed,0xed,0xed,0xed,0xed,0xed,0xed,0xed,0xed,0xd26,0xd26,0xd26,0xd26, +0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26, +0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0xd26,0x1524,0x1524,0xf0,0xd17,0xd17,0xd17,0xd23, +0xd23,0xd23,0xd23,0xd17,0xd17,0xd23,0xd23,0xd23,0xf0,0xf0,0xf0,0xf0,0xd23,0xd23,0xd17,0xd23, +0xd23,0xd23,0xd23,0xd23,0xd23,0xd1a,0xd1a,0xd1a,0xf0,0xf0,0xf0,0xf0,0xd1d,0xf0,0xf0,0xf0, +0xd29,0xd29,0xd20,0xd20,0xd20,0xd20,0xd20,0xd20,0xd20,0xd20,0xd20,0xd20,0xd2c,0xd2c,0xd2c,0xd2c, +0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xf3,0xf3, +0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3,0xf3, +0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527, +0x1527,0x1527,0x1527,0x1527,0xf6,0xf6,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527, +0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0xf6,0xf6, +0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527, +0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0xf6,0xf6,0xf6,0x1527,0x1527,0x1527, +0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0xf6,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527, +0x1527,0x1527,0xf9,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6, +0x1728,0x1728,0x1728,0x1728,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6, +0xf6,0xf6,0xf6,0xf6,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53, +0xfc,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53, +0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xfc,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53, +0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xfc,0xd53,0xd53,0xfc,0xd53, +0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xfc,0xfc, +0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xfc,0xfc, +0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc, +0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc, +0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56, +0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xff,0xff,0xff,0xff,0xff, +0xd98,0xd98,0xd98,0x102,0x102,0x102,0x102,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92, +0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92, +0x102,0x102,0x102,0xd95,0xd95,0xd95,0xd95,0xd95,0xd95,0xd95,0xd95,0xd95,0xd5c,0xd5c,0xd5c,0xd5c, +0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c, +0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0x105,0xd59,0xd65,0xd65,0xd65,0xd65, +0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65, +0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0x108,0x108,0xd62,0xd62,0xd62,0xd62, +0xd62,0xd62,0xd62,0xd62,0xd62,0xd62,0x108,0x108,0x108,0x108,0x108,0x108,0x1860,0x1860,0x1860,0x1860, +0x1860,0x1860,0x1860,0x1860,0x1860,0x1860,0x1860,0x1860,0x1860,0x1860,0x1860,0x1860,0xd68,0xd68,0xd68,0xd68, +0xd68,0xd68,0x10b,0x10b,0xd68,0x10b,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68, +0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0x10b,0xd68, +0xd68,0x10b,0x10b,0x10b,0xd68,0x10b,0x10b,0xd68,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b, +0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0x10e, +0x10e,0x10e,0x10e,0x10e,0x10e,0x10e,0x10e,0x10e,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c, +0xe1c,0xe1c,0xe1c,0x152a,0x152a,0x17d9,0x17d9,0x114,0x1107,0x1107,0x1107,0x1107,0x1107,0x1107,0x1107,0x1107, +0x1107,0x1107,0x1107,0x1107,0x171,0x171,0x171,0x171,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e, +0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe25, +0xe25,0xe2b,0xe2b,0xe25,0x117,0x117,0xe28,0xe28,0x1137,0x1137,0x1137,0x1137,0x11a,0x11a,0x11a,0x11a, +0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0xc90,0xc90,0xc90,0xc90,0xc90,0xc90,0xc90,0xc90, +0xc90,0xc90,0xc90,0xc90,0xc90,0xc90,0xc90,0xc90,0x1026,0x1026,0x1026,0x1026,0x1026,0x1026,0x1026,0x152d, +0x152d,0x152d,0x152d,0x152d,0x152d,0x152d,0x152d,0x152d,0x152d,0x152d,0x152d,0x152d,0x152d,0x1530,0x120,0x120, +0x120,0x120,0x11d,0x17dc,0x1356,0x1179,0xf27,0xf27,0xe40,0xe3d,0xe40,0xe3d,0xe3d,0xe34,0xe34,0xe34, +0xe34,0xe34,0xe34,0x1182,0x117f,0x1182,0x117f,0x117c,0x117c,0x117c,0x141c,0x1419,0x123,0x123,0x123,0x123, +0x123,0xe3a,0xe37,0xe37,0xe37,0xe34,0xe3a,0xe37,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43, +0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0x126, +0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0x126, +0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0x126,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0x126, +0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0x126,0xe49,0xe49,0xe49,0xe49,0xe49,0xe49,0xe49,0xe49, +0xe49,0xe49,0xe49,0xe49,0xe49,0xe49,0xe49,0xe49,0xe46,0xe46,0xe46,0xe46,0xe46,0xe46,0xe46,0xe46, +0xe46,0xe46,0x129,0x129,0x129,0x129,0x129,0x129,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0x12c,0x141f, +0x12c,0x12c,0x12c,0x12c,0x12c,0x141f,0x12c,0x12c,0xea6,0xea6,0xea6,0xea6,0xea6,0xea6,0xea6,0xea6, +0xea6,0xea6,0xea6,0xea6,0xea6,0xea6,0xea6,0xea6,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52, +0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0x12f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, +0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, +0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0x12f,0xe64,0xe58,0xe58,0xe58,0x132,0xe58,0xe58,0x132, +0x132,0x132,0x132,0x132,0xe58,0xe58,0xe58,0xe58,0xe64,0xe64,0xe64,0xe64,0x132,0xe64,0xe64,0xe64, +0x132,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64, +0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0x132,0x132,0x132,0x132,0xe55,0xe55,0xe55,0x132, +0x132,0x132,0x132,0xe5b,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0x132,0x132,0x132,0x132, +0x132,0x132,0x132,0x132,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe67,0xe67,0xe5e,0x132,0x132,0x132, +0x132,0x132,0x132,0x132,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0x1188,0x1188, +0x135,0x135,0x135,0x135,0xe73,0xe73,0xe73,0xe73,0xe73,0xe76,0xe76,0xe76,0xe73,0xe73,0xe76,0xe73, +0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0x135,0x135,0x135,0x135,0x135,0x135, +0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0x1185,0x135,0x135,0x135,0xe6d,0xe6d, +0xe7c,0xe7c,0xe7c,0xe7c,0x138,0x138,0x138,0x138,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c, +0xe79,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x138,0x138, +0x1539,0x153f,0x153c,0x1884,0x17df,0x13e,0x13e,0x13e,0x13e,0x13e,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b, +0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b, +0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0xea3,0xea3,0xea3,0xea0, +0xea0,0xe97,0xe97,0xea0,0xe9d,0xe9d,0xe9d,0xe9d,0x141,0x141,0x141,0x141,0x12f3,0x12f3,0x12f3,0x12f3, +0x12f3,0x12f3,0x12f6,0x12f6,0x12f9,0x12f6,0x198,0x198,0x198,0x198,0x198,0x198,0xea6,0xea6,0xea6,0xea6, +0xea6,0xea6,0x142b,0x142b,0x144,0x144,0x144,0x144,0x144,0x144,0x144,0xea9,0x135c,0x144,0x144,0x144, +0x144,0x144,0x144,0x144,0x144,0x144,0x144,0x144,0x144,0x144,0x144,0x1359,0xc63,0xc63,0xc63,0xc63, +0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xed6,0xec7,0xec1,0xed3, +0xed0,0xeca,0xeca,0xed9,0xec4,0xecd,0x147,0x147,0x147,0x147,0x147,0x147,0xf5a,0xf5a,0xf45,0xf5a, +0xf5d,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0x14d,0x14d,0x14d,0x14d,0xf54,0xf54,0xf54,0xf54, +0xf54,0xf54,0xf54,0xf54,0xf54,0xf54,0xf66,0xf66,0xf4b,0xf51,0xf66,0xf66,0xf4e,0xf4b,0xf4b,0xf4b, +0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf48,0xf48,0xf48,0xf48,0xf48,0xf48,0xf48,0xf48,0xf48, +0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0x14d,0x14d,0x14d,0x1362,0x135f,0x1362,0x135f, +0x1362,0x135f,0x1362,0x135f,0x1362,0x135f,0x1431,0x154b,0x154b,0x154b,0x17e2,0x150,0x154b,0x154b,0x1731,0x1731, +0x1731,0x172b,0x1731,0x172b,0x150,0x150,0x150,0x150,0x150,0x150,0x150,0x150,0x150,0x150,0x150,0x150, +0x150,0x150,0x150,0x150,0x150,0x150,0x150,0x150,0x150,0x150,0x150,0x150,0x150,0x150,0x150,0x150, +0x150,0x150,0x150,0x150,0x150,0x150,0x150,0x1548,0x1434,0x1434,0x135f,0x1062,0x1062,0x1062,0x1062,0x1062, +0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75, +0xf75,0xf75,0xf75,0xf75,0xf72,0xf72,0xf78,0xf78,0x153,0x153,0x153,0x153,0x153,0x153,0x153,0x153, +0xf81,0xf81,0xf81,0xf81,0xf81,0xf81,0xf81,0xf81,0xf81,0xf81,0xf81,0xf81,0xf81,0xf81,0xf81,0xf81, +0xf81,0xf81,0xf81,0xf81,0xf81,0xf81,0xf7b,0xf7b,0xf7b,0xf7b,0x1191,0x1191,0x156,0x156,0x156,0xf7e, +0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e, +0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x1734,0x159,0x159,0x159,0x159,0x159,0x159, +0x159,0x159,0x159,0x159,0x159,0x159,0x159,0x159,0x159,0x159,0x159,0x159,0x159,0x159,0x159,0x159, +0x159,0x159,0x159,0x159,0x159,0x159,0x159,0x159,0x159,0x159,0x159,0x159,0xf8a,0xf8a,0xf8a,0x1554, +0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x1554,0x15c,0xf87,0xf87,0xf87,0xf87, +0x1551,0x15c,0x15c,0x15c,0x15c,0x15c,0x15c,0x15c,0x15c,0x15c,0x15c,0x15c,0xf8d,0xf8d,0xf8d,0xf8d, +0xf8d,0xf8d,0xf8d,0xf8d,0xf8d,0xf8d,0xf8d,0xf8d,0xf8d,0xf8d,0xf8d,0xf8d,0xf8d,0xf8d,0x15f,0x15f, +0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x1089,0x1089,0x1089,0x1089, +0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077, +0x1086,0x1086,0x107d,0x107a,0x162,0x162,0x162,0x108c,0x108c,0x1080,0x1080,0x1080,0x1083,0x1083,0x1083,0x1083, +0x1083,0x1083,0x1083,0x1083,0x1083,0x1083,0x162,0x162,0x162,0x1089,0x1089,0x1089,0x108f,0x108f,0x108f,0x108f, +0x108f,0x108f,0x108f,0x108f,0x108f,0x108f,0x1092,0x1092,0x1092,0x1092,0x1092,0x1092,0x10a4,0x10a4,0x10a4,0x10a4, +0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a7,0x10a7,0x165,0x165,0x165,0x165,0x165,0x165,0x165,0x165, +0x165,0x165,0x165,0x165,0x165,0x165,0x165,0x165,0x165,0x165,0x165,0x165,0x10ce,0x10ce,0x10ce,0x10ce, +0x10c8,0x17e5,0x168,0x168,0x168,0x168,0x168,0x168,0x168,0x168,0x10d4,0x10d4,0x10cb,0x10cb,0x10cb,0x10cb, +0x10cb,0x10cb,0x10cb,0x10cb,0x10cb,0x10cb,0x168,0x168,0x168,0x168,0x168,0x168,0x10f2,0x10f2,0x10f2,0x10f2, +0x10f2,0x10f2,0x10f2,0x10e6,0x10e6,0x10e6,0x10e6,0x10e6,0x10e6,0x10e6,0x10e6,0x10e6,0x10e6,0x10e6,0x10ec,0x10ef, +0x16b,0x16b,0x16b,0x16b,0x16b,0x16b,0x16b,0x16b,0x16b,0x16b,0x16b,0x10e9,0x1101,0x1101,0x1101,0x1101, +0x1101,0x1101,0x1101,0x1101,0x1101,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10fe,0x10fe,0x10f5,0x10f5,0x10fe, +0x10fe,0x10f5,0x10f5,0x16e,0x16e,0x16e,0x16e,0x16e,0x16e,0x16e,0x16e,0x16e,0x1101,0x1101,0x1101,0x10f5, +0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x10f5,0x10fe,0x16e,0x16e,0x10fb,0x10fb,0x10fb,0x10fb, +0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x10fb,0x16e,0x16e,0x10f8,0x1104,0x1104,0x1104,0x1560,0x171,0x171,0x171, +0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171, +0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x171,0x110a,0x110a,0x110a,0x110a, +0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a, +0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110d,0x174,0x174,0x1110,0x1110,0x1110,0x1110, +0x1110,0x1110,0x1110,0x1110,0x1110,0x1110,0x1110,0x1110,0x1110,0x1110,0x1110,0x1110,0x1110,0x1110,0x1110,0x1110, +0x1110,0x1110,0x1110,0x1110,0x1110,0x1110,0x1110,0x1110,0x1110,0x177,0x177,0x177,0x1113,0x1113,0x1113,0x1113, +0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x17a,0x17a,0x17a, +0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x1119,0x1119,0x1119,0x1119, +0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119, +0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x17d,0x17d,0x17d,0x17d,0x17d,0x1116,0x111c,0x111c,0x111c,0x111c, +0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x180,0x180,0x180,0x180,0x111f,0x111f,0x111f,0x111f, +0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f, +0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x1197,0x1197,0x1197,0x1197, +0x11a0,0x1197,0x1197,0x1197,0x11a0,0x1197,0x1197,0x1197,0x1197,0x1194,0x186,0x186,0x119d,0x119d,0x119d,0x119d, +0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x186,0x11a3,0x11a3,0x11a3,0x11a3, +0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3, +0x11a3,0x11a3,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x11be,0x11be,0x11be,0x11be, +0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be, +0x11be,0x11bb,0x11a6,0x11bb,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x18c,0x11af,0x11b8,0x11a6,0x11b8, +0x11b8,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11a6, +0x11a6,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x11ac,0x18c,0x18c,0x11a9,0x11b5,0x11b5,0x11b5,0x11b5, +0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x11b5,0x11b5,0x11b5,0x11b5, +0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x11b5,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x11b2,0x11b2,0x11b2,0x11b2, +0x11b2,0x11b2,0x11b2,0x11c1,0x11c4,0x11c4,0x11c4,0x11c4,0x11b2,0x11b2,0x18c,0x18c,0x15ab,0x15ab,0x15ab,0x15ab, +0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15a8,0x210,0x1308,0x12e7,0x1302,0x1302, +0x1302,0x1302,0x1302,0x1302,0x1302,0x12ea,0x12ea,0x12ea,0x12ea,0x1302,0x12ea,0x12ea,0x12ea,0x12ea,0x12f0,0x14d6, +0x14dc,0x14d9,0x14d3,0x192,0x1701,0x1701,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x11d9,0x11d9,0x11d9,0x11d9, +0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d0,0x11d0,0x11d3,0x11dc, +0x11d6,0x11d6,0x11d6,0x11dc,0x195,0x195,0x195,0x195,0x195,0x195,0x195,0x195,0x11df,0x11df,0x11df,0x11df, +0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x130e,0x11e5,0x1311, +0x11e5,0x11e5,0x11e5,0x11e5,0x11e2,0x11e2,0x11e2,0x11e5,0x173a,0x173d,0x19b,0x19b,0x12d5,0x12d5,0x12d5,0x12d5, +0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5, +0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x19e,0x19e,0x19e,0x11fa,0x11ee,0x11ee,0x11ee, +0x11ee,0x11ee,0x11ee,0x11f1,0x1200,0x1200,0x11ee,0x11ee,0x11ee,0x11ee,0x1a1,0x12fc,0x11f4,0x11f4,0x11f4,0x11f4, +0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x1a1,0x1a1,0x1a1,0x1a1,0x11ee,0x11ee,0x121e,0x1212,0x121e,0x1a4, +0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4, +0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x121b,0x121b,0x1221,0x1215,0x1218,0x1236,0x1236,0x1236,0x1230, +0x1230,0x1227,0x1230,0x1230,0x1227,0x1230,0x1230,0x1239,0x1233,0x122a,0x1a7,0x1a7,0x122d,0x122d,0x122d,0x122d, +0x122d,0x122d,0x122d,0x122d,0x122d,0x122d,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x123f,0x123f,0x123f,0x123f, +0x123f,0x123f,0x123f,0x1aa,0x1aa,0x1aa,0x1aa,0x123c,0x123c,0x123c,0x123c,0x123c,0x123c,0x123c,0x123c,0x123c, +0x123c,0x123c,0x123c,0x123c,0x123c,0x123c,0x123c,0x123c,0x123c,0x123c,0x123c,0x123c,0x123c,0x123c,0x123c,0x123c, +0x123c,0x123c,0x123c,0x123c,0x1aa,0x1aa,0x1aa,0x1aa,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248, +0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1ad,0x1245, +0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257, +0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1b0,0x1b0, +0x1b0,0x1251,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d, +0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x1b3,0x1b3, +0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263, +0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6, +0x1260,0x1260,0x1260,0x1260,0x1260,0x1260,0x1260,0x1260,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269, +0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269, +0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1269,0x1bc,0x1287,0x1287,0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0x1bf, +0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0x14b2,0x14b2,0x14b2,0x14b2,0x14b2,0x14b2,0x14b2,0x14b2, +0x14b2,0x14b2,0x14b2,0x14b2,0x14b2,0x14b2,0x14b2,0x14b2,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1, +0x12b1,0x12b1,0x12b1,0x156c,0x156c,0x1c5,0x1c5,0x1c5,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1, +0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b1,0x12b4,0x12b4,0x12b4,0x1293,0x1c5, +0x13b6,0x12bd,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x13b6,0x12bd,0x13b6,0x12bd, +0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x1443,0x1443,0x1c5,0x1c5,0x1c5,0x1c5, +0x13b9,0x13b9,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x12ba,0x13b3,0x12ba,0x12ba,0x13b3,0x13b9,0x12c0, +0x1863,0x1863,0x1863,0x1863,0x1863,0x1863,0x1863,0x1863,0x1863,0x1863,0x1863,0x1863,0x1863,0x1c5,0x1c5,0x1c5, +0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5, +0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5, +0x1c5,0x1c5,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b, +0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x12e1,0x13d4,0x13d1,0x1c8, +0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x12db,0x12db,0x12db,0x12db, +0x12db,0x12db,0x12db,0x12db,0x12db,0x12db,0x12de,0x12db,0x12db,0x12db,0x12db,0x12db,0x12db,0x12db,0x12db,0x12db, +0x12db,0x12db,0x12db,0x12db,0x12db,0x12db,0x12db,0x12de,0x12db,0x12db,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d1, +0x13d4,0x13d4,0x13d4,0x1866,0x1c8,0x1c8,0x1c8,0x1c8,0x12d8,0x12d8,0x12d8,0x12d8,0x12d8,0x12d8,0x12d8,0x12d8, +0x12d8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1401,0x1401,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8, +0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8, +0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1374,0x1374,0x1374,0x1374, +0x1374,0x1374,0x1374,0x1374,0x1374,0x1374,0x1374,0x1374,0x1374,0x1374,0x1374,0x1374,0x1374,0x1374,0x1374,0x1374, +0x1374,0x1374,0x1374,0x1374,0x1374,0x136e,0x136e,0x136e,0x1cb,0x1cb,0x1371,0x1cb,0x1386,0x1386,0x1386,0x1386, +0x1386,0x1386,0x1377,0x1380,0x137a,0x137a,0x1380,0x1380,0x1380,0x137a,0x1380,0x137a,0x137a,0x137a,0x1383,0x1383, +0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x137d,0x137d,0x137d,0x137d,0x1d1,0x1389,0x1389,0x1389, +0x1389,0x1389,0x1389,0x1d1,0x1d1,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x1d1,0x1d1,0x1389,0x1389,0x1389, +0x1389,0x1389,0x1389,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1389,0x1389,0x1389,0x1389, +0x1389,0x1389,0x1389,0x1d1,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x1d1,0x1608,0x1608,0x1608,0x1608, +0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x138c,0x138c,0x138c,0x138c, +0x138c,0x138c,0x138f,0x13a1,0x13a1,0x1395,0x1395,0x1395,0x1395,0x1395,0x1d4,0x1d4,0x1d4,0x1d4,0x1392,0x1392, +0x1392,0x1392,0x1392,0x1392,0x1392,0x1392,0x1392,0x1392,0x1392,0x1392,0x1392,0x1392,0x1392,0x1392,0x1398,0x1398, +0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4, +0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x156f,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4, +0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4, +0x13a4,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x13da,0x13d7,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da, 0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da, -0x170a,0x156f,0x1575,0x16b6,0x1dd,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x1dd,0x1dd,0x157e, -0x157e,0x1dd,0x1dd,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e, -0x157e,0x1dd,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x1dd,0x157e,0x157e,0x1dd,0x157e,0x157e,0x157e, -0x157e,0x157e,0x1dd,0x1dd,0x16b3,0x157e,0x156f,0x1575,0x156f,0x1575,0x1575,0x1575,0x1575,0x1dd,0x1dd,0x1575, -0x1575,0x1dd,0x1dd,0x1578,0x1578,0x157b,0x1dd,0x1dd,0x170d,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x156f, -0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1581,0x157e,0x157e,0x157e,0x157e,0x1575,0x1575,0x1dd,0x1dd,0x1572,0x1572, -0x1572,0x1572,0x1572,0x1572,0x1572,0x1dd,0x1dd,0x1dd,0x1572,0x1572,0x1572,0x1572,0x1572,0x1dd,0x1dd,0x1dd, -0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596, -0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1e0,0x1596,0x1596,0x1596,0x1596,0x1596, -0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1590,0x1590,0x1590,0x1584,0x1584,0x1584,0x1590,0x1590, -0x1584,0x1593,0x1587,0x1584,0x1599,0x1599,0x158d,0x1599,0x1599,0x158a,0x17a0,0x1e0,0x15a8,0x15a8,0x15a8,0x159c, -0x159c,0x159c,0x159c,0x159c,0x159c,0x159f,0x15a2,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x15a5,0x15a5,0x15a5,0x15a5, -0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x1710,0x1710,0x1710,0x1710, -0x15b4,0x15b1,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x173a,0x173a,0x173a,0x173a, -0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x15ba,0x15ba,0x15ba,0x15ba, -0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba, -0x15ba,0x15ba,0x15ba,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x15ba,0x15ba,0x15ba,0x15ba, -0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba, -0x15ba,0x15ba,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x15ba,0x15ba,0x15ba,0x15ba, -0x15ba,0x15ba,0x15ba,0x15ba,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9, -0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x15c6,0x15c6,0x15c6,0x15c6, -0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15bd, -0x15c0,0x15c3,0x15c6,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x15d5,0x15d5,0x15d5,0x15d5, -0x15d5,0x15c9,0x15c9,0x1ef,0x1ef,0x1ef,0x1ef,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15d2,0x15d2,0x15d2,0x15d2, -0x15d2,0x15d2,0x15cf,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x15de,0x15de,0x15de,0x15de, -0x15de,0x1f2,0x1f2,0x15db,0x15db,0x15db,0x15db,0x15db,0x15db,0x15db,0x15db,0x15db,0x15d8,0x15d8,0x15d8,0x15d8, -0x15d8,0x15d8,0x15d8,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x15e1,0x15f3,0x15f3,0x15e7, -0x15f0,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x15ea,0x15ea,0x15ea,0x15ea, -0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x15f9,0x15f9,0x15f9,0x15f9, -0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9, -0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x1f8,0x1605,0x1605,0x1605,0x1605, -0x1605,0x15ff,0x1608,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1602,0x1602,0x1602,0x1602, -0x1602,0x1602,0x1602,0x1602,0x1602,0x1602,0x1605,0x1605,0x1605,0x1605,0x1605,0x1fb,0x160e,0x160e,0x160e,0x160e, -0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e, -0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x1fe,0x161a,0x161a,0x161a,0x161a, -0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a, -0x161a,0x161a,0x1617,0x1617,0x1617,0x1617,0x1617,0x201,0x201,0x201,0x201,0x201,0x1632,0x1632,0x1635,0x1635, -0x1638,0x1629,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x162f,0x162f,0x162f,0x162f, -0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x204,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x204,0x1632, -0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632, -0x1632,0x1632,0x1632,0x1632,0x204,0x204,0x204,0x204,0x204,0x1632,0x1632,0x1632,0x1641,0x1641,0x1641,0x1641, -0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641, -0x1641,0x1641,0x1641,0x1641,0x1641,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x164a,0x164a,0x164a,0x164a, -0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x20a,0x20a, -0x20a,0x20a,0x20a,0x20a,0x20a,0x1647,0x1647,0x1647,0x1647,0x20a,0x20a,0x20a,0x1665,0x1665,0x1665,0x1665, -0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x164d,0x165f,0x165f,0x164d,0x164d, -0x164d,0x164d,0x210,0x210,0x165f,0x165f,0x1662,0x1662,0x164d,0x164d,0x165f,0x1653,0x1650,0x1656,0x1668,0x1668, -0x1659,0x1659,0x165c,0x165c,0x165c,0x1668,0x1719,0x1719,0x1719,0x1719,0x1719,0x1719,0x1719,0x1719,0x1719,0x1719, -0x1719,0x1719,0x1719,0x1719,0x1716,0x1716,0x1716,0x1716,0x1713,0x1713,0x210,0x210,0x210,0x210,0x210,0x210, +0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x13a7,0x13a7,0x13a7,0x13a7, +0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x1dd,0x1dd,0x13a7,0x13a7,0x13a7, +0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x1572,0x1dd,0x13a7,0x13a7,0x13a7, +0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13dd,0x1dd,0x13a7,0x13a7,0x13a7, +0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x1572,0x1572,0x1572,0x1572, +0x1572,0x1572,0x1572,0x1572,0x1572,0x1572,0x1572,0x1572,0x1572,0x1572,0x1572,0x1572,0x1572,0x1572,0x1572,0x1572, +0x1572,0x1572,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x13fb,0x13f5,0x13f5,0x13f5, +0x13f5,0x13f5,0x1587,0x1587,0x1587,0x1587,0x1587,0x158a,0x16f8,0x158a,0x158a,0x158a,0x17c1,0x186f,0x186f,0x1e0, +0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x158a,0x158a,0x158a,0x158a, +0x158a,0x158a,0x1587,0x1587,0x1587,0x158a,0x1587,0x16f5,0x16f5,0x1e0,0x1e0,0x1e0,0x158a,0x1587,0x1587,0x158a, +0x186f,0x186f,0x186f,0x1e3,0x1e3,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x13aa,0x13aa,0x13aa,0x13aa, +0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa, +0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x144f,0x1590,0x144f,0x144f, +0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1746, +0x1746,0x1e9,0x17f1,0x17f1,0x17f1,0x17f1,0x17f1,0x17f1,0x17f1,0x17f1,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9, +0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9, +0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x17ee,0x1455,0x1455,0x1455,0x1455, +0x1ec,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455, +0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1ec,0x1455,0x1455,0x1ec, +0x1455,0x1ec,0x1ec,0x1455,0x1ec,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1ec, +0x1455,0x1455,0x1455,0x1455,0x1ec,0x1455,0x1ec,0x1455,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1455,0x1ec, +0x1ec,0x1ec,0x1ec,0x1455,0x1ec,0x1455,0x1ec,0x1455,0x1ec,0x1455,0x1455,0x1455,0x1ec,0x1455,0x1455,0x1ec, +0x1455,0x1ec,0x1ec,0x1455,0x1ec,0x1455,0x1ec,0x1455,0x1ec,0x1455,0x1ec,0x1455,0x1ec,0x1455,0x1455,0x1ec, +0x1455,0x1ec,0x1ec,0x1455,0x1455,0x1455,0x1455,0x1ec,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1ec, +0x1455,0x1455,0x1455,0x1455,0x1ec,0x1455,0x1455,0x1455,0x1455,0x1ec,0x1455,0x1ec,0x1455,0x1455,0x1455,0x1455, +0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1ec,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455, +0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1455,0x1455,0x1455, +0x1ec,0x1455,0x1455,0x1455,0x1455,0x1455,0x1ec,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455, +0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec, +0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec, +0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1452,0x1452,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec, +0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x1458, +0x1458,0x1458,0x1458,0x1458,0x1467,0x1458,0x145b,0x145b,0x1458,0x1458,0x1458,0x145e,0x145e,0x1ef,0x1464,0x1464, +0x1464,0x1464,0x1464,0x1464,0x1464,0x1464,0x1464,0x1464,0x1461,0x146d,0x146d,0x146d,0x1ef,0x1ef,0x1ef,0x1ef, +0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a, +0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479,0x1479, +0x1479,0x1479,0x1479,0x1476,0x1470,0x1470,0x1476,0x1476,0x147f,0x147f,0x1479,0x147c,0x147c,0x1476,0x1473,0x1f2, +0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482, +0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482, +0x1f5,0x1f5,0x1f5,0x1f5,0x1749,0x1749,0x1482,0x1482,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749, +0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1f5,0x1f5,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749, +0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x148e,0x148e,0x148e,0x148e,0x148e,0x1f8,0x1f8,0x1f8, +0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x148e,0x148b,0x148b,0x148b,0x148b,0x148b,0x148b,0x148b, +0x148b,0x148b,0x148b,0x148b,0x148b,0x148b,0x148b,0x148b,0x148b,0x148b,0x148b,0x148b,0x148b,0x148b,0x148b,0x148b, +0x148b,0x148b,0x148b,0x148b,0x148b,0x148b,0x148b,0x148b,0x148b,0x148b,0x148b,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8, +0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1488,0x1488,0x1488,0x1488,0x1491, +0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x14a3,0x14a6,0x14a9,0x14a9, +0x14a6,0x14ac,0x14ac,0x1497,0x149a,0x174f,0x174c,0x174c,0x174c,0x1596,0x1fb,0x1fb,0x149d,0x149d,0x149d,0x149d, +0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x1593,0x1755,0x1758,0x1752,0x175b,0x175b,0x14b2,0x14b2,0x14b2,0x14b2, +0x14b2,0x14b2,0x14b2,0x14b2,0x14b2,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x14af,0x14af,0x14af,0x14af, +0x14af,0x14af,0x14af,0x14af,0x14af,0x14af,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x14b5,0x14b5,0x14b5,0x14b5, +0x14b5,0x14b5,0x14b5,0x14b5,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x1305,0x1302,0x1305,0x12ed, +0x1302,0x1302,0x1302,0x1308,0x1302,0x1308,0x130b,0x1302,0x1308,0x1308,0x1302,0x1302,0x14c7,0x14c7,0x14c7,0x14c7, +0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14b8,0x14c1,0x14b8,0x14c1,0x14c1,0x14b8,0x14b8,0x14b8,0x14b8, +0x14b8,0x14b8,0x14c4,0x14bb,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x159c,0x159c,0x159c,0x159c, +0x159c,0x159c,0x159c,0x159c,0x159c,0x159c,0x159c,0x159c,0x159c,0x159c,0x207,0x207,0x1599,0x1599,0x1599,0x1599, +0x1599,0x159f,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x1704,0x16fb,0x16fb,0x16fb, +0x16fb,0x16fb,0x16fb,0x16fb,0x16fb,0x16fb,0x16fb,0x16fb,0x16fb,0x16fb,0x16fb,0x16fb,0x16fb,0x16fb,0x16fb,0x16fb, +0x16fb,0x16fb,0x16fb,0x16fb,0x16fb,0x16fb,0x16fb,0x16fb,0x20d,0x20d,0x20d,0x20d,0x210,0x210,0x210,0x210, 0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210, -0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x213,0x166b,0x166b,0x166b, -0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b, -0x166b,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x166e,0x166e,0x166e,0x166e, -0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x216,0x216,0x216,0x216,0x166e,0x166e,0x166e,0x166e, -0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x216,0x216,0x216,0x216, -0x216,0x216,0x216,0x216,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x216,0x216, -0x216,0x216,0x216,0x216,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x216,0x216,0x216,0x216, -0x216,0x216,0x216,0x216,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e, -0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216, -0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216, -0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x1671,0x1680,0x1677,0x1674,0x1686,0x1686,0x167a,0x1686, -0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d, -0x167d,0x167d,0x219,0x219,0x219,0x219,0x219,0x219,0x168c,0x168c,0x168c,0x168c,0x168c,0x168c,0x168c,0x168c, -0x168c,0x168c,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x1689,0x21c,0x21c,0x21c,0x21c,0x21c, -0x21c,0x21c,0x21c,0x21c,0x21c,0x21c,0x21c,0x1692,0x172b,0x172b,0x172b,0x172b,0x172b,0x172b,0x172b,0x172b, -0x172b,0x172b,0x172b,0x172b,0x172b,0x172b,0x172b,0x172b,0x172b,0x172b,0x172b,0x172b,0x172b,0x172b,0x172b,0x172b, -0x172b,0x172b,0x21f,0x21f,0x21f,0x171c,0x171c,0x171c,0x1728,0x1728,0x171c,0x171c,0x171c,0x171c,0x1728,0x171c, -0x171c,0x171c,0x171c,0x171f,0x21f,0x21f,0x21f,0x21f,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725, -0x1725,0x1725,0x1722,0x1722,0x172e,0x172e,0x172e,0x1722,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x222, -0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222, -0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222,0x222, -0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743, -0x1743,0x1743,0x1743,0x228,0x1743,0x1743,0x228,0x228,0x228,0x228,0x228,0x1740,0x1740,0x1740,0x1740,0x1740, -0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x22b,0x1746,0x22b,0x1746,0x1746,0x1746,0x1746,0x22b,0x1746, -0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x22b,0x1746, -0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1746,0x1749,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b, -0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab, -0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752, -0x1752,0x1752,0x1752,0x22e,0x22e,0x22e,0x22e,0x22e,0x22e,0x22e,0x22e,0x22e,0x22e,0x22e,0x22e,0x22e, -0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f, -0x174f,0x174f,0x174f,0x22e,0x22e,0x22e,0x22e,0x22e,0x22e,0x22e,0x174c,0x174c,0x174c,0x174c,0x174c,0x174c, -0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231, -0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1773,0x1776,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x231, -0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1824,0x1821,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231, -0x1824,0x231,0x231,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1821,0x181e,0x1824,0x1824,0x1824,0x231, -0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x181e,0x1821,0x1821,0x1821,0x1821,0x1821,0x231,0x231,0x231,0x231, -0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x231, -0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231, -0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231, -0x1773,0x1773,0x1773,0x1773,0x1773,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821, -0x1821,0x1821,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231, -0x1773,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231, -0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231, -0x1758,0x1758,0x1758,0x1758,0x1755,0x1758,0x1758,0x175b,0x175e,0x175b,0x175b,0x1758,0x234,0x234,0x234,0x234, -0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x1755,0x1755,0x1755,0x1755,0x1755, -0x17b2,0x17b2,0x17b2,0x17b2,0x17a9,0x17a9,0x17a9,0x17a3,0x17a6,0x17a6,0x17a6,0x237,0x237,0x237,0x237,0x237, -0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x237,0x237,0x237,0x237,0x17ac,0x17ac, -0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x23a,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd, -0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd, -0x17cd,0x17cd,0x17cd,0x17ca,0x17b8,0x17b8,0x17b8,0x17b8,0x17b8,0x17b8,0x17b8,0x23a,0x17b8,0x17b8,0x17b8,0x17b8, -0x17b8,0x17b8,0x17ca,0x17bb,0x17cd,0x17d0,0x17d0,0x17c4,0x17c1,0x17c1,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a, -0x23a,0x23a,0x23a,0x23a,0x17c7,0x17c7,0x17c7,0x17c7,0x17c7,0x17c7,0x17c7,0x17c7,0x17c7,0x17c7,0x17be,0x17be, -0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x23a,0x23a,0x23a, -0x17dc,0x17df,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5, -0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x240,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6, -0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6,0x240,0x240,0x17d6,0x17d6,0x17d6,0x17d6,0x17d6, -0x1827,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243, -0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243, -0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5,0x17e5, -0x246,0x246,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9, -0x246,0x17e2,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17d9,0x17e2,0x17d9,0x17d9,0x17e2,0x17d9,0x17d9,0x246, -0x246,0x246,0x246,0x246,0x246,0x246,0x246,0x246,0x17e8,0x17e8,0x17e8,0x17e8,0x17e8,0x17e8,0x17e8,0x17e8, -0x17e8,0x17e8,0x17e8,0x17e8,0x17e8,0x249,0x249,0x249,0x249,0x249,0x249,0x249,0x249,0x249,0x249,0x249, -0x249,0x249,0x249,0x249,0x249,0x249,0x249,0x249,0x1800,0x1800,0x17f1,0x17eb,0x17eb,0x1800,0x17ee,0x1803, -0x1803,0x1803,0x1803,0x1806,0x1806,0x17fa,0x17f7,0x17f4,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd, -0x17fd,0x17fd,0x24c,0x17fa,0x24c,0x17f4,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c, -0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c, -0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c, -0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x24f,0x24f,0x24f,0x24f, -0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809, -0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x24f,0x24f,0x24f,0x24f, -0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x252,0x252,0x252, -0x252,0x252,0x252,0x252,0x252,0x252,0x252,0x252,0x252,0x252,0x252,0x252,0x252,0x252,0x252,0x252, -0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d, -0x182d,0x182d,0x182d,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255, -0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258, -0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258, -0x1770,0x1770,0x270,0x270,0x270,0x270,0x270,0x270,0x270,0x270,0x270,0x270,0x270,0x270,0x270,0x270, -0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258, -0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x921,0x921, -0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6, -0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b, -0x10d4,0x10d4,0x10d4,0x10d4,0x1275,0x1275,0x1275,0x1275,0x1275,0x1275,0x1275,0x1275,0x1473,0x1761,0x1761,0x1761, -0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e, -0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e, -0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39, -0xc39,0xc39,0xc39,0x1278,0x1278,0x1278,0x261,0x261,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67, -0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67, -0xe67,0xe67,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261, +0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x15b7,0x15b7,0x15b7,0x15b7, +0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x213,0x213,0x213,0x213,0x213,0x15b7,0x15b7,0x15b7,0x15b7, +0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x213,0x213,0x213,0x213,0x213,0x213,0x213, +0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x213,0x213,0x15b4,0x15ae,0x15b1,0x15ba, +0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216, +0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5, +0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0, +0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219, +0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219, +0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x175e,0x15c3,0x15c9,0x170a,0x21c,0x15d2,0x15d2,0x15d2, +0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x21c,0x21c,0x15d2,0x15d2,0x21c,0x21c,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2, +0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x21c,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2, +0x15d2,0x21c,0x15d2,0x15d2,0x21c,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x21c,0x21c,0x1707,0x15d2,0x15c3,0x15c9, +0x15c3,0x15c9,0x15c9,0x15c9,0x15c9,0x21c,0x21c,0x15c9,0x15c9,0x21c,0x21c,0x15cc,0x15cc,0x15cf,0x21c,0x21c, +0x1761,0x21c,0x21c,0x21c,0x21c,0x21c,0x21c,0x15c3,0x21c,0x21c,0x21c,0x21c,0x21c,0x15d5,0x15d2,0x15d2, +0x15d2,0x15d2,0x15c9,0x15c9,0x21c,0x21c,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x21c,0x21c,0x21c, +0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x21c,0x21c,0x21c,0x21c,0x21c,0x21c,0x21c,0x21c,0x21c,0x21c,0x21c, +0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea, +0x15ea,0x15ea,0x21f,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea,0x15ea, +0x15e4,0x15e4,0x15e4,0x15d8,0x15d8,0x15d8,0x15e4,0x15e4,0x15d8,0x15e7,0x15db,0x15d8,0x15ed,0x15ed,0x15e1,0x15ed, +0x15ed,0x15de,0x17f4,0x21f,0x15fc,0x15fc,0x15fc,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f3,0x15f6,0x222, +0x222,0x222,0x222,0x222,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x15f9,0x222,0x222, +0x222,0x222,0x222,0x222,0x1764,0x1764,0x1764,0x1764,0x1608,0x1605,0x225,0x225,0x225,0x225,0x225,0x225, +0x225,0x225,0x225,0x225,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e, +0x178e,0x178e,0x178e,0x178e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e, +0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x228,0x228,0x228,0x228,0x228, +0x228,0x228,0x228,0x228,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e, +0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x228,0x228,0x228,0x228,0x228,0x228, +0x228,0x228,0x228,0x228,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x228,0x228,0x228,0x228, +0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228, +0x228,0x228,0x228,0x228,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a, +0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x1611,0x1614,0x1617,0x161a,0x22b,0x22b,0x22b,0x22b,0x22b, +0x22b,0x22b,0x22b,0x22b,0x1629,0x1629,0x1629,0x1629,0x1629,0x161d,0x161d,0x22e,0x22e,0x22e,0x22e,0x1620, +0x1620,0x1620,0x1620,0x1620,0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0x1623,0x22e,0x22e,0x22e,0x22e,0x22e, +0x22e,0x22e,0x22e,0x22e,0x1632,0x1632,0x1632,0x1632,0x1632,0x231,0x231,0x162f,0x162f,0x162f,0x162f,0x162f, +0x162f,0x162f,0x162f,0x162f,0x162c,0x162c,0x162c,0x162c,0x162c,0x162c,0x162c,0x231,0x231,0x231,0x231,0x231, +0x231,0x231,0x231,0x231,0x1635,0x1647,0x1647,0x163b,0x1644,0x234,0x234,0x234,0x234,0x234,0x234,0x234, +0x234,0x234,0x234,0x234,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x234,0x234, +0x234,0x234,0x234,0x234,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d, +0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d,0x164d, +0x164d,0x164d,0x164d,0x237,0x1659,0x1659,0x1659,0x1659,0x1659,0x1653,0x165c,0x1659,0x1659,0x1659,0x1659,0x1659, +0x1659,0x1659,0x1659,0x1659,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1659,0x1659, +0x1659,0x1659,0x1659,0x23a,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662, +0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662, +0x1662,0x1662,0x1662,0x23d,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e, +0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166b,0x166b,0x166b,0x166b,0x166b,0x240, +0x240,0x240,0x240,0x240,0x1686,0x1686,0x1689,0x1689,0x168c,0x167d,0x243,0x243,0x243,0x243,0x243,0x243, +0x243,0x243,0x243,0x243,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x243,0x167d, +0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x243,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, +0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x243,0x243,0x243,0x243, +0x243,0x1686,0x1686,0x1686,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695, +0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x246,0x246,0x246, +0x246,0x246,0x246,0x246,0x169e,0x169e,0x169e,0x169e,0x169e,0x169e,0x169e,0x169e,0x169e,0x169e,0x169e,0x169e, +0x169e,0x169e,0x169e,0x169e,0x169e,0x169e,0x249,0x249,0x249,0x249,0x249,0x249,0x249,0x169b,0x169b,0x169b, +0x169b,0x249,0x249,0x249,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9, +0x16b9,0x16b9,0x16b9,0x16a1,0x16b3,0x16b3,0x16a1,0x16a1,0x16a1,0x16a1,0x24f,0x24f,0x16b3,0x16b3,0x16b6,0x16b6, +0x16a1,0x16a1,0x16b3,0x16a7,0x16a4,0x16aa,0x16bc,0x16bc,0x16ad,0x16ad,0x16b0,0x16b0,0x16b0,0x16bc,0x176d,0x176d, +0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176d,0x176a,0x176a,0x176a,0x176a, +0x1767,0x1767,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f, +0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f, +0x24f,0x24f,0x24f,0x24f,0x252,0x16bf,0x16bf,0x16bf,0x16bf,0x16bf,0x16bf,0x16bf,0x16bf,0x16bf,0x16bf,0x16bf, +0x16bf,0x16bf,0x16bf,0x16bf,0x16bf,0x16bf,0x16bf,0x16bf,0x16bf,0x252,0x252,0x252,0x252,0x252,0x252,0x252, +0x252,0x252,0x252,0x252,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2, +0x255,0x255,0x255,0x255,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2, +0x16c2,0x16c2,0x16c2,0x16c2,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x16c2,0x16c2,0x16c2,0x16c2, +0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x255,0x255,0x255,0x255,0x255,0x255,0x16c2,0x16c2,0x16c2,0x16c2, +0x16c2,0x16c2,0x16c2,0x16c2,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x16c2,0x16c2,0x16c2,0x16c2, +0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x255,0x255, +0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255, +0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255, +0x16c5,0x16d4,0x16cb,0x16c8,0x16da,0x16da,0x16ce,0x16da,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258, +0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x258,0x258,0x258,0x258,0x258,0x258, +0x16e0,0x16e0,0x16e0,0x16e0,0x16e0,0x16e0,0x16e0,0x16e0,0x16e0,0x16e0,0x16dd,0x16dd,0x16dd,0x16dd,0x16dd,0x16dd, +0x16dd,0x16dd,0x16dd,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x16e6, +0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f, +0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x25e,0x25e,0x25e,0x1770,0x1770,0x1770, +0x177c,0x177c,0x1770,0x1770,0x1770,0x1770,0x177c,0x1770,0x1770,0x1770,0x1770,0x1773,0x25e,0x25e,0x25e,0x25e, +0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1779,0x1776,0x1776,0x1782,0x1782,0x1782,0x1776, +0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261, 0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261, -0x261,0x261,0x261,0x261,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49, -0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0x264,0x264,0x264,0x264,0x264, -0x264,0x264,0x264,0x264,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c, -0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c, -0xb4c,0xb4c,0x267,0x267,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d, -0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a, -0x26a,0x26a,0x26a,0x26a,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7, -0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7, -0x13a7,0x13a7,0x26d,0x26d,0x10ec,0x369,0x369,0x375,0xc7b,0x378,0x378,0x378,0x378,0x378,0x378,0x378, -0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378, -0x378,0x378,0x378,0x378,0x375,0x369,0x369,0x369,0x369,0x369,0x369,0x369,0x369,0x375,0x375,0x375, -0x375,0x36f,0x10ef,0x12c6,0x378,0x8ee,0x8f1,0x36c,0x36c,0x10ec,0x12c3,0x12c3,0x37b,0x37b,0x37b,0x37b, -0x37b,0x37b,0x37b,0x37b,0x378,0x378,0x369,0x369,0x879,0x87c,0x909,0x909,0x909,0x909,0x909,0x909, -0x909,0x909,0x909,0x909,0x372,0xf4e,0xf4b,0x12c9,0x12c9,0x12c9,0x12c9,0x12c9,0x149a,0x10f2,0x10f2,0xea0, -0xea0,0xd6e,0xea0,0xea0,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x37b,0x378,0x378, -0x378,0x378,0x378,0x378,0x378,0x37b,0x378,0x378,0x37b,0x378,0x378,0x378,0x378,0x378,0x12c3,0x12c6, -0x36c,0x378,0x375,0x375,0x456,0x456,0x456,0x456,0x456,0x456,0x456,0x456,0x456,0x456,0x456,0x456, -0x456,0x456,0x456,0x456,0x456,0x456,0x456,0x456,0x456,0x456,0x456,0xb67,0xb67,0xd7a,0xd7a,0x87f, -0xd7d,0x13b9,0x13b9,0x13b9,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459, -0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459,0x459, -0x459,0x459,0x459,0x459,0x45f,0x45f,0x45f,0x1107,0x1107,0x1107,0x1107,0x1107,0x45c,0x45c,0x45c,0x45c, -0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c, -0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x1104,0x1104, -0x1104,0x1104,0x1104,0x1104,0x462,0x45f,0x45f,0x45f,0x45f,0x45f,0x45f,0x45f,0x45f,0x45f,0x45f,0x45f, -0x45f,0x45f,0x45f,0x45f,0x45f,0x45f,0x45f,0x45f,0x45f,0x45f,0x45f,0x45f,0x45f,0x45f,0x45f,0x45f, -0x45f,0x45f,0x45f,0x45f,0x45f,0x45f,0x45f,0x45f,0x46b,0x465,0x46b,0x465,0x46b,0x465,0x46b,0x465, -0x46b,0x465,0x46b,0x465,0x46b,0x465,0x46b,0x465,0x46b,0x465,0x46b,0x465,0x46b,0x465,0x46b,0x465, -0x46b,0x465,0x46b,0x465,0x46b,0x465,0x46b,0x465,0x46b,0x465,0x465,0x465,0x465,0x465,0x468,0x963, -0xf9c,0xf9c,0xf9f,0xf9c,0x46b,0x465,0x46b,0x465,0x46b,0x465,0x46b,0x465,0x46b,0x465,0x46b,0x465, -0x46b,0x465,0x46b,0x465,0x46b,0x465,0x46b,0x465,0x46b,0x465,0x46b,0x465,0x46b,0x465,0xf9f,0xf9c, -0xf9f,0xf9c,0xf9f,0xf9c,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x47a,0x47a,0x47a,0x47a, -0x47a,0x47a,0x47a,0x47a,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x47a,0x47a,0x47a,0x47a, -0x47a,0x47a,0x47a,0x47a,0x657,0x657,0x65a,0x495,0x666,0x663,0x663,0x660,0x4bf,0x4bf,0x47d,0x47d, -0x47d,0x47d,0x47d,0xa95,0x669,0x4a1,0x681,0x684,0x4b6,0x669,0x4a4,0x4a4,0x495,0x4b0,0x4b0,0x657, -0x4bc,0x4b9,0x65d,0x48f,0x486,0x486,0x489,0x489,0x489,0x489,0x489,0x48c,0x489,0x489,0x489,0x480, -0x4c8,0x4c5,0x4c2,0x4c2,0x675,0x4aa,0x4a7,0x672,0x66f,0x66c,0x67e,0x498,0x67b,0x67b,0x4ad,0x4b0, -0x678,0x678,0x4ad,0x4b0,0x492,0x495,0x495,0x495,0x4b3,0x49e,0x49b,0xb7c,0xa9b,0xa9e,0xa98,0xa98, -0xa98,0xa98,0xb73,0xb73,0xb73,0xb73,0xb79,0xca8,0xca5,0xd89,0xd8c,0xb76,0xd8c,0xd8c,0xd8c,0xd8c, -0xd89,0xd8c,0xd8c,0xb70,0x4fb,0x4fb,0x513,0x693,0x4f8,0x690,0x4fb,0x510,0x4f8,0x693,0x50a,0x513, -0x513,0x513,0x50a,0x50a,0x513,0x513,0x513,0x69c,0x4f8,0x513,0x696,0x4f8,0x507,0x513,0x513,0x513, -0x513,0x513,0x4f8,0x4f8,0x4fe,0x690,0x699,0x4f8,0x513,0x4f8,0x69f,0x4f8,0x513,0x501,0x519,0x6a2, -0x513,0x513,0x504,0x50a,0x513,0x513,0x516,0x513,0x50a,0x50d,0x50d,0x50d,0x50d,0xaaa,0xaa7,0xcab, -0xd9b,0xb97,0xb9a,0xb9a,0xb94,0xb91,0xb91,0xb91,0xb91,0xb9a,0xb97,0xb97,0xb97,0xb97,0xb8e,0xb91, -0xd98,0xeac,0xeaf,0xfa5,0x1116,0x1116,0x1116,0x6a8,0x6a5,0x51c,0x51f,0x51f,0x51f,0x51f,0x51f,0x6a5, -0x6a8,0x6a8,0x6a5,0x51f,0x6ae,0x6ae,0x6ae,0x6ae,0x6ae,0x6ae,0x6ae,0x6ae,0x6ae,0x6ae,0x6ae,0x6ae, -0x528,0x528,0x528,0x528,0x6ab,0x6ab,0x6ab,0x6ab,0x6ab,0x6ab,0x6ab,0x6ab,0x6ab,0x6ab,0x522,0x522, -0x522,0x522,0x522,0x522,0x52e,0x52e,0x52e,0x52e,0x52e,0x52e,0x52e,0x52e,0x52b,0x534,0x534,0x52e, -0x52e,0x52e,0x531,0x52b,0x52e,0x52e,0x52b,0x52b,0x52b,0x52b,0x52e,0x52e,0x6b1,0x6b1,0x52b,0x52b, -0x52e,0x52e,0x52e,0x52e,0x52e,0x52e,0x52e,0x52e,0x52e,0x52e,0x52e,0x52e,0x52e,0x531,0x531,0x531, -0x52e,0x52e,0x6b4,0x52e,0x6b4,0x52e,0x52e,0x52e,0x52e,0x52e,0x52e,0x52e,0x52b,0x52e,0x52b,0x52b, -0x52b,0x52b,0x52b,0x52b,0x52e,0x52e,0x52b,0x6b1,0x52b,0x52b,0x52b,0xab0,0xab0,0xab0,0xab0,0xab0, -0xab0,0xab0,0xab0,0xab0,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d, -0x6ba,0x537,0x6ba,0x6ba,0x53a,0x537,0x537,0x6ba,0x6ba,0x53a,0x537,0x6ba,0x53a,0x537,0x537,0x6ba, -0x537,0x6ba,0x546,0x543,0x537,0x6ba,0x537,0x537,0x537,0x537,0x6ba,0x537,0x537,0x6ba,0x6ba,0x6ba, -0x6ba,0x537,0x537,0x6ba,0x53a,0x6ba,0x53a,0x6ba,0x6ba,0x6ba,0x6ba,0x6ba,0x6c0,0x53d,0x6ba,0x53d, -0x53d,0x537,0x537,0x537,0x6ba,0x6ba,0x6ba,0x6ba,0x537,0x537,0x537,0x537,0x6ba,0x6ba,0x537,0x537, -0x537,0x53a,0x537,0x537,0x53a,0x537,0x537,0x53a,0x6ba,0x53a,0x537,0x537,0x6ba,0x537,0x537,0x537, -0x537,0x537,0x6ba,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537, -0x6bd,0x6ba,0x53a,0x537,0x6ba,0x6ba,0x6ba,0x6ba,0x537,0x537,0x6ba,0x6ba,0x537,0x53a,0x6bd,0x6bd, -0x53a,0x53a,0x537,0x537,0x53a,0x53a,0x537,0x537,0x53a,0x53a,0x537,0x537,0x537,0x537,0x537,0x537, -0x53a,0x53a,0x6ba,0x6ba,0x53a,0x53a,0x6ba,0x6ba,0x53a,0x53a,0x537,0x537,0x537,0x537,0x537,0x537, -0x537,0x537,0x537,0x537,0x537,0x6ba,0x537,0x537,0x537,0x6ba,0x537,0x537,0x537,0x537,0x537,0x537, -0x537,0x6ba,0x537,0x537,0x537,0x537,0x537,0x537,0x53a,0x53a,0x53a,0x53a,0x537,0x537,0x537,0x537, -0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x6ba,0x537,0x537,0x537,0x537, -0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537, -0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x537,0x53a,0x53a,0x53a,0x53a, -0x537,0x537,0x537,0x537,0x537,0x537,0x53a,0x53a,0x53a,0x53a,0x537,0x540,0x537,0x537,0xba0,0xba0, -0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0x549,0xab3,0x549,0x549, -0x549,0x549,0x549,0x549,0x555,0x552,0x555,0x552,0x549,0x549,0x549,0x549,0x549,0x549,0x6c3,0x549, -0x549,0x549,0x549,0x549,0x549,0x549,0x7c5,0x7c5,0x549,0x549,0x549,0x549,0x54f,0x54f,0x549,0x549, -0x549,0x549,0x549,0x549,0x54c,0x7cb,0x7c8,0x549,0x549,0x549,0x549,0x549,0x549,0x549,0x549,0x549, -0x549,0x549,0x549,0x549,0x549,0x549,0x549,0x549,0x549,0x549,0x549,0x549,0x549,0x549,0x549,0x549, -0x549,0x549,0x549,0x549,0x549,0x549,0x549,0x549,0x549,0x549,0x549,0xab3,0xba6,0xab3,0xab3,0xab3, -0x558,0x558,0x558,0x558,0x558,0x558,0x558,0x558,0x558,0x558,0x558,0x558,0x558,0x558,0x558,0x558, -0x558,0x558,0x558,0x558,0x558,0x558,0x558,0x558,0x558,0x558,0x558,0x558,0x558,0x558,0x558,0x558, -0x6cc,0x6cc,0x6cc,0x6cc,0x6cc,0x6cc,0x6cc,0x6cc,0x6cc,0x6cc,0x55e,0xc0c,0xc0c,0xc0c,0xc0c,0xc0c, -0xc0c,0xc0c,0xc0c,0xc0c,0xc0c,0xc0c,0xc0c,0xc0c,0xc0c,0xc0c,0xc0c,0xc0c,0xc0c,0xc0c,0xc0c,0xd20, -0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5, -0x6d5,0x6d5,0x6d5,0x6d5,0x561,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564, -0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x564,0x564,0x564,0x564, -0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5, -0x6d8,0x6d8,0x6d8,0x6d8,0x6d8,0x6d8,0x6d8,0x6d8,0x6d8,0x6d8,0x6d8,0x6d8,0x6d8,0x6d8,0x6d8,0x6d8, -0x567,0x567,0x6d8,0x6d8,0x6d8,0x6d8,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9, -0x6de,0x6de,0x56a,0x6db,0x6db,0x6db,0x6db,0x6db,0x6db,0x6db,0x56d,0x56d,0x56a,0x56a,0x570,0x570, -0x570,0x570,0x6de,0x6de,0x570,0x570,0x6e1,0x6de,0x56a,0x56a,0x56a,0x56a,0x6de,0x6de,0x570,0x570, -0x6e1,0x6de,0x56a,0x56a,0x56a,0x56a,0x6de,0x6de,0x6db,0x56a,0x570,0x6de,0x56a,0x56a,0x6db,0x6de, -0x6de,0x6de,0x570,0x570,0x56a,0x56a,0x56a,0x56a,0x56a,0x56a,0x56a,0x56a,0x56a,0x56a,0x56a,0x56a, -0x56a,0x56a,0x6de,0x6db,0x6de,0x6db,0x56a,0x570,0x570,0x570,0x570,0x570,0x570,0x56a,0x56a,0x6db, -0xab9,0xab9,0xab9,0xab9,0xab9,0xab9,0xab9,0xab9,0xbac,0xbac,0xbac,0xbaf,0xbaf,0xc24,0xc24,0xbac, -0x57c,0x57c,0x57c,0x57c,0x579,0x6f0,0x6f0,0x573,0x573,0x6e4,0x573,0x573,0x573,0x573,0x6ea,0x6e4, -0x573,0x579,0x573,0x573,0xd29,0xd29,0xbb2,0xbb2,0xda7,0xabc,0x576,0x576,0x6e7,0x57f,0x6e7,0x576, -0x579,0x573,0x579,0x579,0x573,0x573,0x579,0x573,0x573,0x573,0x579,0x573,0x573,0x573,0x579,0x579, -0x573,0x573,0x573,0x573,0x573,0x573,0x573,0x573,0x579,0x57c,0x57c,0x576,0x573,0x573,0x573,0x573, -0x6f3,0x573,0x6f3,0x573,0x573,0x573,0x573,0x573,0x7ce,0x7ce,0x7ce,0x7ce,0x7ce,0x7ce,0x7ce,0x7ce, -0x7ce,0x7ce,0x7ce,0x7ce,0x573,0x573,0x573,0x573,0x573,0x573,0x573,0x573,0x573,0x573,0x573,0x573, -0x6f3,0x6f0,0x582,0x6f3,0x6e4,0x6ea,0x579,0x6e4,0x6ed,0x6e4,0x6e4,0x573,0x6e4,0x6f0,0x582,0x6f0, -0xabc,0xabc,0xbb5,0xbb5,0xbb5,0xbb5,0xbb5,0xbb5,0xbb5,0xbb5,0xbb5,0xbb8,0xbb5,0xbb5,0xda1,0xe5e, -0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585, -0x585,0x585,0x585,0x585,0x588,0x136e,0x136e,0x136e,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588, -0x14be,0x58e,0x59a,0x58e,0x58e,0x136e,0x588,0x588,0x59a,0x59a,0x1371,0x1371,0x5a0,0x5a0,0x588,0x594, -0x588,0x588,0x594,0x588,0x594,0x588,0x594,0x588,0x588,0x588,0x588,0x588,0x588,0x594,0x588,0x588, -0x588,0x588,0x588,0x588,0x136e,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x594, -0x594,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x6f9,0x588,0x588,0x588,0x588,0x588,0x588, -0x594,0x588,0x588,0x594,0x588,0x588,0x588,0x588,0x136e,0x588,0x136e,0x588,0x588,0x588,0x588,0x136e, -0x136e,0x136e,0x588,0x1272,0x588,0x588,0x588,0x591,0x591,0x591,0x591,0x12f0,0x12f0,0x588,0x58b,0x597, -0x59d,0x588,0x588,0x588,0xbbe,0xbbb,0xbbe,0xbbb,0xbbe,0xbbb,0xbbe,0xbbb,0xbbe,0xbbb,0xbbe,0xbbb, -0xbbe,0xbbb,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x588,0x594,0x588,0x588, -0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x136e,0x588,0x588,0x588, -0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x136e,0x5c1,0x5c1,0x5c1,0x5c1, -0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4, -0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5c1,0x5c7,0x5b8,0x5bb,0x5c7,0x5c7,0x5c7,0x5c7, -0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7, -0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be, -0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1, -0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c4,0x5ca,0x5c7,0x5c1, -0x5c4,0x5ca,0x5c7,0x5c1,0x5c4,0x5ca,0x5c7,0x5c1,0x5c4,0x5ca,0x5c7,0x5c1,0x5c4,0x5ca,0x5c7,0x5c1, -0x5c4,0x5ca,0x5c7,0x5c1,0x5c4,0x5ca,0x5c7,0x5c1,0x5c4,0x5ca,0x5c7,0x5c1,0x5c7,0x5c1,0x5c7,0x5c1, -0x5c7,0x5c1,0x5c7,0x5c1,0x5c7,0x5c1,0x5c7,0x5c1,0x5c4,0x5ca,0x5c7,0x5c1,0x5c4,0x5ca,0x5c7,0x5c1, -0x5c4,0x5ca,0x5c7,0x5c1,0x5c4,0x5ca,0x5c7,0x5c1,0x5c7,0x5c1,0x5c4,0x5ca,0x5c7,0x5c1,0x5c7,0x5c1, -0x5c4,0x5ca,0x5c7,0x5c1,0x5c4,0x5ca,0x5c7,0x5c1,0x5c7,0x5c1,0x12f3,0x12f3,0x12f3,0x12f3,0x12f3,0x12f3, -0x12f3,0x12f3,0x12f3,0x12f3,0x12f3,0x12f3,0x12f3,0x12f3,0x5c7,0x5c1,0x5c7,0x5c1,0x5c7,0x5c1,0x5c4,0x5ca, -0x5c4,0x5ca,0x5c7,0x5c1,0x5c7,0x5c1,0x5c7,0x5c1,0x5c7,0x5c1,0x5c7,0x5c1,0x5c7,0x5c1,0x5c7,0x5c1, -0x5c4,0x5c7,0x5c1,0x5c4,0x5c7,0x5c1,0x5c4,0x5ca,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1, -0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c4, -0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7, -0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1, -0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c4,0x5c4,0x5c1,0x5c4,0x5c1,0x5c4,0x5c1,0x5c1, -0x5c4,0x5c1,0x5c1,0x5c4,0x5c1,0x5c4,0x5c1,0x5c1,0x5c4,0x5c1,0x5c4,0x5c4,0x5c1,0x5c1,0x5c1,0x5c4, -0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c4,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1, -0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c4,0x5c4,0x5c1,0x5c1, -0x5c4,0x5c1,0x5c4,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4, -0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4, -0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5ca,0x5c7,0x5c7,0x5c7,0x5c7, -0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7, -0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5ca,0x5ca,0x5ca,0x5ca, -0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca, -0x5ca,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5c7,0x5cd,0x5cd,0x5cd,0x5cd, -0xfb1,0xfb1,0xfb1,0x14c1,0x14c1,0x14c1,0x14c1,0x14c1,0x14c1,0x14c1,0x16ce,0x16ce,0x82b,0x831,0x831,0x83d, -0x83d,0x82e,0x825,0x82e,0x825,0x82e,0x825,0x82e,0x825,0x82e,0x825,0x82e,0x5dc,0x5dc,0x5d6,0x5dc, -0x5d6,0x5dc,0x5d6,0x5dc,0x5d6,0x5dc,0x5d6,0x5d9,0x5df,0x5dc,0x5d6,0x5dc,0x5d6,0x5d9,0x5df,0x5dc, -0x5d6,0x5dc,0x5d6,0x5d9,0x5df,0x5dc,0x5d6,0x5d9,0x5df,0x5dc,0x5d6,0x5d9,0x5df,0x5dc,0x5d6,0x5dc, -0x5d6,0x5dc,0x5d6,0x5dc,0x5d6,0x5dc,0x5d6,0x5d9,0x5df,0x5dc,0x5d6,0x5d9,0x5df,0x5dc,0x5d6,0x5d9, -0x5df,0x5dc,0x5d6,0x5d9,0x5df,0x5dc,0x5d6,0x5d9,0x5df,0x5dc,0x5d6,0x5d9,0x5df,0x5dc,0x5d6,0x5d9, -0x5df,0x5dc,0x5d6,0x5d9,0x5df,0x5dc,0x5d6,0x5d9,0x6c9,0x6c9,0x6c9,0x6c9,0x6c9,0x6c9,0x6c9,0x6c9, -0x6c9,0x6c9,0x6c9,0x6c9,0x6c9,0x6c9,0x6c9,0x6c9,0x6c9,0x6c9,0x6c9,0x6c9,0x6c6,0x6c6,0x6c6,0x6c6, -0x6c6,0x6c6,0x6c6,0x6c6,0x6c6,0x6c6,0x6c6,0x6c6,0x6c6,0x6c6,0x6c6,0x6c6,0x6c6,0x6c6,0x6c6,0x6c6, -0x6c6,0x6c6,0x6c6,0x6c6,0x6c6,0x6c6,0x6c6,0x6c6,0x6c6,0x6c6,0x6c6,0x6c6,0x6c6,0x6c6,0x6cf,0x6cf, -0x6cf,0x6cf,0x6cf,0x6cf,0x6cf,0x6cf,0x6cf,0x6cf,0x6cf,0x6cf,0x6d2,0x6cf,0x6cf,0x6cf,0x6cf,0x6cf, -0x6cf,0x6cf,0x6cf,0x6cf,0x6cf,0x6cf,0x6cf,0x6cf,0x6cc,0x6cc,0x6cc,0x6cc,0x6cc,0x6cc,0x6cc,0x6cc, -0x6cc,0x6cc,0x6cc,0x6cc,0x6cc,0x6cc,0x6cc,0x6cc,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5, -0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5, -0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6d5,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc, -0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc, -0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0xc12,0x891,0x88b,0x888,0x88e,0x885,0x711,0x714, -0x714,0x714,0x714,0x714,0x714,0x714,0x714,0x714,0x897,0x711,0x711,0x711,0x711,0x711,0x711,0x711, -0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711, -0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x711,0x894,0x894,0x717,0x8a6,0x8a9,0x8af, -0x7d1,0x7dd,0x8c4,0x7da,0x89d,0x89a,0x89d,0x89a,0x8a3,0x8a0,0x8a3,0x8a0,0x89d,0x89a,0x7d7,0x8af, -0x89d,0x89a,0x89d,0x89a,0x89d,0x89a,0x89d,0x89a,0x8b2,0x8bb,0x8b8,0x8b8,0x71d,0x759,0x759,0x759, -0x759,0x759,0x759,0x753,0x753,0x753,0x753,0x753,0x753,0x753,0x753,0x753,0x753,0x753,0x753,0x753, -0x753,0x753,0x753,0x753,0x753,0x753,0x753,0x720,0x73b,0x71a,0x741,0x744,0x73e,0x756,0x756,0x756, -0x756,0x756,0x756,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750, -0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x720,0x73b,0x71a,0x73b,0xc15,0x7bf,0x7bf,0x7bf,0x7bf, -0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf, -0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x7bf,0x126c,0x126c, -0x126c,0x126c,0x126c,0x7c2,0x7d7,0x7da,0x7da,0x7da,0x7da,0x7da,0x7da,0x7da,0x7da,0x7da,0x8fa,0x8fa, -0x8fa,0x8fa,0x7e0,0x7e0,0x8b5,0x8c1,0x8c1,0x8c1,0x8c1,0x8be,0x7d4,0x8ac,0xae0,0xae0,0xae0,0xc27, -0xc45,0xc42,0xafb,0x882,0x7e6,0x7e3,0x7e6,0x7e9,0x7e3,0x7e6,0x7e3,0x7e6,0x7e3,0x7e6,0x7e3,0x7e3, -0x7e3,0x7e3,0x7e3,0x7e3,0x7e6,0x7e6,0x7e3,0x7e6,0x7e6,0x7e3,0x7e6,0x7e6,0x7e3,0x7e6,0x7e6,0x7e3, -0x7e6,0x7e6,0x7e3,0x7e3,0xc48,0x7f8,0x7f2,0x7f8,0x7f2,0x7f8,0x7f2,0x7f8,0x7f2,0x7f8,0x7f2,0x7f2, -0x7f5,0x7f2,0x7f5,0x7f2,0x7f5,0x7f2,0x7f5,0x7f2,0x7f5,0x7f2,0x7f5,0x7f2,0x7f5,0x7f2,0x7f5,0x7f2, -0x7f5,0x7f2,0x7f5,0x7f2,0x7f5,0x7f2,0x7f5,0x7f8,0x7f2,0x7f5,0x7f2,0x7f5,0x7f2,0x7f5,0x7f2,0x7f2, -0x7f2,0x7f2,0x7f2,0x7f2,0x7f5,0x7f5,0x7f2,0x7f5,0x7f5,0x7f2,0x7f5,0x7f5,0x7f2,0x7f5,0x7f5,0x7f2, -0x7f5,0x7f5,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f8,0x7f2,0x7f8,0x7f2,0x7f8,0x7f2,0x7f2,0x7f2,0x7f2, -0x7f2,0x7f2,0x7f8,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f5,0x7f8,0x7f8,0x7f5,0x7f5,0x7f5,0x7f5,0x8ca, -0x8cd,0x7fb,0x7fe,0xc30,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804, -0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804, -0x804,0x804,0x804,0x804,0x807,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804, -0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804,0x804, -0x810,0x810,0x810,0x810,0x810,0x810,0x810,0x810,0x810,0x810,0x810,0x810,0x810,0x810,0x810,0x810, -0x810,0x810,0x810,0x810,0x810,0x810,0x810,0x810,0x810,0x810,0x810,0x810,0xd32,0xd32,0xe61,0x80a, -0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0xd2c,0xd2c,0xd2c,0xd2c, +0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797, +0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x267,0x1797,0x1797,0x267,0x267, +0x267,0x267,0x267,0x1794,0x1794,0x1794,0x1794,0x1794,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x26a, +0x179a,0x26a,0x179a,0x179a,0x179a,0x179a,0x26a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a, +0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x26a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a, +0x179a,0x179d,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff, +0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6, +0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x26d,0x26d,0x26d,0x26d,0x26d, +0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3, +0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x26d,0x26d,0x26d,0x26d,0x26d, +0x26d,0x26d,0x17a0,0x17a0,0x17a0,0x17a0,0x17a0,0x17a0,0x270,0x270,0x270,0x270,0x270,0x270,0x270,0x270, +0x270,0x270,0x270,0x270,0x273,0x273,0x273,0x273,0x17c7,0x17c7,0x17c7,0x17c7,0x17c7,0x17c7,0x17c7,0x17c7, +0x17ca,0x1878,0x1878,0x1878,0x1878,0x1875,0x1878,0x279,0x1875,0x1875,0x1875,0x1875,0x1875,0x1875,0x1878,0x1875, +0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x1878,0x279,0x279,0x1878,0x1878,0x1878,0x1878,0x1878, +0x1878,0x1878,0x1875,0x1872,0x1875,0x1878,0x1878,0x273,0x1875,0x1875,0x1875,0x1875,0x1875,0x1875,0x1872,0x1875, +0x1875,0x1875,0x1875,0x1875,0x276,0x273,0x273,0x273,0x1875,0x1875,0x1875,0x1875,0x1875,0x1875,0x1875,0x1875, +0x1875,0x1875,0x1875,0x1875,0x1875,0x1875,0x1875,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276, +0x276,0x276,0x276,0x276,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273, +0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x17c7,0x17c7,0x17c7,0x17c7,0x17c7,0x1875,0x1875,0x1875, +0x1875,0x1875,0x1875,0x1875,0x1875,0x1875,0x1875,0x1875,0x1875,0x1875,0x276,0x276,0x276,0x276,0x276,0x276, +0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273, +0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273, +0x17c7,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273, +0x276,0x279,0x279,0x279,0x279,0x279,0x279,0x279,0x279,0x279,0x279,0x279,0x279,0x279,0x276,0x276, +0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273, +0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273, +0x17ac,0x17ac,0x17ac,0x17ac,0x17a9,0x17ac,0x17ac,0x17af,0x17b2,0x17af,0x17af,0x17ac,0x27c,0x27c,0x27c,0x27c, +0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9, +0x1806,0x1806,0x1806,0x1806,0x17fd,0x17fd,0x17fd,0x17f7,0x17fa,0x17fa,0x17fa,0x27f,0x27f,0x27f,0x27f,0x27f, +0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x27f,0x27f,0x27f,0x27f,0x1800,0x1800, +0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x282,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821, +0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821, +0x1821,0x1821,0x1821,0x181e,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x282,0x180c,0x180c,0x180c,0x180c, +0x180c,0x180c,0x181e,0x180f,0x1821,0x1824,0x1824,0x1818,0x1815,0x1815,0x282,0x282,0x282,0x282,0x282,0x282, +0x282,0x282,0x282,0x282,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x1812,0x1812, +0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x282,0x282,0x282, +0x1830,0x1833,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839, +0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x288,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a, +0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x288,0x288,0x182a,0x182a,0x182a,0x182a,0x182a, +0x187b,0x28e,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b, +0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b, +0x1839,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839,0x1839, +0x291,0x291,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d, +0x291,0x1836,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x1836,0x182d,0x182d,0x1836,0x182d,0x182d,0x291, +0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x291,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c, +0x183c,0x183c,0x183c,0x183c,0x183c,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294, +0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x1854,0x1854,0x1845,0x183f,0x183f,0x1854,0x1842,0x1857, +0x1857,0x1857,0x1857,0x185a,0x185a,0x184e,0x184b,0x1848,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851, +0x1851,0x1851,0x297,0x184e,0x297,0x1848,0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x297, +0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x297, +0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x1860,0x1860,0x1860,0x1860,0x1860,0x1860,0x1860,0x1860, +0x1860,0x1860,0x1860,0x1860,0x1860,0x1860,0x1860,0x1860,0x1860,0x1860,0x1860,0x1860,0x29a,0x29a,0x29a,0x29a, +0x185d,0x185d,0x185d,0x185d,0x185d,0x185d,0x185d,0x185d,0x185d,0x185d,0x185d,0x185d,0x185d,0x185d,0x185d,0x185d, +0x185d,0x185d,0x185d,0x185d,0x185d,0x185d,0x185d,0x185d,0x185d,0x185d,0x185d,0x185d,0x29a,0x29a,0x29a,0x29a, +0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x29d,0x29d,0x29d, +0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d, +0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881, +0x1881,0x1881,0x1881,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0, +0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3, +0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3, +0x2a6,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3, +0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3, +0x2a3,0x2a3,0x975,0x975,0x17c4,0x17c4,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1, +0x2c1,0x2c1,0x2c1,0x2c1,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6, +0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6, +0x2a6,0x2a6,0x2a6,0x2a6,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a, +0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9, +0x2a9,0x2a9,0x2a9,0x2a9,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2ac, +0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac,0x2ac, +0x2ac,0x2ac,0x2ac,0x2ac,0x112b,0x112b,0x112b,0x112b,0x12cc,0x12cc,0x12cc,0x12cc,0x12cc,0x12cc,0x12cc,0x12cc, +0x14ca,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af, +0x2af,0x2af,0x2af,0x2af,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0x12cf, +0x12cf,0x12cf,0x2b2,0x2b2,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe, +0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0x2b2,0x2b2, +0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2, +0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2,0x2b2, +0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d, +0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5,0x2b5, +0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0, +0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0x2b8,0x2b8, +0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4, +0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb,0x2bb, +0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe, +0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x2be,0x2be, +0x1143,0x3ba,0x3ba,0x3c6,0xccf,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9, +0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9, +0x3c6,0x3ba,0x3ba,0x3ba,0x3ba,0x3ba,0x3ba,0x3ba,0x3ba,0x3c6,0x3c6,0x3c6,0x3c6,0x3c0,0x1146,0x131d, +0x3c9,0x942,0x945,0x3bd,0x3bd,0x1143,0x131a,0x131a,0x3cc,0x3cc,0x3cc,0x3cc,0x3cc,0x3cc,0x3cc,0x3cc, +0x3c9,0x3c9,0x3ba,0x3ba,0x8cd,0x8d0,0x95d,0x95d,0x95d,0x95d,0x95d,0x95d,0x95d,0x95d,0x95d,0x95d, +0x3c3,0xfa5,0xfa2,0x1320,0x1320,0x1320,0x1320,0x1320,0x14f1,0x1149,0x1149,0xef7,0xef7,0xdc2,0xef7,0xef7, +0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3cc,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9, +0x3c9,0x3cc,0x3c9,0x3c9,0x3cc,0x3c9,0x3c9,0x3c9,0x3c9,0x3c9,0x131a,0x131d,0x3bd,0x3c9,0x3c6,0x3c6, +0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7, +0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0xbbb,0xbbb,0xdce,0xdce,0x8d3,0xdd1,0x1410,0x1410,0x1410, +0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa, +0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa, +0x4b0,0x4b0,0x4b0,0x115e,0x115e,0x115e,0x115e,0x115e,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad, +0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad, +0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x115b,0x115b,0x115b,0x115b,0x115b,0x115b, +0x4b3,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0, +0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0, +0x4b0,0x4b0,0x4b0,0x4b0,0x4bc,0x4b6,0x4bc,0x4b6,0x4bc,0x4b6,0x4bc,0x4b6,0x4bc,0x4b6,0x4bc,0x4b6, +0x4bc,0x4b6,0x4bc,0x4b6,0x4bc,0x4b6,0x4bc,0x4b6,0x4bc,0x4b6,0x4bc,0x4b6,0x4bc,0x4b6,0x4bc,0x4b6, +0x4bc,0x4b6,0x4bc,0x4b6,0x4bc,0x4b6,0x4b6,0x4b6,0x4b6,0x4b6,0x4b9,0x9b7,0xff3,0xff3,0xff6,0xff3, +0x4bc,0x4b6,0x4bc,0x4b6,0x4bc,0x4b6,0x4bc,0x4b6,0x4bc,0x4b6,0x4bc,0x4b6,0x4bc,0x4b6,0x4bc,0x4b6, +0x4bc,0x4b6,0x4bc,0x4b6,0x4bc,0x4b6,0x4bc,0x4b6,0x4bc,0x4b6,0xff6,0xff3,0xff6,0xff3,0xff6,0xff3, +0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb, +0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4c8,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb, +0x6a8,0x6a8,0x6ab,0x4e6,0x6b7,0x6b4,0x6b4,0x6b1,0x510,0x510,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0xae9, +0x6ba,0x4f2,0x6d2,0x6d5,0x507,0x6ba,0x4f5,0x4f5,0x4e6,0x501,0x501,0x6a8,0x50d,0x50a,0x6ae,0x4e0, +0x4d7,0x4d7,0x4da,0x4da,0x4da,0x4da,0x4da,0x4dd,0x4da,0x4da,0x4da,0x4d1,0x519,0x516,0x513,0x513, +0x6c6,0x4fb,0x4f8,0x6c3,0x6c0,0x6bd,0x6cf,0x4e9,0x6cc,0x6cc,0x4fe,0x501,0x6c9,0x6c9,0x4fe,0x501, +0x4e3,0x4e6,0x4e6,0x4e6,0x504,0x4ef,0x4ec,0xbd0,0xaef,0xaf2,0xaec,0xaec,0xaec,0xaec,0xbc7,0xbc7, +0xbc7,0xbc7,0xbcd,0xcfc,0xcf9,0xddd,0xde0,0xbca,0xde0,0xde0,0xde0,0xde0,0xddd,0xde0,0xde0,0xbc4, +0x54c,0x54c,0x564,0x6e4,0x549,0x6e1,0x54c,0x561,0x549,0x6e4,0x55b,0x564,0x564,0x564,0x55b,0x55b, +0x564,0x564,0x564,0x6ed,0x549,0x564,0x6e7,0x549,0x558,0x564,0x564,0x564,0x564,0x564,0x549,0x549, +0x54f,0x6e1,0x6ea,0x549,0x564,0x549,0x6f0,0x549,0x564,0x552,0x56a,0x6f3,0x564,0x564,0x555,0x55b, +0x564,0x564,0x567,0x564,0x55b,0x55e,0x55e,0x55e,0x55e,0xafe,0xafb,0xcff,0xdef,0xbeb,0xbee,0xbee, +0xbe8,0xbe5,0xbe5,0xbe5,0xbe5,0xbee,0xbeb,0xbeb,0xbeb,0xbeb,0xbe2,0xbe5,0xdec,0xf03,0xf06,0xffc, +0x116d,0x116d,0x116d,0x6f9,0x6f6,0x56d,0x570,0x570,0x570,0x570,0x570,0x6f6,0x6f9,0x6f9,0x6f6,0x570, +0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x579,0x579,0x579,0x579, +0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x573,0x573,0x573,0x573,0x573,0x573, +0x57f,0x57f,0x57f,0x57f,0x57f,0x57f,0x57f,0x57f,0x57c,0x585,0x585,0x57f,0x57f,0x57f,0x582,0x57c, +0x57f,0x57f,0x57c,0x57c,0x57c,0x57c,0x57f,0x57f,0x702,0x702,0x57c,0x57c,0x57f,0x57f,0x57f,0x57f, +0x57f,0x57f,0x57f,0x57f,0x57f,0x57f,0x57f,0x57f,0x57f,0x582,0x582,0x582,0x57f,0x57f,0x705,0x57f, +0x705,0x57f,0x57f,0x57f,0x57f,0x57f,0x57f,0x57f,0x57c,0x57f,0x57c,0x57c,0x57c,0x57c,0x57c,0x57c, +0x57f,0x57f,0x57c,0x702,0x57c,0x57c,0x57c,0xb04,0xb04,0xb04,0xb04,0xb04,0xb04,0xb04,0xb04,0xb04, +0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0x70b,0x588,0x70b,0x70b, +0x58b,0x588,0x588,0x70b,0x70b,0x58b,0x588,0x70b,0x58b,0x588,0x588,0x70b,0x588,0x70b,0x597,0x594, +0x588,0x70b,0x588,0x588,0x588,0x588,0x70b,0x588,0x588,0x70b,0x70b,0x70b,0x70b,0x588,0x588,0x70b, +0x58b,0x70b,0x58b,0x70b,0x70b,0x70b,0x70b,0x70b,0x711,0x58e,0x70b,0x58e,0x58e,0x588,0x588,0x588, +0x70b,0x70b,0x70b,0x70b,0x588,0x588,0x588,0x588,0x70b,0x70b,0x588,0x588,0x588,0x58b,0x588,0x588, +0x58b,0x588,0x588,0x58b,0x70b,0x58b,0x588,0x588,0x70b,0x588,0x588,0x588,0x588,0x588,0x70b,0x588, +0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x70e,0x70b,0x58b,0x588, +0x70b,0x70b,0x70b,0x70b,0x588,0x588,0x70b,0x70b,0x588,0x58b,0x70e,0x70e,0x58b,0x58b,0x588,0x588, +0x58b,0x58b,0x588,0x588,0x58b,0x58b,0x588,0x588,0x588,0x588,0x588,0x588,0x58b,0x58b,0x70b,0x70b, +0x58b,0x58b,0x70b,0x70b,0x58b,0x58b,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588, +0x588,0x70b,0x588,0x588,0x588,0x70b,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x70b,0x588,0x588, +0x588,0x588,0x588,0x588,0x58b,0x58b,0x58b,0x58b,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588, +0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x70b,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588, +0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588, +0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x58b,0x58b,0x58b,0x58b,0x588,0x588,0x588,0x588, +0x588,0x588,0x58b,0x58b,0x58b,0x58b,0x588,0x591,0x588,0x588,0xbf4,0xbf4,0xbf4,0xbf4,0xbf4,0xbf4, +0xbf4,0xbf4,0xbf4,0xbf4,0xbf4,0xbf4,0xbf4,0xbf4,0x59a,0xb07,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a, +0x5a6,0x5a3,0x5a6,0x5a3,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a,0x714,0x59a,0x59a,0x59a,0x59a,0x59a, +0x59a,0x59a,0x819,0x819,0x59a,0x59a,0x59a,0x59a,0x5a0,0x5a0,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a, +0x59d,0x81f,0x81c,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a, +0x59a,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a, +0x59a,0x59a,0x59a,0x59a,0x59a,0x59a,0x59a,0xb07,0xbfa,0xb07,0xb07,0xb07,0x5a9,0x5a9,0x5a9,0x5a9, +0x5a9,0x5a9,0x5a9,0x5a9,0x5a9,0x5a9,0x5a9,0x5a9,0x5a9,0x5a9,0x5a9,0x5a9,0x5a9,0x5a9,0x5a9,0x5a9, +0x5a9,0x5a9,0x5a9,0x5a9,0x5a9,0x5a9,0x5a9,0x5a9,0x5a9,0x5a9,0x5a9,0x5a9,0x71d,0x71d,0x71d,0x71d, +0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x5af,0xc60,0xc60,0xc60,0xc60,0xc60,0xc60,0xc60,0xc60,0xc60, +0xc60,0xc60,0xc60,0xc60,0xc60,0xc60,0xc60,0xc60,0xc60,0xc60,0xc60,0xd74,0x726,0x726,0x726,0x726, +0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726, +0x5b2,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x726,0x726,0x726,0x726, +0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x5b5,0x5b5,0x5b5,0x5b5,0x726,0x726,0x726,0x726, +0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x729,0x729,0x729,0x729, +0x729,0x729,0x729,0x729,0x729,0x729,0x729,0x729,0x729,0x729,0x729,0x729,0x5b8,0x5b8,0x729,0x729, +0x729,0x729,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0x72f,0x72f,0x5bb,0x72c, +0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x5be,0x5be,0x5bb,0x5bb,0x5c1,0x5c1,0x5c1,0x5c1,0x72f,0x72f, +0x5c1,0x5c1,0x732,0x72f,0x5bb,0x5bb,0x5bb,0x5bb,0x72f,0x72f,0x5c1,0x5c1,0x732,0x72f,0x5bb,0x5bb, +0x5bb,0x5bb,0x72f,0x72f,0x72c,0x5bb,0x5c1,0x72f,0x5bb,0x5bb,0x72c,0x72f,0x72f,0x72f,0x5c1,0x5c1, +0x5bb,0x5bb,0x5bb,0x5bb,0x5bb,0x5bb,0x5bb,0x5bb,0x5bb,0x5bb,0x5bb,0x5bb,0x5bb,0x5bb,0x72f,0x72c, +0x72f,0x72c,0x5bb,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5c1,0x5bb,0x5bb,0x72c,0xb0d,0xb0d,0xb0d,0xb0d, +0xb0d,0xb0d,0xb0d,0xb0d,0xc00,0xc00,0xc00,0xc03,0xc03,0xc78,0xc78,0xc00,0x5cd,0x5cd,0x5cd,0x5cd, +0x5ca,0x741,0x741,0x5c4,0x5c4,0x735,0x5c4,0x5c4,0x5c4,0x5c4,0x73b,0x735,0x5c4,0x5ca,0x5c4,0x5c4, +0xd7d,0xd7d,0xc06,0xc06,0xdfe,0xb10,0x5c7,0x5c7,0x738,0x5d0,0x738,0x5c7,0x5ca,0x5c4,0x5ca,0x5ca, +0x5c4,0x5c4,0x5ca,0x5c4,0x5c4,0x5c4,0x5ca,0x5c4,0x5c4,0x5c4,0x5ca,0x5ca,0x5c4,0x5c4,0x5c4,0x5c4, +0x5c4,0x5c4,0x5c4,0x5c4,0x5ca,0x5cd,0x5cd,0x5c7,0x5c4,0x5c4,0x5c4,0x5c4,0x747,0x5c4,0x747,0x5c4, +0x5c4,0x5c4,0x5c4,0x5c4,0x822,0x822,0x822,0x822,0x822,0x822,0x822,0x822,0x822,0x822,0x822,0x822, +0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x744,0x741,0x5d3,0x744, +0x735,0x73b,0x5ca,0x735,0x73e,0x735,0x735,0x5c4,0x735,0x741,0x5d3,0x741,0xb10,0xb10,0xc09,0xc09, +0xc09,0xc09,0xc09,0xc09,0xc09,0xc09,0xc09,0xc0c,0xc09,0xc09,0xdf5,0xeb5,0x5d6,0x5d6,0x5d6,0x5d6, +0x5d6,0x5d6,0x5d6,0x5d6,0x5d6,0x5d6,0x5d6,0x5d6,0x5d6,0x5d6,0x5d6,0x5d6,0x5d6,0x5d6,0x5d6,0x5d6, +0x5d9,0x13c5,0x13c5,0x13c5,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x1515,0x5df,0x5eb,0x5df, +0x5df,0x13c5,0x5d9,0x5d9,0x5ee,0x5eb,0x13c8,0x13c8,0x5f1,0x5f1,0x5d9,0x5e5,0x5d9,0x5d9,0x5e5,0x5d9, +0x5e5,0x5d9,0x5e5,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5e5,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9, +0x13c5,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5e5,0x5e5,0x5d9,0x5d9,0x5d9, +0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x74d,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5e5,0x5d9,0x5d9,0x5e5, +0x5d9,0x5d9,0x5d9,0x5d9,0x13c5,0x5d9,0x13c5,0x5d9,0x5d9,0x5d9,0x5d9,0x13c5,0x13c5,0x13c5,0x5d9,0x12c9, +0x5d9,0x5d9,0x5d9,0x5e2,0x5e2,0x5e2,0x5e2,0x1347,0x1347,0x5d9,0x5dc,0x5e8,0x5ee,0x5d9,0x5d9,0x5d9, +0xc12,0xc0f,0xc12,0xc0f,0xc12,0xc0f,0xc12,0xc0f,0xc12,0xc0f,0xc12,0xc0f,0xc12,0xc0f,0x74a,0x74a, +0x74a,0x74a,0x74a,0x74a,0x74a,0x74a,0x74a,0x74a,0x5d9,0x5e5,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9, +0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x13c5,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9, +0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x5d9,0x13c5,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612, +0x612,0x612,0x612,0x612,0x612,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x61b,0x61b,0x61b,0x61b, +0x61b,0x61b,0x61b,0x61b,0x612,0x618,0x609,0x60c,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618, +0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618, +0x618,0x618,0x618,0x618,0x618,0x618,0x60f,0x60f,0x60f,0x60f,0x60f,0x60f,0x612,0x612,0x612,0x612, +0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612, +0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x615,0x61b,0x618,0x612,0x615,0x61b,0x618,0x612, +0x615,0x61b,0x618,0x612,0x615,0x61b,0x618,0x612,0x615,0x61b,0x618,0x612,0x615,0x61b,0x618,0x612, +0x615,0x61b,0x618,0x612,0x615,0x61b,0x618,0x612,0x618,0x612,0x618,0x612,0x618,0x612,0x618,0x612, +0x618,0x612,0x618,0x612,0x615,0x61b,0x618,0x612,0x615,0x61b,0x618,0x612,0x615,0x61b,0x618,0x612, +0x615,0x61b,0x618,0x612,0x618,0x612,0x615,0x61b,0x618,0x612,0x618,0x612,0x615,0x61b,0x618,0x612, +0x615,0x61b,0x618,0x612,0x618,0x612,0x134a,0x134a,0x134a,0x134a,0x134a,0x134a,0x134a,0x134a,0x134a,0x134a, +0x134a,0x134a,0x134a,0x134a,0x618,0x612,0x618,0x612,0x618,0x612,0x615,0x61b,0x615,0x61b,0x618,0x612, +0x618,0x612,0x618,0x612,0x618,0x612,0x618,0x612,0x618,0x612,0x618,0x612,0x615,0x618,0x612,0x615, +0x618,0x612,0x615,0x61b,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612, +0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x615,0x615,0x615,0x615,0x615, +0x615,0x615,0x615,0x615,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618, +0x618,0x618,0x618,0x618,0x618,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612, +0x612,0x612,0x612,0x612,0x615,0x615,0x612,0x615,0x612,0x615,0x612,0x612,0x615,0x612,0x612,0x615, +0x612,0x615,0x612,0x612,0x615,0x612,0x615,0x615,0x612,0x612,0x612,0x615,0x612,0x612,0x612,0x612, +0x612,0x615,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612, +0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x612,0x615,0x615,0x612,0x612,0x615,0x612,0x615,0x612, +0x612,0x612,0x612,0x612,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615, +0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x615, +0x615,0x615,0x615,0x615,0x615,0x615,0x615,0x61b,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618, +0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618, +0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b, +0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x618,0x618,0x618, +0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x61e,0x61e,0x61e,0x61e,0x1008,0x1008,0x1008,0x1518, +0x1518,0x1518,0x1518,0x1518,0x1518,0x1518,0x1722,0x1722,0x87f,0x885,0x885,0x891,0x891,0x882,0x879,0x882, +0x879,0x882,0x879,0x882,0x879,0x882,0x879,0x882,0x62d,0x62d,0x627,0x62d,0x627,0x62d,0x627,0x62d, +0x627,0x62d,0x627,0x62a,0x630,0x62d,0x627,0x62d,0x627,0x62a,0x630,0x62d,0x627,0x62d,0x627,0x62a, +0x630,0x62d,0x627,0x62a,0x630,0x62d,0x627,0x62a,0x630,0x62d,0x627,0x62d,0x627,0x62d,0x627,0x62d, +0x627,0x62d,0x627,0x62a,0x630,0x62d,0x627,0x62a,0x630,0x62d,0x627,0x62a,0x630,0x62d,0x627,0x62a, +0x630,0x62d,0x627,0x62a,0x630,0x62d,0x627,0x62a,0x630,0x62d,0x627,0x62a,0x630,0x62d,0x627,0x62a, +0x630,0x62d,0x627,0x62a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a, +0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x71a,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717, +0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717, +0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x717,0x720,0x720,0x720,0x720,0x720,0x720, +0x720,0x720,0x720,0x720,0x720,0x720,0x723,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720, +0x720,0x720,0x720,0x720,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d, +0x71d,0x71d,0x71d,0x71d,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726, +0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726, +0x726,0x726,0x726,0x726,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750, +0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750, +0x750,0x750,0x750,0x750,0xc66,0x8e5,0x8df,0x8dc,0x8e2,0x8d9,0x765,0x768,0x768,0x768,0x768,0x768, +0x768,0x768,0x768,0x768,0x8eb,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765, +0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765, +0x765,0x765,0x765,0x765,0x765,0x765,0x8e8,0x8e8,0x76b,0x8fa,0x8fd,0x903,0x825,0x831,0x918,0x82e, +0x8f1,0x8ee,0x8f1,0x8ee,0x8f7,0x8f4,0x8f7,0x8f4,0x8f1,0x8ee,0x82b,0x903,0x8f1,0x8ee,0x8f1,0x8ee, +0x8f1,0x8ee,0x8f1,0x8ee,0x906,0x90f,0x90c,0x90c,0x771,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7a7, +0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7,0x7a7, +0x7a7,0x7a7,0x7a7,0x774,0x78f,0x76e,0x795,0x798,0x792,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7a4, +0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4,0x7a4, +0x7a4,0x7a4,0x7a4,0x774,0x78f,0x76e,0x78f,0xc69,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813, 0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813, -0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df, -0x8df,0x816,0x816,0x816,0x816,0x816,0x816,0xd35,0xd35,0xd35,0xd35,0x8e2,0x8e2,0x8e2,0x8e2,0x8e2, -0x816,0x816,0x816,0x816,0x816,0x816,0x816,0x816,0x816,0x816,0x816,0x816,0x816,0x816,0x816,0x816, -0x816,0x816,0x816,0x816,0x816,0x816,0x816,0x816,0x816,0x816,0x816,0x816,0x816,0x816,0x816,0x816, -0x816,0x816,0xd35,0xd35,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819, -0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819, -0x819,0x819,0x819,0x819,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x81c,0x81c,0x81c,0x81c, -0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c, -0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0x81c,0xe64,0xe64, -0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64, -0xe64,0xe64,0xe64,0xe64,0x10d4,0x10d4,0x10d4,0x10d4,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f, -0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f, -0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x822,0x822,0x81f,0x822,0x81f,0x822, -0x822,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x81f,0x822,0x81f,0x822,0x81f,0x822, -0x822,0x81f,0x81f,0x822,0x822,0x822,0x81f,0x81f,0x81f,0x81f,0x1476,0x1476,0xc39,0xc39,0xc39,0xc39, -0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0x8d6,0x8d6,0x8d6,0x8d6, -0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6, -0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x8d6,0x12a8,0x12a8,0x12a8,0x12a8, -0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0xd2c,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33, -0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9, -0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8dc, -0x8d9,0x8dc,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0x8d9, -0x8d9,0x8d9,0x8d9,0x8d9,0x8d9,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33, -0xc33,0xc33,0xc33,0xc33,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df, -0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df,0x8df, -0x8df,0x8df,0x8df,0xd35,0x95d,0x93f,0x93f,0x93f,0x93f,0x939,0x93f,0x93f,0x951,0x93f,0x93f,0x93c, -0x948,0x94e,0x94e,0x94e,0x94e,0x94e,0x951,0x939,0x945,0x939,0x939,0x939,0x930,0x930,0x939,0x939, -0x939,0x939,0x939,0x939,0x954,0x954,0x954,0x954,0x954,0x954,0x954,0x954,0x954,0x954,0x939,0x939, -0x939,0x939,0x939,0x939,0x939,0x939,0x939,0x939,0x93c,0x930,0x939,0x930,0x939,0x930,0x94b,0x942, -0x94b,0x942,0x95a,0x95a,0x969,0x969,0x969,0x969,0x969,0x969,0x969,0x969,0x969,0x969,0x969,0x969, -0x969,0x969,0x969,0x969,0x969,0x969,0x969,0x969,0x969,0x969,0x969,0x969,0x969,0x969,0x969,0x969, -0x969,0x969,0x969,0x969,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c, -0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c, -0x96c,0x96c,0x96c,0x96c,0x96f,0x96f,0x96f,0x96f,0x96f,0x96f,0x96f,0x96f,0x96f,0x96f,0x96f,0x96f, -0x96f,0x96f,0x96f,0x96f,0x96f,0x96f,0x96f,0x96f,0x96f,0x96f,0x96f,0x96f,0x96f,0x96f,0x96f,0x96f, -0x96f,0x96f,0x96f,0x96f,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978, -0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978, -0x978,0x978,0x972,0x972,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b, -0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b, -0x97b,0x97b,0x975,0x975,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978, -0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978,0x978, -0x978,0x978,0x978,0x978,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b, -0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b,0x97b, -0x97b,0x97b,0x97b,0x97b,0x97e,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981, -0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981, -0x97e,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981, -0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0x981,0xa0e,0xa0e,0xf96,0xa0e, -0xa0e,0xa0e,0xa11,0xa0e,0xf96,0xa0e,0xa0e,0xf8d,0xa08,0x9fc,0x9fc,0x9fc,0x9fc,0xa0b,0x9fc,0xf7b, -0xf7b,0xf7b,0x9fc,0x9ff,0xa08,0xa02,0xf81,0xf90,0xf90,0xf7b,0xf7b,0xf96,0xb01,0xb01,0xb01,0xb01, -0xb01,0xb01,0xb01,0xb01,0xb01,0xb01,0xa14,0xa14,0xa05,0xa05,0xa05,0xa05,0xa0e,0xa0e,0xa0e,0xa0e, -0xa0e,0xa0e,0xa0b,0xa0b,0x9fc,0x9fc,0xf96,0xf96,0xf96,0xf96,0xf7b,0xf7b,0xa0e,0xa0e,0xa0e,0xa0e, -0xa0e,0xa0e,0xa0e,0xa0e,0xa0e,0xa0e,0xa0e,0xa0e,0xa0e,0xa0e,0xa0e,0xa0e,0xa0e,0xa0e,0xa0e,0xa0e, -0xa0e,0xa0e,0xa0e,0xa0e,0xa0e,0xa0e,0xa0e,0xa0e,0xa0e,0xa0e,0xa0e,0xa0e,0xa23,0xa23,0xa23,0xa23, -0xa23,0xa23,0xa23,0xd86,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23, -0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23, -0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xd86,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23, -0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29, -0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29, -0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa29,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f, -0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2c,0xa32,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0x110d, -0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110a,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f, -0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f, -0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa44,0xa44,0xa44,0xa44, -0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44, -0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa68,0xa68,0xa68,0xa6b, -0xa6b,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68, -0xa50,0xa50,0xa65,0xa47,0xa47,0xa47,0xa47,0xa47,0xa47,0xa47,0xa65,0xa65,0xa68,0xa68,0xa68,0xa68, -0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68, -0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa89,0xa89,0xa89,0xa89, -0xa89,0xa74,0xa74,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89, -0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89, -0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa8c,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89, -0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89, -0xa89,0xa89,0xa89,0xa89,0xab3,0xab3,0xab3,0xab3,0xab3,0xab3,0xab3,0xab3,0xab3,0xab3,0xab3,0xab3, -0xab3,0xab3,0xab3,0xab3,0xab3,0xab3,0xab3,0xab3,0xab3,0xab3,0xab3,0xab3,0xab3,0xab3,0xab3,0xba6, -0xba6,0xba6,0xba6,0xba6,0xabf,0xabf,0xabf,0xabf,0xabf,0xabf,0xabf,0xabf,0xabf,0xabf,0xabf,0xabf, -0xabf,0xabf,0xabf,0xabf,0xabf,0xabf,0xabf,0xabf,0xabf,0xabf,0xabf,0xabf,0xabf,0xabf,0xabf,0xabf, -0xabf,0xabf,0xabf,0xabf,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1, -0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1,0xad1, -0xad1,0xad1,0xad1,0xad1,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7, -0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7,0xad7, -0xad7,0xad7,0xad7,0xad7,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6, -0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6, -0xae6,0xae6,0xae6,0xae6,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9, -0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xaec,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9, -0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9, -0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xaef,0xaef,0xc36,0xc36, -0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef, -0xc36,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xaef,0xb10,0xb10,0xb10,0xb10, -0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10, -0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0x14c4,0xb19,0xb19,0xb19,0xb19, -0xb19,0xb19,0xcbd,0xcbd,0xb16,0xb16,0xb16,0xb16,0xb16,0xb16,0xb16,0xb16,0xb16,0xb16,0xb16,0xb16, -0xb16,0xb16,0xb16,0xb16,0xb16,0xb16,0xb16,0xb16,0xb16,0xb16,0xb16,0xb16,0xb16,0xb16,0xcba,0xcba, -0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b, -0xb19,0xb19,0xb19,0xb19,0xb19,0xb19,0xb19,0xb19,0xb19,0xb19,0xb19,0xb19,0xb19,0xb19,0xb19,0xb19, -0xb19,0xb19,0xb19,0xb19,0xb19,0xb19,0xb19,0xb19,0xb19,0xb19,0xb19,0xb19,0xb19,0xb19,0xb19,0xb19, -0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c, -0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c, -0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb22,0xb2e,0xb34,0xb34,0xb34,0xb28,0xb28,0xb28,0xb31,0xb25,0xb25, -0xb25,0xb25,0xb25,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb34,0xb34,0xb34,0xb34,0xb34, -0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28, -0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28, -0xb28,0xb28,0xb2b,0xb2b,0xb34,0xb34,0xb34,0xb28,0xb28,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34, -0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28, -0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb34,0xb34,0xb34,0xb34,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28, -0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb28,0xb28,0xb28, -0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28, -0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0x16d1,0x16d1,0xb40,0xb37,0xb3d,0xb3d, -0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d, -0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb37,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb40,0xb40, -0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40, -0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb37,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d, -0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb37,0xb3d,0xb3d, -0xb3d,0xb3d,0xb3d,0xb3d,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40, -0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb37,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d, -0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d, +0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x813,0x12c3,0x12c3,0x12c3,0x12c3,0x12c3,0x816, +0x82b,0x82e,0x82e,0x82e,0x82e,0x82e,0x82e,0x82e,0x82e,0x82e,0x94e,0x94e,0x94e,0x94e,0x834,0x834, +0x909,0x915,0x915,0x915,0x915,0x912,0x828,0x900,0xb34,0xb34,0xb34,0xc7b,0xc99,0xc96,0xb4f,0x8d6, +0x83a,0x837,0x83a,0x83d,0x837,0x83a,0x837,0x83a,0x837,0x83a,0x837,0x837,0x837,0x837,0x837,0x837, +0x83a,0x83a,0x837,0x83a,0x83a,0x837,0x83a,0x83a,0x837,0x83a,0x83a,0x837,0x83a,0x83a,0x837,0x837, +0xc9c,0x84c,0x846,0x84c,0x846,0x84c,0x846,0x84c,0x846,0x84c,0x846,0x846,0x849,0x846,0x849,0x846, +0x849,0x846,0x849,0x846,0x849,0x846,0x849,0x846,0x849,0x846,0x849,0x846,0x849,0x846,0x849,0x846, +0x849,0x846,0x849,0x84c,0x846,0x849,0x846,0x849,0x846,0x849,0x846,0x846,0x846,0x846,0x846,0x846, +0x849,0x849,0x846,0x849,0x849,0x846,0x849,0x849,0x846,0x849,0x849,0x846,0x849,0x849,0x846,0x846, +0x846,0x846,0x846,0x84c,0x846,0x84c,0x846,0x84c,0x846,0x846,0x846,0x846,0x846,0x846,0x84c,0x846, +0x846,0x846,0x846,0x846,0x849,0x84c,0x84c,0x849,0x849,0x849,0x849,0x91e,0x921,0x84f,0x852,0xc84, +0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858, +0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858, +0x85b,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858, +0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x858,0x864,0x864,0x864,0x864, +0x864,0x864,0x864,0x864,0x864,0x864,0x864,0x864,0x864,0x864,0x864,0x864,0x864,0x864,0x864,0x864, +0x864,0x864,0x864,0x864,0x864,0x864,0x864,0x864,0xd86,0xd86,0xeb8,0x85e,0x92a,0x92a,0x92a,0x92a, +0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0xd80,0xd80,0xd80,0xd80,0x867,0x867,0x867,0x867, +0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x933,0x933,0x933,0x933, +0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x86a,0x86a,0x86a, +0x86a,0x86a,0x86a,0xd89,0xd89,0xd89,0xd89,0x936,0x936,0x936,0x936,0x936,0x86a,0x86a,0x86a,0x86a, +0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a, +0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0xd89,0xd89, +0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d, +0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d, +0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870, +0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870, +0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0xebb,0xebb,0xebb,0xebb,0xebb,0xebb, +0xebb,0xebb,0xebb,0xebb,0xebb,0xebb,0xebb,0xebb,0xebb,0xebb,0xebb,0xebb,0xebb,0xebb,0xebb,0xebb, +0x112b,0x112b,0x112b,0x112b,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873, +0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873, +0x873,0x873,0x873,0x873,0x873,0x873,0x876,0x876,0x873,0x876,0x873,0x876,0x876,0x873,0x873,0x873, +0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x876,0x873,0x876,0x873,0x876,0x876,0x873,0x873,0x876, +0x876,0x876,0x873,0x873,0x873,0x873,0x14cd,0x14cd,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d, +0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a, +0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a, +0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x12ff,0x12ff,0x12ff,0x12ff,0x12a8,0x12a8,0x12a8,0x12a8, +0x12a8,0x12a8,0x12a8,0x12a8,0xd80,0xc87,0xc87,0xc87,0xc87,0xc87,0xc87,0xc87,0xc87,0xc87,0xc87,0xc87, +0xc87,0xc87,0xc87,0xc87,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d, +0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x930,0x92d,0x930,0x92d,0x92d, +0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d, +0x92d,0xc87,0xc87,0xc87,0xc87,0xc87,0xc87,0xc87,0xc87,0xc87,0xc87,0xc87,0xc87,0xc87,0xc87,0xc87, +0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933, +0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0x933,0xd89, +0x9b1,0x993,0x993,0x993,0x993,0x98d,0x993,0x993,0x9a5,0x993,0x993,0x990,0x99c,0x9a2,0x9a2,0x9a2, +0x9a2,0x9a2,0x9a5,0x98d,0x999,0x98d,0x98d,0x98d,0x984,0x984,0x98d,0x98d,0x98d,0x98d,0x98d,0x98d, +0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x9a8,0x98d,0x98d,0x98d,0x98d,0x98d,0x98d, +0x98d,0x98d,0x98d,0x98d,0x990,0x984,0x98d,0x984,0x98d,0x984,0x99f,0x996,0x99f,0x996,0x9ae,0x9ae, +0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd, +0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd,0x9bd, +0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0, +0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0,0x9c0, +0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3, +0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3, +0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc, +0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9c6,0x9c6, +0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf, +0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9c9,0x9c9, +0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc, +0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc,0x9cc, +0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf, +0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf,0x9cf, +0x9d2,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5, +0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d2,0x9d5,0x9d5,0x9d5, +0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5, +0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0xa62,0xa62,0xfed,0xa62,0xa62,0xa62,0xa65,0xa62, +0xfed,0xa62,0xa62,0xfe4,0xa5c,0xa50,0xa50,0xa50,0xa50,0xa5f,0xa50,0xfd2,0xfd2,0xfd2,0xa50,0xa53, +0xa5c,0xa56,0xfd8,0xfe7,0xfe7,0xfd2,0xfd2,0xfed,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55, +0xb55,0xb55,0xa68,0xa68,0xa59,0xa59,0xa59,0xa59,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa5f,0xa5f, +0xa50,0xa50,0xfed,0xfed,0xfed,0xfed,0xfd2,0xfd2,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62, +0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62, +0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xdda, +0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77, +0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77, +0xa77,0xa77,0xa77,0xdda,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77, +0xa77,0xa77,0xa77,0xa77,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d, +0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d, +0xa7d,0xa7d,0xa7d,0xa7d,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83, +0xa83,0xa80,0xa86,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0x1164,0x1164,0x1164,0x1164,0x1164, +0x1164,0x1164,0x1164,0x1164,0x1161,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83, +0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83, +0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98, +0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98, +0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xabc,0xabc,0xabc,0xabf,0xabf,0xabc,0xabc,0xabc, +0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xaa4,0xaa4,0xab9,0xa9b, +0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xab9,0xab9,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc, +0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc, +0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xadd,0xadd,0xadd,0xadd,0xadd,0xac8,0xac8,0xadd, +0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd, +0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd, +0xadd,0xadd,0xadd,0xae0,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd, +0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd,0xadd, +0xb07,0xb07,0xb07,0xb07,0xb07,0xb07,0xb07,0xb07,0xb07,0xb07,0xb07,0xb07,0xb07,0xb07,0xb07,0xb07, +0xb07,0xb07,0xb07,0xb07,0xb07,0xb07,0xb07,0xb07,0xb07,0xb07,0xb07,0xbfa,0xbfa,0xbfa,0xbfa,0xbfa, +0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13, +0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13, +0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25, +0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25,0xb25, +0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b, +0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b,0xb2b, 0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a, 0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a, -0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40, -0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d, 0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d, -0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40, -0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d, +0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb40,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d, 0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d, -0xb3d,0xb3d,0xb3d,0xb3d,0xb40,0xb40,0xb40,0xb40,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43, -0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43, -0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49, -0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49, -0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c, -0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c, -0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xba6,0xba6,0xba6,0xba6,0xba6,0xba6,0xba6,0xba6, -0xba6,0xba6,0xba6,0xba6,0xba6,0xba6,0xba6,0xba6,0xba6,0xba6,0xba6,0xba6,0xba6,0xba6,0xba3,0xba6, -0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xcae, -0xcb1,0xd9e,0xd9e,0xd9e,0xd9e,0xd9e,0xd9e,0xd9e,0xd9e,0xd9e,0xd9e,0xd9e,0xeb8,0xeb8,0xeb8,0xeb8, -0xbb5,0xbb5,0xbb5,0xbb5,0xbb5,0xbb5,0xbb5,0xbb5,0xbb5,0xbb5,0xcb4,0xcb4,0xcb4,0xcb4,0xcb4,0xcb4, -0xcb4,0xcb4,0xda4,0xe5b,0xda4,0xda4,0xda4,0xda4,0xda1,0xda4,0xda1,0xda4,0xda4,0xfab,0x1242,0x1242, -0xdad,0xdad,0xdad,0xdad,0xdad,0xdb3,0xdb0,0xeca,0xeca,0xeca,0xeca,0x13bf,0xfbd,0x13bf,0x12fc,0x12fc, -0xbeb,0xbeb,0xbeb,0xbeb,0xbeb,0xbeb,0xbeb,0xbeb,0xbeb,0xbeb,0xbeb,0xbeb,0xbeb,0xbeb,0xbeb,0xbeb, -0xbeb,0xbeb,0xc1b,0xc18,0xc1b,0xc18,0xc1b,0xc18,0x10ce,0x10cb,0xfc3,0xfc0,0xbee,0xbee,0xbee,0xbee, -0xbee,0xbee,0xbee,0xbee,0xbee,0xbee,0xbee,0xbee,0xbee,0xbee,0xbee,0xbee,0xbf1,0xbf1,0xbf1,0xbf1, -0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1, -0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf4,0xbf4,0xbf1,0xbf1, -0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf7,0xbf7,0xbf7,0xbfd,0xbfa,0xc21,0xc1e,0xbfd, -0xbfa,0xbfd,0xbfa,0xbfd,0xbfa,0xbfd,0xbfa,0xbfd,0xbfa,0xbfd,0xbfa,0xbfd,0xbfa,0xbfd,0xbfa,0xbfd, -0xbfa,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7, -0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7, -0xbf7,0xbf7,0xbf7,0xbf7,0xbfd,0xbfa,0xbfd,0xbfa,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7, -0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7, -0xbf7,0xbf7,0xbf7,0xbf7,0xbfd,0xbfa,0xbf7,0xbf7,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00, -0xc00,0xc00,0xc00,0xc00,0xc06,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00, -0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00, -0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc06,0xc06,0xc06,0xc00,0xc00,0xc00,0xc00,0xc00, -0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00, -0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc03,0xc00,0xc00,0xc00,0xc39,0xc39,0xc39,0xc39, -0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39, -0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xc39,0xcb7,0xd26,0xda1,0xda1, -0xda1,0xda1,0xda1,0xda1,0xda1,0xda1,0xe5b,0xe5b,0xda1,0xda1,0xda1,0xda1,0xda4,0xda4,0xebb,0xfab, -0xfab,0xfab,0xfab,0xfab,0xfab,0xfab,0xfab,0xfab,0xfab,0x126f,0x126f,0x1245,0xcdb,0xcdb,0xcdb,0xcdb, -0xcdb,0xcdb,0xcdb,0xcdb,0xcdb,0xcdb,0xcdb,0xcdb,0xcdb,0xcdb,0xcdb,0xcdb,0xcdb,0xcdb,0xcdb,0xcdb, -0xcdb,0xcdb,0xcdb,0xcdb,0xcdb,0xcdb,0xcdb,0xcdb,0xcdb,0xcdb,0xcdb,0xcdb,0xcea,0xcea,0xcea,0xcea, -0xcea,0xcea,0xce1,0xce1,0xce1,0xce1,0xce1,0xcde,0xcf3,0xcf3,0xcf3,0xced,0xcf3,0xcf3,0xcf3,0xcf3, -0xcf3,0xcf3,0xcf3,0xcf3,0xcf3,0xcf3,0xcf3,0xced,0xcf3,0xcf3,0xcf3,0xcf3,0xce7,0xce7,0xcf0,0xcf0, -0xcf0,0xcf0,0xce4,0xce4,0xce4,0xce4,0xce4,0xcea,0xdb9,0xdb9,0xdb9,0xdb9,0xdb9,0xdb9,0xdb9,0xdb9, -0xdb9,0xdb9,0xdb9,0xdb9,0xdb6,0xdb9,0xdb9,0xdb9,0xdb9,0xdb9,0xdb9,0xdb9,0xcf3,0xcf3,0xcf3,0xcf3, -0xcf3,0xcf3,0xcf3,0xcf3,0xcf3,0xcf3,0xcf3,0xcf3,0xcf3,0xcf3,0xced,0xcf3,0xcf3,0xcf3,0xcf3,0xcf3, -0xcf3,0xcf3,0xcf3,0xcf3,0xcf3,0xcf3,0xcf3,0xcf3,0xcf3,0xce7,0xce7,0xce7,0xcea,0xcea,0xcea,0xcea, -0xcea,0xcea,0xcea,0xcea,0xcea,0xcea,0xcea,0xcea,0xcea,0xcea,0xcea,0xcea,0xcea,0xcea,0xcea,0xcea, -0xcea,0xcea,0xcea,0xcea,0xcea,0xcea,0xcea,0xcea,0xcea,0xcea,0xcea,0xcea,0xcf6,0xcf6,0xcf6,0xcf6, -0xcf6,0xcf9,0xcf9,0xcf9,0xcf6,0xcf6,0xcf6,0xcf6,0xcf6,0xcf6,0xdbc,0xdbc,0xdbc,0xdbc,0xdbc,0xdbc, -0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0x10d7,0x10d7,0xfc6,0xfc6,0xfc6,0xcfc,0xcfc,0xcfc,0xcfc, -0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xcfc, -0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xcfc,0xd02,0xd02,0xd02,0xd02, -0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02, -0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd02,0xd0b,0xd0b,0xd0b,0xd0b, -0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b, -0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd0b,0xd17,0xd17,0xd17,0xd17, -0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17, -0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd17,0xd23,0xd23,0xd23,0xd23, -0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23, -0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xd23,0xdc2,0xdc2,0xdc2,0xdc2, -0xdc2,0xdc2,0xdc2,0xdc2,0xdc2,0xdc2,0xdc2,0xdc2,0xdc2,0xdc2,0xdc2,0xdc2,0xdc2,0xdc2,0xdc2,0xdc2, -0xdc2,0xdc2,0xdc2,0xdc2,0xdc2,0xdc2,0xdc2,0xdc2,0xdc2,0xdc2,0xdc2,0xdc2,0xdc8,0xdc8,0xdc8,0xdc8, -0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8, -0xdc8,0xdc5,0xdc5,0xdc5,0xdc5,0xdc5,0xdc5,0xdc5,0xdc5,0xdc5,0xdc5,0xdc5,0xdc8,0xdc8,0xdc8,0xdc8, -0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8, -0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xdc8,0xe88,0xe88,0xdda,0xdda, -0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xfd2,0xfd2,0xfd2,0xfd2,0xfd2,0xfcf,0xfcf,0xfcf,0xfcf, -0xfcf,0xfcf,0xfcf,0xfcf,0xfcf,0xfcf,0xfcf,0xfcf,0xfcf,0xfcf,0xfcf,0xfcf,0xde9,0xde6,0xde9,0xde6, -0xde9,0xde6,0xde9,0xde6,0xde9,0xde6,0xde9,0xde6,0xde9,0xde6,0xde9,0xde6,0xde9,0xde6,0xde9,0xde6, -0xde9,0xde6,0xde9,0xde6,0xde9,0xde6,0xde9,0xde6,0xde9,0xde6,0xde9,0xde6,0xdf5,0xdf5,0xdf5,0xdf5, -0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5, -0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdfb,0xdfb,0xdfb,0xdfb, -0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb, -0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0xe13,0xe13,0xe13,0xe13, -0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13, -0xe13,0xe13,0xe13,0xed3,0xed3,0xed3,0xed3,0xfd5,0xfd5,0xfd5,0xfd5,0xfd5,0xe1c,0xe1c,0xe1c,0xe1c, -0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c, -0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe25,0xe25,0xe25,0xe25, -0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25, -0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe2e,0xe2e,0xe2e,0xe2e, -0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e, -0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe28,0xe2b,0xe2b,0xe2b,0xe2b, -0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b, -0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2b,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe37,0xe37,0xe37,0xe37, -0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34, -0xe34,0xe34,0xe31,0xe3a,0xfe1,0xfdb,0xfea,0xfd8,0xe37,0xe37,0xfd8,0xfd8,0xe4c,0xe4c,0xe3d,0xe4c, -0xe4c,0xe4c,0xe43,0xe4c,0xe4c,0xe4c,0xe4c,0xe3d,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c, -0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4f,0xe4f,0xe4f,0xe4f, -0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f, -0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe67,0xe67,0xe67,0xe67, -0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67, -0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe85,0xe85,0xe85,0xe85, -0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0x10e0,0x10e0,0x10e0,0x10e0, -0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0xecd,0xecd,0xecd,0xecd, -0xfc6,0xfc6,0xfc6,0xfc6,0xfc6,0xfc6,0xfc6,0xfc6,0xfc6,0xfc6,0xfc6,0xfc6,0xfc9,0xfc9,0xfc9,0xfc9, -0xfc9,0xfc9,0xfc9,0xfc9,0xfc9,0xfc9,0xfc9,0xfc9,0xfc9,0xfc9,0xfc9,0xfc9,0xeee,0xeee,0xeee,0xeee, -0xf00,0xf09,0xf0c,0xf09,0xf0c,0xf09,0xf0c,0xf09,0xf0c,0xf09,0xf0c,0xf09,0xf09,0xf09,0xf0c,0xf09, -0xf09,0xf09,0xf09,0xf09,0xf09,0xf09,0xf09,0xf09,0xf09,0xf09,0xf09,0xf09,0xf09,0xf09,0xf09,0xf09, -0xf09,0xf09,0xf09,0xf09,0xef1,0xf00,0xeee,0xeee,0xeee,0xeee,0xeee,0xf03,0xeee,0xf03,0xf00,0xf00, -0xf15,0xf12,0xf15,0xf15,0xf15,0xf12,0xf12,0xf15,0xf12,0xf15,0xf12,0xf15,0xf12,0xffc,0xffc,0xffc, -0x1137,0xff3,0xffc,0xff3,0xf12,0xf15,0xf12,0xf12,0xff3,0xff3,0xff3,0xff3,0xff6,0xff9,0x1137,0x1137, -0xf18,0xf18,0x100e,0x1005,0x100e,0x1005,0x100e,0x1005,0x100e,0x1005,0x100e,0x1005,0x100e,0x1005,0x100e,0x1005, -0x1005,0x1005,0x100e,0x1005,0x100e,0x1005,0x100e,0x1005,0x100e,0x1005,0x100e,0x1005,0x100e,0x1005,0x100e,0x1005, -0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e, -0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e,0xf1e, -0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d, -0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d,0xf2d, -0xf2d,0xf2d,0xf2d,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7,0x14f7, -0x14f7,0x14f7,0x14f7,0x14f7,0xf33,0xf33,0xf33,0xf33,0xf33,0xf33,0xf33,0xf33,0xf33,0xf33,0xf33,0xf33, -0xf33,0xf33,0xf33,0xf33,0xf33,0xf33,0xf33,0xf33,0xf33,0xf33,0xf33,0xf33,0xf33,0xf33,0xf33,0xf33, -0xf33,0xf33,0xf33,0xf33,0xf7b,0xf96,0xf8d,0xf8a,0xf8a,0xf96,0xf96,0xf8d,0xf8d,0xf8a,0xf8a,0xf8a, -0xf8a,0xf8a,0xf96,0xf96,0xf96,0xf7b,0xf7b,0xf7b,0xf7b,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96, -0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf7b,0xf8d,0xf90,0xf7b,0xf7b,0xf93,0xf93,0xf93,0xf93,0xf93, -0xf93,0xf7e,0xf96,0xf93,0xf87,0xf87,0xf87,0xf87,0xf87,0xf87,0xf87,0xf87,0xf87,0xf87,0x1101,0x1101, -0x10fe,0x10fb,0xf84,0xf84,0xfae,0xfae,0xfae,0xfae,0x126f,0x126f,0x1245,0x1245,0x124b,0x1242,0x1242,0x1242, -0x1242,0x1245,0x136b,0x124b,0x1245,0x124b,0x1242,0x124b,0x126f,0x1242,0x1242,0x1242,0x1245,0x1245,0x1242,0x1242, -0x1245,0x1242,0x1242,0x1245,0xfc9,0xfc9,0xfc9,0xfc9,0xfc9,0xfc6,0xfc6,0xfc9,0xfc9,0xfc9,0xfc9,0xfc9, -0xfc9,0x14d0,0x14d0,0x14d0,0x10d7,0xfc6,0xfc6,0xfc6,0xfc6,0x127b,0x1254,0x1254,0x1254,0x1254,0x14d0,0x14d0, -0x14d0,0x14d0,0x14d0,0x14d0,0xfe7,0xfe7,0xfe4,0xfde,0xfe4,0xfde,0xfe4,0xfde,0xfe4,0xfde,0xfdb,0xfdb, -0xfdb,0xfdb,0xff0,0xfed,0xfdb,0x1134,0x13cb,0x13ce,0x13ce,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13d1,0x13d1, -0x14eb,0x14df,0x14df,0x14dc,0x100e,0x1005,0x100e,0x1005,0x100e,0x1005,0x100e,0x1005,0x1002,0xfff,0xfff,0x100e, -0x1005,0x130b,0x1308,0x16da,0x130b,0x1308,0x13da,0x13d7,0x14ee,0x14ee,0x14f4,0x14ee,0x14f4,0x14ee,0x14f4,0x14ee, -0x14f4,0x14ee,0x14f4,0x14ee,0x100e,0x1005,0x100e,0x1005,0x100e,0x1005,0x100e,0x1005,0x100e,0x1005,0x100e,0x1005, -0x100e,0x1005,0x100e,0x1005,0x100e,0x1005,0x100e,0x1005,0x100e,0x1005,0x100e,0x1005,0x100e,0x1005,0x100e,0x1005, -0x100e,0x1005,0x100e,0x1005,0x1008,0x1005,0x1005,0x1005,0x1005,0x1005,0x1005,0x1005,0x1005,0x100e,0x1005,0x100e, -0x1005,0x100e,0x100e,0x1005,0x1011,0x1011,0x1017,0x101d,0x101d,0x101d,0x101d,0x101d,0x101d,0x101d,0x101d,0x101d, -0x101d,0x101d,0x101d,0x101d,0x101d,0x101d,0x101d,0x101d,0x101d,0x101d,0x101d,0x101d,0x101d,0x101d,0x101d,0x101d, -0x101d,0x101d,0x101d,0x101d,0x101d,0x1017,0x1011,0x1011,0x1011,0x1011,0x1017,0x1017,0x1011,0x1011,0x101a,0x13e3, -0x13e0,0x13e0,0x101d,0x101d,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x13e6,0x13e6, -0x13e6,0x13e6,0x13e6,0x13e6,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032, -0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032, -0x1032,0x1032,0x1032,0x1032,0x103b,0x103b,0x103b,0x103b,0x103b,0x103b,0x103b,0x103b,0x103b,0x103b,0x103b,0x103b, -0x103b,0x103b,0x103b,0x103b,0x103b,0x103b,0x103b,0x103b,0x103b,0x103b,0x103b,0x103b,0x103e,0x103e,0x103e,0x1041, -0x103e,0x103e,0x1044,0x1044,0x1047,0x1047,0x1047,0x1047,0x1047,0x1047,0x1047,0x1047,0x1047,0x1047,0x1047,0x1047, -0x1047,0x1047,0x1047,0x1047,0x1047,0x1047,0x1047,0x1047,0x1047,0x1047,0x1047,0x1047,0x1047,0x1047,0x1047,0x1047, -0x1047,0x1047,0x1047,0x1047,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050, -0x1053,0x104a,0x1059,0x1056,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050, -0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050,0x1050, -0x1050,0x1050,0x1050,0x1050,0x1311,0x130e,0x106b,0x1065,0x106b,0x1065,0x106b,0x1065,0x106b,0x1065,0x106b,0x1065, -0x106b,0x1065,0x1068,0x10e9,0x105c,0x105c,0x105c,0x1062,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9, -0x105f,0x105f,0x1062,0x106e,0x106b,0x1065,0x106b,0x1065,0x106b,0x1065,0x106b,0x1065,0x106b,0x1065,0x106b,0x1065, -0x106b,0x1065,0x106b,0x1065,0x106b,0x1065,0x106b,0x1065,0x106b,0x1065,0x106b,0x1065,0x106b,0x1065,0x106b,0x1065, -0x106b,0x1065,0x106b,0x1065,0x1503,0x1500,0x1503,0x1500,0x1506,0x1506,0x16e3,0x13e9,0x1077,0x1077,0x107a,0x107a, -0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a, -0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x107a,0x1077,0x1077,0x1077,0x1077, -0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1080,0x1080, -0x1080,0x1080,0x1080,0x1083,0x1083,0x1083,0x10dd,0x108c,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b, -0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086, -0x1086,0x1086,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089, -0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa, -0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa, -0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10aa,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc, -0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc, -0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10c5,0x10c5,0x10c5,0x10c5,0x10da,0x10c5,0x10c5,0x10c5, -0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5, -0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8, -0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8, -0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x10c8,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149, -0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1140,0x1140, -0x1143,0x1143,0x1149,0x1140,0x1140,0x1140,0x1140,0x1140,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c, -0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c, -0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x114c,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167, -0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167, -0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1167,0x1173,0x1173,0x1173,0x1173,0x1173,0x1173,0x1173,0x1173, -0x1173,0x1173,0x1173,0x1173,0x1173,0x1173,0x1173,0x1173,0x1173,0x1173,0x1173,0x1173,0x1173,0x1173,0x1173,0x1173, -0x1173,0x1173,0x1173,0x1173,0x1173,0x1173,0x1170,0x1176,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182, -0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182, -0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1182,0x1191,0x1191,0x1191,0x11a0,0x11a6,0x11a6,0x11a6,0x11a6, -0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6, -0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x1194,0x11a0,0x11a0,0x1191,0x1191, -0x1191,0x1191,0x11a0,0x11a0,0x1191,0x11a0,0x11a0,0x11a0,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2, -0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b5,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11ac, -0x11ac,0x11ac,0x11b2,0x11af,0x150c,0x150f,0x1512,0x1512,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4, -0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11b8,0x11c4,0x11b8,0x11b8,0x11b8,0x11cd,0x11cd,0x11b8, -0x11b8,0x11cd,0x11c4,0x11cd,0x11cd,0x11c4,0x11b8,0x11bb,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4, -0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4, -0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11c4,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df, -0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df, -0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7, -0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f7, -0x11f7,0x11f7,0x11f7,0x11f7,0x11f7,0x11f4,0x11f4,0x11f4,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200, -0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200, -0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x1200,0x120f,0x120f,0x120f,0x120f,0x120f,0x120f,0x120f,0x120f, -0x120f,0x120f,0x120f,0x120f,0x120f,0x120f,0x120f,0x120f,0x120f,0x120f,0x120f,0x120f,0x120f,0x120f,0x120f,0x120f, -0x120f,0x120f,0x120f,0x120f,0x120f,0x120f,0x120f,0x120f,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a, -0x122a,0x122a,0x122a,0x122d,0x122a,0x122a,0x122a,0x122a,0x1227,0x1227,0x1227,0x121b,0x121b,0x121b,0x121b,0x1227, -0x1227,0x1221,0x121e,0x1224,0x1224,0x1215,0x1230,0x1230,0x1218,0x1218,0x1227,0x122a,0x122a,0x122a,0x122a,0x122a, -0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a,0x122a, -0x122a,0x122a,0x122d,0x122a,0x122d,0x122a,0x122a,0x122a,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233, -0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233, -0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1239,0x1239,0x1239,0x1236,0x1236,0x1236,0x1233,0x1233, -0x1233,0x1233,0x1236,0x1233,0x1233,0x1233,0x1239,0x1236,0x1239,0x1236,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233, -0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233, -0x1233,0x1233,0x1233,0x1233,0x1233,0x1239,0x1236,0x1236,0x1233,0x1233,0x1233,0x1233,0x1245,0x1245,0x12ed,0x1242, -0x12ed,0x12ed,0x12ed,0x12ed,0x1242,0x1248,0x126f,0x1242,0x1242,0x1242,0x1242,0x1242,0x1248,0x124b,0x126f,0x126f, -0x124b,0x126f,0x1242,0x124b,0x124b,0x124e,0x126f,0x1242,0x1242,0x126f,0x1245,0x1245,0x135c,0x135c,0x135c,0x135c, -0x135c,0x135c,0x135c,0x135c,0x135c,0x135c,0x1257,0x1257,0x1257,0x1257,0x1377,0x1356,0x1260,0x1377,0x1377,0x1377, -0x1377,0x1377,0x1377,0x1377,0x1377,0x1377,0x1377,0x180f,0x180f,0x180f,0x180f,0x180f,0x135f,0x135f,0x1266,0x135f, -0x135f,0x135f,0x1266,0x135f,0x135f,0x135f,0x1260,0x1260,0x1260,0x1260,0x1260,0x1359,0x135c,0x135c,0x135c,0x135c, -0x135c,0x135c,0x135c,0x1263,0x135c,0x135c,0x135c,0x135c,0x135c,0x135c,0x135c,0x1263,0x128d,0x128d,0x128d,0x128d, -0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d, -0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x128d,0x132f,0x132f,0x132f,0x132f, -0x132f,0x132f,0x132f,0x132f,0x132f,0x132f,0x132f,0x132f,0x132f,0x132f,0x132f,0x132f,0x132f,0x132f,0x132f,0x132f, -0x132f,0x132f,0x132f,0x132f,0x132f,0x132f,0x132f,0x132f,0x132f,0x132f,0x132f,0x132f,0x1344,0x1335,0x1344,0x1347, -0x1347,0x1347,0x1347,0x1347,0x1347,0x1347,0x1347,0x1347,0x1347,0x1347,0x1347,0x1347,0x1347,0x1347,0x1347,0x1347, -0x1347,0x1347,0x1347,0x1347,0x1347,0x1347,0x1347,0x1347,0x1347,0x1347,0x1347,0x1347,0x1335,0x1335,0x1335,0x1335, -0x1335,0x1335,0x1335,0x1335,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d, -0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d,0x134d, -0x134d,0x134d,0x134d,0x134d,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353, -0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353,0x1353, -0x1353,0x1353,0x1353,0x1353,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, -0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x1389,0x1389,0x138c,0x138c,0x138c,0x138c,0x138c, -0x1389,0x138c,0x138c,0x138c,0x1389,0x138c,0x1389,0x138c,0x1389,0x138c,0x138c,0x138c,0x138c,0x138c,0x1395,0x138c, -0x138c,0x138c,0x138c,0x1389,0x138c,0x1389,0x1389,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, -0x138c,0x138c,0x138c,0x138c,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x138c,0x138c,0x138c,0x138c,0x138c, -0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x1389,0x1389,0x1389,0x1389,0x1389, -0x1389,0x1389,0x1389,0x1389,0x1389,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, -0x138c,0x138c,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x1389,0x151e,0x151e, -0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, -0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, -0x138c,0x1527,0x1521,0x1521,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1764,0x1764,0x1764, -0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x1527,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, -0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, -0x138c,0x138c,0x138c,0x138c,0x138c,0x1527,0x1764,0x1764,0x138c,0x138c,0x138c,0x138c,0x138c,0x1395,0x138c,0x138c, -0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x1521,0x1521,0x1527,0x1527, -0x1521,0x1527,0x1527,0x1527,0x151e,0x151e,0x1527,0x1527,0x138c,0x138c,0x1392,0x1395,0x1395,0x1698,0x138c,0x1392, -0x138c,0x138c,0x1395,0x1530,0x152d,0x1527,0x1527,0x1764,0x1764,0x1764,0x1764,0x1764,0x1527,0x1527,0x1527,0x1527, -0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, -0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x1521,0x1521,0x1527,0x1698,0x1527,0x1521,0x1527, -0x1764,0x1764,0x1764,0x1767,0x1767,0x1767,0x1767,0x1767,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, -0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, -0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x1527,0x138c,0x1527,0x1395,0x1395,0x138c,0x138c,0x1395,0x1395, -0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, -0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x1398,0x1398,0x1398,0x1398,0x1392,0x1392, -0x1392,0x1392,0x1395,0x1392,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x138c,0x138c,0x138c, -0x1395,0x138c,0x138c,0x138c,0x138c,0x1395,0x1395,0x1395,0x138c,0x1395,0x1395,0x1395,0x138c,0x138c,0x138c,0x138f, -0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, -0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x1698,0x138c,0x138c,0x138c,0x138c,0x1527,0x1521,0x1764, -0x13ef,0x13ef,0x13ef,0x13ef,0x151e,0x151e,0x151e,0x151e,0x151e,0x1524,0x1527,0x1764,0x1764,0x1764,0x1764,0x16ec, -0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, -0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1527,0x1527,0x1521,0x1521,0x1527,0x152d,0x1530,0x1527,0x1527, -0x1527,0x1527,0x1818,0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1527,0x1521,0x1527,0x1521,0x1521,0x1521,0x1521, -0x152a,0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1527,0x1521,0x1521,0x1521,0x1527,0x151e,0x151e,0x151e,0x151e, -0x151e,0x151e,0x1527,0x138c,0x138c,0x138c,0x138c,0x138c,0x1479,0x139b,0x139b,0x139b,0x139b,0x139b,0x139b,0x139b, -0x139b,0x139b,0x139b,0x139b,0x139b,0x139b,0x139b,0x139b,0x139b,0x1479,0x139b,0x139b,0x139b,0x1479,0x139b,0x1479, -0x139b,0x1479,0x139b,0x1479,0x139b,0x139b,0x139b,0x1479,0x139b,0x139b,0x139b,0x139b,0x139b,0x139b,0x1479,0x1479, -0x139b,0x139b,0x139b,0x139b,0x1479,0x139b,0x1479,0x1479,0x139b,0x139b,0x139b,0x139b,0x1479,0x139b,0x139b,0x139b, -0x139b,0x139b,0x139b,0x139b,0x139b,0x139b,0x139b,0x139b,0x139b,0x169e,0x169e,0x176a,0x176a,0x139e,0x139e,0x139e, -0x139b,0x139b,0x139b,0x139e,0x139e,0x139e,0x139e,0x139e,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d, -0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1, -0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1, -0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a4,0x13a1,0x13a1,0x13a1,0x13a1, -0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a4,0x13a4,0x13a4,0x13a1, -0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7, -0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7, -0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x1797,0x1797,0x1794,0x16ef,0x13f5,0x13f5,0x13f5,0x13f5, -0x13f5,0x13f5,0x13f2,0x13f2,0x13f2,0x13f2,0x13f2,0x13f2,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5, -0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x1539,0x1401,0x1401,0x1401,0x1413,0x1413,0x1413,0x1413,0x1413, -0x1413,0x1413,0x1413,0x1413,0x1413,0x1413,0x1413,0x1413,0x1413,0x1413,0x1413,0x1413,0x1413,0x1413,0x1413,0x1413, -0x1413,0x1413,0x1413,0x1413,0x1413,0x1413,0x1413,0x1413,0x142e,0x142e,0x142e,0x142e,0x142e,0x142e,0x142e,0x142e, -0x142e,0x142e,0x142e,0x142e,0x142e,0x142e,0x142e,0x142e,0x142e,0x142e,0x142e,0x142e,0x142e,0x142e,0x142e,0x142e, -0x142e,0x142e,0x142e,0x142e,0x142e,0x142e,0x142e,0x142e,0x1437,0x1437,0x1437,0x1437,0x1437,0x1437,0x1437,0x1437, -0x1437,0x1437,0x1437,0x1437,0x1437,0x1437,0x1437,0x1437,0x1437,0x1437,0x1437,0x1437,0x1437,0x1437,0x1437,0x1437, -0x1437,0x1437,0x1437,0x1437,0x1437,0x1437,0x1437,0x1437,0x143d,0x143d,0x1449,0x144f,0x144f,0x144f,0x144f,0x144f, -0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f, -0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x144f,0x1449,0x1449,0x1449,0x143d,0x143d, -0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x1449,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470, -0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470, -0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1470,0x1521,0x1521,0x1527,0x1527,0x1527,0x1521,0x1521,0x1521, -0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1527,0x1527,0x1527,0x151e,0x151e,0x151e,0x151e, -0x151e,0x151e,0x151e,0x151e,0x1527,0x1527,0x1527,0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1527, -0x1521,0x1521,0x1527,0x1527,0x1527,0x1527,0x1521,0x1521,0x1530,0x1521,0x1521,0x1521,0x1521,0x169b,0x169b,0x1521, -0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1815,0x1527,0x1521,0x1521,0x1527,0x1521,0x1521,0x1521, -0x1521,0x1521,0x1521,0x1521,0x1521,0x1527,0x1527,0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1521,0x1521, -0x1527,0x1521,0x1521,0x1521,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551, -0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551,0x1551, -0x1551,0x1551,0x1551,0x1551,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563, -0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563, -0x1563,0x1563,0x1563,0x1563,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569, -0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569, -0x1569,0x1569,0x1569,0x1569,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c, -0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c,0x156c, -0x156c,0x156c,0x156c,0x156c,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab, -0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab, -0x15ab,0x15ab,0x15ab,0x159c,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4, -0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15ae, -0x15b7,0x15b7,0x15b7,0x15b7,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba, -0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba, -0x15ba,0x15ba,0x15ba,0x15ba,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15cc,0x15d5,0x15d5,0x15d5, -0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5, -0x15d5,0x15d5,0x15d5,0x15d5,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de, -0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de, -0x15de,0x15de,0x15de,0x15de,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0, -0x15f0,0x15f0,0x15f0,0x15f0,0x15ed,0x15ed,0x15ed,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15e1,0x15ed, -0x15ed,0x15e1,0x15ed,0x15e4,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0, -0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0,0x15f0, -0x15f0,0x15f0,0x15f0,0x15f0,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614, -0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614, -0x1614,0x1611,0x1611,0x1611,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d, -0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x1623,0x1623,0x1623,0x1620,0x1620,0x1620, -0x161d,0x161d,0x161d,0x161d,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632, -0x1632,0x1632,0x1632,0x1632,0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0x1626,0x1638,0x1638,0x162c,0x1629,0x1629, -0x1629,0x1629,0x1629,0x1629,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632, +0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb3d,0xb43,0xb43,0xc8a,0xc8a,0xb43,0xb43,0xb43,0xb43, +0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xc8a,0xb43,0xb43,0xb43, +0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64, +0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64, +0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0xb64,0x151b,0xb6d,0xb6d,0xb6d,0xb6d,0xb6d,0xb6d,0xd11,0xd11, +0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a, +0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xd0e,0xd0e,0xd5f,0xd5f,0xd5f,0xd5f, +0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xb6d,0xb6d,0xb6d,0xb6d, +0xb6d,0xb6d,0xb6d,0xb6d,0xb6d,0xb6d,0xb6d,0xb6d,0xb6d,0xb6d,0xb6d,0xb6d,0xb6d,0xb6d,0xb6d,0xb6d, +0xb6d,0xb6d,0xb6d,0xb6d,0xb6d,0xb6d,0xb6d,0xb6d,0xb6d,0xb6d,0xb6d,0xb6d,0xb70,0xb70,0xb70,0xb70, +0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70, +0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb7f,0xb7f,0xb7f,0xb7f, +0xb7f,0xb76,0xb82,0xb88,0xb88,0xb88,0xb7c,0xb7c,0xb7c,0xb85,0xb79,0xb79,0xb79,0xb79,0xb79,0xb73, +0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb88,0xb88,0xb88,0xb88,0xb88,0xb7c,0xb7c,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7f,0xb7f, +0xb88,0xb88,0xb88,0xb7c,0xb7c,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb7c,0xb7c,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, +0xb7c,0xb7c,0xb88,0xb88,0xb88,0xb88,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0x1725,0x1725,0xb94,0xb8b,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91, +0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91, +0xb91,0xb91,0xb91,0xb8b,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94, +0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94, +0xb94,0xb94,0xb94,0xb8b,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91, +0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb8b,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91, +0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94, +0xb94,0xb94,0xb94,0xb94,0xb94,0xb8b,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91, +0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb8e,0xb8e,0xb8e,0xb8e, +0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e, +0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb8e,0xb94,0xb94,0xb94,0xb94, +0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94, +0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91, +0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb94,0xb94,0xb94,0xb94, +0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94, +0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91, +0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91,0xb91, +0xb94,0xb94,0xb94,0xb94,0xb97,0xb97,0xb97,0xb97,0xb97,0xb97,0xb97,0xb97,0xb97,0xb97,0xb97,0xb97, +0xb97,0xb97,0xb97,0xb97,0xb97,0xb97,0xb97,0xb97,0xb97,0xb97,0xb97,0xb97,0xb97,0xb97,0xb97,0xb97, +0xb97,0xb97,0xb97,0xb97,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d, +0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d, +0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0, +0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0, +0xba0,0xba0,0xba0,0xba0,0xbfa,0xbfa,0xbfa,0xbfa,0xbfa,0xbfa,0xbfa,0xbfa,0xbfa,0xbfa,0xbfa,0xbfa, +0xbfa,0xbfa,0xbfa,0xbfa,0xbfa,0xbfa,0xbfa,0xbfa,0xbfa,0xbfa,0xbf7,0xbfa,0xbf7,0xbf7,0xbf7,0xbf7, +0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xbf7,0xd02,0xd05,0xdf2,0xdf2,0xdf2, +0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xf0f,0xf0f,0xf0f,0xf0f,0xc09,0xc09,0xc09,0xc09, +0xc09,0xc09,0xc09,0xc09,0xc09,0xc09,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xdf8,0xeb2, +0xdf8,0xdfb,0xdfb,0xdf8,0xdf5,0xdf8,0xdf5,0xdf8,0xdf8,0x1002,0x1299,0x1299,0xe04,0xe04,0xe04,0xe04, +0xe04,0xe0a,0xe07,0xf21,0xf21,0xf21,0xf21,0x1416,0x1014,0x1416,0x1353,0x1353,0xc3f,0xc3f,0xc3f,0xc3f, +0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc6f,0xc6c, +0xc6f,0xc6c,0xc6f,0xc6c,0x1125,0x1122,0x101a,0x1017,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42, +0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc42,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45, +0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45, +0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc48,0xc48,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45, +0xc45,0xc45,0xc45,0xc45,0xc4b,0xc4b,0xc4b,0xc51,0xc4e,0xc75,0xc72,0xc51,0xc4e,0xc51,0xc4e,0xc51, +0xc4e,0xc51,0xc4e,0xc51,0xc4e,0xc51,0xc4e,0xc51,0xc4e,0xc51,0xc4e,0xc51,0xc4e,0xc4b,0xc4b,0xc4b, +0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b, +0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b, +0xc51,0xc4e,0xc51,0xc4e,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b, +0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b, +0xc51,0xc4e,0xc4b,0xc4b,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54, +0xc5a,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54, +0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54, +0xc54,0xc54,0xc54,0xc54,0xc5a,0xc5a,0xc5a,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54, +0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54, +0xc54,0xc54,0xc54,0xc54,0xc57,0xc54,0xc54,0xc54,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d, +0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d, +0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xd0b,0xd7a,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5,0xdf5, +0xdf5,0xdf5,0xeb2,0xeb2,0xdf5,0xdf5,0xdf5,0xdf5,0xdf8,0xdf8,0xf12,0x1002,0x1002,0x1002,0x1002,0x1002, +0x1002,0x1002,0x1002,0x1002,0x1002,0x12c6,0x12c6,0x129c,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f, +0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f, +0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd2f,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd35,0xd35, +0xd35,0xd35,0xd35,0xd32,0xd47,0xd47,0xd47,0xd41,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47, +0xd47,0xd47,0xd47,0xd41,0xd47,0xd47,0xd47,0xd47,0xd3b,0xd3b,0xd44,0xd44,0xd44,0xd44,0xd38,0xd38, +0xd38,0xd38,0xd38,0xd3e,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10, +0xe0d,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47, +0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd41,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47,0xd47, +0xd47,0xd47,0xd47,0xd47,0xd47,0xd3b,0xd3b,0xd3b,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e, +0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e, +0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd4d,0xd4d,0xd4d, +0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xf24,0xf24,0xf24,0xf24, +0xf24,0xf24,0xf24,0x112e,0x112e,0x101d,0x101d,0x101d,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50, +0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50, +0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56, +0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56, +0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f, +0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f, +0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b, +0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b, +0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77, +0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77, +0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19, +0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19, +0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f, +0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1c,0xe1c,0xe1c, +0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f, +0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f, +0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xe1f,0xedf,0xedf,0xe31,0xe31,0xf27,0xf27,0xf27,0xf27, +0xf27,0xf27,0xf27,0x1029,0x1029,0x1029,0x1029,0x1029,0x1026,0x1026,0x1026,0x1026,0x1026,0x1026,0x1026,0x1026, +0x1026,0x1026,0x1026,0x1026,0x1026,0x1026,0x1026,0x1026,0xe40,0xe3d,0xe40,0xe3d,0xe40,0xe3d,0xe40,0xe3d, +0xe40,0xe3d,0xe40,0xe3d,0xe40,0xe3d,0xe40,0xe3d,0xe40,0xe3d,0xe40,0xe3d,0xe40,0xe3d,0xe40,0xe3d, +0xe40,0xe3d,0xe40,0xe3d,0xe40,0xe3d,0xe40,0xe3d,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c, +0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c, +0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52, +0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52, +0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a, +0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xf2a, +0xf2a,0xf2a,0xf2a,0x102c,0x102c,0x102c,0x102c,0x102c,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73, +0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73, +0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c, +0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c, +0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0xe85, +0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0xe85, +0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0xe7f,0xe82,0xe82,0xe82,0xe82,0xe82,0xe82,0xe82,0xe82, +0xe82,0xe82,0xe82,0xe82,0xe82,0xe82,0xe82,0xe82,0xe82,0xe82,0xe82,0xe82,0xe82,0xe82,0xe82,0xe82, +0xe82,0xe82,0xe82,0xe85,0xe85,0xe85,0xe85,0xe85,0xe8e,0xe8e,0xe8e,0xe8e,0xe8e,0xe8e,0xe8e,0xe8e, +0xe8e,0xe8e,0xe8e,0xe8e,0xe8e,0xe8e,0xe8b,0xe8b,0xe8b,0xe8b,0xe8b,0xe8b,0xe8b,0xe8b,0xe88,0xe91, +0x1038,0x1032,0x1041,0x102f,0xe8e,0xe8e,0x102f,0x102f,0xea3,0xea3,0xe94,0xea3,0xea3,0xea3,0xe9a,0xea3, +0xea3,0xea3,0xea3,0xe94,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3, +0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea6,0xea6,0xea6,0xea6,0xea6,0xea6,0xea6,0xea6, +0xea6,0xea6,0xea6,0xea6,0xea6,0xea6,0xea6,0xea6,0xea6,0xea6,0xea6,0xea6,0xea6,0xea6,0xea6,0xea6, +0xea6,0xea6,0xea6,0xea6,0xea6,0xea6,0xea6,0xea6,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe, +0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe, +0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xebe,0xedc,0xedc,0xedc,0xedc,0xedc,0xedc,0xedc,0xedc, +0xedc,0xedc,0xedc,0xedc,0xedc,0xedc,0xedc,0xedc,0x1137,0x1137,0x1137,0x1137,0x1137,0x1137,0x1137,0x1137, +0x1137,0x1137,0x1137,0x1137,0x1137,0x1137,0x1137,0x1137,0xf24,0xf24,0xf24,0xf24,0x101d,0x101d,0x101d,0x101d, +0x101d,0x101d,0x101d,0x101d,0x101d,0x101d,0x101d,0x101d,0x1020,0x1020,0x1020,0x1020,0x1020,0x1020,0x1020,0x1020, +0x1020,0x1020,0x1020,0x1020,0x1020,0x1020,0x1020,0x1020,0xf45,0xf45,0xf45,0xf45,0xf57,0xf60,0xf63,0xf60, +0xf63,0xf60,0xf63,0xf60,0xf63,0xf60,0xf63,0xf60,0xf60,0xf60,0xf63,0xf60,0xf60,0xf60,0xf60,0xf60, +0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60, +0xf48,0xf57,0xf45,0xf45,0xf45,0xf45,0xf45,0xf5a,0xf45,0xf5a,0xf57,0xf57,0xf6c,0xf69,0xf6c,0xf6c, +0xf6c,0xf69,0xf69,0xf6c,0xf69,0xf6c,0xf69,0xf6c,0xf69,0x1053,0x1053,0x1053,0x118e,0x104a,0x1053,0x104a, +0xf69,0xf6c,0xf69,0xf69,0x104a,0x104a,0x104a,0x104a,0x104d,0x1050,0x118e,0x118e,0xf6f,0xf6f,0x1065,0x105c, +0x1065,0x105c,0x1065,0x105c,0x1065,0x105c,0x1065,0x105c,0x1065,0x105c,0x1065,0x105c,0x105c,0x105c,0x1065,0x105c, +0x1065,0x105c,0x1065,0x105c,0x1065,0x105c,0x1065,0x105c,0x1065,0x105c,0x1065,0x105c,0xf75,0xf75,0xf75,0xf75, +0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75, +0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf84,0xf84,0xf84,0xf84, +0xf84,0xf84,0xf84,0xf84,0xf84,0xf84,0xf84,0xf84,0xf84,0xf84,0xf84,0xf84,0xf84,0xf84,0xf84,0xf84, +0xf84,0xf84,0xf84,0xf84,0xf84,0xf84,0xf84,0xf84,0xf84,0xf84,0xf84,0xf84,0xf84,0xf84,0xf84,0x154e, +0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e,0x154e, +0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a, +0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a, +0xfd2,0xfed,0xfe4,0xfe1,0xfe1,0xfed,0xfed,0xfe4,0xfe4,0xfe1,0xfe1,0xfe1,0xfe1,0xfe1,0xfed,0xfed, +0xfed,0xfd2,0xfd2,0xfd2,0xfd2,0xfed,0xfed,0xfed,0xfed,0xfed,0xfed,0xfed,0xfed,0xfed,0xfed,0xfed, +0xfed,0xfed,0xfd2,0xfe4,0xfe7,0xfd2,0xfd2,0xfea,0xfea,0xfea,0xfea,0xfea,0xfea,0xfd5,0xfed,0xfea, +0xfde,0xfde,0xfde,0xfde,0xfde,0xfde,0xfde,0xfde,0xfde,0xfde,0x1158,0x1158,0x1155,0x1152,0xfdb,0xfdb, +0x1005,0x1005,0x1005,0x1005,0x12c6,0x12c6,0x129c,0x129c,0x12a2,0x1299,0x1299,0x1299,0x1299,0x129c,0x13c2,0x12a2, +0x129c,0x12a2,0x1299,0x12a2,0x12c6,0x1299,0x1299,0x1299,0x129c,0x129c,0x1299,0x1299,0x129c,0x1299,0x1299,0x129c, +0x1020,0x1020,0x1020,0x1020,0x1020,0x101d,0x101d,0x1020,0x1020,0x1020,0x1020,0x1020,0x1020,0x1527,0x1527,0x1527, +0x112e,0x101d,0x101d,0x101d,0x101d,0x12d2,0x12ab,0x12ab,0x12ab,0x12ab,0x1527,0x1527,0x1527,0x1527,0x1527,0x1527, +0x103e,0x103e,0x103b,0x1035,0x103b,0x1035,0x103b,0x1035,0x103b,0x1035,0x1032,0x1032,0x1032,0x1032,0x1047,0x1044, +0x1032,0x118b,0x1422,0x1425,0x1425,0x1422,0x1422,0x1422,0x1422,0x1422,0x1428,0x1428,0x1542,0x1536,0x1536,0x1533, +0x1065,0x105c,0x1065,0x105c,0x1065,0x105c,0x1065,0x105c,0x1059,0x1056,0x1056,0x1065,0x105c,0x1362,0x135f,0x172e, +0x1362,0x135f,0x1431,0x142e,0x1545,0x1545,0x154b,0x1545,0x154b,0x1545,0x154b,0x1545,0x154b,0x1545,0x154b,0x1545, +0x1065,0x105c,0x1065,0x105c,0x1065,0x105c,0x1065,0x105c,0x1065,0x105c,0x1065,0x105c,0x1065,0x105c,0x1065,0x105c, +0x1065,0x105c,0x1065,0x105c,0x1065,0x105c,0x1065,0x105c,0x1065,0x105c,0x1065,0x105c,0x1065,0x105c,0x1065,0x105c, +0x105f,0x105c,0x105c,0x105c,0x105c,0x105c,0x105c,0x105c,0x105c,0x1065,0x105c,0x1065,0x105c,0x1065,0x1065,0x105c, +0x1068,0x1068,0x106e,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074, +0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074, +0x1074,0x106e,0x1068,0x1068,0x1068,0x1068,0x106e,0x106e,0x1068,0x1068,0x1071,0x143a,0x1437,0x1437,0x1074,0x1074, +0x106b,0x106b,0x106b,0x106b,0x106b,0x106b,0x106b,0x106b,0x106b,0x106b,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d, +0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089, +0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089, +0x1092,0x1092,0x1092,0x1092,0x1092,0x1092,0x1092,0x1092,0x1092,0x1092,0x1092,0x1092,0x1092,0x1092,0x1092,0x1092, +0x1092,0x1092,0x1092,0x1092,0x1092,0x1092,0x1092,0x1092,0x1095,0x1095,0x1095,0x1098,0x1095,0x1095,0x109b,0x109b, +0x109e,0x109e,0x109e,0x109e,0x109e,0x109e,0x109e,0x109e,0x109e,0x109e,0x109e,0x109e,0x109e,0x109e,0x109e,0x109e, +0x109e,0x109e,0x109e,0x109e,0x109e,0x109e,0x109e,0x109e,0x109e,0x109e,0x109e,0x109e,0x109e,0x109e,0x109e,0x109e, +0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10aa,0x10a1,0x10b0,0x10ad, +0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7, +0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7,0x10a7, +0x1368,0x1365,0x10c2,0x10bc,0x10c2,0x10bc,0x10c2,0x10bc,0x10c2,0x10bc,0x10c2,0x10bc,0x10c2,0x10bc,0x10bf,0x1140, +0x10b3,0x10b3,0x10b3,0x10b9,0x1440,0x1440,0x1440,0x1440,0x1440,0x1440,0x1440,0x1440,0x10b6,0x10b6,0x10b9,0x10c5, +0x10c2,0x10bc,0x10c2,0x10bc,0x10c2,0x10bc,0x10c2,0x10bc,0x10c2,0x10bc,0x10c2,0x10bc,0x10c2,0x10bc,0x10c2,0x10bc, +0x10c2,0x10bc,0x10c2,0x10bc,0x10c2,0x10bc,0x10c2,0x10bc,0x10c2,0x10bc,0x10c2,0x10bc,0x10c2,0x10bc,0x10c2,0x10bc, +0x155a,0x1557,0x155a,0x1557,0x155d,0x155d,0x1737,0x1440,0x10ce,0x10ce,0x10d1,0x10d1,0x10d1,0x10d1,0x10d1,0x10d1, +0x10d1,0x10d1,0x10d1,0x10d1,0x10d1,0x10d1,0x10d1,0x10d1,0x10d1,0x10d1,0x10d1,0x10d1,0x10d1,0x10d1,0x10d1,0x10d1, +0x10d1,0x10d1,0x10d1,0x10d1,0x10d1,0x10d1,0x10d1,0x10d1,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce, +0x10ce,0x10ce,0x10ce,0x10ce,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10d7,0x10d7,0x10d7,0x10d7,0x10d7,0x10da, +0x10da,0x10da,0x1134,0x10e3,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2, +0x10f2,0x10f2,0x10f2,0x10f2,0x10dd,0x10dd,0x10dd,0x10dd,0x10dd,0x10dd,0x10dd,0x10dd,0x10dd,0x10dd,0x10e0,0x10e0, +0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0, +0x10e0,0x10e0,0x10e0,0x10e0,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101, +0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101, +0x1101,0x1101,0x1101,0x1101,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113, +0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113, +0x1113,0x1113,0x1113,0x1113,0x111c,0x111c,0x111c,0x111c,0x1131,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c, +0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c, +0x111c,0x111c,0x111c,0x111c,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f, +0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f,0x111f, +0x111f,0x111f,0x111f,0x111f,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0, +0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x11a0,0x1197,0x1197,0x119a,0x119a,0x11a0,0x1197, +0x1197,0x1197,0x1197,0x1197,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3, +0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3, +0x11a3,0x11a3,0x11a3,0x11a3,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be, +0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be,0x11be, +0x11be,0x11be,0x11be,0x11be,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca, +0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca, +0x11ca,0x11ca,0x11c7,0x11cd,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9, +0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9,0x11d9, +0x11d9,0x11d9,0x11d9,0x11d9,0x11e8,0x11e8,0x11e8,0x11f7,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd, +0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd, +0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11fd,0x11eb,0x11f7,0x11f7,0x11e8,0x11e8,0x11e8,0x11e8,0x11f7,0x11f7, +0x11e8,0x11f7,0x11f7,0x11f7,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209, +0x1209,0x1209,0x1209,0x1209,0x120c,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209,0x1203,0x1203,0x1203,0x1209,0x1206, +0x1563,0x1566,0x1569,0x1569,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b, +0x121b,0x121b,0x121b,0x121b,0x120f,0x121b,0x120f,0x120f,0x120f,0x1224,0x1224,0x120f,0x120f,0x1224,0x121b,0x1224, +0x1224,0x121b,0x120f,0x1212,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b, +0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b,0x121b, +0x121b,0x121b,0x121b,0x121b,0x1236,0x1236,0x1236,0x1236,0x1236,0x1236,0x1236,0x1236,0x1236,0x1236,0x1236,0x1236, +0x1236,0x1236,0x1236,0x1236,0x1236,0x1236,0x1236,0x1236,0x1236,0x1236,0x1236,0x1236,0x1236,0x1236,0x1236,0x1236, +0x1236,0x1236,0x1236,0x1236,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e, +0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e, +0x124e,0x124b,0x124b,0x124b,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257, +0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257, +0x1257,0x1257,0x1257,0x1257,0x1266,0x1266,0x1266,0x1266,0x1266,0x1266,0x1266,0x1266,0x1266,0x1266,0x1266,0x1266, +0x1266,0x1266,0x1266,0x1266,0x1266,0x1266,0x1266,0x1266,0x1266,0x1266,0x1266,0x1266,0x1266,0x1266,0x1266,0x1266, +0x1266,0x1266,0x1266,0x1266,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1284, +0x1281,0x1281,0x1281,0x1281,0x127e,0x127e,0x127e,0x1272,0x1272,0x1272,0x1272,0x127e,0x127e,0x1278,0x1275,0x127b, +0x127b,0x126c,0x1287,0x1287,0x126f,0x126f,0x127e,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281, +0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1281,0x1284,0x1281, +0x1284,0x1281,0x1281,0x1281,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a, +0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a, +0x128a,0x128a,0x128a,0x128a,0x1290,0x1290,0x1290,0x128d,0x128d,0x128d,0x128a,0x128a,0x128a,0x128a,0x128d,0x128a, +0x128a,0x128a,0x1290,0x128d,0x1290,0x128d,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a, +0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a,0x128a, +0x128a,0x1290,0x128d,0x128d,0x128a,0x128a,0x128a,0x128a,0x129c,0x129c,0x1344,0x1299,0x1344,0x1344,0x1344,0x1344, +0x1299,0x129f,0x12c6,0x1299,0x1299,0x1299,0x1299,0x1299,0x129f,0x12a2,0x12c6,0x12c6,0x12a2,0x12c6,0x1299,0x12a2, +0x12a2,0x12a5,0x12c6,0x1299,0x1299,0x12c6,0x129c,0x129c,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3, +0x13b3,0x13b3,0x12ae,0x12ae,0x12ae,0x12ae,0x13ce,0x13ad,0x12b7,0x13ce,0x13ce,0x13ce,0x13ce,0x13ce,0x13ce,0x13ce, +0x13ce,0x13ce,0x13ce,0x1863,0x1863,0x1863,0x1863,0x1863,0x13b6,0x13b6,0x12bd,0x13b6,0x13b6,0x13b6,0x12bd,0x13b6, +0x13b6,0x13b6,0x12b7,0x12b7,0x12b7,0x12b7,0x12b7,0x13b0,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x12ba, +0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x12ba,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4, +0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4, +0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x12e4,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386, +0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386, +0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x139b,0x138c,0x139b,0x139e,0x139e,0x139e,0x139e,0x139e, +0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e, +0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, +0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4, +0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4, +0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa, +0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa, +0x13e3,0x13e3,0x13e3,0x13e3,0x13e6,0x13e3,0x13e3,0x13e3,0x13e6,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3, +0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e0,0x13e0,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e0,0x13e3,0x13e3,0x13e3, +0x13e0,0x13e3,0x13e0,0x13e3,0x13e0,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e9,0x13e3,0x13e3,0x13e3,0x13e3,0x13e0, +0x13e3,0x13e0,0x13e0,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e6,0x13e6,0x13e3,0x13e3,0x13e3, +0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3, +0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0, +0x13e0,0x13e3,0x13e3,0x13e6,0x13e3,0x13e3,0x13e3,0x13e3,0x13e6,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e0,0x13e0, +0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x1575,0x1575,0x13e3,0x13e3,0x13e3,0x13e3, +0x13e3,0x13e3,0x13e3,0x13e3,0x13e6,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3, +0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x157e,0x1578,0x1578, +0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x17b8,0x17b8,0x17b8,0x13e3,0x13e3,0x13e3,0x13e3, +0x13e3,0x13e3,0x157e,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e6,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3, +0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3, +0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e6, +0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x157e,0x17b8,0x17b8,0x13e3,0x13e3,0x13e3,0x13e3, +0x13e3,0x13e9,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e6, +0x1578,0x1578,0x157e,0x157e,0x1578,0x157e,0x157e,0x157e,0x1575,0x1575,0x157e,0x157e,0x13e3,0x13e3,0x13e9,0x13e9, +0x13e9,0x16ec,0x13e3,0x13e9,0x13e3,0x13e3,0x13e9,0x1584,0x1584,0x157e,0x157e,0x17b8,0x17b8,0x17b8,0x17b8,0x17b8, +0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x13e3,0x13e3,0x13e3,0x13e3, +0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e6,0x13e3,0x13e6,0x13e3,0x13e3,0x13e3,0x1578,0x1578,0x157e, +0x16ec,0x157e,0x1578,0x157e,0x17b8,0x17b8,0x17b8,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x13e3,0x13e3,0x13e3,0x13e3, +0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3, +0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x157e,0x13e3,0x157e,0x13e9,0x13e9, +0x13e3,0x13e3,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e3,0x13e3,0x13e3, +0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13ec,0x13ec, +0x13ec,0x13ec,0x13e3,0x13e3,0x13e3,0x13e3,0x13e9,0x13e3,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9, +0x13e9,0x13e3,0x13e3,0x13e3,0x13e9,0x13e3,0x13e3,0x13e3,0x13e3,0x13e9,0x13e9,0x13e9,0x13e3,0x13e9,0x13e9,0x13e9, +0x13e3,0x13e3,0x13e3,0x13e6,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3, +0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x16ec,0x13e3,0x13e3,0x13e3, +0x13e3,0x157e,0x1578,0x17b8,0x1446,0x1446,0x1446,0x1446,0x1575,0x1575,0x1575,0x1575,0x1575,0x157b,0x157e,0x17b8, +0x17b8,0x17b8,0x17b8,0x1740,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3, +0x13e3,0x13e3,0x13e3,0x13e3,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x157e,0x157e,0x1578,0x1578,0x157e, +0x1584,0x1584,0x157e,0x157e,0x157e,0x157e,0x186c,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x157e,0x1578,0x157e, +0x1578,0x1578,0x1578,0x1578,0x1581,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x157e,0x1578,0x1578,0x1578,0x157e, +0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x157e,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x14d0,0x13ef,0x13ef,0x13ef, +0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x14d0,0x13ef,0x13ef, +0x13ef,0x14d0,0x13ef,0x14d0,0x13ef,0x14d0,0x13ef,0x14d0,0x13ef,0x13ef,0x13ef,0x14d0,0x13ef,0x13ef,0x13ef,0x13ef, +0x13ef,0x13ef,0x14d0,0x14d0,0x13ef,0x13ef,0x13ef,0x13ef,0x14d0,0x13ef,0x14d0,0x14d0,0x13ef,0x13ef,0x13ef,0x13ef, +0x14d0,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x16f2,0x16f2,0x17be, +0x17be,0x13f2,0x13f2,0x13f2,0x13ef,0x13ef,0x13ef,0x13f2,0x13f2,0x13f2,0x13f2,0x13f2,0x1671,0x1671,0x1671,0x1671, +0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x13f8,0x13f5,0x13f5,0x13f5, +0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f8,0x13f5, +0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13fb, +0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5, +0x13fb,0x13fb,0x13fb,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13f5,0x13fe,0x13fe,0x13fe,0x13fe, +0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe, +0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x13fe,0x17eb,0x17eb,0x17e8,0x1743, +0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x1449,0x1449,0x1449,0x1449,0x1449,0x1449,0x144c,0x144c,0x144c,0x144c, +0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x144c,0x158d,0x1458,0x1458,0x1458,0x146a, +0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a, +0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x1485,0x1485,0x1485,0x1485, +0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485, +0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x148e,0x148e,0x148e,0x148e, +0x148e,0x148e,0x148e,0x148e,0x148e,0x148e,0x148e,0x148e,0x148e,0x148e,0x148e,0x148e,0x148e,0x148e,0x148e,0x148e, +0x148e,0x148e,0x148e,0x148e,0x148e,0x148e,0x148e,0x148e,0x148e,0x148e,0x148e,0x148e,0x1494,0x1494,0x14a0,0x14a6, +0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6, +0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a0, +0x14a0,0x14a0,0x1494,0x1494,0x1494,0x1494,0x1494,0x1494,0x1494,0x1494,0x1494,0x14a0,0x14c7,0x14c7,0x14c7,0x14c7, +0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7, +0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x1578,0x1578,0x157e,0x157e, +0x157e,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x157e,0x157e,0x157e, +0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x1575,0x157e,0x157e,0x157e,0x1578,0x1578,0x1578,0x1578,0x1578, +0x1578,0x1578,0x1578,0x157e,0x1578,0x1578,0x157e,0x157e,0x157e,0x157e,0x1578,0x1578,0x1584,0x1578,0x1578,0x1578, +0x1578,0x16ef,0x16ef,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1869,0x157e,0x1578,0x1578, +0x157e,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x157e,0x157e,0x1578,0x1578,0x1578,0x1578,0x1578, +0x1578,0x1578,0x1578,0x1578,0x157e,0x1578,0x1578,0x1578,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5, +0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5, +0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7, +0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7, +0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15b7,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd, +0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd, +0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15bd,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0, +0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0, +0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff, +0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff, +0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15f0,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608, +0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608, +0x1608,0x1608,0x1608,0x1602,0x160b,0x160b,0x160b,0x160b,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e, +0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e, +0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629, +0x1620,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629, +0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1629,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632, 0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632, -0x1632,0x1632,0x1632,0x1632,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e, -0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163b,0x163b,0x163b,0x163b,0x163b, -0x163b,0x163b,0x163b,0x163b,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641, -0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641, -0x1641,0x1641,0x1641,0x1641,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665, -0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665,0x1665, -0x1665,0x1665,0x1665,0x1665,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e, -0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e, -0x166e,0x166e,0x166e,0x166e,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, -0x1686,0x1686,0x1686,0x1686,0x1671,0x1680,0x1680,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1680,0x1671,0x1683, -0x1683,0x1671,0x1683,0x1671,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, +0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644, +0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1641,0x1641,0x1641,0x1635,0x1635,0x1635,0x1635,0x1635, +0x1635,0x1635,0x1635,0x1641,0x1641,0x1635,0x1641,0x1638,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644, +0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644, +0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1668,0x1668,0x1668,0x1668,0x1668,0x1668,0x1668,0x1668, +0x1668,0x1668,0x1668,0x1668,0x1668,0x1668,0x1668,0x1668,0x1668,0x1668,0x1668,0x1668,0x1668,0x1668,0x1668,0x1668, +0x1668,0x1668,0x1668,0x1668,0x1668,0x1665,0x1665,0x1665,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671, +0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1677,0x1677, +0x1677,0x1674,0x1674,0x1674,0x1671,0x1671,0x1671,0x1671,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, +0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x167a,0x168c, +0x168c,0x1680,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, 0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, -0x1686,0x1686,0x1686,0x1686,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f, -0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f, -0x168f,0x168f,0x168f,0x168f,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695, +0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692, +0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x1692,0x168f, +0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695, 0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695, -0x1695,0x1695,0x1695,0x1695,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5, -0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5, -0x16f5,0x16f5,0x16f5,0x16f5,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731, -0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731, -0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1737,0x1734,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731, -0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x1731,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a, -0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a, -0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d, -0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d, -0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f, -0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f, -0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x174f,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752, -0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752, -0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1752,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755, -0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755, -0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1758,0x1758,0x1758,0x1758,0x1755, -0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1755,0x1758,0x1758,0x1758, -0x1758,0x1758,0x1758,0x1758,0x1758,0x1755,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758, -0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758, -0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1758,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770, -0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770, -0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x17b5,0x17b5,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2, -0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2, -0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5, -0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5, -0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803, -0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1800,0x1800,0x1800, -0x17eb,0x17eb,0x17eb,0x17eb,0x17eb,0x17eb,0x17eb,0x17eb,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803, -0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803, -0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a, -0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a, -0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d, -0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d, -0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0,0,0,0 +0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x1695,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9, +0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9, +0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16b9,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2, +0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2, +0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16c2,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da, +0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16c5,0x16d4,0x16d4,0x16c5,0x16c5,0x16c5,0x16c5,0x16c5, +0x16c5,0x16d4,0x16c5,0x16d7,0x16d7,0x16c5,0x16d7,0x16c5,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da, +0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da, +0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16e3,0x16e3,0x16e3,0x16e3,0x16e3,0x16e3,0x16e3,0x16e3, +0x16e3,0x16e3,0x16e3,0x16e3,0x16e3,0x16e3,0x16e3,0x16e3,0x16e3,0x16e3,0x16e3,0x16e3,0x16e3,0x16e3,0x16e3,0x16e3, +0x16e3,0x16e3,0x16e3,0x16e3,0x16e3,0x16e3,0x16e3,0x16e3,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9, +0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9, +0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749, +0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749, +0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1749,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785, +0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785, +0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x178b,0x1788,0x1785,0x1785,0x1785,0x1785, +0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x178e,0x178e,0x178e,0x178e, +0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e, +0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x178e,0x1791,0x1791,0x1791,0x1791, +0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791, +0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x17a3,0x17a3,0x17a3,0x17a3, +0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3, +0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a3,0x17a6,0x17a6,0x17a6,0x17a6, +0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6, +0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a9,0x17a9,0x17a9,0x17a9, +0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9, +0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17ac, +0x17ac,0x17ac,0x17ac,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9, +0x17a9,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17a9,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac, +0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac, +0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17c4,0x17c4,0x17c4,0x17c4, +0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4, +0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x1809,0x1809,0x1806,0x1806, +0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806, +0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1809,0x1809,0x1809,0x1809, +0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809, +0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1857,0x1857,0x1857,0x1857, +0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857, +0x1857,0x1854,0x1854,0x1854,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x183f,0x1857,0x1857,0x1857,0x1857, +0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857, +0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x187e,0x187e,0x187e,0x187e, +0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e, +0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x1881,0x1881,0x1881,0x1881, +0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881, +0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0x1881,0,0,0,0 }; static const UTrie2 propsVectorsTrie={ propsVectorsTrie_index, - propsVectorsTrie_index+4844, + propsVectorsTrie_index+4924, NULL, - 4844, - 23696, + 4924, + 24212, 0xa40, - 0x136c, + 0x13bc, 0x0, 0x0, 0x110000, - 0x6f78, + 0x71cc, NULL, 0, FALSE, FALSE, 0, NULL }; -static const uint32_t propsVectors[6195]={ -0x67,0,0,0x67,0,0xe00000,0x67,0x80000,0x20,0x867,0,0,0xa67,0,0,0xb67, -0,0,0xc67,0,0,0xd67,0,0,0xe67,0,0,0x1067,0,0,0x1167,0, -0,0x1267,0,0,0x1367,0,0,0x1467,0,0,0x1567,0,0,0x1667,0,0, -0x1767,0,0,0x1867,0,0,0x1967,0,0,0x1a67,0,0,0x1b67,0,0,0x1d67, -0,0,0x1f67,0,0,0x2067,0,0,0x2267,0,0,0x2367,0,0,0x2467,0, -0,0x2567,0,0,0x2767,0,0,0x2867,0x80000,0x20,0x2967,0,0,0x2a67,0,0x1600000, -0x2b67,0,0,0x2d67,0,0,0x3067,0x20000000,0,0x3167,0x20000000,0,0x3267,0x20000000,0,0x3a67, -0,0,0x3b67,0,0,0x3c67,0,0,0x3e67,0,0,0x4067,0,0,0x4167,0, -0,0x4367,0,0,0x4467,0,0,0x4867,0,0,0x4967,0,0,0x4a67,0,0, -0x5067,0,0,0x5167,0,0,0x5467,0,0,0x5567,0,0,0x5667,0x80000,0x20,0x5767, -0,0,0x5867,0,0,0x5967,0,0,0x5b67,0,0,0x5c67,0,0,0x5d67,0, +static const uint32_t propsVectors[6279]={ +0x67,0,0,0x67,0,0x200000,0x67,0,0x230400,0x67,0,0x230560,0x67,0,0x400000,0x67, +0,0x448000,0x67,0,0x500000,0x67,0,0x962460,0x67,0,0x962540,0x67,0,0xe00000,0x67,0, +0xe30000,0x67,0,0x1329800,0x67,0x80000,0x20,0x867,0,0,0xa67,0,0,0xb67,0,0, +0xc67,0,0,0xd67,0,0,0xe67,0,0,0x1067,0,0,0x1067,0,0x200000,0x1067, +0,0x230400,0x1167,0,0,0x1267,0,0,0x1267,0,0x962460,0x1367,0,0,0x1467,0, +0,0x1567,0,0,0x1667,0,0,0x1767,0,0,0x1767,0,0x962460,0x1867,0,0, +0x1967,0,0,0x1a67,0,0,0x1b67,0,0,0x1d67,0,0,0x1f67,0,0,0x2067, +0,0,0x2267,0,0,0x2367,0,0,0x2467,0,0,0x2567,0,0,0x2767,0, +0,0x2867,0x80000,0x20,0x2967,0,0,0x2a67,0,0x1600000,0x2b67,0,0,0x2d67,0,0, +0x3067,0x20000000,0x200000,0x3167,0x20000000,0,0x3267,0x20000000,0,0x3a67,0,0,0x3b67,0,0,0x3c67, +0,0,0x3e67,0,0,0x4067,0,0,0x4067,0,0xe30400,0x4167,0,0,0x4367,0, +0,0x4467,0,0,0x4867,0,0,0x4967,0,0,0x4a67,0,0,0x5067,0,0, +0x5167,0,0,0x5467,0,0,0x5567,0,0,0x5667,0x80000,0x20,0x5767,0,0,0x5867, +0,0,0x5867,0,0x230400,0x5967,0,0,0x5b67,0,0,0x5c67,0,0,0x5d67,0, 0,0x6067,0x80000,0x20,0x6267,0,0,0x6367,0,0,0x6467,0,0,0x6567,0,0, -0x6f67,0,0,0x7067,0,0,0x7367,0x20000000,0,0x7567,0,0,0x7667,0,0,0x7767, -0,0,0x7867,0,0,0x7a67,0,0,0x7b67,0,0,0x7c67,0,0,0x7e67,0, -0,0x7f67,0,0,0x8167,0,0,0x8267,0,0,0x8367,0,0,0x8467,0,0, -0x8567,0,0,0x8667,0,0,0x8767,0,0,0x8867,0,0,0x8967,0,0,0x8b67, -0,0,0x8c67,0,0,0x8e67,0x20000000,0,0x8f67,0,0,0x9067,0,0,0x9167,0, -0,0x9267,0,0,0x9367,0,0,0x9567,0,0,0x9667,0,0,0x9767,0,0, -0x9867,0,0,0x9967,0,0,0x9a67,0,0,0x9c67,0,0,0x9f67,0,0,0xa167, -0,0,0xa367,0,0,0xa467,0,0,0xa567,0,0,0xa667,0,0,0xa767,0, -0,0xa867,0,0,0xa967,0,0,0xaa67,0,0xe00000,0xab67,0,0xe00000,0xac67,0,0, -0xad67,0,0,0xae67,0,0,0xaf67,0,0,0xb167,0,0,0xb267,0,0,0xb367, -0,0,0xb467,0,0,0xb567,0,0,0xb767,0,0,0xb867,0,0,0xb967,0, -0,0xba67,0,0,0xbc67,0,0,0xbd67,0,0,0xbe67,0,0,0xbf67,0,0, -0xc067,0,0,0xc167,0,0,0xc267,0,0,0xc367,0,0xe00000,0xc467,0,0xe00000,0xc667, -0,0,0xc767,0,0,0xc867,0,0,0xc967,0,0,0xca67,0,0,0xcb67,0, -0,0xcc67,0,0xe00000,0xcf67,0,0xe00000,0xd067,0,0xe00000,0xd267,0,0,0xd367,0,0, -0xd467,0,0,0xd567,0,0,0xd667,0,0,0xd867,0,0,0xd967,0,0,0xda67, -0,0,0xdb67,0,0,0xdc67,0,0,0xdd67,0,0,0xde67,0,0,0xdf67,0, -0,0xe067,0,0,0xe167,0,0,0xe267,0,0,0xe367,0,0xe00000,0xe467,0,0, -0xe567,0,0,0xe667,0,0,0xe767,0,0,0xe867,0,0,0xe967,0,0,0xea67, -0,0,0xeb67,0,0,0xec67,0,0,0xed67,0,0,0xee67,0,0,0xef67,0, -0,0xf167,0,0,0xf367,0,0,0xf567,0,0,0xf667,0,0,0xf767,0,0, -0xf867,0,0,0xf967,0,0,0xfa67,0,0xe00000,0xfb67,0,0,0xfc67,0,0,0xfd67, -0,0,0xfe67,0,0,0x10167,0,0,0x10267,0,0,0x10367,0,0,0x10467,0, -0,0x10567,0,0xe00000,0x10667,0,0,0x10767,0,0,0x10867,0,0,0x10967,0,0, -0x10a67,0,0,0x10b67,0,0,0x10c67,0,0,0x10d67,0,0,0x10e67,0,0,0x10f67, -0,0,0x11067,0,0,0x11167,0,0,0xa0067,0,0xe00000,0xa4667,0,0xe00000,0xa4767,0, -0xe00000,0xa4f67,0,0xe00000,0xa5e67,0,0xe00000,0xa5f67,0,0xe00000,0xac567,0,0xe00000,0xad167,0,0xe00000, -0xb0067,0,0xe00000,0x11000100,0,0x900020,0x11000100,0x40000001,0x440020,0x11000100,0x40000001,0x643020,0x11000100,0x40000001,0xa5a040,0x11000100, -0x40000001,0x116a8a0,0x11000200,0,0x900020,0x11000200,0x4000001,0xc4000b,0x11000200,0x7c00100,0x220402,0x11000200,0x24000000,0x10200000,0x11000200,0x24000008, -0x1710000,0x11000200,0x40000001,0x1d3b020,0x11000219,0x7c00100,0x220401,0x11000219,0x7c00100,0x250401,0x11000319,0x7c00100,0x220401,0x11000319,0x7c00100,0x220402, -0x11000319,0x7c00100,0x250400,0x11000319,0x7c00100,0x250401,0x11000419,0x7c00100,0x220400,0x11000419,0x7c00100,0x220401,0x11000419,0x7c00100,0x220402,0x11000419, -0x7c00100,0x230400,0x11000419,0x7c00100,0x250400,0x11000419,0x7c00100,0x250401,0x11000419,0x7c00100,0x250402,0x11000519,0x7c00100,0x220400,0x11000519,0x7c00100, -0x230400,0x11000600,0x4000400,0x200000,0x11000600,0x4000400,0x200002,0x11000600,0x4000400,0x201000,0x11000600,0x7c00500,0x220400,0x11000600,0x7c00500,0x230400, -0x11000600,0x7c00500,0x530400,0x11000600,0x7c00d00,0x230400,0x11000619,0x7c00500,0x22040f,0x11000800,0x4000010,0x1001401,0x11000800,0x4000400,0x200001,0x11000800, -0x6800010,0x201001,0x11000800,0x7c00500,0x230401,0x11000807,0x7c00100,0x220400,0x11000807,0x7c00100,0x250400,0x1100080e,0x4000400,0x200000,0x1100080e,0x4000400, -0x200002,0x1100080e,0x7000500,0x220402,0x1100080e,0x7c00100,0x220400,0x1100080e,0x7c00100,0x220401,0x1100080e,0x7c00100,0x220402,0x1100080e,0x7c00100,0x250400, -0x1100080e,0x7c00100,0x250401,0x1100080e,0x7c00120,0x220402,0x1100080e,0x7c00120,0x250402,0x11000908,0x4000000,0x200000,0x11000908,0x7c00100,0x220400,0x11000908, -0x7c00100,0x220401,0x11000908,0x7c00100,0x250400,0x11000908,0x7c00100,0x250401,0x11000a03,0x4000000,0x200000,0x11000a03,0x4000000,0x270000,0x11000a03,0x7c00100, -0x220400,0x11000a03,0x7c00100,0x220402,0x11000a03,0x7c00100,0x250400,0x11000a03,0x7c00500,0x230400,0x11000b13,0x2802500,0x962460,0x11000b13,0x4000000,0x200000, -0x11000b13,0x4000000,0x201000,0x11000b13,0x4000000,0x230400,0x11000b13,0x4000002,0x400000,0x11000b13,0x4000010,0x200000,0x11000b13,0x7c00100,0x2633800,0x11000c00, -0,0x218960,0x11000c02,0x2802100,0x962460,0x11000c02,0x2802400,0x962460,0x11000c02,0x4000000,0x200000,0x11000c02,0x4000000,0x1329400,0x11000c02,0x4000000, -0x1329800,0x11000c02,0x4000000,0x1500000,0x11000c02,0x6800000,0x1329800,0x11000c02,0x7c00100,0x230400,0x11000c02,0x7c00100,0x230401,0x11000c02,0x7c00100,0x230402, -0x11000c02,0x7c00500,0x230400,0x11000c02,0x7d00100,0x230400,0x11000c02,0xc000010,0xb48000,0x11000f0a,0x2802100,0x962460,0x11000f0a,0x2802400,0x962460,0x11000f0a, -0x2806400,0x962460,0x11000f0a,0x4000000,0x200000,0x11000f0a,0x6800100,0x962540,0x11000f0a,0x7c00100,0x230400,0x11000f0a,0x7c00100,0x230401,0x11001004,0x2802100, -0x962460,0x11001004,0x2802400,0x962460,0x11001004,0x2806400,0x962460,0x11001004,0x4000000,0x200000,0x11001004,0x4000000,0x1500000,0x11001004,0x6800100,0x962540, -0x11001004,0x6800100,0x962541,0x11001004,0x7c00100,0x230400,0x11001004,0x7c00100,0x230401,0x11001110,0x2802100,0x962460,0x11001110,0x2802400,0x962460,0x11001110, -0x2806400,0x962460,0x11001110,0x6800100,0x962540,0x11001110,0x7c00100,0x230400,0x11001110,0x7c00100,0x230401,0x1100120f,0x2802100,0x962460,0x1100120f,0x2802400, -0x962460,0x1100120f,0x2806400,0x962460,0x1100120f,0x6800100,0x962540,0x1100120f,0x7c00100,0x230400,0x1100131f,0x2802100,0x962460,0x1100131f,0x2802400,0x962460, -0x1100131f,0x2806400,0x962460,0x1100131f,0x4000000,0x200000,0x1100131f,0x6800000,0x1329800,0x1100131f,0x6800100,0x962540,0x1100131f,0x6800100,0x962541,0x1100131f, -0x7c00100,0x230400,0x1100131f,0x7c00100,0x230401,0x11001423,0x2802100,0x962460,0x11001423,0x2806400,0x962460,0x11001423,0x6800100,0x962540,0x11001423,0x6800100, -0x962541,0x11001423,0x7c00100,0x230400,0x11001423,0x7c00100,0x230401,0x11001524,0x2802100,0x962460,0x11001524,0x2802100,0x962461,0x11001524,0x2806400,0x962460, -0x11001524,0x6800000,0x1329800,0x11001524,0x6800100,0x962540,0x11001524,0x7c00100,0x230400,0x11001615,0x2802100,0x962460,0x11001615,0x2806400,0x962460,0x11001615, -0x6800000,0x1329800,0x11001615,0x6800100,0x962540,0x11001615,0x6800100,0x962541,0x11001615,0x7c00100,0x230400,0x1100171a,0x2802100,0x962460,0x1100171a,0x2806400, -0x962460,0x1100171a,0x6800000,0x1329800,0x1100171a,0x6800100,0x962540,0x1100171a,0x6800100,0x962541,0x1100171a,0x7c00100,0x230400,0x11001900,0x4000000,0x1600000, -0x11001926,0x2802100,0x1862460,0x11001926,0x2802400,0x1862460,0x11001926,0x2806100,0x1862460,0x11001926,0x4000000,0x200000,0x11001926,0x4000010,0x400000,0x11001926, -0x6800000,0x1329800,0x11001926,0x7800100,0x1830142,0x11001926,0x7c00100,0x1830000,0x11001926,0x7c00900,0x1830000,0x11001926,0x7e00100,0x1830000,0x11001a18,0x2802100, -0x1862460,0x11001a18,0x2802400,0x1862460,0x11001a18,0x6800000,0x1329800,0x11001a18,0x7800100,0x1830142,0x11001a18,0x7c00100,0x1830000,0x11001a18,0x7c00100,0x1830002, -0x11001a18,0x7c00900,0x1830000,0x11001a18,0x7e00100,0x1830000,0x11001d0c,0x7c00100,0x230400,0x11001d0c,0x7c00100,0x250400,0x11001e12,0x7c00100,0x2230500,0x11001e12, -0x7c00100,0x2330520,0x11001e12,0x7c80100,0x2330520,0x11002619,0x7c00100,0x220401,0x11002619,0x7c00100,0x220402,0x11002619,0x7c00100,0x250401,0x1100270e,0x4000400, -0x200001,0x1100270e,0x4000400,0x200002,0x1100270e,0x4000400,0x500001,0x1100270e,0x7c00100,0x220401,0x1100270e,0x7c00100,0x250401,0x11002800,0x80000,0x918820, -0x11002800,0x80000,0x1c18020,0x11002800,0x180000,0x918820,0x11002800,0x4000001,0x440001,0x11002800,0x4000001,0x440002,0x11002800,0x4000001,0xc4000b,0x11002800, -0x6800000,0x201c00,0x11002800,0x6800020,0x201c00,0x11002800,0x24000000,0x200000,0x11002800,0x24000000,0x200002,0x11002800,0x24000000,0x810000,0x11002800,0x24000000, -0x1410000,0x11002800,0x24000000,0x1500000,0x11002800,0x24000000,0x1500002,0x11002800,0x24000002,0x400000,0x11002800,0x24000006,0xc0000b,0x11002800,0x24000008,0x1410000, -0x11002800,0x24000008,0x1710000,0x11002800,0x24000020,0x1001400,0x11002800,0x24000020,0x1500002,0x11002800,0x2c000010,0x1248000,0x11002800,0x2c000010,0x11248002,0x11002800, -0x40000001,0x63b020,0x11002800,0x40080000,0x918820,0x11002801,0x80000,0x2a65620,0x11002801,0x82000,0x962460,0x11002900,0x4000000,0x20000e,0x11002900,0x4000000, -0x20000f,0x11002900,0x4000020,0x20000e,0x11002900,0x4000020,0x20000f,0x11002900,0x4000020,0x81000e,0x11002900,0x4000020,0x81000f,0x11002900,0x4000020,0x141000e, -0x11002900,0x4000020,0x141000f,0x11002900,0x4000022,0x20000e,0x11002900,0x4000022,0x20000f,0x11002a00,0x4000000,0x1500000,0x11002a00,0x4000000,0x1600000,0x11002a00, -0x4000000,0x1600002,0x11002b01,0x2000,0x962460,0x11002b01,0x2802020,0x962460,0x11002c00,0x4000000,0x200000,0x11002c00,0x4000000,0x200002,0x11002c00,0x4000000, -0x20000f,0x11002c00,0x4000020,0x200000,0x11002c00,0x7c00000,0x200000,0x11002c00,0x7c00020,0x200000,0x11002c00,0x7c00120,0x220405,0x11002c00,0x7c00120,0x230402, -0x11002c00,0x7c00120,0x250402,0x11002c00,0x7c00120,0x250405,0x11002c19,0x7c00100,0x250400,0x11002c19,0x7c00100,0x250401,0x11002d00,0x4000000,0x100006,0x11002d00, -0x4000000,0x200006,0x11002d19,0x7c00100,0x220402,0x11002d19,0x7c00100,0x230400,0x11002d19,0x7c00100,0x250402,0x11002e00,0x24000000,0x200000,0x11002e00,0x24000020, -0x200000,0x11002e00,0x24000020,0x200001,0x11002e00,0x24000020,0x10200000,0x11002f00,0x24000020,0x200000,0x11002f00,0x24000020,0x200001,0x11002f00,0x24000020,0x200002, -0x11002f00,0x24000020,0xf00000,0x11002f00,0x24000020,0x1600000,0x11002f00,0x24000022,0x1600000,0x11003000,0x24000000,0x200000,0x11003000,0x24000000,0x10200000,0x11003000, -0x24000020,0x200000,0x11003000,0x24000020,0x810000,0x11003000,0x24000020,0x1410000,0x11003100,0x24000000,0x200000,0x11003200,0x24000000,0x200000,0x11003300,0x4000000, -0x100003,0x11003400,0x24000000,0x100000,0x11003400,0x24000000,0x200000,0x11003500,0x24000000,0x200000,0x11003600,0x24000000,0x200000,0x11003600,0x24000000,0x10200000, -0x11003600,0x24000020,0x200000,0x11003700,0x24000000,0x200000,0x11003700,0x24000000,0xe00000,0x11003700,0x24000000,0x10200000,0x11003700,0x24000000,0x10e00000,0x11003700, -0x24000000,0x928045a0,0x11003700,0x24000020,0x200000,0x11003800,0x4000000,0x100000,0x11003800,0x24000000,0x200000,0x11003800,0x24000000,0xb00000,0x11003800,0x24000000, -0xe00000,0x11003800,0x24000000,0x1710000,0x11003800,0x24000000,0x10200000,0x11003800,0x24000000,0x10b00000,0x11003800,0x24000000,0x10e00000,0x11003800,0x24000000,0x10e05200, -0x11003800,0x24000000,0x928045a0,0x11005003,0x7c00100,0x220402,0x11005013,0x2802500,0x962460,0x11005013,0x4000020,0x200005,0x11005013,0x7c00100,0x2633801,0x11005013, -0x7c00100,0x2633802,0x11005013,0x7c00100,0x2633805,0x11005019,0x7c00100,0x220402,0x11005100,0x24000000,0x810000,0x11005100,0x24000000,0x1410000,0x11005102,0x7000100, -0x230408,0x11005102,0x7c00100,0x230404,0x11005102,0x7c00100,0x230407,0x11005102,0x7c00100,0x230408,0x11005102,0x7c00100,0x230409,0x11005201,0x2802400,0x962460, -0x11005500,0x80000,0x1e18820,0x11005502,0x7000100,0x230408,0x11005502,0x7c00100,0x230404,0x11005502,0x7c00100,0x230407,0x11005502,0x7c00100,0x230408,0x11005502, -0x7c00100,0x230409,0x11005667,0x1000,0,0x11020200,0x80004,0x418820,0x11020200,0x4000000,0x100006,0x11020200,0x4000000,0x10000f,0x11020200,0x4000400, -0x100002,0x11020200,0x4000400,0x500002,0x11020200,0x6800c00,0x101000,0x11020200,0x24000000,0x100000,0x11020200,0x24000000,0x1400000,0x11020200,0x24000000,0x1500000, -0x11020200,0x24000000,0x1600000,0x11020200,0x24000000,0x10200000,0x11020200,0x24000020,0x100000,0x11020200,0x24000020,0x1600000,0x11020219,0x7c00100,0x12040f,0x11020219, -0x7c00100,0x220400,0x11020219,0x7c00100,0x220401,0x11020219,0x7c00100,0x250400,0x11020319,0x7c00100,0x220400,0x11020319,0x7c00100,0x220401,0x11020319,0x7c00100, -0x220402,0x11020319,0x7c00100,0x250400,0x11020319,0x7c00100,0x250402,0x11020319,0x7d00100,0x220402,0x11020419,0x7c00100,0x220401,0x11020519,0x7c00100,0x220400, -0x11020600,0x4000400,0x100002,0x11020600,0x4000400,0x200000,0x11020600,0x7c00500,0x130400,0x11020600,0x7c00d00,0x130400,0x11020701,0x2802400,0x962460,0x11020701, -0x2802400,0x962461,0x11020701,0x2802400,0xc62460,0x1102080e,0x7c00100,0x220400,0x1102080e,0x7c00100,0x250400,0x11020908,0x7c00100,0x220400,0x11020908,0x7c00100, -0x220401,0x11020908,0x7c00100,0x250400,0x11020908,0x7c00100,0x250401,0x11022800,0x24000000,0x100000,0x11022800,0x24000000,0x200000,0x11022800,0x24000000,0x200002, -0x11022800,0x24000000,0x401000,0x11022800,0x24000000,0xf00002,0x11022800,0x24000000,0xf0ac02,0x11022800,0x24000000,0x1500000,0x11022800,0x24000002,0x100000,0x11022800, -0x24000002,0x370000,0x11022800,0x24000002,0x470000,0x11022800,0x24000006,0x400000,0x11022800,0x24000008,0x1710000,0x11022800,0x24000008,0x1712c00,0x11022800,0x24000020, -0x100000,0x11022800,0x24000020,0x1500000,0x11022800,0x24000020,0x1500002,0x11022900,0x4000000,0x10000e,0x11022900,0x4000000,0x10000f,0x11022919,0x7c00100,0x12040f, -0x11022c00,0x4000000,0x100002,0x11022c00,0x4000000,0x1500002,0x11022c00,0x4000000,0x1600002,0x11022c00,0x4000000,0x1010000f,0x11022c00,0x7c00120,0x120405,0x11022c0e, -0x7c00100,0x250401,0x11022c19,0x7c00100,0x150401,0x11022d00,0x4000000,0x100006,0x11022d00,0x4000000,0x200006,0x11022d19,0x7c00100,0x120402,0x11022d19,0x7c00100, -0x150402,0x11022e00,0x24000000,0x200000,0x11022e00,0x24000020,0x100000,0x11022e00,0x24000020,0x10100000,0x11022f00,0x24000020,0x100000,0x11022f00,0x24000020,0x100001, -0x11022f00,0x24000020,0x100002,0x11023000,0x24000000,0x100000,0x11023300,0x4000000,0x100002,0x11023300,0x4000000,0x100003,0x11023300,0x4000100,0x120403,0x11023300, -0x4000100,0x150403,0x11023300,0x4000100,0x10150403,0x11023400,0x24000000,0x100000,0x11023500,0x24000000,0x100000,0x11023600,0x24000000,0x100000,0x11023600,0x24000020, -0x100000,0x11023600,0x24000020,0x10100000,0x11023700,0x24000000,0x100000,0x11023700,0x24000000,0xe00000,0x11023700,0x24000000,0x10100000,0x11023700,0x24000000,0x10e00000, -0x11023700,0x24000020,0x100000,0x11023700,0x24000020,0x10100000,0x11023800,0x4000000,0x100000,0x11023800,0x24000000,0x200000,0x11024e67,0,0,0x11025600, -0x4000000,0x100000,0x11042a00,0x4000000,0x1600000,0x11045700,0x4000000,0x20000a,0x11045700,0x4000020,0x20000a,0x11045712,0x7c00100,0xe3040a,0x11045712,0x7c80100, -0xe3040a,0x11045716,0x7c00100,0xe30c0a,0x11045716,0x7c00100,0x2530c0a,0x11063d00,0x4000001,0x440011,0x11065700,0x4000000,0x810011,0x11065700,0x4000000,0xe00011, -0x11065700,0x4000000,0x1410011,0x11065700,0x4000000,0x1500011,0x11065700,0x4000000,0x1600011,0x11065700,0x4000006,0xe70011,0x11065700,0x4000008,0xe00011,0x11065700, -0x4000008,0xe02c11,0x11065700,0x4000010,0x871411,0x11065700,0x4000010,0x1201411,0x11065700,0x4000010,0x1271011,0x11065700,0x4000020,0xe00011,0x11065700,0x4000400, -0xe00011,0x11065700,0x4000420,0xe00011,0x11065700,0x6800000,0xe01c11,0x11065700,0x6800040,0xe00011,0x11065700,0xc000010,0x80ac11,0x11065700,0xc000010,0xb48011, -0x11065719,0x7c00100,0xe20411,0x11065719,0x7c00100,0xe50411,0x11065719,0x7c00140,0xe20411,0x11065719,0x7c00140,0xe50411,0x11080100,0x6800000,0x201c00,0x11080100, -0x68000c0,0x11329800,0x11080100,0x24000000,0x200000,0x11080100,0x24000000,0x810000,0x11080100,0x24000000,0x1410000,0x11080100,0x24000000,0x1500000,0x11080100,0x24000000, -0x1600000,0x11080100,0x24000000,0x1b00000,0x11080100,0x24000000,0x2410000,0x11080100,0x24000000,0x10200000,0x11080100,0x24000006,0xd70000,0x11080100,0x24000008,0x1713c00, -0x11080100,0x24000008,0x1714000,0x11080100,0x24000010,0x1001400,0x11080100,0x24000010,0x1071000,0x11080100,0x24000010,0x1071400,0x11080100,0x24000020,0x200000,0x11080100, -0x24000020,0x400000,0x11080100,0x24000020,0x1600000,0x11080100,0x24000400,0x200000,0x11080100,0x24000420,0x200000,0x11080100,0x2c000010,0xb48000,0x11080100,0x2c000010, -0x100ac00,0x11080100,0x44000001,0x1a40000,0x11080119,0x7c00100,0x220400,0x11080119,0x7c00100,0x250400,0x11080119,0x7c001c0,0x220400,0x11080119,0x7c001c0,0x250400, -0x11080200,0x4000400,0x200002,0x11080200,0x24000000,0x200000,0x11080200,0x24000000,0x1500000,0x11080200,0x24000000,0x1600000,0x11080200,0x24000020,0x200000,0x110a1e12, -0x7c00100,0x2130480,0x110a1e12,0x7c80100,0x2130480,0x110a3000,0x24000000,0x30e00000,0x110a3000,0x24100000,0x810001,0x110a3000,0x24100000,0x1410001,0x110a3700,0x24000000, -0x30200000,0x110a3d00,0x4000000,0xe00000,0x110a3d00,0x4000000,0xe00002,0x110a3d00,0x24000000,0xe00000,0x110a3d11,0x7c00300,0xe30000,0x110a3d11,0x7c00900,0x1230400, -0x110a3d12,0x2802400,0x962460,0x110a3e14,0x7c00100,0xe30000,0x110a3e14,0x7c00100,0xe30001,0x110a3e14,0x7c00100,0x2530000,0x110a3e14,0x7c00900,0x1230000,0x110a3e14, -0x7c00900,0x1230001,0x110a3f16,0x7c00100,0xe30c00,0x110a3f16,0x7c00100,0xe30c01,0x110a3f16,0x7c00100,0x2530c00,0x110a3f16,0x7c00900,0x1230c00,0x110a3f16,0x7c00900, -0x1230c01,0x110a4005,0x7c00100,0xe30400,0x110a4112,0x7c00100,0xe30402,0x110a4112,0x7c80100,0xe30402,0x110a4400,0x4000000,0xe00000,0x110a4412,0x4000000,0xe00002, -0x110a4412,0x4000000,0xe00003,0x110a4416,0x4000000,0xe00c03,0x110a4500,0x4000000,0xe0000d,0x110a4516,0x4000000,0xe00c0d,0x110a4711,0x7c40300,0xe30000,0x110a4f11, -0x7c00300,0xe30001,0x110a4f11,0x7c40300,0xe30000,0x110a5300,0x4000000,0x810010,0x110a5300,0x4000000,0xe00002,0x110a5300,0x4000000,0xe00010,0x110a5300,0x4000000, -0x1410010,0x110a5300,0x4000002,0xe70010,0x110a5300,0x4000008,0x810010,0x110a5300,0x4000008,0x1410010,0x110a5300,0x6800000,0xe01c02,0x110a5300,0x6800000,0xe01c10, -0x110a5400,0x4000000,0x81000c,0x110a5400,0x4000000,0xe0000c,0x110a5400,0x4000000,0x141000c,0x110a5400,0x4000000,0x150000c,0x110a5400,0x4000000,0x160000c,0x110a5400, -0x4000002,0xe7000c,0x110a5400,0x4000010,0x87140c,0x110a5400,0x4000010,0xe7000c,0x110a5400,0x4000010,0x120140c,0x110a5400,0x4000010,0x127100c,0x110a5400,0x4000020, -0xe0000c,0x110a5400,0x4000026,0xe7000c,0x110a5400,0xc000010,0x80ac0c,0x110a5400,0xc000010,0xb4800c,0x11400a0c,0xc000010,0x1049400,0x11400c0e,0x4000010,0xb00000, -0x11400c0e,0x4000010,0x1071400,0x11400c0e,0xc000010,0xb48000,0x11400c11,0x7c00900,0x230400,0x11400f34,0xc000010,0x448000,0x11400f44,0xc000010,0x448000,0x11401d70, -0x4000000,0x200000,0x11403d92,0x4000000,0xe00000,0x11445787,0x4000004,0x120000a,0x11445787,0x4000008,0x81000a,0x11445787,0x4000008,0x141000a,0x11445787,0x4000010, -0x87000a,0x11445787,0xc000010,0x84800a,0x11445790,0x3802500,0x126246a,0x11445790,0x7c00d00,0x2530c0a,0x114a3d87,0x24000000,0x810000,0x114a3d87,0x24000000,0x1410000, -0x114a3d87,0x24000008,0x810000,0x114a3d87,0x24000008,0x1410000,0x114a3d87,0x24000010,0x870000,0x114a3d87,0x2c000010,0x848000,0x114a3d8d,0x4000000,0xe00000,0x114a3d8d, -0x24000000,0xe00000,0x114a3d8d,0x24000002,0x1200000,0x114a3d8d,0x24000002,0x10e00000,0x114a3d8d,0x24000008,0x810000,0x114a3d8d,0x24000008,0x1410000,0x114a3d90,0x7c00900, -0x930c00,0x114a3d90,0x7c00900,0xe30c00,0x114a3d92,0x7c00300,0xe30000,0x114a3e90,0x7000400,0x1200c02,0x114a3f87,0x4000004,0x1200000,0x114a3f90,0x7c00d00,0x2530c00, -0x114a4292,0x4000000,0xe00000,0x114a4292,0x4000000,0xe0000f,0x114a4492,0x4000000,0xe00002,0x114a4492,0x4000000,0xe00003,0x114a4492,0x4000000,0x10e00003,0x114a4592, -0x4000000,0xe00002,0x114a4592,0x4000000,0xe0000d,0x1180090a,0x2802400,0x962460,0x11800c17,0x2802100,0x962460,0x11800c17,0x2802500,0x962460,0x11800f1d,0x2802400, -0x962460,0x11800f29,0x2802400,0x962460,0x11820700,0x2802400,0x962460,0x11820700,0x2802500,0x962460,0x118a3d93,0x2802400,0x962460,0x118a3e90,0x2802400,0x962460, -0x11c00904,0x2802400,0x962460,0x11c00908,0x2802400,0x962460,0x11c00c1b,0x6800000,0x1329800,0x11c00f58,0x6800000,0x1329800,0x11c0105d,0x6800000,0x1329800,0x11c01161, -0x6800000,0x1329800,0x11c01265,0x6800000,0x1329800,0x11c01469,0x4000000,0x200000,0x11c01469,0x6800000,0x1329800,0x11c01469,0x7c00100,0x230400,0x11c0511b,0x7c00100, -0x230408,0x20000067,0x1000,0,0x20000b13,0x2802400,0x962460,0x20000b13,0x2802500,0x962460,0x20001b27,0x2802100,0x962460,0x20001b27,0x2802100,0x962461, -0x20001b27,0x2802400,0x962460,0x20001b27,0x2806400,0x962460,0x20001b27,0x2902100,0x962462,0x20001b27,0x4000000,0x200000,0x20001b27,0x4000000,0x400000,0x20001b27, -0x4000000,0x500000,0x20001b27,0x4000000,0x810000,0x20001b27,0x4000000,0xb00000,0x20001b27,0x4000000,0xc0000b,0x20001b27,0x4000000,0x1410000,0x20001b27,0x4000010, -0xb00000,0x20001b27,0x4000010,0xc00000,0x20001b27,0x6800000,0x1329800,0x20001b27,0x6800100,0x462540,0x20001b27,0x6800400,0x962540,0x20001b27,0x7c00100,0x230400, -0x20001b27,0x7c00100,0x230401,0x20002619,0x7c00100,0x220401,0x20002a00,0x4000000,0x1600000,0x20004b67,0,0x1900020,0x20004c67,0,0x1900020,0x20004d67, -0,0x1900020,0x20006d67,0x1000,0,0x20006e67,0x1000,0,0x20026d67,0,0,0x20026e67,0,0,0x200a4a12,0x7c00100, -0x1f304c1,0x200a4a12,0x7c00100,0x20304e1,0x21005600,0x4000000,0x700000,0x21022a00,0x4000000,0x1600000,0x30000419,0x7c00100,0x220400,0x30000419,0x7c00100,0x220401, -0x30000419,0x7c00100,0x250400,0x30000419,0x7c00100,0x250401,0x30000519,0x7c00100,0x220400,0x30000600,0x4000400,0x200000,0x30000600,0x7c00500,0x230400,0x30000605, -0x4000400,0x200000,0x3000080e,0x7c00100,0x220400,0x30000908,0x2000,0x962460,0x30000908,0x7c00100,0x220400,0x30000908,0x7c00100,0x220401,0x30000908,0x7c00100, -0x250400,0x30000908,0x7c00100,0x250401,0x30000a03,0x4000006,0x400000,0x30000c02,0x4000000,0x200000,0x30000c02,0x7c00100,0x230400,0x30000d22,0,0x218960, -0x30000d22,0x2802100,0x962460,0x30000d22,0x2802400,0x962460,0x30000d22,0x2802500,0x962460,0x30000d22,0x4000000,0x200000,0x30000d22,0x4000010,0x200000,0x30000d22, -0x7c00100,0x230400,0x30000d22,0xc000010,0x248000,0x30000e25,0x2802500,0x962460,0x30000e25,0x7c00100,0x230400,0x30001821,0x2802100,0x962460,0x30001821,0x2806400, -0x962460,0x30001821,0x4000000,0x200000,0x30001821,0x6800100,0x962540,0x30001821,0x6800100,0x962541,0x30001821,0x7c00100,0x230400,0x30001b27,0x2802100,0x962460, -0x30001b27,0x2802400,0x962460,0x30001b27,0x4000000,0x200000,0x30001b27,0x4000000,0x400000,0x30001b27,0x7c00100,0x230400,0x30001c1c,0x2802100,0x1862460,0x30001c1c, -0x2802400,0x1862460,0x30001c1c,0x2806400,0x1862460,0x30001c1c,0x4000000,0x200000,0x30001c1c,0x6800100,0x1862400,0x30001c1c,0x6800100,0x1862540,0x30001c1c,0x7c00100, -0x1830000,0x30001c1c,0x7c00100,0x1830001,0x30001c1c,0xc000010,0x448000,0x30001f0b,0x4000000,0x200000,0x30001f0b,0x4000010,0x200000,0x30001f0b,0x4000010,0x400000, -0x30001f0b,0x6800000,0x200000,0x30001f0b,0x7c00100,0x230400,0x30001f0b,0xc000010,0x248000,0x30002006,0x7c00100,0x250400,0x30002128,0x4000010,0x200000,0x30002128, -0x7c00100,0x230400,0x30002128,0xc000010,0x248000,0x3000221d,0x4000000,0x810000,0x3000221d,0x4000000,0x1410000,0x3000221d,0x4000001,0x440000,0x3000221d,0x7c00100, -0x230400,0x30002300,0x4000010,0x400000,0x30002320,0x7c00100,0x230400,0x30002417,0x2802100,0x1862460,0x30002417,0x2802400,0x1862460,0x30002417,0x2806400,0x1862460, -0x30002417,0x2882000,0x1862460,0x30002417,0x4000000,0x200000,0x30002417,0x4000000,0x400000,0x30002417,0x4000000,0x1600000,0x30002417,0x4000010,0x400000,0x30002417, -0x4000010,0x1200000,0x30002417,0x6800000,0x1329800,0x30002417,0x6800100,0x1862540,0x30002417,0x7c00100,0x1830000,0x30002417,0x7d00100,0x1830000,0x3000251b,0x80000, -0xc18820,0x3000251b,0x2802100,0x962460,0x3000251b,0x3c02100,0x962460,0x3000251b,0x4000000,0x200000,0x3000251b,0x4000006,0x500000,0x3000251b,0x4000010,0x400000, -0x3000251b,0x4000010,0xb70000,0x3000251b,0x4000800,0x200000,0x3000251b,0x6800000,0x1329800,0x3000251b,0x7c00100,0x230400,0x3000251b,0x7c00900,0x230400,0x3000251b, -0xc000010,0xb48000,0x3000251b,0x12882000,0x962460,0x30002800,0x4000001,0xc41c0b,0x30002800,0x24000000,0x200000,0x30002800,0x2c000010,0x1248002,0x30002800,0x2c000010, -0x11248002,0x30002a00,0x4000000,0x1600000,0x30002b01,0x2000,0x962460,0x30002c00,0x4000000,0x200000,0x30002c00,0x7c00100,0x10220405,0x30002d19,0x7c00100,0x250400, -0x30002e00,0x24000000,0x200000,0x30003000,0x24000000,0x200000,0x30003100,0x24000000,0x200000,0x30003600,0x24000000,0x200000,0x30003700,0x24000000,0x200000,0x3000392e, -0x24000000,0x200000,0x30005013,0x7c00100,0x2633801,0x30005600,0,0x918820,0x30020600,0x4000400,0x500000,0x30020701,0x2802400,0x962460,0x30020701,0x2802400, -0xc62460,0x300a3a11,0x4020000,0xe00000,0x300a3a11,0x4020000,0xe00002,0x300a3b11,0x4020000,0xe00002,0x300a3c00,0x4008000,0xe00000,0x300a3c00,0x4010000,0xe00000, -0x300a3d11,0x7c00300,0xe30002,0x300a4305,0x7c00100,0xe30400,0x300a4611,0x7c40300,0xe30000,0x300a4829,0x7c00100,0xe30400,0x300a4829,0x7c00900,0x1230400,0x300a4929, -0x4000000,0xe00000,0x30402576,0x4000010,0x400000,0x30402576,0x4000010,0xb70000,0x30402576,0xc000010,0xb48000,0x304a3d92,0x4000000,0xe00000,0x30800c17,0x2802100, -0x962460,0x30c01c6e,0x6800000,0x1329800,0x3100080e,0x7c00120,0x220402,0x3100080e,0x7c00120,0x250402,0x31005167,0x1000,0,0x3100581e,0x4000000,0x200000, -0x3100581e,0x7c00100,0x230400,0x3100590d,0x7c00100,0x230400,0x31005a09,0x7c00100,0x220400,0x31005a09,0x7c00100,0x250400,0x31005b00,0x4000000,0x200000,0x31005c00, -0x80000,0x918820,0x31005c00,0x2802000,0x962460,0x31005c00,0x2802400,0x962460,0x31005c00,0x4000000,0x200000,0x31005c00,0x4000000,0x200001,0x31005c00,0x6800000, -0x962540,0x31005c00,0x6800400,0x962540,0x31005c01,0x2802400,0x962460,0x31005d00,0x4000020,0x200005,0x31005d00,0x6800020,0x1329805,0x31005d00,0x7c00120,0x220405, -0x31005d00,0x7c00120,0x250405,0x31006000,0x82000,0x962460,0x31006000,0x180000,0x918820,0x310a5e11,0x7c40300,0xe30000,0x310a5f11,0x7c00300,0xe30001,0x32000419, -0x7c00100,0x250400,0x3200080e,0x4000020,0x200000,0x3200080e,0x7c00100,0x220400,0x3200080e,0x7c00100,0x250400,0x32000908,0x7c00100,0x220400,0x32000908,0x7c00100, -0x250400,0x32000c02,0x7c00100,0x230400,0x32000e25,0x7c00100,0x230400,0x32001d0c,0x7c00100,0x230400,0x32002800,0x80000,0x1e18820,0x32002800,0x80020,0x218820, -0x32002800,0x4000001,0x440002,0x32002800,0x24000000,0x200000,0x32002800,0x24000000,0x200002,0x32002800,0x24000020,0x200000,0x32002800,0x2c000010,0x1248002,0x32002919, -0x7c00100,0x22040f,0x32002a00,0x4000000,0x1600000,0x32002b01,0x2000,0x962460,0x32002b01,0x2802000,0x962460,0x32002b01,0x2802020,0x962460,0x32002c00,0x4000000, -0x200000,0x32002c00,0x4000020,0x200000,0x32002c00,0x4000020,0x200005,0x32002c00,0x7c00120,0x220405,0x32002c00,0x7c00120,0x250405,0x32002e00,0x24000020,0x200000, -0x32002f00,0x24000020,0x200000,0x32003000,0x24000000,0x200000,0x32003000,0x24000020,0x200000,0x32003500,0x24000000,0x200000,0x32003600,0x24000020,0x200000,0x32003600, -0x24000020,0x10200000,0x32003700,0x24000000,0x100000,0x32003700,0x24000000,0x200000,0x32003700,0x24000000,0x10200000,0x32003800,0x24000000,0x810000,0x32003800,0x24000000, -0x1410000,0x32005102,0x4000000,0x1500008,0x32005502,0x7c00100,0x230400,0x32006108,0x7c00100,0x220400,0x32006108,0x7c00100,0x250400,0x3200622a,0x2802100,0x962460, -0x3200622a,0x2806000,0x962460,0x3200622a,0x7c00100,0x230400,0x3200632b,0x2802100,0x962460,0x3200632b,0x2806000,0x962460,0x3200632b,0x7c00100,0x230400,0x3200642c, -0x2802100,0x962460,0x3200642c,0x7c00100,0x230400,0x3200652d,0x2802100,0x962460,0x3200652d,0x7c00100,0x230400,0x32006600,0x24000020,0x200000,0x32006700,0x24000020, -0x200000,0x32006800,0x24000020,0x200000,0x32006800,0x24000020,0x10200000,0x32006900,0x24000020,0x200000,0x32006900,0x24000020,0x810000,0x32006900,0x24000020,0x1410000, -0x32006a00,0x24000020,0x200000,0x32006a00,0x24000020,0x200001,0x32006a00,0x24000020,0x200002,0x32020701,0x2882000,0xc62460,0x32023300,0x4000000,0x100000,0x32026c01, -0x12882000,0x962460,0x32065700,0x4000000,0x810011,0x32065700,0x4000000,0x1410011,0x32086600,0x24000020,0x810000,0x32086600,0x24000020,0x1410000,0x32086900,0x24000020, -0x810000,0x32086900,0x24000020,0x1410000,0x320a3600,0x24000020,0x30200000,0x320a3d11,0x7c00100,0x1230400,0x320a3e14,0x7c00100,0xe30010,0x320a3e14,0x7c00100,0x2530000, -0x320a3f16,0x7c00100,0xe30c10,0x320a4400,0x4000000,0xe00003,0x320a4929,0x4000000,0xe00000,0x320a4f11,0x7c00300,0xe30001,0x320a6b16,0x7c00100,0x2530c00,0x32406372, -0xc000010,0x448000,0x324a3d95,0x4000000,0x10e00000,0x324a3d95,0x7c00100,0x1230400,0x324a3f90,0x4000002,0x1200c00,0x324a538d,0x24000000,0xe00000,0x32820701,0x2802000, -0x962460,0x40000419,0x7c00100,0x220400,0x40000519,0x7c00100,0x220400,0x40000600,0x4000400,0x200000,0x4000080e,0x7c00100,0x220400,0x4000080e,0x7c00100,0x250400, -0x4000080e,0x7c00100,0x250402,0x40000c02,0,0x218960,0x40000c02,0x2802100,0x962460,0x40000c02,0x2802400,0x962460,0x40000c02,0x2802500,0x962460,0x40000c02, -0x4000000,0x200000,0x40000c02,0x4000000,0x1071400,0x40000c02,0x7c00100,0x230400,0x40000d22,0x7c00100,0x230400,0x40000f0a,0x7c00100,0x230400,0x40001004,0x7c00100, -0x230400,0x40001110,0x2802100,0x962460,0x40001110,0x6800100,0x962540,0x4000120f,0x2802100,0x962460,0x4000120f,0x4000000,0x1600000,0x4000120f,0x7c00100,0x230400, -0x4000131f,0x7c00100,0x230400,0x40001423,0x4000000,0x200000,0x40001423,0x4000000,0x1600000,0x40001615,0x2802400,0x962460,0x40001615,0x7c00100,0x230400,0x40002417, -0x2802400,0x1862460,0x40002417,0x4000000,0x200000,0x40002800,0x6800000,0x201c00,0x40002800,0x24000002,0x200000,0x40002c00,0x4000000,0x200002,0x40003000,0x24000000, -0x10200000,0x40003000,0x24000020,0x200000,0x40003700,0x24000000,0x200000,0x40003700,0x24000000,0x10200000,0x40005a09,0x7c00100,0x220400,0x40005a09,0x7c00100,0x250400, -0x40005d00,0x7c00120,0x220405,0x40006f30,0x2802100,0x962460,0x40006f30,0x2802400,0x962460,0x40006f30,0x4000000,0x200000,0x40006f30,0x6800000,0x1329800,0x40006f30, -0x6800100,0x962540,0x40006f30,0x7c00100,0x230400,0x40006f30,0xc000010,0xb48000,0x40007034,0x7c00100,0x1830000,0x40007117,0x4000000,0x200000,0x40007208,0x7c00100, -0x220400,0x4000720e,0x7c00100,0x220400,0x4000720e,0x7c00500,0x22040e,0x4000720e,0x7c00500,0x22040f,0x40007219,0x7c00100,0x220400,0x40007219,0x7c00500,0x220400, -0x40007219,0x7c00500,0x22040e,0x40007219,0x7c00500,0x22040f,0x40007300,0x24000000,0x200000,0x40007300,0x24000000,0x10200000,0x40007400,0x4000000,0x200000,0x40007531, -0x7c00100,0x230400,0x40007631,0x7c00100,0x230400,0x40007835,0x4000010,0x400000,0x40007835,0x7c00100,0x230400,0x40007933,0x7c00100,0x230400,0x40007a32,0x6800000, -0x1329800,0x40007a32,0x7c00100,0x230400,0x40007b2f,0x7c00100,0x230400,0x40007c00,0x4000000,0x200000,0x40020701,0x2802400,0x962460,0x40020701,0x2802400,0xc62460, -0x40023300,0x4000000,0x200000,0x40027d01,0x12882000,0x962460,0x400a3700,0x24000000,0x30200000,0x400a3700,0x24000000,0x30e00000,0x400a4400,0x4000000,0xe0000d,0x400a4412, -0x4000000,0xe00002,0x400a4412,0x4000000,0xe00003,0x400a4500,0x4000000,0xe0000d,0x400a5300,0x4000000,0x810010,0x400a5300,0x4000000,0x1410010,0x404077b8,0x4000000, -0x200000,0x404077bb,0x4000000,0x200000,0x404077bb,0x4000000,0x400000,0x40c0511b,0x4000000,0x200000,0x41000419,0x7c00100,0x220400,0x41000419,0x7c00100,0x250400, -0x4100080e,0x7c00100,0x220400,0x4100080e,0x7c00100,0x250400,0x41000908,0x7c00100,0x220400,0x41000908,0x7c00100,0x250400,0x41000b13,0x2802000,0x962460,0x41000b13, -0x2802100,0x962460,0x41000b13,0x4000000,0xb00000,0x41000c02,0x2802100,0x962460,0x41000c02,0x4000000,0xb00000,0x41000c02,0x4000000,0x1500000,0x41000f0a,0x7c00100, -0x230400,0x41001004,0x7c00100,0x230400,0x41001423,0x7c00100,0x230400,0x41001b27,0x4000000,0x500000,0x41001d0c,0x7c00100,0x230400,0x41001d0c,0x7c00100,0x23040f, -0x41001f0b,0x2802100,0x962460,0x41001f0b,0x4000000,0x200000,0x41001f0b,0x7c00100,0x230400,0x41002800,0x24000000,0x200000,0x41002800,0x24000000,0x400000,0x41002919, -0x7c00100,0x22040e,0x41002a00,0x4000000,0x1600000,0x41002b01,0x2802020,0x962460,0x41002c00,0x4000000,0x200000,0x41002c00,0x7c00120,0x220405,0x41003000,0x24000000, -0x200000,0x41003700,0x24000000,0x200000,0x41003700,0x24000000,0x10200000,0x41003700,0x24000000,0x10e00000,0x41005d00,0x7c00120,0x220405,0x41006600,0x24000020,0x200000, -0x41006600,0x24000020,0x810000,0x41006600,0x24000020,0x1410000,0x41007208,0x7c00100,0x22040f,0x41007219,0x7c00100,0x220400,0x41007300,0x24000000,0x200000,0x41007e0e, -0x2802000,0x962460,0x41007e0e,0x4000000,0x200000,0x41007f0e,0x4000000,0x200000,0x41007f0e,0x7c00100,0x230400,0x41008002,0x7c00100,0x230400,0x41008137,0x2802100, -0x962460,0x41008137,0x4000000,0x200000,0x41008137,0x6800100,0x962540,0x41008137,0x7c00100,0x230400,0x41008301,0x2802000,0x962460,0x41008407,0x4000000,0x200000, -0x41008407,0x4000000,0x400000,0x41008407,0x4000000,0xb00000,0x41008407,0x7c00100,0x220400,0x41008407,0x7c00100,0x250400,0x4100850b,0x7c00100,0x230400,0x4100860b, -0x4000000,0x200000,0x4100860b,0x7c00100,0x230400,0x4100870c,0x7c00100,0x220400,0x41008838,0x7c00100,0x220400,0x41008838,0x7c00100,0x250400,0x41008939,0x2802000, -0x962460,0x41008939,0x2802100,0x962460,0x41008939,0x2806000,0x962460,0x41008939,0x4000000,0x200000,0x41008939,0x4000000,0x400000,0x41008939,0x7c00100,0x230400, -0x41008939,0xc000010,0x448000,0x41008a00,0x4000000,0x200000,0x41008b3b,0x4000000,0x1800000,0x41008b3b,0x6800000,0x1329800,0x41008b3b,0x7c00100,0x1830000,0x41008b3b, -0x7e00100,0x1830000,0x41008c3d,0x4000010,0x400000,0x41008c3d,0x7c00100,0x230400,0x41008d0e,0x7c00100,0x22040f,0x41008d19,0x7c00100,0x220400,0x41008d19,0x7c00100, -0x22040f,0x41008e00,0x24000000,0x200000,0x41008e00,0x24000000,0x400000,0x41008e00,0x24000000,0x1710000,0x41008e00,0x24000006,0x400000,0x41008f3a,0x2802000,0x962460, -0x41008f3a,0x2802100,0x962460,0x41008f3a,0x2806000,0x962460,0x41008f3a,0x4000000,0x200000,0x41008f3a,0x6800100,0x962540,0x41008f3a,0x7c00100,0x230400,0x4100903c, -0x7c00100,0x230400,0x4100903c,0x7c00100,0x23040f,0x41020701,0x2802000,0x962460,0x41020701,0x2802000,0xc62460,0x410a3700,0x24000000,0x30200000,0x410a3700,0x24000000, -0x30e00000,0x410a4412,0x4000000,0xe00003,0x410a4711,0x7c40300,0xe30000,0x410a4f11,0x7c00300,0xe30001,0x410a9100,0x4000000,0x800010,0x410a9100,0x4000000,0x810010, -0x410a9100,0x4000000,0x870010,0x410a9100,0x4000000,0xb00010,0x410a9100,0x4000000,0xf00010,0x410a9100,0x4000000,0x1001410,0x410a9100,0x4000000,0x1071010,0x410a9100, -0x4000000,0x1071410,0x410a9100,0x4000000,0x1410010,0x414a8292,0x4000000,0xe00000,0x41808300,0x2802000,0x962460,0x41c01469,0x6800000,0x1329800,0x50000419,0x7c00100, -0x220400,0x50000419,0x7c00100,0x250400,0x5000080e,0x7c00100,0x220400,0x50000908,0x7c00100,0x220400,0x50000908,0x7c00100,0x250400,0x50000b13,0x2802500,0x962460, -0x50000f0a,0x7c00100,0x230400,0x50001615,0x2802100,0x962460,0x50001615,0x7c00100,0x230400,0x50002b01,0x2802020,0x962460,0x50002c00,0x4000000,0x200000,0x50002c19, -0x7c00100,0x220400,0x50002d19,0x7c00100,0x220400,0x50003000,0x24000000,0x200000,0x50003000,0x24000020,0x200000,0x50003700,0x24000000,0x200000,0x50005d00,0x7c00120, -0x220405,0x50005d00,0x7c00120,0x250405,0x50006108,0x7c00100,0x220400,0x50006108,0x7c00100,0x250400,0x50006600,0x24000020,0x200000,0x50007300,0x24000000,0x200000, -0x50008301,0x2802400,0x962460,0x50008a00,0x7c00500,0x230400,0x50009257,0x2802400,0x962460,0x50009257,0x4000000,0x200000,0x50009257,0x4000010,0x1071400,0x50009257, -0x6800000,0x1329800,0x50009257,0x7c00100,0x230400,0x50009257,0x7c00500,0x230400,0x50009257,0x7c00900,0x230400,0x50009257,0xc000010,0xb48000,0x5000933e,0x2802100, -0x962460,0x5000933e,0x2802400,0x962460,0x5000933e,0x4000000,0x200000,0x5000933e,0x4000000,0x400000,0x5000933e,0x4000010,0x400000,0x5000933e,0x6800000,0x1329800, -0x5000933e,0x6800100,0x962540,0x5000933e,0x6800100,0x962541,0x5000933e,0x6804400,0x962540,0x5000933e,0x7c00100,0x230400,0x5000933e,0x7c00100,0x230401,0x5000933e, -0xc000010,0x448000,0x50009419,0x7c00100,0x220400,0x50009419,0x7c00100,0x250400,0x50009500,0x4000400,0x200000,0x5000965a,0x4000000,0x500000,0x5000965a,0x7c00100, -0x230400,0x5000965a,0xc000010,0xb48000,0x5000975b,0x4000000,0x200000,0x5000975b,0x4000010,0x400000,0x5000975b,0x7c00100,0x230400,0x50009865,0x7c00100,0x230400, -0x50009965,0x4000010,0x400000,0x50009965,0x7c00100,0x230400,0x50409a92,0x4000000,0x200000,0x5100080e,0x7c00100,0x220400,0x5100080e,0x7c00100,0x250400,0x51000c02, -0x2802100,0x962460,0x51000c02,0x4000000,0x1500000,0x51000c02,0x4000020,0x200000,0x51000c02,0x7c00100,0x230400,0x51000f0a,0x7c00100,0x230400,0x51000f0a,0x7c00500, -0x230400,0x51001110,0x2802100,0x962460,0x5100131f,0x2802100,0x962460,0x51001423,0x7c00100,0x230400,0x51001524,0x2802100,0x962460,0x51001524,0x4000000,0x200000, -0x51001524,0x7c00100,0x230400,0x5100171a,0x2802100,0x962460,0x5100171a,0x4000000,0x200000,0x5100171a,0x4000000,0x1500000,0x5100171a,0x7c00100,0x230400,0x51001b27, -0x4000000,0x200000,0x51001b27,0x4000000,0x400000,0x51001b27,0x4000000,0x500000,0x51001b27,0x7c00100,0x230400,0x51001c1c,0x2802100,0x1862460,0x51001c1c,0x2802400, -0x1862460,0x51001c1c,0x2806400,0x1862460,0x51001c1c,0x4000000,0x1800000,0x51001c1c,0x6800000,0x1329800,0x51001c1c,0x6800000,0x1862400,0x51001c1c,0x6800100,0x1862400, -0x51001c1c,0x6800100,0x1862540,0x51001c1c,0x6800400,0x1862400,0x51001c1c,0x7c00100,0x1830000,0x5100251b,0x7c00100,0x230400,0x51002619,0x7c00100,0x220400,0x51002619, -0x7c00100,0x250400,0x51002800,0x80020,0x218820,0x51002c00,0x4000000,0x200000,0x51002d19,0x7c00100,0x230400,0x51003700,0x24000000,0x200000,0x51003700,0x24000000, -0xe00000,0x51005201,0x2802400,0x962460,0x51005c00,0x4000000,0x200000,0x51006108,0x7c00100,0x220400,0x51006108,0x7c00100,0x250400,0x51006600,0x24000020,0x200000, -0x51006600,0x24000020,0x810000,0x51006600,0x24000020,0x1410000,0x51007300,0x24000000,0x200000,0x51007300,0x24000020,0x200000,0x51008002,0x7c00100,0x230400,0x51008301, -0x2802000,0x962460,0x51008301,0x2802400,0x962460,0x51008a00,0x7c00500,0x230400,0x51008e00,0x24000000,0x200000,0x51008e00,0x24000000,0x400000,0x51008e00,0x24000000, -0x810000,0x51008e00,0x24000000,0x1400000,0x51008e00,0x24000000,0x1410000,0x51008e00,0x24000000,0x1710000,0x51008e00,0x24000002,0x200000,0x51008e00,0x24000500,0x230400, -0x51008e00,0x2c000010,0xb48000,0x51009419,0x7c00100,0x220400,0x51009419,0x7c00100,0x22040e,0x51009419,0x7c00100,0x22040f,0x51009419,0x7c00100,0x250400,0x51009500, -0x4000000,0x200000,0x51009500,0x7c00500,0x230400,0x51009519,0x7c00100,0x220400,0x51009519,0x7c00100,0x22040f,0x51009519,0x7c00100,0x230400,0x51009519,0x7c00100, -0x250400,0x51009b71,0x2802100,0x962460,0x51009b71,0x6800000,0x1329800,0x51009b71,0x6800100,0x962540,0x51009b71,0x6804400,0x962540,0x51009b71,0x7c00100,0x230400, -0x51009c52,0x2802100,0x962460,0x51009c52,0x2802400,0x962460,0x51009c52,0x2802c00,0x962460,0x51009c52,0x4000010,0x400000,0x51009c52,0x6800000,0x1329800,0x51009c52, -0x6800100,0x962540,0x51009c52,0x7c00100,0x230400,0x51009c52,0xc000010,0x448000,0x51009d6d,0x6800000,0x1329800,0x51009d6d,0x7c00100,0x230400,0x51009d6d,0x7c00500, -0x230400,0x51009d6d,0x7c00d00,0x230400,0x51009d6d,0xc000010,0x448000,0x51009e08,0x2802100,0x962460,0x51009f63,0x4000010,0x400000,0x51009f63,0x6800000,0x1329800, -0x51009f63,0x7c00100,0x230400,0x51009f63,0x7c00900,0x230400,0x51009f63,0xc000010,0x448000,0x51009f63,0xc000010,0xb48000,0x5100a008,0x2000,0x962460,0x5100a008, -0x2802400,0x962460,0x5100a008,0x4000000,0x200000,0x5100a008,0x7c00100,0x220400,0x5100a008,0x7c00100,0x230400,0x5100a008,0x7c00100,0x250400,0x5100a008,0x7c00500, -0x230400,0x5100a16f,0x2806400,0x962460,0x5100a16f,0x6800000,0x1329800,0x5100a16f,0x6800100,0x962540,0x5100a16f,0x7c00100,0x230400,0x5100a16f,0xc000010,0x448000, -0x5100a24f,0x2802100,0x962460,0x5100a24f,0x2802400,0x962460,0x5100a24f,0x6800000,0x1329800,0x5100a24f,0x7c00100,0x230400,0x5100a24f,0xc000010,0x448000,0x5100a36e, -0x2802100,0x962460,0x5100a36e,0x4000000,0x200000,0x5100a36e,0x6800100,0x962540,0x5100a36e,0x6804400,0x962540,0x5100a36e,0x7c00100,0x230400,0x5100a442,0x2802100, -0x962460,0x5100a442,0x4000000,0x200000,0x5100a442,0x6800000,0x1329800,0x5100a442,0x6800100,0x962540,0x5100a442,0x7c00100,0x230400,0x5100a442,0xc000010,0x448000, -0x5100a500,0x4000000,0x200000,0x5100a600,0x4000000,0x200000,0x5100a601,0x2802000,0x962460,0x5100a76b,0x7c00100,0x230400,0x5100a868,0x7c00100,0x230400,0x5100a96c, -0x4000000,0x200000,0x5100a96c,0x7c00100,0x230400,0x5100aa00,0x4000000,0xe00000,0x5100ab00,0x4000000,0xe00000,0x51086600,0x24000020,0x810000,0x51086600,0x24000020, -0x1410000,0x510a4005,0x7c00100,0xe30400,0x510a4711,0x7c40300,0xe30000,0x510a7300,0x24000000,0x30200000,0x510aaa00,0x4000000,0x30e00000,0x5140a2b3,0x4000400,0x400000, -0x514a8292,0x4000000,0xe00000,0x51802b84,0x2802000,0x962460,0x51c00908,0x2802400,0x962460,0x51c0a008,0x2802400,0x962460,0x52000f0a,0x2802100,0x962460,0x52000f0a, -0x6800100,0x962540,0x52000f0a,0x7c00100,0x230400,0x52001004,0x4000000,0x1600000,0x52001b00,0x4000000,0x200000,0x52001c1c,0x2802100,0x1862460,0x52001c1c,0x6800100, -0x1862400,0x52001c1c,0x6800400,0x1862400,0x52001e12,0x7c00100,0x2230500,0x52001e12,0x7c00100,0x2330520,0x52002128,0x4000002,0x400000,0x52002128,0x7c00100,0x230400, -0x52002a00,0x4000000,0x1500000,0x52002a00,0x4000000,0x1600000,0x52002d00,0x4000000,0x200006,0x52003000,0x24000000,0x200000,0x52006108,0x7c00100,0x220400,0x52006108, -0x7c00100,0x250400,0x52008301,0x2802400,0x962460,0x52008407,0x2802400,0x962460,0x52008407,0x7c00100,0x220400,0x52008407,0x7c00100,0x250400,0x52008b3b,0x6800000, -0x1800000,0x52008b3b,0x7c00100,0x1830000,0x52008e00,0x24000000,0x400000,0x52009419,0x7c00100,0x250400,0x5200975b,0x4000000,0x200000,0x5200ac7e,0x2802000,0x962460, -0x5200ac7e,0x2802100,0x962460,0x5200ac7e,0x2802400,0x962460,0x5200ac7e,0x4000010,0x200000,0x5200ac7e,0x7c00100,0x230400,0x5200ad28,0x7c00100,0x230400,0x5200ae6a, -0x2802100,0x1862460,0x5200ae6a,0x2802400,0x962460,0x5200ae6a,0x2802400,0x1862460,0x5200ae6a,0x2806000,0x1862460,0x5200ae6a,0x4000000,0x1800000,0x5200ae6a,0x6800000, -0x1329800,0x5200ae6a,0x6800100,0x1862400,0x5200ae6a,0x6800100,0x1862540,0x5200ae6a,0x7c00100,0x1830000,0x5200ae6a,0x7c00900,0x1830000,0x5200ae6a,0xc000010,0x1848000, -0x5200b083,0x4000010,0x400000,0x5200b083,0x7c00100,0x230400,0x5200b083,0xc000010,0x448000,0x5200b182,0x2802400,0x962460,0x5200b182,0x4000000,0x200000,0x5200b182, -0x4000010,0x400000,0x5200b182,0x7c00100,0x230400,0x5200b182,0xc000010,0x448000,0x5200b30a,0x2802400,0x962460,0x5200b30a,0x4000000,0x200000,0x5200b30a,0x7c00100, -0x230400,0x5200b54e,0x2802100,0x962460,0x5200b54e,0x2802400,0x962460,0x5200b54e,0x4000000,0x200000,0x5200b54e,0x4000010,0x400000,0x5200b54e,0x6800000,0x1329800, -0x5200b54e,0x6800100,0x962540,0x5200b54e,0x6804400,0x962540,0x5200b54e,0x7c00100,0x230400,0x5200b54e,0xc000010,0x448000,0x5200b61c,0x4000000,0x1800000,0x5200b61c, -0x6800400,0x1862400,0x5200b61c,0x7c00100,0x1830000,0x5200b61c,0x7c00900,0x1830000,0x5200b77f,0x2802100,0x1862460,0x5200b77f,0x2802400,0x1862460,0x5200b77f,0x4000000, -0x1800000,0x5200b77f,0x4000010,0x1800000,0x5200b77f,0x7c00100,0x1830000,0x5200b77f,0x7c00500,0x1830000,0x5200b77f,0x7c00900,0x1830000,0x5200b77f,0x7e00100,0x1830000, -0x5200b873,0x2802100,0x962460,0x5200b873,0x2806400,0x962460,0x5200b873,0x6800000,0x1329800,0x5200b873,0x6800100,0x962540,0x5200b873,0x6800400,0x962540,0x5200b873, -0x7c00100,0x230400,0x5200b873,0xc000010,0x448000,0x5200b912,0x7c00100,0x2230500,0x5200b912,0x7c00100,0x2330520,0x5200ba74,0x4000000,0x200000,0x5200ba74,0x4000010, -0x400000,0x5200ba74,0x7c00100,0x230400,0x5200bb85,0x4000000,0x200000,0x5200bb85,0x7c00100,0x230400,0x5200bc75,0x4000000,0x400000,0x5200bc75,0x4000010,0x400000, -0x5200bc75,0x7c00100,0x230400,0x5200bd7d,0x4000000,0x200000,0x5200bd7d,0x7c00100,0x230400,0x5200be7a,0x4000000,0x200000,0x5200be7a,0x7c00100,0x230400,0x5200bf58, -0x7c00100,0x230400,0x5200c002,0x4000000,0x200000,0x5200c178,0,0x218960,0x5200c178,0x2802000,0x962460,0x5200c178,0x2802100,0x962460,0x5200c178,0x2802400, -0x962460,0x5200c178,0x2806400,0x962460,0x5200c178,0x4000000,0x200000,0x5200c178,0x6800100,0x962540,0x5200c178,0x7c00100,0x230400,0x5200c178,0x7c00100,0x230401, -0x5200c178,0xc000010,0x448000,0x5200c247,0x7c00100,0x230400,0x5200c247,0x7c00100,0x830400,0x5200c247,0x7c00100,0x1430400,0x5200c300,0x4000000,0x200003,0x52022d00, -0x4000000,0x100006,0x52023700,0x24000000,0x100000,0x52023700,0x24000000,0xe00000,0x52023700,0x24000000,0x10100000,0x52023700,0x24000000,0x10e00000,0x52023700,0x24000000, -0x928045a0,0x52024400,0x4000000,0x100000,0x52027300,0x24000000,0x100000,0x5202c300,0x4000000,0x100000,0x5202c300,0x4000000,0x100002,0x5202c300,0x4000000,0x100003, -0x5202c300,0x4000000,0x10000d,0x5202c300,0x4000100,0x150400,0x5202c300,0x4000100,0x15040d,0x5202c300,0x4000100,0x10150400,0x520a1e12,0x7c00100,0x2130480,0x520a3700, -0x24000000,0x30e00000,0x520a3800,0x24000000,0x30100000,0x520a4711,0x7c40300,0xe30000,0x520a4f11,0x7c00300,0xe30001,0x520a7300,0x24000000,0x30100000,0x520ab412,0x7c00100, -0x2130480,0x520ac400,0x4000000,0xe00002,0x520ac400,0x4000000,0xe0000d,0x520ac400,0x4000000,0x30e0000d,0x520ac414,0x4000000,0xe0000d,0x520ac511,0x7c40300,0xe30000, -0x5240af78,0x6800400,0x962540,0x5240af78,0x7c00100,0x230400,0x5240af79,0x4000400,0x200000,0x5240af79,0x6800100,0x962540,0x5240b298,0x4000000,0x200000,0x5240b2a2, -0x4000000,0x200000,0x5240b2a2,0x4000000,0x1500000,0x5240b5b6,0x7c00900,0x230400,0x524a4492,0x4000000,0xe00003,0x5280af78,0x2802400,0x962460,0x5280af79,0x2802400, -0x962460,0x5280af7b,0x2802400,0x962460,0x5280af7d,0x2802400,0x962460,0x52c0b3ad,0x2802400,0x962460,0x52c0b3b1,0x7c00100,0x230400,0x60000c02,0x2802100,0x962460, -0x60000c02,0x7c00100,0x230400,0x60000f0a,0x2802100,0x962460,0x60000f0a,0x6800100,0x962540,0x60000f0a,0x7c00100,0x230400,0x6000131f,0x4000000,0x200000,0x6000171a, -0x7c00100,0x230400,0x6000171a,0x7c00100,0x230560,0x60001b27,0x2802100,0x962460,0x60001b27,0x4000000,0xc00000,0x60001b27,0x7c00100,0x230400,0x60001f0b,0x2802000, -0x962460,0x60002919,0x7c00100,0x22040e,0x60002a00,0x4000000,0x1600000,0x60003000,0x24000000,0x10200000,0x60003000,0x24000000,0x10e00000,0x60003700,0x24000000,0x200000, -0x60003800,0x24000000,0x1710000,0x60005102,0x4000000,0x200000,0x60006108,0x7c00100,0x220400,0x60006108,0x7c00100,0x250400,0x60006600,0x24000020,0x200000,0x60008301, -0x2802000,0x962460,0x6000903c,0x2806000,0x962460,0x6000903c,0x4000000,0x400000,0x60009519,0x7c00100,0x220400,0x60009519,0x7c00100,0x250400,0x6000a008,0x7c00100, -0x220400,0x6000a008,0x7c00100,0x250400,0x6000c300,0x4000000,0x32703580,0x6000c654,0x2802000,0x962460,0x6000c654,0x4000010,0x200000,0x6000c654,0x7c00100,0x230400, -0x6000c73f,0x2802000,0x962460,0x6000c73f,0x2802100,0x962460,0x6000c73f,0x4000000,0x200000,0x6000c73f,0x6800100,0x962540,0x6000c73f,0x6804000,0x962540,0x6000c73f, -0x7c00100,0x230400,0x6000c80b,0x7c00100,0x230400,0x6000c941,0x2802100,0x962460,0x6000c941,0x2806000,0x962460,0x6000c941,0x4000000,0x200000,0x6000c941,0x4000010, -0x200000,0x6000c941,0x6800000,0x1329800,0x6000c941,0x6800100,0x962540,0x6000c941,0x7c00100,0x230400,0x6000c941,0xc000010,0x448000,0x6000ca82,0x7c00100,0x230400, -0x6000cc00,0x4000000,0xe00000,0x6000d000,0x4000000,0x200000,0x6002c300,0x4000000,0x100000,0x6002c300,0x4000000,0x10000d,0x6002c300,0x4000100,0x150400,0x6002c300, -0x4000100,0x15040d,0x6002c300,0x4000100,0x10150400,0x600a3000,0x24000000,0x30200000,0x600a3000,0x24000000,0x30e00000,0x600a3700,0x24000000,0x30200000,0x600a3800,0x24000000, -0x30200000,0x600a3800,0x24000000,0xb28045a0,0x600a4305,0x7c00100,0xe30400,0x600ac300,0x4000000,0x30100000,0x600ac400,0x4000000,0x10e0000d,0x600ac400,0x4000000,0x30e0000d, -0x600acb14,0x7c00100,0xe30000,0x600acb16,0x7c00100,0xe30c00,0x600acc00,0x4000000,0x30e00000,0x600acd00,0x4000000,0x30200000,0x600acd00,0x4000000,0x30e00000,0x600acd00, -0x4000000,0x30e05200,0x600acd00,0x4000000,0xb0e00000,0x600acd00,0x4000000,0xb28045a0,0x600acd00,0x4000000,0xb28049c0,0x600ace00,0x4000000,0x30e00000,0x600ace00,0x4000000, -0xb28045a0,0x600acf00,0x4000000,0x30e00000,0x600acf00,0x4000000,0xb28045a0,0x600ad111,0x7c40300,0xe30000,0x604ac492,0x4000000,0x30e00003,0x61000a03,0x4000000,0x1600000, -0x61000c02,0,0x218960,0x6100120f,0x4000000,0x200000,0x61001a18,0x7c00100,0x1830000,0x61001d0c,0x7c00100,0x230400,0x61001d0c,0x7c00100,0x250400,0x61006600, -0x24000020,0x200000,0x61008407,0x7c00100,0x220400,0x61008407,0x7c00100,0x250400,0x6100870c,0x7c00100,0x220400,0x61008e00,0x24000000,0x200000,0x61008e00,0x24000000, -0x400000,0x61008e00,0x24000002,0x300000,0x6100903c,0x7c00100,0x230400,0x61009519,0x7c00100,0x220400,0x61009519,0x7c00100,0x250400,0x61009519,0x7c00500,0x22040f, -0x61009b71,0x2802100,0x962460,0x61009b71,0x2806400,0x962460,0x61009b71,0x7c00100,0x230400,0x6100a008,0x2802100,0x962460,0x6100c300,0x4000000,0x20000f,0x6100cd00, -0x4000000,0x200000,0x6100d202,0x2802400,0x962460,0x6100d202,0x2802500,0x962460,0x6100d202,0x7c00100,0x230400,0x6100d302,0x4000020,0x200000,0x6100d302,0x7c00120, -0x230405,0x6100d476,0x2802100,0x962460,0x6100d476,0x2802100,0x962461,0x6100d476,0x2806400,0x962460,0x6100d476,0x4000000,0x400000,0x6100d476,0x6800000,0x1329800, -0x6100d476,0x6800100,0x962540,0x6100d476,0x7c00100,0x230400,0x6100d476,0xc000010,0x448000,0x6100d573,0x2802100,0x962460,0x6100d573,0x2806400,0x962460,0x6100d573, -0x6800100,0x962540,0x6100d573,0x7c00100,0x230400,0x6100d573,0x7c00900,0x230400,0x6100d573,0xc000010,0x448000,0x6100d68d,0x7c00100,0x230400,0x6100d756,0x7c00100, -0x230400,0x6100d85c,0x2802400,0x962460,0x6100d85c,0x6800100,0x962540,0x6100d85c,0x7c00100,0x230400,0x6100d85c,0x7c00500,0x230400,0x6100d997,0x2802100,0x962460, -0x6100d997,0x4000000,0x200000,0x6100d997,0x4000000,0x400000,0x6100d997,0x6800000,0x1329800,0x6100d997,0x6800100,0x962540,0x6100d997,0x6804400,0x962540,0x6100d997, -0x7c00100,0x230400,0x6100d997,0x7c00100,0x230560,0x6100d997,0xc000010,0x448000,0x6100da98,0x6800000,0x1329800,0x6100da98,0x7c00100,0x230400,0x6100db71,0x4000000, -0x200000,0x6100dc99,0x2802100,0x962460,0x6100dc99,0x2802400,0x962460,0x6100dc99,0x6800000,0x1329800,0x6100dc99,0x6800100,0x962540,0x6100dc99,0x6804400,0x962540, -0x6100dc99,0x7c00100,0x230400,0x610a4711,0x7c40300,0xe30000,0x610a4f11,0x7c00300,0xe30001,0x610ace00,0x4000000,0x30e00000,0x6140af78,0x7c00100,0x230400,0x6140af79, -0x6800100,0x962540,0x6140af82,0x7c00100,0x230400,0x6180af79,0x2802400,0x962460,0x62002a00,0x4000000,0x1600000,0x63000c00,0x80000,0x918820,0x63002800,0x80000, -0x918820,0x7000080e,0x7c00100,0x250400,0x70000a03,0x4000000,0x200000,0x70000c00,0,0x218960,0x70000f0a,0x7c00100,0x230400,0x70001004,0x7c00100,0x230400, -0x70001524,0x2802100,0x962460,0x70001524,0x7c00100,0x230400,0x70001615,0x2802100,0x962460,0x7000171a,0x2802100,0x962460,0x70001821,0x6800000,0x1329800,0x70002320, -0x7c00100,0x230400,0x70002a00,0x4000000,0x1500000,0x70002a00,0x4000000,0x1600000,0x70003000,0x24000000,0x200000,0x70003000,0x24000000,0x10200000,0x70003800,0x24000000, -0xe00000,0x70005201,0x2802400,0x962460,0x7000581e,0x7c00100,0x230400,0x70006108,0x7c00100,0x220400,0x70006108,0x7c00100,0x250400,0x70006f30,0x7c00100,0x230400, -0x70007300,0x24000000,0x200000,0x70007f0e,0x4000000,0x200000,0x70008301,0x2802100,0x962460,0x70008301,0x2802400,0x962460,0x70008e00,0x24000000,0x200000,0x70008e00, -0x24000000,0x400000,0x70008e00,0x24000002,0x400000,0x70008e00,0x24000008,0x1410000,0x70008e00,0x24000010,0x400000,0x70008e00,0x2c000010,0x448000,0x70009519,0x7c00100, -0x220400,0x70009519,0x7c00100,0x230400,0x70009519,0x7c00100,0x250400,0x70009865,0x7c00100,0x230400,0x70009965,0x4000010,0x400000,0x70009965,0x7c00100,0x230400, -0x7000a008,0x7c00100,0x220400,0x7000a008,0x7c00100,0x250400,0x7000a008,0x7c00500,0x22040f,0x7000a50e,0x4000000,0x200000,0x7000b61c,0x2802400,0x1862460,0x7000b61c, -0x6800400,0x1862400,0x7000b61c,0x7c00100,0x1830000,0x7000c300,0x4000000,0x100000,0x7000c941,0x2806000,0x962460,0x7000cc00,0x4000000,0xe00000,0x7000cd00,0x4000000, -0x200000,0x7000cd00,0x4000000,0xe00000,0x7000cd00,0x4000000,0x10200000,0x7000cd00,0x4000000,0x10e00000,0x7000cd00,0x4000000,0x10e05200,0x7000cd00,0x4000000,0x90e00000, -0x7000cd00,0x4000000,0x928045a0,0x7000cf00,0x4000000,0xe00000,0x7000cf00,0x4000000,0x10e00000,0x7000d202,0x2802100,0x962460,0x7000d202,0x7c00100,0x230400,0x7000d997, -0x7c00100,0x230400,0x7000d997,0xc000010,0x248000,0x7000dd86,0x2802400,0x962460,0x7000dd86,0x7c00100,0x230400,0x7000dd86,0xc000010,0x448000,0x7000de9f,0x4000000, -0x200000,0x7000de9f,0x7c00100,0x230400,0x7000e001,0x2000,0x962460,0x7000e001,0x2802400,0x962460,0x7000e187,0x2802000,0x962460,0x7000e187,0x2802100,0x962460, -0x7000e187,0x4000000,0x200000,0x7000e187,0x7c00100,0x230400,0x7000e187,0xc000010,0x448000,0x7000e288,0x7c00100,0x230400,0x7000e300,0x4000000,0x200000,0x7000e489, -0x2802100,0x962460,0x7000e489,0x2802400,0x962460,0x7000e489,0x6800100,0x962540,0x7000e489,0x6800100,0x962541,0x7000e489,0x6804400,0x962540,0x7000e489,0x7c00100, -0x230400,0x7000e489,0x7c00900,0x230400,0x7000e59d,0x2802100,0x962460,0x7000e59d,0x2802400,0x962460,0x7000e59d,0x4000000,0x200000,0x7000e59d,0x4000010,0x200000, -0x7000e59d,0x6800100,0x962540,0x7000e59d,0x6804400,0x962540,0x7000e59d,0x7c00100,0x230400,0x7000e59d,0xc000010,0x448000,0x7000e691,0x2802100,0x962460,0x7000e691, -0x2802400,0x962460,0x7000e691,0x2806400,0x962460,0x7000e691,0x6800000,0x1329800,0x7000e691,0x6800100,0x962540,0x7000e691,0x7c00100,0x230400,0x7000e700,0x4000400, -0x200000,0x7000e70e,0x7c00100,0x220400,0x7000e719,0x7c00100,0x220400,0x7000e719,0x7c00500,0x22040f,0x7000e853,0x7c00100,0x230400,0x7000e9a0,0x2802400,0x962460, -0x7000e9a0,0x4000000,0x200000,0x7000e9a0,0x4000000,0x500000,0x7000e9a0,0x7c00100,0x230400,0x7000ea79,0x2802400,0x962460,0x7000ea79,0x4000000,0x200000,0x7000ea79, -0x4000000,0xf00000,0x7000ea79,0x4000010,0x400000,0x7000ea79,0x7c00100,0x230400,0x7000eb8c,0x2802400,0x962460,0x7000eb8c,0x4000000,0x200000,0x7000eb8c,0x7c00100, -0x230400,0x7000eca3,0x2802100,0x962460,0x7000eca3,0x2806400,0x962460,0x7000eca3,0x4000000,0x200000,0x7000eca3,0x6800000,0x1329800,0x7000eca3,0x6800100,0x962540, -0x7000eca3,0x7c00100,0x230400,0x7000eca3,0xc000010,0x448000,0x7000ed95,0x6800000,0x1329800,0x7000ed95,0x7c00100,0x230400,0x7000ed95,0xc000010,0x448000,0x7000ee1c, -0x2802400,0x1862460,0x7000ee1c,0x6800000,0x1329800,0x7000ee1c,0x7c00100,0x1830000,0x7000ee1c,0x7c00900,0x1830000,0x7000ef8f,0x4000000,0x200000,0x7000ef8f,0x7c00100, -0x230400,0x7000f08e,0x4000000,0x200000,0x7000f08e,0x7c00100,0x230400,0x7000f159,0x2802100,0x962460,0x7000f159,0x7c00100,0x230400,0x7000f200,0x4000000,0x200000, -0x7000f200,0x4000000,0x1200000,0x7000f200,0x4000000,0x1710000,0x7000f34b,0x2802100,0x962460,0x7000f34b,0x4000000,0x200000,0x7000f34b,0x4000010,0x400000,0x7000f34b, -0x6800000,0x1329800,0x7000f34b,0x7c00100,0x230400,0x7000f34b,0x7c00900,0x230400,0x7000f34b,0xc000010,0x448000,0x7000f490,0x4000000,0x200000,0x7000f490,0x7c00100, -0x230400,0x7000f5a5,0x7c00100,0x230400,0x7000f67b,0x4000000,0x200000,0x7000f67b,0x4000010,0x200000,0x7000f67b,0x7c00100,0x230400,0x7000f8a6,0x2802100,0x962460, -0x7000f8a6,0x2802400,0x962460,0x7000f8a6,0x2806400,0x962460,0x7000f8a6,0x4000000,0x500000,0x7000f8a6,0x4000010,0xb00000,0x7000f8a6,0x4000800,0x200000,0x7000f8a6, -0x6800100,0x962540,0x7000f8a6,0x6800100,0x962541,0x7000f8a6,0x7c00100,0x230400,0x7000f8a6,0xc000010,0x448000,0x7000f921,0x4000000,0x200000,0x7000fa00,0x4000000, -0x200000,0x7000fb9e,0x2802100,0x962460,0x7000fb9e,0x2802400,0x962460,0x7000fb9e,0x2806400,0x962460,0x7000fb9e,0x4000000,0x200000,0x7000fb9e,0x6800000,0x1329800, -0x7000fb9e,0x6800100,0x962540,0x7000fb9e,0x6800100,0x962541,0x7000fb9e,0x7c00100,0x230400,0x7000fc92,0x4000000,0x200000,0x7000fc92,0x6800000,0x1329800,0x7000fc92, -0x7c00100,0x220400,0x7000fc92,0x7c00100,0x230400,0x7000fc92,0x7c00100,0x250400,0x700acd00,0x4000000,0x30e00000,0x700acd00,0x4000000,0xb28045a0,0x700ace00,0x4000000, -0x30e00000,0x700acf00,0x4000000,0x30e00000,0x700acf00,0x4000000,0xb0e00000,0x7040dfbd,0x4000000,0x200000,0x7040f7c1,0x80000,0x918820,0x7080af79,0x2802400,0x962460, -0x7080dfbd,0x2802400,0x962460,0x70c0e4bf,0x2802400,0x962460,0x70c0e4bf,0x6800100,0x962540,0x8000120f,0x7c00100,0x230400,0x80001524,0x7c00100,0x230400,0x8000171a, -0x7c00100,0x230400,0x80002006,0x7c00100,0x220400,0x80002006,0x7c00100,0x250400,0x80002a00,0x4000000,0x1500000,0x80002d00,0x4000000,0x200000,0x80005208,0x2802400, -0x962460,0x80005c00,0x4000000,0x200000,0x80007300,0x24000000,0x200000,0x80009519,0x7c00100,0x220400,0x80009519,0x7c00100,0x230400,0x80009519,0x7c00100,0x250400, -0x80009865,0x7c00100,0x230400,0x8000a008,0x2802100,0x962460,0x8000b30a,0x4000000,0x500000,0x8000b30a,0x7c00100,0x230400,0x8000cd00,0x4000000,0xe00000,0x8000d202, -0x2802500,0x962460,0x8000d202,0x7c00100,0x230400,0x8000d68d,0x4000000,0x200000,0x8000d997,0x2802400,0x962460,0x8000d997,0x4000000,0x200000,0x8000d997,0x4000000, -0x400000,0x8000d997,0x4000000,0x500000,0x8000d997,0x7c00100,0x230400,0x8000d997,0xc000010,0x448000,0x8000e489,0x2802100,0x962460,0x8000e489,0x7c00100,0x230400, -0x8000e719,0x7c00100,0x220400,0x8000f8a6,0x2802100,0x962460,0x8000f8a6,0x7c00100,0x230400,0x8000f8a6,0xc000010,0x448000,0x8000fda1,0x2802100,0x1862460,0x8000fda1, -0x2806400,0x1862460,0x8000fda1,0x4000000,0x1800000,0x8000fda1,0x6800000,0x1329800,0x8000fda1,0x6800100,0x1862540,0x8000fda1,0x7c00100,0x1830000,0x8000fda1,0xc000010, -0x448000,0x8000fe9c,0x7c00100,0x230400,0x8000fe9c,0x7c00100,0x830400,0x8000fe9c,0x7c00100,0x1430400,0x8000ff06,0x7c00100,0x220400,0x80010165,0x7c00100,0x230400, -0x800102a2,0x4000000,0x200000,0x800102a2,0x7c00100,0x230400,0x800103a4,0x7c00100,0x230400,0x800103a4,0xc000010,0x448000,0x8001044c,0x4000000,0x200000,0x8001044c, -0x7c00100,0x220400,0x8001044c,0x7c00100,0x250400,0x80010670,0x2802000,0x962460,0x80010670,0x4000000,0x200000,0x80010670,0x4000010,0x400000,0x80010670,0xc000010, -0x448000,0x800a4711,0x7c40300,0xe30000,0x800acd00,0x4000000,0x30e00000,0x800acd00,0x4000000,0x72904de0,0x800ace00,0x4000000,0x30e00000,0x800acf00,0x4000000,0x30e00000, -0x800b0011,0x7c40300,0xe30000,0x800b0500,0x4000000,0x30e00000,0x800b0500,0x4000000,0xb28045a0,0x90001615,0x7c00100,0x230400,0x9000171a,0x4000000,0x200000,0x9000171a, -0x7c00100,0x230400,0x90003000,0x24000000,0x200000,0x90007f0e,0x4000000,0x200000,0x90008301,0x2802000,0x962460,0x90008e00,0x24000000,0x400000,0x90009519,0x7c00100, -0x250400,0x9000a16f,0x2802100,0x962460,0x9000d200,0,0x218960,0x9000d202,0x2802000,0x962460,0x9000d202,0x2802100,0x962460,0x9000d202,0x7c00100,0x230400, -0x9000e59d,0x2802100,0x962460,0x900107a7,0x2802100,0x962460,0x900107a7,0x2802400,0x962460,0x900107a7,0x2802c00,0x962460,0x900107a7,0x4000000,0x1400000,0x900107a7, -0x6800000,0x1329800,0x900107a7,0x7c00100,0x220400,0x900107a7,0x7c00100,0x250400,0x900108a8,0x2802100,0x962460,0x900108a8,0x2806400,0x962460,0x900108a8,0x4000000, -0x200000,0x900108a8,0x4000000,0x400000,0x900108a8,0x4000010,0x400000,0x900108a8,0x6800000,0x1329800,0x900108a8,0x6800100,0x962540,0x900108a8,0x7c00100,0x230400, -0x900108a8,0xc000010,0x448000,0x90010908,0x7c00100,0x220400,0x90010a38,0x2802100,0x962460,0x90010ca9,0x2802100,0x962460,0x90010ca9,0x4000000,0x500000,0x90010ca9, -0x4000010,0xb00000,0x90010ca9,0x6800100,0x962540,0x90010ca9,0x7c00100,0x230400,0x90010d1b,0x4000000,0x500000,0x90010eaa,0x2802100,0x962460,0x90010eaa,0x2802400, -0x962460,0x90010eaa,0x2806400,0x962460,0x90010eaa,0x4000000,0x200000,0x90010eaa,0x4000000,0x400000,0x90010eaa,0x4000010,0x400000,0x90010eaa,0x6800000,0x1329800, -0x90010eaa,0x6800100,0x962540,0x90010eaa,0x7c00100,0x230400,0x90010eaa,0xc000010,0x448000,0x90010fab,0x7c00100,0x220400,0x90010fab,0x7c00100,0x250400,0x9002c300, -0x4000000,0x100000,0x900ac400,0x4000000,0xe0000d,0x900acd00,0x4000000,0x30e00000,0x900acd00,0x4000000,0xb28045a0,0x900acf00,0x4000000,0x30e00000,0x900b0500,0x4000000, -0xe00000,0x900b0500,0x4000000,0x30e00000,0x900b0500,0x4000000,0xb28045a0,0x900b0b9a,0x7c00900,0x1230400,0x900b109a,0x7c00300,0xe30000,0x900b119a,0x7c00300,0xe30000, -0x90408e06,0x24000000,0x400000}; +0x6f67,0,0,0x7067,0,0,0x7367,0x20000000,0,0x7367,0x20000000,0x200000,0x7567,0,0,0x7667, +0,0,0x7767,0,0,0x7867,0,0,0x7a67,0,0,0x7b67,0,0,0x7c67,0, +0,0x7e67,0,0,0x7f67,0,0,0x8167,0,0,0x8267,0,0,0x8367,0,0, +0x8367,0,0x962460,0x8467,0,0,0x8567,0,0,0x8667,0,0,0x8767,0,0,0x8867, +0,0,0x8967,0,0,0x8b67,0,0,0x8c67,0,0,0x8e67,0x20000000,0,0x8e67,0x20000000, +0x400000,0x8f67,0,0,0x9067,0,0,0x9167,0,0,0x9267,0,0,0x9367,0,0, +0x9567,0,0,0x9667,0,0,0x9767,0,0,0x9867,0,0,0x9967,0,0,0x9a67, +0,0,0x9c67,0,0,0x9f67,0,0,0xa167,0,0,0xa367,0,0,0xa467,0, +0,0xa567,0,0,0xa667,0,0,0xa767,0,0,0xa867,0,0,0xa967,0,0, +0xaa67,0,0xe00000,0xab67,0,0xe00000,0xac67,0,0,0xad67,0,0,0xae67,0,0,0xaf67, +0,0,0xaf67,0,0x962540,0xb167,0,0,0xb267,0,0,0xb367,0,0,0xb467,0, +0,0xb567,0,0,0xb767,0,0,0xb867,0,0,0xb967,0,0,0xba67,0,0, +0xbc67,0,0,0xbd67,0,0,0xbe67,0,0,0xbf67,0,0,0xc067,0,0,0xc167, +0,0,0xc267,0,0,0xc367,0,0xe00000,0xc467,0,0xe00000,0xc667,0,0,0xc767,0, +0,0xc867,0,0,0xc967,0,0,0xca67,0,0,0xcb67,0,0xe30000,0xcc67,0,0xe00000, +0xcf67,0,0xe00000,0xcf67,0,0x30e00000,0xd067,0,0xe00000,0xd267,0,0,0xd367,0,0,0xd467, +0,0,0xd567,0,0,0xd667,0,0,0xd867,0,0,0xd967,0,0,0xda67,0, +0,0xdb67,0,0,0xdc67,0,0,0xdd67,0,0,0xde67,0,0,0xdf67,0,0, +0xe067,0,0,0xe167,0,0,0xe267,0,0,0xe367,0,0xe00000,0xe467,0,0,0xe567, +0,0,0xe667,0,0,0xe767,0,0,0xe867,0,0,0xe967,0,0,0xea67,0, +0,0xeb67,0,0,0xec67,0,0,0xed67,0,0,0xee67,0,0,0xef67,0,0, +0xf167,0,0,0xf367,0,0,0xf567,0,0,0xf667,0,0,0xf767,0,0,0xf867, +0,0,0xf967,0,0,0xfa67,0,0xe00000,0xfb67,0,0,0xfc67,0,0,0xfd67,0, +0,0xfe67,0,0,0x10167,0,0,0x10267,0,0,0x10367,0,0,0x10467,0,0, +0x10567,0,0x200000,0x10567,0,0xe00000,0x10567,0,0x30e00000,0x10567,0,0xb28045a0,0x10667,0,0,0x10767, +0,0,0x10867,0,0,0x10967,0,0,0x10a67,0,0,0x10b67,0,0,0x10b67,0, +0x1230400,0x10c67,0,0,0x10d67,0,0,0x10e67,0,0,0x10f67,0,0,0x11067,0,0, +0x11167,0,0,0xa0067,0,0xe00000,0xa0067,0,0xe30000,0xa4667,0,0xe00000,0xa4767,0,0xe00000,0xa4767, +0,0xe30000,0xa4f67,0,0xe00000,0xa5e67,0,0xe00000,0xa5f67,0,0xe00000,0xac567,0,0xe00000,0xad167,0, +0xe00000,0xb0067,0,0xe00000,0x11000100,0,0x900020,0x11000100,0x40000001,0x440020,0x11000100,0x40000001,0x643020,0x11000100,0x40000001,0xa5a040, +0x11000100,0x40000001,0x116a8a0,0x11000200,0,0x900020,0x11000200,0x4000001,0xc4000b,0x11000200,0x7c00100,0x220402,0x11000200,0x24000000,0x10200000,0x11000200, +0x24000008,0x1710000,0x11000200,0x40000001,0x1d3b020,0x11000219,0x7c00100,0x220401,0x11000219,0x7c00100,0x250401,0x11000319,0x7c00100,0x220401,0x11000319,0x7c00100, +0x220402,0x11000319,0x7c00100,0x250400,0x11000319,0x7c00100,0x250401,0x11000419,0x7c00100,0x220400,0x11000419,0x7c00100,0x220401,0x11000419,0x7c00100,0x220402, +0x11000419,0x7c00100,0x230400,0x11000419,0x7c00100,0x250400,0x11000419,0x7c00100,0x250401,0x11000419,0x7c00100,0x250402,0x11000519,0x7c00100,0x220400,0x11000519, +0x7c00100,0x230400,0x11000600,0x4000400,0x200000,0x11000600,0x4000400,0x200002,0x11000600,0x4000400,0x200400,0x11000600,0x7c00500,0x220400,0x11000600,0x7c00500, +0x230400,0x11000600,0x7c00500,0x530400,0x11000600,0x7c00d00,0x230400,0x11000619,0x7c00500,0x22040f,0x11000800,0x4000010,0x1001401,0x11000800,0x4000400,0x200001, +0x11000800,0x6800010,0x201001,0x11000800,0x7c00500,0x230401,0x11000807,0x7c00100,0x220400,0x11000807,0x7c00100,0x250400,0x1100080e,0x4000400,0x200000,0x1100080e, +0x4000400,0x200002,0x1100080e,0x7000500,0x220402,0x1100080e,0x7c00100,0x220400,0x1100080e,0x7c00100,0x220401,0x1100080e,0x7c00100,0x220402,0x1100080e,0x7c00100, +0x250400,0x1100080e,0x7c00100,0x250401,0x1100080e,0x7c00120,0x220402,0x1100080e,0x7c00120,0x250402,0x11000908,0x4000000,0x200000,0x11000908,0x7c00100,0x220400, +0x11000908,0x7c00100,0x220401,0x11000908,0x7c00100,0x250400,0x11000908,0x7c00100,0x250401,0x11000a03,0x4000000,0x200000,0x11000a03,0x4000000,0x270000,0x11000a03, +0x7c00100,0x220400,0x11000a03,0x7c00100,0x220402,0x11000a03,0x7c00100,0x250400,0x11000a03,0x7c00500,0x230400,0x11000b13,0x2802500,0x962460,0x11000b13,0x4000000, +0x200000,0x11000b13,0x4000000,0x201000,0x11000b13,0x4000000,0x230400,0x11000b13,0x4000002,0x400000,0x11000b13,0x4000010,0x200000,0x11000b13,0x7c00100,0x2633800, +0x11000c00,0,0x218960,0x11000c02,0x2802100,0x962460,0x11000c02,0x2802400,0x962460,0x11000c02,0x4000000,0x200000,0x11000c02,0x4000000,0x1329400,0x11000c02, +0x4000000,0x1329800,0x11000c02,0x4000000,0x1500000,0x11000c02,0x6800000,0x1329800,0x11000c02,0x7c00100,0x230400,0x11000c02,0x7c00100,0x230401,0x11000c02,0x7c00100, +0x230402,0x11000c02,0x7c00500,0x230400,0x11000c02,0x7d00100,0x230400,0x11000c02,0xc000010,0xb48000,0x11000f0a,0x2802100,0x962460,0x11000f0a,0x2802400,0x962460, +0x11000f0a,0x2806400,0x962460,0x11000f0a,0x4000000,0x200000,0x11000f0a,0x6800100,0x962540,0x11000f0a,0x7c00100,0x230400,0x11000f0a,0x7c00100,0x230401,0x11001004, +0x2802100,0x962460,0x11001004,0x2802400,0x962460,0x11001004,0x2806400,0x962460,0x11001004,0x4000000,0x200000,0x11001004,0x4000000,0x1500000,0x11001004,0x6800100, +0x962540,0x11001004,0x6800100,0x962541,0x11001004,0x7c00100,0x230400,0x11001004,0x7c00100,0x230401,0x11001110,0x2802100,0x962460,0x11001110,0x2802400,0x962460, +0x11001110,0x2806400,0x962460,0x11001110,0x6800100,0x962540,0x11001110,0x7c00100,0x230400,0x11001110,0x7c00100,0x230401,0x1100120f,0x2802100,0x962460,0x1100120f, +0x2802400,0x962460,0x1100120f,0x2806400,0x962460,0x1100120f,0x6800100,0x962540,0x1100120f,0x7c00100,0x230400,0x1100131f,0x2802100,0x962460,0x1100131f,0x2802400, +0x962460,0x1100131f,0x2806400,0x962460,0x1100131f,0x4000000,0x200000,0x1100131f,0x6800000,0x1329800,0x1100131f,0x6800100,0x962540,0x1100131f,0x6800100,0x962541, +0x1100131f,0x7c00100,0x230400,0x1100131f,0x7c00100,0x230401,0x11001423,0x2802100,0x962460,0x11001423,0x2806400,0x962460,0x11001423,0x6800100,0x962540,0x11001423, +0x6800100,0x962541,0x11001423,0x7c00100,0x230400,0x11001423,0x7c00100,0x230401,0x11001524,0x2802100,0x962460,0x11001524,0x2802100,0x962461,0x11001524,0x2806400, +0x962460,0x11001524,0x6800000,0x1329800,0x11001524,0x6800100,0x962540,0x11001524,0x7c00100,0x230400,0x11001615,0x2802100,0x962460,0x11001615,0x2806400,0x962460, +0x11001615,0x6800000,0x1329800,0x11001615,0x6800100,0x962540,0x11001615,0x6800100,0x962541,0x11001615,0x7c00100,0x230400,0x1100171a,0x2802100,0x962460,0x1100171a, +0x2806400,0x962460,0x1100171a,0x6800000,0x1329800,0x1100171a,0x6800100,0x962540,0x1100171a,0x6800100,0x962541,0x1100171a,0x7c00100,0x230400,0x11001900,0x4000000, +0x1600000,0x11001926,0x2802100,0x1862460,0x11001926,0x2802400,0x1862460,0x11001926,0x2806100,0x1862460,0x11001926,0x4000000,0x200000,0x11001926,0x4000010,0x400000, +0x11001926,0x6800000,0x1329800,0x11001926,0x7800100,0x1830142,0x11001926,0x7c00100,0x1830000,0x11001926,0x7c00900,0x1830000,0x11001926,0x7e00100,0x1830000,0x11001a18, +0x2802100,0x1862460,0x11001a18,0x2802400,0x1862460,0x11001a18,0x6800000,0x1329800,0x11001a18,0x7800100,0x1830142,0x11001a18,0x7c00100,0x1830000,0x11001a18,0x7c00100, +0x1830002,0x11001a18,0x7c00900,0x1830000,0x11001a18,0x7e00100,0x1830000,0x11001d0c,0x7c00100,0x230400,0x11001d0c,0x7c00100,0x250400,0x11001e12,0x7c00100,0x2230500, +0x11001e12,0x7c00100,0x2330520,0x11001e12,0x7c80100,0x2330520,0x11002619,0x7c00100,0x220401,0x11002619,0x7c00100,0x220402,0x11002619,0x7c00100,0x250401,0x1100270e, +0x4000400,0x200001,0x1100270e,0x4000400,0x200002,0x1100270e,0x4000400,0x500001,0x1100270e,0x7c00100,0x220401,0x1100270e,0x7c00100,0x250401,0x11002800,0x80000, +0x918820,0x11002800,0x80000,0x1c18020,0x11002800,0x180000,0x918820,0x11002800,0x4000001,0x440001,0x11002800,0x4000001,0x440002,0x11002800,0x4000001,0xc4000b, +0x11002800,0x6800000,0x201c00,0x11002800,0x6800020,0x201c00,0x11002800,0x24000000,0x200000,0x11002800,0x24000000,0x200002,0x11002800,0x24000000,0x810000,0x11002800, +0x24000000,0x1410000,0x11002800,0x24000000,0x1500000,0x11002800,0x24000000,0x1500002,0x11002800,0x24000002,0x400000,0x11002800,0x24000006,0xc0000b,0x11002800,0x24000008, +0x1410000,0x11002800,0x24000008,0x1710000,0x11002800,0x24000020,0x1001400,0x11002800,0x24000020,0x1500002,0x11002800,0x2c000010,0x1248000,0x11002800,0x2c000010,0x11248002, +0x11002800,0x40000001,0x63b020,0x11002800,0x40080000,0x918820,0x11002801,0x80000,0x2a65620,0x11002801,0x82000,0x962460,0x11002900,0x4000000,0x20000e,0x11002900, +0x4000000,0x20000f,0x11002900,0x4000020,0x20000e,0x11002900,0x4000020,0x20000f,0x11002900,0x4000020,0x81000e,0x11002900,0x4000020,0x81000f,0x11002900,0x4000020, +0x141000e,0x11002900,0x4000020,0x141000f,0x11002900,0x4000022,0x20000e,0x11002900,0x4000022,0x20000f,0x11002a00,0x4000000,0x1500000,0x11002a00,0x4000000,0x1600000, +0x11002a00,0x4000000,0x1600002,0x11002b01,0x2000,0x962460,0x11002b01,0x2802020,0x962460,0x11002c00,0x4000000,0x200000,0x11002c00,0x4000000,0x200002,0x11002c00, +0x4000000,0x20000f,0x11002c00,0x4000020,0x200000,0x11002c00,0x7c00000,0x200000,0x11002c00,0x7c00020,0x200000,0x11002c00,0x7c00120,0x220405,0x11002c00,0x7c00120, +0x230402,0x11002c00,0x7c00120,0x250402,0x11002c00,0x7c00120,0x250405,0x11002c19,0x7c00100,0x250400,0x11002c19,0x7c00100,0x250401,0x11002d00,0x4000000,0x100006, +0x11002d00,0x4000000,0x200006,0x11002d19,0x7c00100,0x220402,0x11002d19,0x7c00100,0x230400,0x11002d19,0x7c00100,0x250402,0x11002e00,0x24000000,0x200000,0x11002e00, +0x24000020,0x200000,0x11002e00,0x24000020,0x200001,0x11002e00,0x24000020,0x10200000,0x11002f00,0x24000020,0x200000,0x11002f00,0x24000020,0x200001,0x11002f00,0x24000020, +0x200002,0x11002f00,0x24000020,0xf00000,0x11002f00,0x24000020,0x1600000,0x11002f00,0x24000022,0x1600000,0x11003000,0x24000000,0x200000,0x11003000,0x24000000,0x10200000, +0x11003000,0x24000020,0x200000,0x11003000,0x24000020,0x810000,0x11003000,0x24000020,0x1410000,0x11003100,0x24000000,0x200000,0x11003200,0x24000000,0x200000,0x11003300, +0x4000000,0x100003,0x11003400,0x24000000,0x100000,0x11003400,0x24000000,0x200000,0x11003500,0x24000000,0x200000,0x11003600,0x24000000,0x200000,0x11003600,0x24000000, +0x10200000,0x11003600,0x24000020,0x200000,0x11003700,0x24000000,0x200000,0x11003700,0x24000000,0xe00000,0x11003700,0x24000000,0x10200000,0x11003700,0x24000000,0x10e00000, +0x11003700,0x24000000,0x928045a0,0x11003700,0x24000020,0x200000,0x11003800,0x4000000,0x100000,0x11003800,0x24000000,0x200000,0x11003800,0x24000000,0xb00000,0x11003800, +0x24000000,0xe00000,0x11003800,0x24000000,0x1710000,0x11003800,0x24000000,0x10200000,0x11003800,0x24000000,0x10b00000,0x11003800,0x24000000,0x10e00000,0x11003800,0x24000000, +0x10e05200,0x11003800,0x24000000,0x928045a0,0x11005003,0x7c00100,0x220402,0x11005013,0x2802500,0x962460,0x11005013,0x4000020,0x200005,0x11005013,0x7c00100,0x2633801, +0x11005013,0x7c00100,0x2633802,0x11005013,0x7c00100,0x2633805,0x11005019,0x7c00100,0x220402,0x11005100,0x24000000,0x810000,0x11005100,0x24000000,0x1410000,0x11005102, +0x7000100,0x230408,0x11005102,0x7c00100,0x230404,0x11005102,0x7c00100,0x230407,0x11005102,0x7c00100,0x230408,0x11005102,0x7c00100,0x230409,0x11005201,0x2802400, +0x962460,0x11005500,0x80000,0x1e18820,0x11005502,0x7000100,0x230408,0x11005502,0x7c00100,0x230404,0x11005502,0x7c00100,0x230407,0x11005502,0x7c00100,0x230408, +0x11005502,0x7c00100,0x230409,0x11005667,0x1000,0,0x11020200,0x80004,0x418820,0x11020200,0x4000000,0x100006,0x11020200,0x4000000,0x10000f,0x11020200, +0x4000400,0x100002,0x11020200,0x4000400,0x500002,0x11020200,0x6800c00,0x101000,0x11020200,0x24000000,0x100000,0x11020200,0x24000000,0x1400000,0x11020200,0x24000000, +0x1500000,0x11020200,0x24000000,0x1600000,0x11020200,0x24000000,0x10200000,0x11020200,0x24000020,0x100000,0x11020200,0x24000020,0x1600000,0x11020219,0x7c00100,0x12040f, +0x11020219,0x7c00100,0x220400,0x11020219,0x7c00100,0x220401,0x11020219,0x7c00100,0x250400,0x11020319,0x7c00100,0x220400,0x11020319,0x7c00100,0x220401,0x11020319, +0x7c00100,0x220402,0x11020319,0x7c00100,0x250400,0x11020319,0x7c00100,0x250402,0x11020319,0x7d00100,0x220402,0x11020419,0x7c00100,0x220401,0x11020519,0x7c00100, +0x220400,0x11020600,0x4000400,0x100002,0x11020600,0x4000400,0x200400,0x11020600,0x7c00500,0x130400,0x11020600,0x7c00d00,0x130400,0x11020701,0x2802400,0x962460, +0x11020701,0x2802400,0x962461,0x11020701,0x2802400,0xc62460,0x1102080e,0x7c00100,0x220400,0x1102080e,0x7c00100,0x250400,0x11020908,0x7c00100,0x220400,0x11020908, +0x7c00100,0x220401,0x11020908,0x7c00100,0x250400,0x11020908,0x7c00100,0x250401,0x11022800,0x24000000,0x100000,0x11022800,0x24000000,0x200000,0x11022800,0x24000000, +0x200002,0x11022800,0x24000000,0x401000,0x11022800,0x24000000,0xf00002,0x11022800,0x24000000,0xf0ac02,0x11022800,0x24000000,0x1500000,0x11022800,0x24000002,0x100000, +0x11022800,0x24000002,0x370000,0x11022800,0x24000002,0x470000,0x11022800,0x24000006,0x400000,0x11022800,0x24000008,0x1710000,0x11022800,0x24000008,0x1712c00,0x11022800, +0x24000020,0x100000,0x11022800,0x24000020,0x1500000,0x11022800,0x24000020,0x1500002,0x11022900,0x4000000,0x10000e,0x11022900,0x4000000,0x10000f,0x11022919,0x7c00100, +0x12040f,0x11022c00,0x4000000,0x100002,0x11022c00,0x4000000,0x1500002,0x11022c00,0x4000000,0x1600002,0x11022c00,0x4000000,0x1010000f,0x11022c00,0x7c00120,0x120405, +0x11022c0e,0x7c00100,0x250401,0x11022c19,0x7c00100,0x150401,0x11022d00,0x4000000,0x100006,0x11022d00,0x4000000,0x200006,0x11022d19,0x7c00100,0x120402,0x11022d19, +0x7c00100,0x150402,0x11022e00,0x24000000,0x200000,0x11022e00,0x24000020,0x100000,0x11022e00,0x24000020,0x10100000,0x11022f00,0x24000020,0x100000,0x11022f00,0x24000020, +0x100001,0x11022f00,0x24000020,0x100002,0x11023000,0x24000000,0x100000,0x11023300,0x4000000,0x100002,0x11023300,0x4000000,0x100003,0x11023300,0x4000100,0x120403, +0x11023300,0x4000100,0x150403,0x11023300,0x4000100,0x10150403,0x11023400,0x24000000,0x100000,0x11023500,0x24000000,0x100000,0x11023600,0x24000000,0x100000,0x11023600, +0x24000020,0x100000,0x11023600,0x24000020,0x10100000,0x11023700,0x24000000,0x100000,0x11023700,0x24000000,0xe00000,0x11023700,0x24000000,0x10100000,0x11023700,0x24000000, +0x10e00000,0x11023700,0x24000020,0x100000,0x11023700,0x24000020,0x10100000,0x11023700,0x24000020,0x10105200,0x11023800,0x4000000,0x100000,0x11023800,0x24000000,0x200000, +0x11024e67,0,0,0x11025600,0x4000000,0x100000,0x11042a00,0x4000000,0x1600000,0x11045700,0x4000000,0x20000a,0x11045700,0x4000020,0x20000a,0x11045712, +0x7c00100,0xe3040a,0x11045712,0x7c80100,0xe3040a,0x11045716,0x7c00100,0xe30c0a,0x11045716,0x7c00100,0x2530c0a,0x11063d00,0x4000001,0x440011,0x11065700,0x4000000, +0x810011,0x11065700,0x4000000,0xe00011,0x11065700,0x4000000,0x1410011,0x11065700,0x4000000,0x1500011,0x11065700,0x4000000,0x1600011,0x11065700,0x4000006,0xe70011, +0x11065700,0x4000008,0xe00011,0x11065700,0x4000008,0xe02c11,0x11065700,0x4000010,0x871411,0x11065700,0x4000010,0x1201411,0x11065700,0x4000010,0x1271011,0x11065700, +0x4000020,0xe00011,0x11065700,0x4000400,0xe00011,0x11065700,0x4000420,0xe00011,0x11065700,0x6800000,0xe01c11,0x11065700,0x6800040,0xe00011,0x11065700,0xc000010, +0x80ac11,0x11065700,0xc000010,0xb48011,0x11065719,0x7c00100,0xe20411,0x11065719,0x7c00100,0xe50411,0x11065719,0x7c00140,0xe20411,0x11065719,0x7c00140,0xe50411, +0x11080100,0x6800000,0x201c00,0x11080100,0x68000c0,0x11329800,0x11080100,0x24000000,0x200000,0x11080100,0x24000000,0x810000,0x11080100,0x24000000,0x1410000,0x11080100, +0x24000000,0x1500000,0x11080100,0x24000000,0x1600000,0x11080100,0x24000000,0x1b00000,0x11080100,0x24000000,0x2410000,0x11080100,0x24000000,0x10200000,0x11080100,0x24000006, +0xd70000,0x11080100,0x24000008,0x1713c00,0x11080100,0x24000008,0x1714000,0x11080100,0x24000010,0x1001400,0x11080100,0x24000010,0x1071000,0x11080100,0x24000010,0x1071400, +0x11080100,0x24000020,0x200000,0x11080100,0x24000020,0x400000,0x11080100,0x24000020,0x1600000,0x11080100,0x24000400,0x200000,0x11080100,0x24000420,0x200000,0x11080100, +0x2c000010,0xb48000,0x11080100,0x2c000010,0x100ac00,0x11080100,0x44000001,0x1a40000,0x11080119,0x7c00100,0x220400,0x11080119,0x7c00100,0x250400,0x11080119,0x7c001c0, +0x220400,0x11080119,0x7c001c0,0x250400,0x11080200,0x4000400,0x200002,0x11080200,0x24000000,0x200000,0x11080200,0x24000000,0x1500000,0x11080200,0x24000000,0x1600000, +0x11080200,0x24000020,0x200000,0x110a1e12,0x7c00100,0x2130480,0x110a1e12,0x7c80100,0x2130480,0x110a3000,0x24000000,0x30e00000,0x110a3000,0x24100000,0x810001,0x110a3000, +0x24100000,0x1410001,0x110a3700,0x24000000,0x30200000,0x110a3d00,0x4000000,0xe00000,0x110a3d00,0x4000000,0xe00002,0x110a3d00,0x24000000,0xe00000,0x110a3d11,0x7c00300, +0xe30000,0x110a3d11,0x7c00900,0x1230400,0x110a3d12,0x2802400,0x962460,0x110a3e14,0x7c00100,0xe30000,0x110a3e14,0x7c00100,0xe30001,0x110a3e14,0x7c00100,0x2530000, +0x110a3e14,0x7c00900,0x1230000,0x110a3e14,0x7c00900,0x1230001,0x110a3f16,0x7c00100,0xe30c00,0x110a3f16,0x7c00100,0xe30c01,0x110a3f16,0x7c00100,0x2530c00,0x110a3f16, +0x7c00900,0x1230c00,0x110a3f16,0x7c00900,0x1230c01,0x110a4005,0x7c00100,0xe30400,0x110a4112,0x7c00100,0xe30402,0x110a4112,0x7c80100,0xe30402,0x110a4400,0x4000000, +0xe00000,0x110a4412,0x4000000,0xe00002,0x110a4412,0x4000000,0xe00003,0x110a4416,0x4000000,0xe00c03,0x110a4500,0x4000000,0xe0000d,0x110a4516,0x4000000,0xe00c0d, +0x110a4711,0x7c40300,0xe30000,0x110a4f11,0x7c00300,0xe30001,0x110a4f11,0x7c40300,0xe30000,0x110a5300,0x4000000,0x810010,0x110a5300,0x4000000,0xe00002,0x110a5300, +0x4000000,0xe00010,0x110a5300,0x4000000,0x1410010,0x110a5300,0x4000002,0xe70010,0x110a5300,0x4000008,0x810010,0x110a5300,0x4000008,0x1410010,0x110a5300,0x6800000, +0xe01c02,0x110a5300,0x6800000,0xe01c10,0x110a5400,0x4000000,0x81000c,0x110a5400,0x4000000,0xe0000c,0x110a5400,0x4000000,0x141000c,0x110a5400,0x4000000,0x150000c, +0x110a5400,0x4000000,0x160000c,0x110a5400,0x4000002,0xe7000c,0x110a5400,0x4000010,0x87140c,0x110a5400,0x4000010,0xe7000c,0x110a5400,0x4000010,0x120140c,0x110a5400, +0x4000010,0x127100c,0x110a5400,0x4000020,0xe0000c,0x110a5400,0x4000026,0xe7000c,0x110a5400,0xc000010,0x80ac0c,0x110a5400,0xc000010,0xb4800c,0x11400a0c,0xc000010, +0x1049400,0x11400c0e,0x4000010,0xb00000,0x11400c0e,0x4000010,0x1071400,0x11400c0e,0xc000010,0xb48000,0x11400c11,0x7c00900,0x230400,0x11400f34,0xc000010,0x448000, +0x11400f44,0xc000010,0x448000,0x11401d70,0x4000000,0x200000,0x11403d92,0x4000000,0xe00000,0x11445787,0x4000004,0x120000a,0x11445787,0x4000008,0x81000a,0x11445787, +0x4000008,0x141000a,0x11445787,0x4000010,0x87000a,0x11445787,0xc000010,0x84800a,0x11445790,0x3802500,0x126246a,0x11445790,0x7c00d00,0x2530c0a,0x114a3d87,0x24000000, +0x810000,0x114a3d87,0x24000000,0x1410000,0x114a3d87,0x24000008,0x810000,0x114a3d87,0x24000008,0x1410000,0x114a3d87,0x24000010,0x870000,0x114a3d87,0x2c000010,0x848000, +0x114a3d8d,0x4000000,0xe00000,0x114a3d8d,0x24000000,0xe00000,0x114a3d8d,0x24000002,0x1200000,0x114a3d8d,0x24000002,0x10e00000,0x114a3d8d,0x24000008,0x810000,0x114a3d8d, +0x24000008,0x1410000,0x114a3d90,0x7c00900,0x930c00,0x114a3d90,0x7c00900,0xe30c00,0x114a3d92,0x7c00300,0xe30000,0x114a3e90,0x7000400,0x1200c02,0x114a3f87,0x4000004, +0x1200000,0x114a3f90,0x7c00d00,0x2530c00,0x114a4292,0x4000000,0xe00000,0x114a4292,0x4000000,0xe0000f,0x114a4492,0x4000000,0xe00002,0x114a4492,0x4000000,0xe00003, +0x114a4492,0x4000000,0x10e00003,0x114a4592,0x4000000,0xe00002,0x114a4592,0x4000000,0xe0000d,0x1180090a,0x2802400,0x962460,0x11800c17,0x2802100,0x962460,0x11800c17, +0x2802500,0x962460,0x11800f1d,0x2802400,0x962460,0x11800f29,0x2802400,0x962460,0x11820700,0x2802400,0x962460,0x11820700,0x2802500,0x962460,0x118a3d93,0x2802400, +0x962460,0x118a3e90,0x2802400,0x962460,0x11c00904,0x2802400,0x962460,0x11c00908,0x2802400,0x962460,0x11c00c1b,0x6800000,0x1329800,0x11c00f58,0x6800000,0x1329800, +0x11c0105d,0x6800000,0x1329800,0x11c01161,0x6800000,0x1329800,0x11c01265,0x6800000,0x1329800,0x11c01469,0x4000000,0x200000,0x11c01469,0x6800000,0x1329800,0x11c01469, +0x7c00100,0x230400,0x11c0511b,0x7c00100,0x230408,0x20000067,0x1000,0,0x20000b13,0x2802400,0x962460,0x20000b13,0x2802500,0x962460,0x20001b27,0x2802100, +0x962460,0x20001b27,0x2802100,0x962461,0x20001b27,0x2802400,0x962460,0x20001b27,0x2806400,0x962460,0x20001b27,0x2902100,0x962462,0x20001b27,0x4000000,0x200000, +0x20001b27,0x4000000,0x400000,0x20001b27,0x4000000,0x500000,0x20001b27,0x4000000,0x810000,0x20001b27,0x4000000,0xb00000,0x20001b27,0x4000000,0xc0000b,0x20001b27, +0x4000000,0x1410000,0x20001b27,0x4000010,0xb00000,0x20001b27,0x4000010,0xc00000,0x20001b27,0x6800000,0x1329800,0x20001b27,0x6800100,0x462540,0x20001b27,0x6800400, +0x962540,0x20001b27,0x7c00100,0x230400,0x20001b27,0x7c00100,0x230401,0x20002619,0x7c00100,0x220401,0x20002a00,0x4000000,0x1600000,0x20004b67,0,0x1900020, +0x20004c67,0,0x1900020,0x20004d67,0,0x1900020,0x20006d67,0x1000,0,0x20006e67,0x1000,0,0x20026d67,0,0,0x20026e67, +0,0,0x200a4a12,0x7c00100,0x1f304c1,0x200a4a12,0x7c00100,0x20304e1,0x21005600,0x4000000,0x700000,0x21022a00,0x4000000,0x1600000,0x30000419,0x7c00100, +0x220400,0x30000419,0x7c00100,0x220401,0x30000419,0x7c00100,0x250400,0x30000419,0x7c00100,0x250401,0x30000519,0x7c00100,0x220400,0x30000600,0x4000400,0x200400, +0x30000600,0x7c00500,0x230400,0x30000605,0x4000400,0x200000,0x3000080e,0x7c00100,0x220400,0x30000908,0x2000,0x962460,0x30000908,0x7c00100,0x220400,0x30000908, +0x7c00100,0x220401,0x30000908,0x7c00100,0x250400,0x30000908,0x7c00100,0x250401,0x30000a03,0x4000006,0x400000,0x30000c02,0x4000000,0x200000,0x30000c02,0x7c00100, +0x230400,0x30000d22,0,0x218960,0x30000d22,0x2802100,0x962460,0x30000d22,0x2802400,0x962460,0x30000d22,0x2802500,0x962460,0x30000d22,0x4000000,0x200000, +0x30000d22,0x4000010,0x200000,0x30000d22,0x7c00100,0x230400,0x30000d22,0xc000010,0x248000,0x30000e25,0x2802500,0x962460,0x30000e25,0x7c00100,0x230400,0x30001821, +0x2802100,0x962460,0x30001821,0x2806400,0x962460,0x30001821,0x4000000,0x200000,0x30001821,0x6800100,0x962540,0x30001821,0x6800100,0x962541,0x30001821,0x7c00100, +0x230400,0x30001b27,0x2802100,0x962460,0x30001b27,0x2802400,0x962460,0x30001b27,0x4000000,0x200000,0x30001b27,0x4000000,0x400000,0x30001b27,0x7c00100,0x230400, +0x30001c1c,0x2802100,0x1862460,0x30001c1c,0x2802400,0x1862460,0x30001c1c,0x2806400,0x1862460,0x30001c1c,0x4000000,0x200000,0x30001c1c,0x6800100,0x1862400,0x30001c1c, +0x6800100,0x1862540,0x30001c1c,0x7c00100,0x1830000,0x30001c1c,0x7c00100,0x1830001,0x30001c1c,0xc000010,0x448000,0x30001f0b,0x4000000,0x200000,0x30001f0b,0x4000010, +0x200000,0x30001f0b,0x4000010,0x400000,0x30001f0b,0x6800000,0x200000,0x30001f0b,0x7c00100,0x230400,0x30001f0b,0xc000010,0x248000,0x30002006,0x7c00100,0x250400, +0x30002128,0x4000010,0x200000,0x30002128,0x7c00100,0x230400,0x30002128,0xc000010,0x248000,0x3000221d,0x4000000,0x810000,0x3000221d,0x4000000,0x1410000,0x3000221d, +0x4000001,0x440000,0x3000221d,0x7c00100,0x230400,0x30002300,0x4000010,0x400000,0x30002320,0x7c00100,0x230400,0x30002417,0x2802100,0x1862460,0x30002417,0x2802400, +0x1862460,0x30002417,0x2806400,0x1862460,0x30002417,0x2882000,0x1862460,0x30002417,0x4000000,0x200000,0x30002417,0x4000000,0x400000,0x30002417,0x4000000,0x1600000, +0x30002417,0x4000010,0x400000,0x30002417,0x4000010,0x1200000,0x30002417,0x6800000,0x1329800,0x30002417,0x6800100,0x1862540,0x30002417,0x7c00100,0x1830000,0x30002417, +0x7d00100,0x1830000,0x3000251b,0x80000,0xc18820,0x3000251b,0x2802100,0x962460,0x3000251b,0x3c02100,0x962460,0x3000251b,0x4000000,0x200000,0x3000251b,0x4000006, +0x500000,0x3000251b,0x4000010,0x400000,0x3000251b,0x4000010,0xb70000,0x3000251b,0x4000800,0x200000,0x3000251b,0x6800000,0x1329800,0x3000251b,0x7c00100,0x230400, +0x3000251b,0x7c00900,0x230400,0x3000251b,0xc000010,0xb48000,0x3000251b,0x12882000,0x962460,0x30002800,0x4000001,0xc41c0b,0x30002800,0x24000000,0x200000,0x30002800, +0x2c000010,0x1248002,0x30002800,0x2c000010,0x11248002,0x30002a00,0x4000000,0x1600000,0x30002b01,0x2000,0x962460,0x30002c00,0x4000000,0x200000,0x30002c00,0x7c00100, +0x10220405,0x30002d19,0x7c00100,0x250400,0x30002e00,0x24000000,0x200000,0x30003000,0x24000000,0x200000,0x30003100,0x24000000,0x200000,0x30003600,0x24000000,0x200000, +0x30003700,0x24000000,0x200000,0x3000392e,0x24000000,0x200000,0x30005013,0x7c00100,0x2633801,0x30005600,0,0x918820,0x30020600,0x4000400,0x500400,0x30020701, +0x2802400,0x962460,0x30020701,0x2802400,0xc62460,0x300a3a11,0x4020000,0xe00000,0x300a3a11,0x4020000,0xe00002,0x300a3b11,0x4020000,0xe00002,0x300a3c00,0x4008000, +0xe00000,0x300a3c00,0x4010000,0xe00000,0x300a3d11,0x7c00300,0xe30002,0x300a4305,0x7c00100,0xe30400,0x300a4611,0x7c40300,0xe30000,0x300a4829,0x7c00100,0xe30400, +0x300a4829,0x7c00900,0x1230400,0x300a4929,0x4000000,0xe00000,0x30402576,0x4000010,0x400000,0x30402576,0x4000010,0xb70000,0x30402576,0xc000010,0xb48000,0x304a3d92, +0x4000000,0xe00000,0x30800c17,0x2802100,0x962460,0x30c01c6e,0x6800000,0x1329800,0x3100080e,0x7c00120,0x220402,0x3100080e,0x7c00120,0x250402,0x31005167,0x1000, +0,0x3100581e,0x4000000,0x200000,0x3100581e,0x7c00100,0x230400,0x3100590d,0x7c00100,0x230400,0x31005a09,0x7c00100,0x220400,0x31005a09,0x7c00100,0x250400, +0x31005b00,0x4000000,0x200000,0x31005c00,0x80000,0x918820,0x31005c00,0x2802000,0x962460,0x31005c00,0x2802400,0x962460,0x31005c00,0x4000000,0x200000,0x31005c00, +0x4000000,0x200001,0x31005c00,0x6800000,0x962540,0x31005c00,0x6800400,0x962540,0x31005c01,0x2802400,0x962460,0x31005d00,0x4000020,0x200005,0x31005d00,0x6800020, +0x1329805,0x31005d00,0x7c00120,0x220405,0x31005d00,0x7c00120,0x250405,0x31006000,0x82000,0x962460,0x31006000,0x180000,0x918820,0x310a5e11,0x7c40300,0xe30000, +0x310a5f11,0x7c00300,0xe30001,0x32000419,0x7c00100,0x250400,0x3200080e,0x4000020,0x200000,0x3200080e,0x7c00100,0x220400,0x3200080e,0x7c00100,0x250400,0x32000908, +0x7c00100,0x220400,0x32000908,0x7c00100,0x250400,0x32000c02,0x7c00100,0x230400,0x32000e25,0x7c00100,0x230400,0x32001d0c,0x7c00100,0x230400,0x32002800,0x80000, +0x1e18820,0x32002800,0x80020,0x218820,0x32002800,0x4000001,0x440002,0x32002800,0x24000000,0x200000,0x32002800,0x24000000,0x200002,0x32002800,0x24000020,0x200000, +0x32002800,0x2c000010,0x1248002,0x32002919,0x7c00100,0x22040f,0x32002a00,0x4000000,0x1600000,0x32002b01,0x2000,0x962460,0x32002b01,0x2802000,0x962460,0x32002b01, +0x2802020,0x962460,0x32002c00,0x4000000,0x200000,0x32002c00,0x4000020,0x200000,0x32002c00,0x4000020,0x200005,0x32002c00,0x7c00120,0x220405,0x32002c00,0x7c00120, +0x250405,0x32002e00,0x24000020,0x200000,0x32002f00,0x24000020,0x200000,0x32003000,0x24000000,0x200000,0x32003000,0x24000020,0x200000,0x32003500,0x24000000,0x200000, +0x32003600,0x24000020,0x200000,0x32003600,0x24000020,0x10200000,0x32003700,0x24000000,0x100000,0x32003700,0x24000000,0x200000,0x32003700,0x24000000,0x10200000,0x32003800, +0x24000000,0x810000,0x32003800,0x24000000,0x1410000,0x32005102,0x4000000,0x1500008,0x32005502,0x7c00100,0x230400,0x32006108,0x7c00100,0x220400,0x32006108,0x7c00100, +0x250400,0x3200622a,0x2802100,0x962460,0x3200622a,0x2806000,0x962460,0x3200622a,0x7c00100,0x230400,0x3200632b,0x2802100,0x962460,0x3200632b,0x2806000,0x962460, +0x3200632b,0x7c00100,0x230400,0x3200642c,0x2802100,0x962460,0x3200642c,0x7c00100,0x230400,0x3200652d,0x2802100,0x962460,0x3200652d,0x7c00100,0x230400,0x32006600, +0x24000020,0x200000,0x32006700,0x24000020,0x200000,0x32006800,0x24000020,0x200000,0x32006800,0x24000020,0x10200000,0x32006900,0x24000020,0x200000,0x32006900,0x24000020, +0x810000,0x32006900,0x24000020,0x1410000,0x32006a00,0x24000020,0x200000,0x32006a00,0x24000020,0x200001,0x32006a00,0x24000020,0x200002,0x32020701,0x2882000,0xc62460, +0x32023300,0x4000000,0x100000,0x32026c01,0x12882000,0x962460,0x32065700,0x4000000,0x810011,0x32065700,0x4000000,0x1410011,0x32086600,0x24000020,0x810000,0x32086600, +0x24000020,0x1410000,0x32086900,0x24000020,0x810000,0x32086900,0x24000020,0x1410000,0x320a3600,0x24000020,0x30200000,0x320a3d11,0x7c00100,0x1230400,0x320a3e14,0x7c00100, +0xe30010,0x320a3e14,0x7c00100,0x2530000,0x320a3f16,0x7c00100,0xe30c10,0x320a4400,0x4000000,0xe00003,0x320a4929,0x4000000,0xe00000,0x320a4f11,0x7c00300,0xe30001, +0x320a6b16,0x7c00100,0x2530c00,0x32406372,0xc000010,0x448000,0x324a3d95,0x4000000,0x10e00000,0x324a3d95,0x7c00100,0x1230400,0x324a3f90,0x4000002,0x1200c00,0x324a538d, +0x24000000,0xe00000,0x32820701,0x2802000,0x962460,0x40000419,0x7c00100,0x220400,0x40000519,0x7c00100,0x220400,0x40000600,0x4000400,0x200400,0x4000080e,0x7c00100, +0x220400,0x4000080e,0x7c00100,0x250400,0x4000080e,0x7c00100,0x250402,0x40000c02,0,0x218960,0x40000c02,0x2802100,0x962460,0x40000c02,0x2802400,0x962460, +0x40000c02,0x2802500,0x962460,0x40000c02,0x4000000,0x200000,0x40000c02,0x4000000,0x1071400,0x40000c02,0x7c00100,0x230400,0x40000d22,0x7c00100,0x230400,0x40000f0a, +0x7c00100,0x230400,0x40001004,0x7c00100,0x230400,0x40001110,0x2802100,0x962460,0x40001110,0x6800100,0x962540,0x4000120f,0x2802100,0x962460,0x4000120f,0x4000000, +0x1600000,0x4000120f,0x7c00100,0x230400,0x4000131f,0x7c00100,0x230400,0x40001423,0x4000000,0x200000,0x40001423,0x4000000,0x1600000,0x40001615,0x2802400,0x962460, +0x40001615,0x7c00100,0x230400,0x40002417,0x2802400,0x1862460,0x40002417,0x4000000,0x200000,0x40002800,0x6800000,0x201c00,0x40002800,0x24000002,0x200000,0x40002c00, +0x4000000,0x200002,0x40003000,0x24000000,0x10200000,0x40003000,0x24000020,0x200000,0x40003700,0x24000000,0x200000,0x40003700,0x24000000,0x10200000,0x40005a09,0x7c00100, +0x220400,0x40005a09,0x7c00100,0x250400,0x40005d00,0x7c00120,0x220405,0x40006f30,0x2802100,0x962460,0x40006f30,0x2802400,0x962460,0x40006f30,0x4000000,0x200000, +0x40006f30,0x6800000,0x1329800,0x40006f30,0x6800100,0x962540,0x40006f30,0x7c00100,0x230400,0x40006f30,0xc000010,0xb48000,0x40007034,0x7c00100,0x1830000,0x40007117, +0x4000000,0x200000,0x40007208,0x7c00100,0x220400,0x4000720e,0x7c00100,0x220400,0x4000720e,0x7c00500,0x22040e,0x4000720e,0x7c00500,0x22040f,0x40007219,0x7c00100, +0x220400,0x40007219,0x7c00500,0x220400,0x40007219,0x7c00500,0x22040e,0x40007219,0x7c00500,0x22040f,0x40007300,0x24000000,0x200000,0x40007300,0x24000000,0x10200000, +0x40007400,0x4000000,0x200000,0x40007531,0x7c00100,0x230400,0x40007631,0x7c00100,0x230400,0x40007835,0x4000010,0x400000,0x40007835,0x7c00100,0x230400,0x40007933, +0x7c00100,0x230400,0x40007a32,0x6800000,0x1329800,0x40007a32,0x7c00100,0x230400,0x40007b2f,0x7c00100,0x230400,0x40007c00,0x4000000,0x200000,0x40020701,0x2802400, +0x962460,0x40020701,0x2802400,0xc62460,0x40023300,0x4000000,0x200000,0x40027d01,0x12882000,0x962460,0x400a3700,0x24000000,0x30200000,0x400a3700,0x24000000,0x30e00000, +0x400a4400,0x4000000,0xe0000d,0x400a4412,0x4000000,0xe00002,0x400a4412,0x4000000,0xe00003,0x400a4500,0x4000000,0xe0000d,0x400a5300,0x4000000,0x810010,0x400a5300, +0x4000000,0x1410010,0x404077b8,0x4000000,0x200000,0x404077bb,0x4000000,0x200000,0x404077bb,0x4000000,0x400000,0x40c0511b,0x4000000,0x200000,0x41000419,0x7c00100, +0x220400,0x41000419,0x7c00100,0x250400,0x4100080e,0x7c00100,0x220400,0x4100080e,0x7c00100,0x250400,0x41000908,0x7c00100,0x220400,0x41000908,0x7c00100,0x250400, +0x41000b13,0x2802000,0x962460,0x41000b13,0x2802100,0x962460,0x41000b13,0x4000000,0xb00000,0x41000c02,0x2802100,0x962460,0x41000c02,0x4000000,0xb00000,0x41000c02, +0x4000000,0x1500000,0x41000f0a,0x7c00100,0x230400,0x41001004,0x7c00100,0x230400,0x41001423,0x7c00100,0x230400,0x41001b27,0x4000000,0x500000,0x41001d0c,0x7c00100, +0x230400,0x41001d0c,0x7c00100,0x23040f,0x41001f0b,0x2802100,0x962460,0x41001f0b,0x4000000,0x200000,0x41001f0b,0x7c00100,0x230400,0x41002800,0x24000000,0x200000, +0x41002800,0x24000000,0x400000,0x41002919,0x7c00100,0x22040e,0x41002a00,0x4000000,0x1600000,0x41002b01,0x2802020,0x962460,0x41002c00,0x4000000,0x200000,0x41002c00, +0x7c00120,0x220405,0x41003000,0x24000000,0x200000,0x41003700,0x24000000,0x200000,0x41003700,0x24000000,0x10200000,0x41003700,0x24000000,0x10205200,0x41003700,0x24000000, +0x10e00000,0x41005d00,0x7c00120,0x220405,0x41006600,0x24000020,0x200000,0x41006600,0x24000020,0x810000,0x41006600,0x24000020,0x1410000,0x41007208,0x7c00100,0x22040f, +0x41007219,0x7c00100,0x220400,0x41007300,0x24000000,0x200000,0x41007e0e,0x2802000,0x962460,0x41007e0e,0x4000000,0x200000,0x41007f0e,0x4000000,0x200000,0x41007f0e, +0x7c00100,0x230400,0x41008002,0x7c00100,0x230400,0x41008137,0x2802100,0x962460,0x41008137,0x4000000,0x200000,0x41008137,0x6800100,0x962540,0x41008137,0x7c00100, +0x230400,0x41008301,0x2802000,0x962460,0x41008407,0x4000000,0x200000,0x41008407,0x4000000,0x400000,0x41008407,0x4000000,0xb00000,0x41008407,0x7c00100,0x220400, +0x41008407,0x7c00100,0x250400,0x4100850b,0x7c00100,0x230400,0x4100860b,0x4000000,0x200000,0x4100860b,0x7c00100,0x230400,0x4100870c,0x7c00100,0x220400,0x41008838, +0x7c00100,0x220400,0x41008838,0x7c00100,0x250400,0x41008939,0x2802000,0x962460,0x41008939,0x2802100,0x962460,0x41008939,0x2806000,0x962460,0x41008939,0x4000000, +0x200000,0x41008939,0x4000000,0x400000,0x41008939,0x7c00100,0x230400,0x41008939,0xc000010,0x448000,0x41008a00,0x4000000,0x200000,0x41008b3b,0x4000000,0x1800000, +0x41008b3b,0x6800000,0x1329800,0x41008b3b,0x7c00100,0x1830000,0x41008b3b,0x7e00100,0x1830000,0x41008c3d,0x4000010,0x400000,0x41008c3d,0x7c00100,0x230400,0x41008d0e, +0x7c00100,0x22040f,0x41008d19,0x7c00100,0x220400,0x41008d19,0x7c00100,0x22040f,0x41008e00,0x24000000,0x200000,0x41008e00,0x24000000,0x400000,0x41008e00,0x24000000, +0x1710000,0x41008e00,0x24000006,0x400000,0x41008f3a,0x2802000,0x962460,0x41008f3a,0x2802100,0x962460,0x41008f3a,0x2806000,0x962460,0x41008f3a,0x4000000,0x200000, +0x41008f3a,0x6800100,0x962540,0x41008f3a,0x7c00100,0x230400,0x4100903c,0x7c00100,0x230400,0x4100903c,0x7c00100,0x23040f,0x41020701,0x2802000,0x962460,0x41020701, +0x2802000,0xc62460,0x410a3700,0x24000000,0x30200000,0x410a3700,0x24000000,0x30e00000,0x410a4412,0x4000000,0xe00003,0x410a4711,0x7c40300,0xe30000,0x410a4f11,0x7c00300, +0xe30001,0x410a9100,0x4000000,0x800010,0x410a9100,0x4000000,0x810010,0x410a9100,0x4000000,0x870010,0x410a9100,0x4000000,0xb00010,0x410a9100,0x4000000,0xf00010, +0x410a9100,0x4000000,0x1001410,0x410a9100,0x4000000,0x1071010,0x410a9100,0x4000000,0x1071410,0x410a9100,0x4000000,0x1410010,0x414a8292,0x4000000,0xe00000,0x41808300, +0x2802000,0x962460,0x41c01469,0x6800000,0x1329800,0x50000419,0x7c00100,0x220400,0x50000419,0x7c00100,0x250400,0x5000080e,0x7c00100,0x220400,0x50000908,0x7c00100, +0x220400,0x50000908,0x7c00100,0x250400,0x50000b13,0x2802500,0x962460,0x50000f0a,0x7c00100,0x230400,0x50001615,0x2802100,0x962460,0x50001615,0x7c00100,0x230400, +0x50002b01,0x2802020,0x962460,0x50002c00,0x4000000,0x200000,0x50002c19,0x7c00100,0x220400,0x50002d19,0x7c00100,0x220400,0x50003000,0x24000000,0x200000,0x50003000, +0x24000020,0x200000,0x50003700,0x24000000,0x200000,0x50005d00,0x7c00120,0x220405,0x50005d00,0x7c00120,0x250405,0x50006108,0x7c00100,0x220400,0x50006108,0x7c00100, +0x250400,0x50006600,0x24000020,0x200000,0x50007300,0x24000000,0x200000,0x50008301,0x2802400,0x962460,0x50008a00,0x7c00500,0x230400,0x50009257,0x2802400,0x962460, +0x50009257,0x4000000,0x200000,0x50009257,0x4000010,0x1071400,0x50009257,0x6800000,0x1329800,0x50009257,0x7c00100,0x230400,0x50009257,0x7c00500,0x230400,0x50009257, +0x7c00900,0x230400,0x50009257,0xc000010,0xb48000,0x5000933e,0x2802100,0x962460,0x5000933e,0x2802400,0x962460,0x5000933e,0x4000000,0x200000,0x5000933e,0x4000000, +0x400000,0x5000933e,0x4000010,0x400000,0x5000933e,0x6800000,0x1329800,0x5000933e,0x6800100,0x962540,0x5000933e,0x6800100,0x962541,0x5000933e,0x6804400,0x962540, +0x5000933e,0x7c00100,0x230400,0x5000933e,0x7c00100,0x230401,0x5000933e,0xc000010,0x448000,0x50009419,0x7c00100,0x220400,0x50009419,0x7c00100,0x250400,0x50009500, +0x4000400,0x200400,0x5000965a,0x4000000,0x500000,0x5000965a,0x7c00100,0x230400,0x5000965a,0xc000010,0xb48000,0x5000975b,0x4000000,0x200000,0x5000975b,0x4000010, +0x400000,0x5000975b,0x7c00100,0x230400,0x50009865,0x7c00100,0x230400,0x50009965,0x4000010,0x400000,0x50009965,0x7c00100,0x230400,0x50409a92,0x4000000,0x200000, +0x5100080e,0x7c00100,0x220400,0x5100080e,0x7c00100,0x250400,0x51000c02,0x2802100,0x962460,0x51000c02,0x4000000,0x1500000,0x51000c02,0x4000020,0x200000,0x51000c02, +0x7c00100,0x230400,0x51000f0a,0x7c00100,0x230400,0x51000f0a,0x7c00500,0x230400,0x51001110,0x2802100,0x962460,0x5100131f,0x2802100,0x962460,0x51001423,0x7c00100, +0x230400,0x51001524,0x2802100,0x962460,0x51001524,0x4000000,0x200000,0x51001524,0x7c00100,0x230400,0x5100171a,0x2802100,0x962460,0x5100171a,0x4000000,0x200000, +0x5100171a,0x4000000,0x1500000,0x5100171a,0x7c00100,0x230400,0x51001b27,0x4000000,0x200000,0x51001b27,0x4000000,0x400000,0x51001b27,0x4000000,0x500000,0x51001b27, +0x7c00100,0x230400,0x51001c1c,0x2802100,0x1862460,0x51001c1c,0x2802400,0x1862460,0x51001c1c,0x2806400,0x1862460,0x51001c1c,0x4000000,0x1800000,0x51001c1c,0x6800000, +0x1329800,0x51001c1c,0x6800000,0x1862400,0x51001c1c,0x6800100,0x1862400,0x51001c1c,0x6800100,0x1862540,0x51001c1c,0x6800400,0x1862400,0x51001c1c,0x7c00100,0x1830000, +0x5100251b,0x7c00100,0x230400,0x51002619,0x7c00100,0x220400,0x51002619,0x7c00100,0x250400,0x51002800,0x80020,0x218820,0x51002c00,0x4000000,0x200000,0x51002d19, +0x7c00100,0x230400,0x51003700,0x24000000,0x200000,0x51003700,0x24000000,0xe00000,0x51005201,0x2802400,0x962460,0x51005c00,0x4000000,0x200000,0x51006108,0x7c00100, +0x220400,0x51006108,0x7c00100,0x250400,0x51006600,0x24000020,0x200000,0x51006600,0x24000020,0x810000,0x51006600,0x24000020,0x1410000,0x51007300,0x24000000,0x200000, +0x51007300,0x24000020,0x200000,0x51008002,0x7c00100,0x230400,0x51008301,0x2802000,0x962460,0x51008301,0x2802400,0x962460,0x51008a00,0x7c00500,0x230400,0x51008e00, +0x24000000,0x200000,0x51008e00,0x24000000,0x400000,0x51008e00,0x24000000,0x810000,0x51008e00,0x24000000,0x1400000,0x51008e00,0x24000000,0x1410000,0x51008e00,0x24000000, +0x1710000,0x51008e00,0x24000002,0x200000,0x51008e00,0x24000500,0x230400,0x51008e00,0x2c000010,0xb48000,0x51009419,0x7c00100,0x220400,0x51009419,0x7c00100,0x22040e, +0x51009419,0x7c00100,0x22040f,0x51009419,0x7c00100,0x250400,0x51009500,0x4000000,0x200400,0x51009500,0x7c00500,0x230400,0x51009519,0x7c00100,0x220400,0x51009519, +0x7c00100,0x22040f,0x51009519,0x7c00100,0x230400,0x51009519,0x7c00100,0x250400,0x51009b71,0x2802100,0x962460,0x51009b71,0x6800000,0x1329800,0x51009b71,0x6800100, +0x962540,0x51009b71,0x6804400,0x962540,0x51009b71,0x7c00100,0x230400,0x51009c52,0x2802100,0x962460,0x51009c52,0x2802400,0x962460,0x51009c52,0x2802c00,0x962460, +0x51009c52,0x4000010,0x400000,0x51009c52,0x6800000,0x1329800,0x51009c52,0x6800100,0x962540,0x51009c52,0x7c00100,0x230400,0x51009c52,0xc000010,0x448000,0x51009d6d, +0x6800000,0x1329800,0x51009d6d,0x7c00100,0x230400,0x51009d6d,0x7c00500,0x230400,0x51009d6d,0x7c00d00,0x230400,0x51009d6d,0xc000010,0x448000,0x51009e08,0x2802100, +0x962460,0x51009f63,0x4000010,0x400000,0x51009f63,0x6800000,0x1329800,0x51009f63,0x7c00100,0x230400,0x51009f63,0x7c00900,0x230400,0x51009f63,0xc000010,0x448000, +0x51009f63,0xc000010,0xb48000,0x5100a008,0x2000,0x962460,0x5100a008,0x2802400,0x962460,0x5100a008,0x4000000,0x200000,0x5100a008,0x7c00100,0x220400,0x5100a008, +0x7c00100,0x230400,0x5100a008,0x7c00100,0x250400,0x5100a008,0x7c00500,0x230400,0x5100a16f,0x2806400,0x962460,0x5100a16f,0x6800000,0x1329800,0x5100a16f,0x6800100, +0x962540,0x5100a16f,0x7c00100,0x230400,0x5100a16f,0xc000010,0x448000,0x5100a24f,0x2802100,0x962460,0x5100a24f,0x2802400,0x962460,0x5100a24f,0x6800000,0x1329800, +0x5100a24f,0x7c00100,0x230400,0x5100a24f,0xc000010,0x448000,0x5100a36e,0x2802100,0x962460,0x5100a36e,0x4000000,0x200000,0x5100a36e,0x6800100,0x962540,0x5100a36e, +0x6804400,0x962540,0x5100a36e,0x7c00100,0x230400,0x5100a442,0x2802100,0x962460,0x5100a442,0x4000000,0x200000,0x5100a442,0x6800000,0x1329800,0x5100a442,0x6800100, +0x962540,0x5100a442,0x7c00100,0x230400,0x5100a442,0xc000010,0x448000,0x5100a500,0x4000000,0x200000,0x5100a600,0x4000000,0x200000,0x5100a601,0x2802000,0x962460, +0x5100a76b,0x7c00100,0x230400,0x5100a868,0x7c00100,0x230400,0x5100a96c,0x4000000,0x200000,0x5100a96c,0x7c00100,0x230400,0x5100aa00,0x4000000,0xe00000,0x5100ab00, +0x4000000,0xe00000,0x51086600,0x24000020,0x810000,0x51086600,0x24000020,0x1410000,0x510a4005,0x7c00100,0xe30400,0x510a4711,0x7c40300,0xe30000,0x510a7300,0x24000000, +0x30200000,0x510aaa00,0x4000000,0x30e00000,0x5140a2b3,0x4000400,0x400000,0x514a8292,0x4000000,0xe00000,0x51802b84,0x2802000,0x962460,0x51c00908,0x2802400,0x962460, +0x51c0a008,0x2802400,0x962460,0x52000f0a,0x2802100,0x962460,0x52000f0a,0x6800100,0x962540,0x52000f0a,0x7c00100,0x230400,0x52001004,0x4000000,0x1600000,0x52001b00, +0x4000000,0x200000,0x52001c1c,0x2802100,0x1862460,0x52001c1c,0x6800100,0x1862400,0x52001c1c,0x6800400,0x1862400,0x52001e12,0x7c00100,0x2230500,0x52001e12,0x7c00100, +0x2330520,0x52002128,0x4000002,0x400000,0x52002128,0x7c00100,0x230400,0x52002a00,0x4000000,0x1500000,0x52002a00,0x4000000,0x1600000,0x52002d00,0x4000000,0x200006, +0x52003000,0x24000000,0x200000,0x52006108,0x7c00100,0x220400,0x52006108,0x7c00100,0x250400,0x52008301,0x2802400,0x962460,0x52008407,0x2802400,0x962460,0x52008407, +0x7c00100,0x220400,0x52008407,0x7c00100,0x250400,0x52008b3b,0x6800000,0x1800000,0x52008b3b,0x7c00100,0x1830000,0x52008e00,0x24000000,0x400000,0x52009419,0x7c00100, +0x250400,0x5200975b,0x4000000,0x200000,0x5200ac7e,0x2802000,0x962460,0x5200ac7e,0x2802100,0x962460,0x5200ac7e,0x2802400,0x962460,0x5200ac7e,0x4000010,0x200000, +0x5200ac7e,0x7c00100,0x230400,0x5200ad28,0x7c00100,0x230400,0x5200ae6a,0x2802100,0x1862460,0x5200ae6a,0x2802400,0x962460,0x5200ae6a,0x2802400,0x1862460,0x5200ae6a, +0x2806000,0x1862460,0x5200ae6a,0x4000000,0x1800000,0x5200ae6a,0x6800000,0x1329800,0x5200ae6a,0x6800100,0x1862400,0x5200ae6a,0x6800100,0x1862540,0x5200ae6a,0x7c00100, +0x1830000,0x5200ae6a,0x7c00900,0x1830000,0x5200ae6a,0xc000010,0x1848000,0x5200b083,0x4000010,0x400000,0x5200b083,0x7c00100,0x230400,0x5200b083,0xc000010,0x448000, +0x5200b182,0x2802400,0x962460,0x5200b182,0x4000000,0x200000,0x5200b182,0x4000010,0x400000,0x5200b182,0x7c00100,0x230400,0x5200b182,0xc000010,0x448000,0x5200b30a, +0x2802400,0x962460,0x5200b30a,0x4000000,0x200000,0x5200b30a,0x7c00100,0x230400,0x5200b54e,0x2802100,0x962460,0x5200b54e,0x2802400,0x962460,0x5200b54e,0x4000000, +0x200000,0x5200b54e,0x4000010,0x400000,0x5200b54e,0x6800000,0x1329800,0x5200b54e,0x6800100,0x962540,0x5200b54e,0x6804400,0x962540,0x5200b54e,0x7c00100,0x230400, +0x5200b54e,0xc000010,0x448000,0x5200b61c,0x4000000,0x1800000,0x5200b61c,0x6800400,0x1862400,0x5200b61c,0x7c00100,0x1830000,0x5200b61c,0x7c00900,0x1830000,0x5200b77f, +0x2802100,0x1862460,0x5200b77f,0x2802400,0x1862460,0x5200b77f,0x4000000,0x1800000,0x5200b77f,0x4000010,0x1800000,0x5200b77f,0x7c00100,0x1830000,0x5200b77f,0x7c00500, +0x1830000,0x5200b77f,0x7c00900,0x1830000,0x5200b77f,0x7e00100,0x1830000,0x5200b873,0x2802100,0x962460,0x5200b873,0x2806400,0x962460,0x5200b873,0x6800000,0x1329800, +0x5200b873,0x6800100,0x962540,0x5200b873,0x6800400,0x962540,0x5200b873,0x7c00100,0x230400,0x5200b873,0xc000010,0x448000,0x5200b912,0x7c00100,0x2230500,0x5200b912, +0x7c00100,0x2330520,0x5200ba74,0x4000000,0x200000,0x5200ba74,0x4000010,0x400000,0x5200ba74,0x7c00100,0x230400,0x5200bb85,0x4000000,0x200000,0x5200bb85,0x7c00100, +0x230400,0x5200bc75,0x4000000,0x400000,0x5200bc75,0x4000010,0x400000,0x5200bc75,0x7c00100,0x230400,0x5200bd7d,0x4000000,0x200000,0x5200bd7d,0x7c00100,0x230400, +0x5200be7a,0x4000000,0x200000,0x5200be7a,0x7c00100,0x230400,0x5200bf58,0x7c00100,0x230400,0x5200c002,0x4000000,0x200000,0x5200c178,0,0x218960,0x5200c178, +0x2802000,0x962460,0x5200c178,0x2802100,0x962460,0x5200c178,0x2802400,0x962460,0x5200c178,0x2806400,0x962460,0x5200c178,0x4000000,0x200000,0x5200c178,0x6800100, +0x962540,0x5200c178,0x7c00100,0x230400,0x5200c178,0x7c00100,0x230401,0x5200c178,0xc000010,0x448000,0x5200c247,0x7c00100,0x230400,0x5200c247,0x7c00100,0x830400, +0x5200c247,0x7c00100,0x1430400,0x5200c300,0x4000000,0x200003,0x52022d00,0x4000000,0x100006,0x52023700,0x24000000,0x100000,0x52023700,0x24000000,0xe00000,0x52023700, +0x24000000,0x10100000,0x52023700,0x24000000,0x10e00000,0x52023700,0x24000000,0x928045a0,0x52024400,0x4000000,0x100000,0x52027300,0x24000000,0x100000,0x5202c300,0x4000000, +0x100000,0x5202c300,0x4000000,0x100002,0x5202c300,0x4000000,0x100003,0x5202c300,0x4000000,0x10000d,0x5202c300,0x4000100,0x150400,0x5202c300,0x4000100,0x15040d, +0x5202c300,0x4000100,0x10150400,0x520a1e12,0x7c00100,0x2130480,0x520a3700,0x24000000,0x30e00000,0x520a3800,0x24000000,0x30100000,0x520a4711,0x7c40300,0xe30000,0x520a4f11, +0x7c00300,0xe30001,0x520a7300,0x24000000,0x30100000,0x520ab412,0x7c00100,0x2130480,0x520ac400,0x4000000,0xe00002,0x520ac400,0x4000000,0xe0000d,0x520ac400,0x4000000, +0x30e0000d,0x520ac414,0x4000000,0xe0000d,0x520ac511,0x7c40300,0xe30000,0x5240af78,0x6800400,0x962540,0x5240af78,0x7c00100,0x230400,0x5240af79,0x4000400,0x200000, +0x5240af79,0x6800100,0x962540,0x5240b298,0x4000000,0x200000,0x5240b2a2,0x4000000,0x200000,0x5240b2a2,0x4000000,0x1500000,0x5240b5b6,0x7c00900,0x230400,0x524a4492, +0x4000000,0xe00003,0x5280af78,0x2802400,0x962460,0x5280af79,0x2802400,0x962460,0x5280af7b,0x2802400,0x962460,0x5280af7d,0x2802400,0x962460,0x52c0b3ad,0x2802400, +0x962460,0x52c0b3b1,0x7c00100,0x230400,0x60000c02,0x2802100,0x962460,0x60000c02,0x7c00100,0x230400,0x60000f0a,0x2802100,0x962460,0x60000f0a,0x6800100,0x962540, +0x60000f0a,0x7c00100,0x230400,0x6000131f,0x4000000,0x200000,0x6000171a,0x7c00100,0x230400,0x6000171a,0x7c00100,0x230560,0x60001b27,0x2802100,0x962460,0x60001b27, +0x4000000,0xc00000,0x60001b27,0x7c00100,0x230400,0x60001f0b,0x2802000,0x962460,0x60002919,0x7c00100,0x22040e,0x60002a00,0x4000000,0x1600000,0x60003000,0x24000000, +0x10200000,0x60003000,0x24000000,0x10e00000,0x60003700,0x24000000,0x200000,0x60003800,0x24000000,0x1710000,0x60005102,0x4000000,0x200000,0x60006108,0x7c00100,0x220400, +0x60006108,0x7c00100,0x250400,0x60006600,0x24000020,0x200000,0x60008301,0x2802000,0x962460,0x6000903c,0x2806000,0x962460,0x6000903c,0x4000000,0x400000,0x60009519, +0x7c00100,0x220400,0x60009519,0x7c00100,0x250400,0x6000a008,0x7c00100,0x220400,0x6000a008,0x7c00100,0x250400,0x6000c300,0x4000000,0x32703580,0x6000c654,0x2802000, +0x962460,0x6000c654,0x4000010,0x200000,0x6000c654,0x7c00100,0x230400,0x6000c73f,0x2802000,0x962460,0x6000c73f,0x2802100,0x962460,0x6000c73f,0x4000000,0x200000, +0x6000c73f,0x6800100,0x962540,0x6000c73f,0x6804000,0x962540,0x6000c73f,0x7c00100,0x230400,0x6000c80b,0x7c00100,0x230400,0x6000c941,0x2802100,0x962460,0x6000c941, +0x2806000,0x962460,0x6000c941,0x4000000,0x200000,0x6000c941,0x4000010,0x200000,0x6000c941,0x6800000,0x1329800,0x6000c941,0x6800100,0x962540,0x6000c941,0x7c00100, +0x230400,0x6000c941,0xc000010,0x448000,0x6000ca82,0x7c00100,0x230400,0x6000cc00,0x4000000,0xe00000,0x6000d000,0x4000000,0x200000,0x6002c300,0x4000000,0x100000, +0x6002c300,0x4000000,0x10000d,0x6002c300,0x4000100,0x150400,0x6002c300,0x4000100,0x15040d,0x6002c300,0x4000100,0x10150400,0x600a3000,0x24000000,0x30200000,0x600a3000, +0x24000000,0x30e00000,0x600a3700,0x24000000,0x30200000,0x600a3800,0x24000000,0x30200000,0x600a3800,0x24000000,0xb28045a0,0x600a4305,0x7c00100,0xe30400,0x600ac300,0x4000000, +0x30100000,0x600ac400,0x4000000,0x10e0000d,0x600ac400,0x4000000,0x30e0000d,0x600acb14,0x7c00100,0xe30000,0x600acb16,0x7c00100,0xe30c00,0x600acc00,0x4000000,0x30e00000, +0x600acd00,0x4000000,0x30200000,0x600acd00,0x4000000,0x30e00000,0x600acd00,0x4000000,0x30e05200,0x600acd00,0x4000000,0xb28045a0,0x600acd00,0x4000000,0xb28049c0,0x600ace00, +0x4000000,0x30e00000,0x600ace00,0x4000000,0xb28045a0,0x600acf00,0x4000000,0x30e00000,0x600acf00,0x4000000,0x30e05200,0x600acf00,0x4000000,0xb28045a0,0x600ad111,0x7c40300, +0xe30000,0x604ac492,0x4000000,0x30e00003,0x61000a03,0x4000000,0x1600000,0x61000c02,0,0x218960,0x6100120f,0x4000000,0x200000,0x61001a18,0x7c00100,0x1830000, +0x61001d0c,0x7c00100,0x230400,0x61001d0c,0x7c00100,0x250400,0x61006600,0x24000020,0x200000,0x61008407,0x7c00100,0x220400,0x61008407,0x7c00100,0x250400,0x6100870c, +0x7c00100,0x220400,0x61008e00,0x24000000,0x200000,0x61008e00,0x24000000,0x400000,0x61008e00,0x24000002,0x300000,0x6100903c,0x7c00100,0x230400,0x61009519,0x7c00100, +0x220400,0x61009519,0x7c00100,0x250400,0x61009519,0x7c00500,0x22040f,0x61009b71,0x2802100,0x962460,0x61009b71,0x2806400,0x962460,0x61009b71,0x7c00100,0x230400, +0x6100a008,0x2802100,0x962460,0x6100c300,0x4000000,0x20000f,0x6100cd00,0x4000000,0x200000,0x6100d202,0x2802400,0x962460,0x6100d202,0x2802500,0x962460,0x6100d202, +0x7c00100,0x230400,0x6100d302,0x4000020,0x200000,0x6100d302,0x7c00120,0x230405,0x6100d476,0x2802100,0x962460,0x6100d476,0x2802100,0x962461,0x6100d476,0x2806400, +0x962460,0x6100d476,0x4000000,0x400000,0x6100d476,0x6800000,0x1329800,0x6100d476,0x6800100,0x962540,0x6100d476,0x7c00100,0x230400,0x6100d476,0xc000010,0x448000, +0x6100d573,0x2802100,0x962460,0x6100d573,0x2806400,0x962460,0x6100d573,0x6800100,0x962540,0x6100d573,0x7c00100,0x230400,0x6100d573,0x7c00900,0x230400,0x6100d573, +0xc000010,0x448000,0x6100d68d,0x7c00100,0x230400,0x6100d756,0x7c00100,0x230400,0x6100d85c,0x2802400,0x962460,0x6100d85c,0x6800100,0x962540,0x6100d85c,0x7c00100, +0x230400,0x6100d85c,0x7c00500,0x230400,0x6100d997,0x2802100,0x962460,0x6100d997,0x4000000,0x200000,0x6100d997,0x4000000,0x400000,0x6100d997,0x6800000,0x1329800, +0x6100d997,0x6800100,0x962540,0x6100d997,0x6804400,0x962540,0x6100d997,0x7c00100,0x230400,0x6100d997,0x7c00100,0x230560,0x6100d997,0xc000010,0x448000,0x6100da98, +0x6800000,0x1329800,0x6100da98,0x7c00100,0x230400,0x6100db71,0x4000000,0x200000,0x6100dc99,0x2802100,0x962460,0x6100dc99,0x2802400,0x962460,0x6100dc99,0x6800000, +0x1329800,0x6100dc99,0x6800100,0x962540,0x6100dc99,0x6804400,0x962540,0x6100dc99,0x7c00100,0x230400,0x610a4711,0x7c40300,0xe30000,0x610a4f11,0x7c00300,0xe30001, +0x610ace00,0x4000000,0x30e00000,0x6140af78,0x7c00100,0x230400,0x6140af79,0x6800100,0x962540,0x6140af82,0x7c00100,0x230400,0x6180af79,0x2802400,0x962460,0x62002a00, +0x4000000,0x1600000,0x63000c00,0x80000,0x918820,0x63002800,0x80000,0x918820,0x7000080e,0x7c00100,0x250400,0x70000a03,0x4000000,0x200000,0x70000c00,0, +0x218960,0x70000f0a,0x7c00100,0x230400,0x70001004,0x7c00100,0x230400,0x70001524,0x2802100,0x962460,0x70001524,0x7c00100,0x230400,0x70001615,0x2802100,0x962460, +0x7000171a,0x2802100,0x962460,0x70001821,0x6800000,0x1329800,0x70002320,0x7c00100,0x230400,0x70002a00,0x4000000,0x1500000,0x70002a00,0x4000000,0x1600000,0x70003000, +0x24000000,0x200000,0x70003000,0x24000000,0x10200000,0x70003800,0x24000000,0xe00000,0x70005201,0x2802400,0x962460,0x7000581e,0x7c00100,0x230400,0x70006108,0x7c00100, +0x220400,0x70006108,0x7c00100,0x250400,0x70006f30,0x7c00100,0x230400,0x70007300,0x24000000,0x200000,0x70007f0e,0x4000000,0x200000,0x70008301,0x2802100,0x962460, +0x70008301,0x2802400,0x962460,0x70008e00,0x24000000,0x200000,0x70008e00,0x24000000,0x400000,0x70008e00,0x24000002,0x400000,0x70008e00,0x24000008,0x1410000,0x70008e00, +0x24000010,0x400000,0x70008e00,0x2c000010,0x448000,0x70009519,0x7c00100,0x220400,0x70009519,0x7c00100,0x230400,0x70009519,0x7c00100,0x250400,0x70009865,0x7c00100, +0x230400,0x70009965,0x4000010,0x400000,0x70009965,0x7c00100,0x230400,0x7000a008,0x7c00100,0x220400,0x7000a008,0x7c00100,0x250400,0x7000a008,0x7c00500,0x22040f, +0x7000a50e,0x4000000,0x200000,0x7000b61c,0x2802400,0x1862460,0x7000b61c,0x6800400,0x1862400,0x7000b61c,0x7c00100,0x1830000,0x7000c300,0x4000000,0x100000,0x7000c941, +0x2806000,0x962460,0x7000cc00,0x4000000,0xe00000,0x7000cd00,0x4000000,0x200000,0x7000cd00,0x4000000,0xe00000,0x7000cd00,0x4000000,0x10200000,0x7000cd00,0x4000000, +0x10e00000,0x7000cd00,0x4000000,0x10e05200,0x7000cd00,0x4000000,0x928045a0,0x7000cf00,0x4000000,0xe00000,0x7000cf00,0x4000000,0x10e00000,0x7000d202,0x2802100,0x962460, +0x7000d202,0x7c00100,0x230400,0x7000d997,0x7c00100,0x230400,0x7000d997,0xc000010,0x248000,0x7000dd86,0x2802400,0x962460,0x7000dd86,0x7c00100,0x230400,0x7000dd86, +0xc000010,0x448000,0x7000de9f,0x4000000,0x200000,0x7000de9f,0x7c00100,0x230400,0x7000e001,0x2000,0x962460,0x7000e001,0x2802400,0x962460,0x7000e187,0x2802000, +0x962460,0x7000e187,0x2802100,0x962460,0x7000e187,0x4000000,0x200000,0x7000e187,0x7c00100,0x230400,0x7000e187,0xc000010,0x448000,0x7000e288,0x7c00100,0x230400, +0x7000e300,0x4000000,0x200000,0x7000e489,0x2802100,0x962460,0x7000e489,0x2802400,0x962460,0x7000e489,0x6800100,0x962540,0x7000e489,0x6800100,0x962541,0x7000e489, +0x6804400,0x962540,0x7000e489,0x7c00100,0x230400,0x7000e489,0x7c00900,0x230400,0x7000e59d,0x2802100,0x962460,0x7000e59d,0x2802400,0x962460,0x7000e59d,0x4000000, +0x200000,0x7000e59d,0x4000010,0x200000,0x7000e59d,0x6800100,0x962540,0x7000e59d,0x6804400,0x962540,0x7000e59d,0x7c00100,0x230400,0x7000e59d,0xc000010,0x448000, +0x7000e691,0x2802100,0x962460,0x7000e691,0x2802400,0x962460,0x7000e691,0x2806400,0x962460,0x7000e691,0x6800000,0x1329800,0x7000e691,0x6800100,0x962540,0x7000e691, +0x7c00100,0x230400,0x7000e700,0x4000400,0x200400,0x7000e70e,0x7c00100,0x220400,0x7000e719,0x7c00100,0x220400,0x7000e719,0x7c00500,0x22040f,0x7000e853,0x7c00100, +0x230400,0x7000e9a0,0x2802400,0x962460,0x7000e9a0,0x4000000,0x200000,0x7000e9a0,0x4000000,0x500000,0x7000e9a0,0x7c00100,0x230400,0x7000ea79,0x2802400,0x962460, +0x7000ea79,0x4000000,0x200000,0x7000ea79,0x4000000,0xf00000,0x7000ea79,0x4000010,0x400000,0x7000ea79,0x7c00100,0x230400,0x7000eb8c,0x2802400,0x962460,0x7000eb8c, +0x4000000,0x200000,0x7000eb8c,0x7c00100,0x230400,0x7000eca3,0x2802100,0x962460,0x7000eca3,0x2806400,0x962460,0x7000eca3,0x4000000,0x200000,0x7000eca3,0x6800000, +0x1329800,0x7000eca3,0x6800100,0x962540,0x7000eca3,0x7c00100,0x230400,0x7000eca3,0xc000010,0x448000,0x7000ed95,0x6800000,0x1329800,0x7000ed95,0x7c00100,0x230400, +0x7000ed95,0xc000010,0x448000,0x7000ee1c,0x2802400,0x1862460,0x7000ee1c,0x6800000,0x1329800,0x7000ee1c,0x7c00100,0x1830000,0x7000ee1c,0x7c00900,0x1830000,0x7000ef8f, +0x4000000,0x200000,0x7000ef8f,0x7c00100,0x230400,0x7000f08e,0x4000000,0x200000,0x7000f08e,0x7c00100,0x230400,0x7000f159,0x2802100,0x962460,0x7000f159,0x7c00100, +0x230400,0x7000f200,0x4000000,0x200000,0x7000f200,0x4000000,0x1200000,0x7000f200,0x4000000,0x1710000,0x7000f34b,0x2802100,0x962460,0x7000f34b,0x4000000,0x200000, +0x7000f34b,0x4000010,0x400000,0x7000f34b,0x6800000,0x1329800,0x7000f34b,0x7c00100,0x230400,0x7000f34b,0x7c00900,0x230400,0x7000f34b,0xc000010,0x448000,0x7000f490, +0x4000000,0x200000,0x7000f490,0x7c00100,0x230400,0x7000f5a5,0x7c00100,0x230400,0x7000f67b,0x4000000,0x200000,0x7000f67b,0x4000010,0x200000,0x7000f67b,0x7c00100, +0x230400,0x7000f8a6,0x2802100,0x962460,0x7000f8a6,0x2802400,0x962460,0x7000f8a6,0x2806400,0x962460,0x7000f8a6,0x4000000,0x500000,0x7000f8a6,0x4000010,0xb00000, +0x7000f8a6,0x4000800,0x200000,0x7000f8a6,0x6800100,0x962540,0x7000f8a6,0x6800100,0x962541,0x7000f8a6,0x7c00100,0x230400,0x7000f8a6,0xc000010,0x448000,0x7000f921, +0x4000000,0x200000,0x7000fa00,0x4000000,0x200000,0x7000fb9e,0x2802100,0x962460,0x7000fb9e,0x2802400,0x962460,0x7000fb9e,0x2806400,0x962460,0x7000fb9e,0x4000000, +0x200000,0x7000fb9e,0x6800000,0x1329800,0x7000fb9e,0x6800100,0x962540,0x7000fb9e,0x6800100,0x962541,0x7000fb9e,0x7c00100,0x230400,0x7000fc92,0x4000000,0x200000, +0x7000fc92,0x6800000,0x1329800,0x7000fc92,0x7c00100,0x220400,0x7000fc92,0x7c00100,0x230400,0x7000fc92,0x7c00100,0x250400,0x700acd00,0x4000000,0x30e00000,0x700acd00, +0x4000000,0xb28045a0,0x700ace00,0x4000000,0x30e00000,0x700acf00,0x4000000,0x30e00000,0x700acf00,0x4000000,0xb28045a0,0x7040dfbd,0x4000000,0x200000,0x7040f7c1,0x80000, +0x918820,0x7080af79,0x2802400,0x962460,0x7080dfbd,0x2802400,0x962460,0x70c0e4bf,0x2802400,0x962460,0x70c0e4bf,0x6800100,0x962540,0x8000120f,0x7c00100,0x230400, +0x80001524,0x7c00100,0x230400,0x8000171a,0x7c00100,0x230400,0x80002006,0x7c00100,0x220400,0x80002006,0x7c00100,0x250400,0x80002a00,0x4000000,0x1500000,0x80002d00, +0x4000000,0x200000,0x80005208,0x2802400,0x962460,0x80005c00,0x4000000,0x200000,0x80007300,0x24000000,0x200000,0x80009519,0x7c00100,0x220400,0x80009519,0x7c00100, +0x230400,0x80009519,0x7c00100,0x250400,0x80009865,0x7c00100,0x230400,0x8000a008,0x2802100,0x962460,0x8000b30a,0x4000000,0x500000,0x8000b30a,0x7c00100,0x230400, +0x8000cd00,0x4000000,0xe00000,0x8000d202,0x2802500,0x962460,0x8000d202,0x7c00100,0x230400,0x8000d68d,0x4000000,0x200000,0x8000d997,0x2802400,0x962460,0x8000d997, +0x4000000,0x200000,0x8000d997,0x4000000,0x400000,0x8000d997,0x4000000,0x500000,0x8000d997,0x7c00100,0x230400,0x8000d997,0xc000010,0x448000,0x8000e489,0x2802100, +0x962460,0x8000e489,0x7c00100,0x230400,0x8000e719,0x7c00100,0x220400,0x8000f8a6,0x2802100,0x962460,0x8000f8a6,0x7c00100,0x230400,0x8000f8a6,0xc000010,0x448000, +0x8000fda1,0x2802100,0x1862460,0x8000fda1,0x2806400,0x1862460,0x8000fda1,0x4000000,0x1800000,0x8000fda1,0x6800000,0x1329800,0x8000fda1,0x6800100,0x1862540,0x8000fda1, +0x7c00100,0x1830000,0x8000fda1,0xc000010,0x448000,0x8000fe9c,0x7c00100,0x230400,0x8000fe9c,0x7c00100,0x830400,0x8000fe9c,0x7c00100,0x1430400,0x8000ff06,0x7c00100, +0x220400,0x80010165,0x7c00100,0x230400,0x800102a2,0x4000000,0x200000,0x800102a2,0x7c00100,0x230400,0x800103a4,0x7c00100,0x230400,0x800103a4,0xc000010,0x448000, +0x8001044c,0x4000000,0x200000,0x8001044c,0x7c00100,0x220400,0x8001044c,0x7c00100,0x250400,0x80010670,0x2802000,0x962460,0x80010670,0x4000000,0x200000,0x80010670, +0x4000010,0x400000,0x80010670,0xc000010,0x448000,0x800a4711,0x7c40300,0xe30000,0x800acd00,0x4000000,0x30e00000,0x800acd00,0x4000000,0x72904de0,0x800ace00,0x4000000, +0x30e00000,0x800acf00,0x4000000,0x30e00000,0x800b0011,0x7c40300,0xe30000,0x800b0500,0x4000000,0x30e00000,0x800b0500,0x4000000,0xb28045a0,0x90001615,0x7c00100,0x230400, +0x9000171a,0x4000000,0x200000,0x9000171a,0x7c00100,0x230400,0x90003000,0x24000000,0x200000,0x90007f0e,0x4000000,0x200000,0x90008301,0x2802000,0x962460,0x90008e00, +0x24000000,0x400000,0x90009519,0x7c00100,0x250400,0x9000a16f,0x2802100,0x962460,0x9000d200,0,0x218960,0x9000d202,0x2802000,0x962460,0x9000d202,0x2802100, +0x962460,0x9000d202,0x7c00100,0x230400,0x9000e59d,0x2802100,0x962460,0x900107a7,0x2802100,0x962460,0x900107a7,0x2802400,0x962460,0x900107a7,0x2802c00,0x962460, +0x900107a7,0x4000000,0x1400000,0x900107a7,0x6800000,0x1329800,0x900107a7,0x7c00100,0x220400,0x900107a7,0x7c00100,0x250400,0x900108a8,0x2802100,0x962460,0x900108a8, +0x2806400,0x962460,0x900108a8,0x4000000,0x200000,0x900108a8,0x4000000,0x400000,0x900108a8,0x4000010,0x400000,0x900108a8,0x6800000,0x1329800,0x900108a8,0x6800100, +0x962540,0x900108a8,0x7c00100,0x230400,0x900108a8,0xc000010,0x448000,0x90010908,0x7c00100,0x220400,0x90010a38,0x2802100,0x962460,0x90010ca9,0x2802100,0x962460, +0x90010ca9,0x4000000,0x500000,0x90010ca9,0x4000010,0xb00000,0x90010ca9,0x6800100,0x962540,0x90010ca9,0x7c00100,0x230400,0x90010d1b,0x4000000,0x500000,0x90010eaa, +0x2802100,0x962460,0x90010eaa,0x2802400,0x962460,0x90010eaa,0x2806400,0x962460,0x90010eaa,0x4000000,0x200000,0x90010eaa,0x4000000,0x400000,0x90010eaa,0x4000010, +0x400000,0x90010eaa,0x6800000,0x1329800,0x90010eaa,0x6800100,0x962540,0x90010eaa,0x7c00100,0x230400,0x90010eaa,0xc000010,0x448000,0x90010fab,0x7c00100,0x220400, +0x90010fab,0x7c00100,0x250400,0x9002c300,0x4000000,0x100000,0x900ac400,0x4000000,0xe0000d,0x900acd00,0x4000000,0x30e00000,0x900acd00,0x4000000,0xb28045a0,0x900acf00, +0x4000000,0x30e00000,0x900b0500,0x4000000,0xe00000,0x900b0500,0x4000000,0x30e00000,0x900b0500,0x4000000,0xb28045a0,0x900b0b9a,0x7c00900,0x1230400,0x900b109a,0x7c00300, +0xe30000,0x900b119a,0x7c00300,0xe30000,0x90408e06,0x24000000,0x400000}; -static const int32_t countPropsVectors=6195; +static const int32_t countPropsVectors=6279; static const int32_t propsVectorsColumns=3; static const uint16_t scriptExtensions[194]={ 0x800e,0x8019,8,0x8059,8,2,8,0x8038,8,6,8,0x8019,3,0x800c,2,0x22, @@ -3539,6 +3581,6 @@ static const uint16_t scriptExtensions[194]={ 0x8023,0xa,0xaf,0x19,0x1c,0x804f,0x37,0x804e,0x2f,0x31,0x8053,0x2f,0x8031,2,0x8007,0x89, 0x67,0x8087}; -static const int32_t indexes[UPROPS_INDEX_COUNT]={0x28aa,0x28aa,0x28aa,0x28aa,0x606c,3,0x789f,0x7900,0x7900,0x7900,0xb11ae,0x2a75631,0,0,0,0}; +static const int32_t indexes[UPROPS_INDEX_COUNT]={0x28aa,0x28aa,0x28aa,0x28aa,0x6196,3,0x7a1d,0x7a7e,0x7a7e,0x7a7e,0xb11ae,0x2a75631,0,0,0,0}; #endif // INCLUDED_FROM_UCHAR_C diff --git a/deps/icu-small/source/common/ucharstrie.cpp b/deps/icu-small/source/common/ucharstrie.cpp index d04d315c79..e0b33af519 100644 --- a/deps/icu-small/source/common/ucharstrie.cpp +++ b/deps/icu-small/source/common/ucharstrie.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: ucharstrie.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -175,7 +175,8 @@ UCharsTrie::next(int32_t uchar) { } UStringTrieResult -UCharsTrie::next(const UChar *s, int32_t sLength) { +UCharsTrie::next(ConstChar16Ptr ptr, int32_t sLength) { + const UChar *s=ptr; if(sLength<0 ? *s==0 : sLength==0) { // Empty input. return current(); diff --git a/deps/icu-small/source/common/ucharstriebuilder.cpp b/deps/icu-small/source/common/ucharstriebuilder.cpp index 412a58a45d..694648d0c8 100644 --- a/deps/icu-small/source/common/ucharstriebuilder.cpp +++ b/deps/icu-small/source/common/ucharstriebuilder.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: ucharstriebuilder.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/ucharstrieiterator.cpp b/deps/icu-small/source/common/ucharstrieiterator.cpp index 68ba8c2177..b3132241fe 100644 --- a/deps/icu-small/source/common/ucharstrieiterator.cpp +++ b/deps/icu-small/source/common/ucharstrieiterator.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: ucharstrieiterator.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -21,7 +21,7 @@ U_NAMESPACE_BEGIN -UCharsTrie::Iterator::Iterator(const UChar *trieUChars, int32_t maxStringLength, +UCharsTrie::Iterator::Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength, UErrorCode &errorCode) : uchars_(trieUChars), pos_(uchars_), initialPos_(uchars_), diff --git a/deps/icu-small/source/common/uchriter.cpp b/deps/icu-small/source/common/uchriter.cpp index fd0a407c6b..822168f5c8 100644 --- a/deps/icu-small/source/common/uchriter.cpp +++ b/deps/icu-small/source/common/uchriter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -25,14 +25,14 @@ UCharCharacterIterator::UCharCharacterIterator() // never default construct! } -UCharCharacterIterator::UCharCharacterIterator(const UChar* textPtr, +UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length) : CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0), text(textPtr) { } -UCharCharacterIterator::UCharCharacterIterator(const UChar* textPtr, +UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length, int32_t position) : CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0, position), @@ -40,7 +40,7 @@ UCharCharacterIterator::UCharCharacterIterator(const UChar* textPtr, { } -UCharCharacterIterator::UCharCharacterIterator(const UChar* textPtr, +UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length, int32_t textBegin, int32_t textEnd, @@ -349,7 +349,7 @@ UCharCharacterIterator::move32(int32_t delta, CharacterIterator::EOrigin origin) return pos; } -void UCharCharacterIterator::setText(const UChar* newText, +void UCharCharacterIterator::setText(ConstChar16Ptr newText, int32_t newTextLength) { text = newText; if(newText == 0 || newTextLength < 0) { diff --git a/deps/icu-small/source/common/ucln.h b/deps/icu-small/source/common/ucln.h index 3c8c66ad2f..fe6666efed 100644 --- a/deps/icu-small/source/common/ucln.h +++ b/deps/icu-small/source/common/ucln.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: ucln.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/ucln_cmn.cpp b/deps/icu-small/source/common/ucln_cmn.cpp index 657ec337d7..7e541a1a5f 100644 --- a/deps/icu-small/source/common/ucln_cmn.cpp +++ b/deps/icu-small/source/common/ucln_cmn.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ****************************************************************************** * file name: ucln_cmn.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/ucln_cmn.h b/deps/icu-small/source/common/ucln_cmn.h index 56fa73053f..a6ecfd54bb 100644 --- a/deps/icu-small/source/common/ucln_cmn.h +++ b/deps/icu-small/source/common/ucln_cmn.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ****************************************************************************** * file name: ucln_cmn.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/ucln_imp.h b/deps/icu-small/source/common/ucln_imp.h index 5aa5b0d19f..2e98566979 100644 --- a/deps/icu-small/source/common/ucln_imp.h +++ b/deps/icu-small/source/common/ucln_imp.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: ucln_imp.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -121,7 +121,9 @@ U_CAPI void U_EXPORT2 UCLN_FINI () /* READ READ READ READ! Are you getting compilation errors from windows.h? Any source file which includes this (ucln_imp.h) header MUST be defined with language extensions ON. */ +#ifndef WIN32_LEAN_AND_MEAN # define WIN32_LEAN_AND_MEAN +#endif # define VC_EXTRALEAN # define NOUSER # define NOSERVICE diff --git a/deps/icu-small/source/common/ucmndata.c b/deps/icu-small/source/common/ucmndata.c deleted file mode 100644 index 6b7d78d104..0000000000 --- a/deps/icu-small/source/common/ucmndata.c +++ /dev/null @@ -1,384 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************/ - - -/*------------------------------------------------------------------------------ - * - * UCommonData An abstract interface for dealing with ICU Common Data Files. - * ICU Common Data Files are a grouping of a number of individual - * data items (resources, converters, tables, anything) into a - * single file or dll. The combined format includes a table of - * contents for locating the individual items by name. - * - * Two formats for the table of contents are supported, which is - * why there is an abstract inteface involved. - * - */ - -#include "unicode/utypes.h" -#include "unicode/udata.h" -#include "cstring.h" -#include "ucmndata.h" -#include "udatamem.h" - -#if defined(UDATA_DEBUG) || defined(UDATA_DEBUG_DUMP) -# include -#endif - -U_CFUNC uint16_t -udata_getHeaderSize(const DataHeader *udh) { - if(udh==NULL) { - return 0; - } else if(udh->info.isBigEndian==U_IS_BIG_ENDIAN) { - /* same endianness */ - return udh->dataHeader.headerSize; - } else { - /* opposite endianness */ - uint16_t x=udh->dataHeader.headerSize; - return (uint16_t)((x<<8)|(x>>8)); - } -} - -U_CFUNC uint16_t -udata_getInfoSize(const UDataInfo *info) { - if(info==NULL) { - return 0; - } else if(info->isBigEndian==U_IS_BIG_ENDIAN) { - /* same endianness */ - return info->size; - } else { - /* opposite endianness */ - uint16_t x=info->size; - return (uint16_t)((x<<8)|(x>>8)); - } -} - -/*-----------------------------------------------------------------------------* - * * - * Pointer TOCs. TODO: This form of table-of-contents should be removed * - * because DLLs must be relocated on loading to correct the * - * pointer values and this operation makes shared memory * - * mapping of the data much less likely to work. * - * * - *-----------------------------------------------------------------------------*/ -typedef struct { - const char *entryName; - const DataHeader *pHeader; -} PointerTOCEntry; - - -typedef struct { - uint32_t count; - uint32_t reserved; - PointerTOCEntry entry[2]; /* Actual size is from count. */ -} PointerTOC; - - -/* definition of OffsetTOC struct types moved to ucmndata.h */ - -/*-----------------------------------------------------------------------------* - * * - * entry point lookup implementations * - * * - *-----------------------------------------------------------------------------*/ - -#ifndef MIN -#define MIN(a,b) (((a)<(b)) ? (a) : (b)) -#endif - -/** - * Compare strings where we know the shared prefix length, - * and advance the prefix length as we find that the strings share even more characters. - */ -static int32_t -strcmpAfterPrefix(const char *s1, const char *s2, int32_t *pPrefixLength) { - int32_t pl=*pPrefixLength; - int32_t cmp=0; - s1+=pl; - s2+=pl; - for(;;) { - int32_t c1=(uint8_t)*s1++; - int32_t c2=(uint8_t)*s2++; - cmp=c1-c2; - if(cmp!=0 || c1==0) { /* different or done */ - break; - } - ++pl; /* increment shared same-prefix length */ - } - *pPrefixLength=pl; - return cmp; -} - -static int32_t -offsetTOCPrefixBinarySearch(const char *s, const char *names, - const UDataOffsetTOCEntry *toc, int32_t count) { - int32_t start=0; - int32_t limit=count; - /* - * Remember the shared prefix between s, start and limit, - * and don't compare that shared prefix again. - * The shared prefix should get longer as we narrow the [start, limit[ range. - */ - int32_t startPrefixLength=0; - int32_t limitPrefixLength=0; - if(count==0) { - return -1; - } - /* - * Prime the prefix lengths so that we don't keep prefixLength at 0 until - * both the start and limit indexes have moved. - * At the same time, we find if s is one of the start and (limit-1) names, - * and if not, exclude them from the actual binary search. - */ - if(0==strcmpAfterPrefix(s, names+toc[0].nameOffset, &startPrefixLength)) { - return 0; - } - ++start; - --limit; - if(0==strcmpAfterPrefix(s, names+toc[limit].nameOffset, &limitPrefixLength)) { - return limit; - } - while(starttoc; - if (toc != NULL) { - retVal = toc->count; - } - return retVal; -} - -static const DataHeader * -offsetTOCLookupFn(const UDataMemory *pData, - const char *tocEntryName, - int32_t *pLength, - UErrorCode *pErrorCode) { - const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc; - if(toc!=NULL) { - const char *base=(const char *)toc; - int32_t number, count=(int32_t)toc->count; - - /* perform a binary search for the data in the common data's table of contents */ -#if defined (UDATA_DEBUG_DUMP) - /* list the contents of the TOC each time .. not recommended */ - for(number=0; numberentry[number].nameOffset]); - } -#endif - number=offsetTOCPrefixBinarySearch(tocEntryName, base, toc->entry, count); - if(number>=0) { - /* found it */ - const UDataOffsetTOCEntry *entry=toc->entry+number; -#ifdef UDATA_DEBUG - fprintf(stderr, "%s: Found.\n", tocEntryName); -#endif - if((number+1) < count) { - *pLength = (int32_t)(entry[1].dataOffset - entry->dataOffset); - } else { - *pLength = -1; - } - return (const DataHeader *)(base+entry->dataOffset); - } else { -#ifdef UDATA_DEBUG - fprintf(stderr, "%s: Not found.\n", tocEntryName); -#endif - return NULL; - } - } else { -#ifdef UDATA_DEBUG - fprintf(stderr, "returning header\n"); -#endif - - return pData->pHeader; - } -} - - -static uint32_t pointerTOCEntryCount(const UDataMemory *pData) { - const PointerTOC *toc = (PointerTOC *)pData->toc; - return (uint32_t)((toc != NULL) ? (toc->count) : 0); -} - - -static const DataHeader *pointerTOCLookupFn(const UDataMemory *pData, - const char *name, - int32_t *pLength, - UErrorCode *pErrorCode) { - if(pData->toc!=NULL) { - const PointerTOC *toc = (PointerTOC *)pData->toc; - int32_t number, count=(int32_t)toc->count; - -#if defined (UDATA_DEBUG_DUMP) - /* list the contents of the TOC each time .. not recommended */ - for(number=0; numberentry[number].entryName); - } -#endif - number=pointerTOCPrefixBinarySearch(name, toc->entry, count); - if(number>=0) { - /* found it */ -#ifdef UDATA_DEBUG - fprintf(stderr, "%s: Found.\n", toc->entry[number].entryName); -#endif - *pLength=-1; - return UDataMemory_normalizeDataPointer(toc->entry[number].pHeader); - } else { -#ifdef UDATA_DEBUG - fprintf(stderr, "%s: Not found.\n", name); -#endif - return NULL; - } - } else { - return pData->pHeader; - } -} - -static const commonDataFuncs CmnDFuncs = {offsetTOCLookupFn, offsetTOCEntryCount}; -static const commonDataFuncs ToCPFuncs = {pointerTOCLookupFn, pointerTOCEntryCount}; - - - -/*----------------------------------------------------------------------* - * * - * checkCommonData Validate the format of a common data file. * - * Fill in the virtual function ptr based on TOC type * - * If the data is invalid, close the UDataMemory * - * and set the appropriate error code. * - * * - *----------------------------------------------------------------------*/ -U_CFUNC void udata_checkCommonData(UDataMemory *udm, UErrorCode *err) { - if (U_FAILURE(*err)) { - return; - } - - if(udm==NULL || udm->pHeader==NULL) { - *err=U_INVALID_FORMAT_ERROR; - } else if(!(udm->pHeader->dataHeader.magic1==0xda && - udm->pHeader->dataHeader.magic2==0x27 && - udm->pHeader->info.isBigEndian==U_IS_BIG_ENDIAN && - udm->pHeader->info.charsetFamily==U_CHARSET_FAMILY) - ) { - /* header not valid */ - *err=U_INVALID_FORMAT_ERROR; - } - else if (udm->pHeader->info.dataFormat[0]==0x43 && - udm->pHeader->info.dataFormat[1]==0x6d && - udm->pHeader->info.dataFormat[2]==0x6e && - udm->pHeader->info.dataFormat[3]==0x44 && - udm->pHeader->info.formatVersion[0]==1 - ) { - /* dataFormat="CmnD" */ - udm->vFuncs = &CmnDFuncs; - udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader); - } - else if(udm->pHeader->info.dataFormat[0]==0x54 && - udm->pHeader->info.dataFormat[1]==0x6f && - udm->pHeader->info.dataFormat[2]==0x43 && - udm->pHeader->info.dataFormat[3]==0x50 && - udm->pHeader->info.formatVersion[0]==1 - ) { - /* dataFormat="ToCP" */ - udm->vFuncs = &ToCPFuncs; - udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader); - } - else { - /* dataFormat not recognized */ - *err=U_INVALID_FORMAT_ERROR; - } - - if (U_FAILURE(*err)) { - /* If the data is no good and we memory-mapped it ourselves, - * close the memory mapping so it doesn't leak. Note that this has - * no effect on non-memory mapped data, other than clearing fields in udm. - */ - udata_close(udm); - } -} - -/* - * TODO: Add a udata_swapPackageHeader() function that swaps an ICU .dat package - * header but not its sub-items. - * This function will be needed for automatic runtime swapping. - * Sub-items should not be swapped to limit the swapping to the parts of the - * package that are actually used. - * - * Since lengths of items are implicit in the order and offsets of their - * ToC entries, and since offsets are relative to the start of the ToC, - * a swapped version may need to generate a different data structure - * with pointers to the original data items and with their lengths - * (-1 for the last one if it is not known), and maybe even pointers to the - * swapped versions of the items. - * These pointers to swapped versions would establish a cache; - * instead, each open data item could simply own the storage for its swapped - * data. This fits better with the current design. - * - * markus 2003sep18 Jitterbug 2235 - */ diff --git a/deps/icu-small/source/common/ucmndata.cpp b/deps/icu-small/source/common/ucmndata.cpp new file mode 100644 index 0000000000..251c7ba182 --- /dev/null +++ b/deps/icu-small/source/common/ucmndata.cpp @@ -0,0 +1,389 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************/ + + +/*------------------------------------------------------------------------------ + * + * UCommonData An abstract interface for dealing with ICU Common Data Files. + * ICU Common Data Files are a grouping of a number of individual + * data items (resources, converters, tables, anything) into a + * single file or dll. The combined format includes a table of + * contents for locating the individual items by name. + * + * Two formats for the table of contents are supported, which is + * why there is an abstract inteface involved. + * + */ + +#include "unicode/utypes.h" +#include "unicode/udata.h" +#include "cstring.h" +#include "ucmndata.h" +#include "udatamem.h" + +#if defined(UDATA_DEBUG) || defined(UDATA_DEBUG_DUMP) +# include +#endif + +U_CFUNC uint16_t +udata_getHeaderSize(const DataHeader *udh) { + if(udh==NULL) { + return 0; + } else if(udh->info.isBigEndian==U_IS_BIG_ENDIAN) { + /* same endianness */ + return udh->dataHeader.headerSize; + } else { + /* opposite endianness */ + uint16_t x=udh->dataHeader.headerSize; + return (uint16_t)((x<<8)|(x>>8)); + } +} + +U_CFUNC uint16_t +udata_getInfoSize(const UDataInfo *info) { + if(info==NULL) { + return 0; + } else if(info->isBigEndian==U_IS_BIG_ENDIAN) { + /* same endianness */ + return info->size; + } else { + /* opposite endianness */ + uint16_t x=info->size; + return (uint16_t)((x<<8)|(x>>8)); + } +} + +/*-----------------------------------------------------------------------------* + * * + * Pointer TOCs. TODO: This form of table-of-contents should be removed * + * because DLLs must be relocated on loading to correct the * + * pointer values and this operation makes shared memory * + * mapping of the data much less likely to work. * + * * + *-----------------------------------------------------------------------------*/ +typedef struct { + const char *entryName; + const DataHeader *pHeader; +} PointerTOCEntry; + + +typedef struct { + uint32_t count; + uint32_t reserved; + PointerTOCEntry entry[2]; /* Actual size is from count. */ +} PointerTOC; + + +/* definition of OffsetTOC struct types moved to ucmndata.h */ + +/*-----------------------------------------------------------------------------* + * * + * entry point lookup implementations * + * * + *-----------------------------------------------------------------------------*/ + +#ifndef MIN +#define MIN(a,b) (((a)<(b)) ? (a) : (b)) +#endif + +/** + * Compare strings where we know the shared prefix length, + * and advance the prefix length as we find that the strings share even more characters. + */ +static int32_t +strcmpAfterPrefix(const char *s1, const char *s2, int32_t *pPrefixLength) { + int32_t pl=*pPrefixLength; + int32_t cmp=0; + s1+=pl; + s2+=pl; + for(;;) { + int32_t c1=(uint8_t)*s1++; + int32_t c2=(uint8_t)*s2++; + cmp=c1-c2; + if(cmp!=0 || c1==0) { /* different or done */ + break; + } + ++pl; /* increment shared same-prefix length */ + } + *pPrefixLength=pl; + return cmp; +} + +static int32_t +offsetTOCPrefixBinarySearch(const char *s, const char *names, + const UDataOffsetTOCEntry *toc, int32_t count) { + int32_t start=0; + int32_t limit=count; + /* + * Remember the shared prefix between s, start and limit, + * and don't compare that shared prefix again. + * The shared prefix should get longer as we narrow the [start, limit[ range. + */ + int32_t startPrefixLength=0; + int32_t limitPrefixLength=0; + if(count==0) { + return -1; + } + /* + * Prime the prefix lengths so that we don't keep prefixLength at 0 until + * both the start and limit indexes have moved. + * At the same time, we find if s is one of the start and (limit-1) names, + * and if not, exclude them from the actual binary search. + */ + if(0==strcmpAfterPrefix(s, names+toc[0].nameOffset, &startPrefixLength)) { + return 0; + } + ++start; + --limit; + if(0==strcmpAfterPrefix(s, names+toc[limit].nameOffset, &limitPrefixLength)) { + return limit; + } + while(starttoc; + if (toc != NULL) { + retVal = toc->count; + } + return retVal; +} + +static const DataHeader * U_CALLCONV +offsetTOCLookupFn(const UDataMemory *pData, + const char *tocEntryName, + int32_t *pLength, + UErrorCode *pErrorCode) { + (void)pErrorCode; + const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc; + if(toc!=NULL) { + const char *base=(const char *)toc; + int32_t number, count=(int32_t)toc->count; + + /* perform a binary search for the data in the common data's table of contents */ +#if defined (UDATA_DEBUG_DUMP) + /* list the contents of the TOC each time .. not recommended */ + for(number=0; numberentry[number].nameOffset]); + } +#endif + number=offsetTOCPrefixBinarySearch(tocEntryName, base, toc->entry, count); + if(number>=0) { + /* found it */ + const UDataOffsetTOCEntry *entry=toc->entry+number; +#ifdef UDATA_DEBUG + fprintf(stderr, "%s: Found.\n", tocEntryName); +#endif + if((number+1) < count) { + *pLength = (int32_t)(entry[1].dataOffset - entry->dataOffset); + } else { + *pLength = -1; + } + return (const DataHeader *)(base+entry->dataOffset); + } else { +#ifdef UDATA_DEBUG + fprintf(stderr, "%s: Not found.\n", tocEntryName); +#endif + return NULL; + } + } else { +#ifdef UDATA_DEBUG + fprintf(stderr, "returning header\n"); +#endif + + return pData->pHeader; + } +} + + +static uint32_t U_CALLCONV pointerTOCEntryCount(const UDataMemory *pData) { + const PointerTOC *toc = (PointerTOC *)pData->toc; + return (uint32_t)((toc != NULL) ? (toc->count) : 0); +} + +static const DataHeader * U_CALLCONV pointerTOCLookupFn(const UDataMemory *pData, + const char *name, + int32_t *pLength, + UErrorCode *pErrorCode) { + (void)pErrorCode; + if(pData->toc!=NULL) { + const PointerTOC *toc = (PointerTOC *)pData->toc; + int32_t number, count=(int32_t)toc->count; + +#if defined (UDATA_DEBUG_DUMP) + /* list the contents of the TOC each time .. not recommended */ + for(number=0; numberentry[number].entryName); + } +#endif + number=pointerTOCPrefixBinarySearch(name, toc->entry, count); + if(number>=0) { + /* found it */ +#ifdef UDATA_DEBUG + fprintf(stderr, "%s: Found.\n", toc->entry[number].entryName); +#endif + *pLength=-1; + return UDataMemory_normalizeDataPointer(toc->entry[number].pHeader); + } else { +#ifdef UDATA_DEBUG + fprintf(stderr, "%s: Not found.\n", name); +#endif + return NULL; + } + } else { + return pData->pHeader; + } +} +U_CDECL_END + + +static const commonDataFuncs CmnDFuncs = {offsetTOCLookupFn, offsetTOCEntryCount}; +static const commonDataFuncs ToCPFuncs = {pointerTOCLookupFn, pointerTOCEntryCount}; + + + +/*----------------------------------------------------------------------* + * * + * checkCommonData Validate the format of a common data file. * + * Fill in the virtual function ptr based on TOC type * + * If the data is invalid, close the UDataMemory * + * and set the appropriate error code. * + * * + *----------------------------------------------------------------------*/ +U_CFUNC void udata_checkCommonData(UDataMemory *udm, UErrorCode *err) { + if (U_FAILURE(*err)) { + return; + } + + if(udm==NULL || udm->pHeader==NULL) { + *err=U_INVALID_FORMAT_ERROR; + } else if(!(udm->pHeader->dataHeader.magic1==0xda && + udm->pHeader->dataHeader.magic2==0x27 && + udm->pHeader->info.isBigEndian==U_IS_BIG_ENDIAN && + udm->pHeader->info.charsetFamily==U_CHARSET_FAMILY) + ) { + /* header not valid */ + *err=U_INVALID_FORMAT_ERROR; + } + else if (udm->pHeader->info.dataFormat[0]==0x43 && + udm->pHeader->info.dataFormat[1]==0x6d && + udm->pHeader->info.dataFormat[2]==0x6e && + udm->pHeader->info.dataFormat[3]==0x44 && + udm->pHeader->info.formatVersion[0]==1 + ) { + /* dataFormat="CmnD" */ + udm->vFuncs = &CmnDFuncs; + udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader); + } + else if(udm->pHeader->info.dataFormat[0]==0x54 && + udm->pHeader->info.dataFormat[1]==0x6f && + udm->pHeader->info.dataFormat[2]==0x43 && + udm->pHeader->info.dataFormat[3]==0x50 && + udm->pHeader->info.formatVersion[0]==1 + ) { + /* dataFormat="ToCP" */ + udm->vFuncs = &ToCPFuncs; + udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader); + } + else { + /* dataFormat not recognized */ + *err=U_INVALID_FORMAT_ERROR; + } + + if (U_FAILURE(*err)) { + /* If the data is no good and we memory-mapped it ourselves, + * close the memory mapping so it doesn't leak. Note that this has + * no effect on non-memory mapped data, other than clearing fields in udm. + */ + udata_close(udm); + } +} + +/* + * TODO: Add a udata_swapPackageHeader() function that swaps an ICU .dat package + * header but not its sub-items. + * This function will be needed for automatic runtime swapping. + * Sub-items should not be swapped to limit the swapping to the parts of the + * package that are actually used. + * + * Since lengths of items are implicit in the order and offsets of their + * ToC entries, and since offsets are relative to the start of the ToC, + * a swapped version may need to generate a different data structure + * with pointers to the original data items and with their lengths + * (-1 for the last one if it is not known), and maybe even pointers to the + * swapped versions of the items. + * These pointers to swapped versions would establish a cache; + * instead, each open data item could simply own the storage for its swapped + * data. This fits better with the current design. + * + * markus 2003sep18 Jitterbug 2235 + */ diff --git a/deps/icu-small/source/common/ucmndata.h b/deps/icu-small/source/common/ucmndata.h index 4ff37cc20b..8c36897f16 100644 --- a/deps/icu-small/source/common/ucmndata.h +++ b/deps/icu-small/source/common/ucmndata.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/ucnv.c b/deps/icu-small/source/common/ucnv.c deleted file mode 100644 index 7ce05644c7..0000000000 --- a/deps/icu-small/source/common/ucnv.c +++ /dev/null @@ -1,2918 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1998-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* ucnv.c: -* Implements APIs for the ICU's codeset conversion library; -* mostly calls through internal functions; -* created by Bertrand A. Damiba -* -* Modification History: -* -* Date Name Description -* 04/04/99 helena Fixed internal header inclusion. -* 05/09/00 helena Added implementation to handle fallback mappings. -* 06/20/2000 helena OS/400 port changes; mostly typecast. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ustring.h" -#include "unicode/ucnv.h" -#include "unicode/ucnv_err.h" -#include "unicode/uset.h" -#include "unicode/utf.h" -#include "unicode/utf16.h" -#include "putilimp.h" -#include "cmemory.h" -#include "cstring.h" -#include "uassert.h" -#include "utracimp.h" -#include "ustr_imp.h" -#include "ucnv_imp.h" -#include "ucnv_cnv.h" -#include "ucnv_bld.h" - -/* size of intermediate and preflighting buffers in ucnv_convert() */ -#define CHUNK_SIZE 1024 - -typedef struct UAmbiguousConverter { - const char *name; - const UChar variant5c; -} UAmbiguousConverter; - -static const UAmbiguousConverter ambiguousConverters[]={ - { "ibm-897_P100-1995", 0xa5 }, - { "ibm-942_P120-1999", 0xa5 }, - { "ibm-943_P130-1999", 0xa5 }, - { "ibm-946_P100-1995", 0xa5 }, - { "ibm-33722_P120-1999", 0xa5 }, - { "ibm-1041_P100-1995", 0xa5 }, - /*{ "ibm-54191_P100-2006", 0xa5 },*/ - /*{ "ibm-62383_P100-2007", 0xa5 },*/ - /*{ "ibm-891_P100-1995", 0x20a9 },*/ - { "ibm-944_P100-1995", 0x20a9 }, - { "ibm-949_P110-1999", 0x20a9 }, - { "ibm-1363_P110-1997", 0x20a9 }, - { "ISO_2022,locale=ko,version=0", 0x20a9 }, - { "ibm-1088_P100-1995", 0x20a9 } -}; - -/*Calls through createConverter */ -U_CAPI UConverter* U_EXPORT2 -ucnv_open (const char *name, - UErrorCode * err) -{ - UConverter *r; - - if (err == NULL || U_FAILURE (*err)) { - return NULL; - } - - r = ucnv_createConverter(NULL, name, err); - return r; -} - -U_CAPI UConverter* U_EXPORT2 -ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err) -{ - return ucnv_createConverterFromPackage(packageName, converterName, err); -} - -/*Extracts the UChar* to a char* and calls through createConverter */ -U_CAPI UConverter* U_EXPORT2 -ucnv_openU (const UChar * name, - UErrorCode * err) -{ - char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH]; - - if (err == NULL || U_FAILURE(*err)) - return NULL; - if (name == NULL) - return ucnv_open (NULL, err); - if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH) - { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - return ucnv_open(u_austrcpy(asciiName, name), err); -} - -/* Copy the string that is represented by the UConverterPlatform enum - * @param platformString An output buffer - * @param platform An enum representing a platform - * @return the length of the copied string. - */ -static int32_t -ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm) -{ - switch (pltfrm) - { - case UCNV_IBM: - uprv_strcpy(platformString, "ibm-"); - return 4; - case UCNV_UNKNOWN: - break; - } - - /* default to empty string */ - *platformString = 0; - return 0; -} - -/*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls - *through createConverter*/ -U_CAPI UConverter* U_EXPORT2 -ucnv_openCCSID (int32_t codepage, - UConverterPlatform platform, - UErrorCode * err) -{ - char myName[UCNV_MAX_CONVERTER_NAME_LENGTH]; - int32_t myNameLen; - - if (err == NULL || U_FAILURE (*err)) - return NULL; - - /* ucnv_copyPlatformString could return "ibm-" or "cp" */ - myNameLen = ucnv_copyPlatformString(myName, platform); - T_CString_integerToString(myName + myNameLen, codepage, 10); - - return ucnv_createConverter(NULL, myName, err); -} - -/* Creating a temporary stack-based object that can be used in one thread, -and created from a converter that is shared across threads. -*/ - -U_CAPI UConverter* U_EXPORT2 -ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status) -{ - UConverter *localConverter, *allocatedConverter; - int32_t stackBufferSize; - int32_t bufferSizeNeeded; - char *stackBufferChars = (char *)stackBuffer; - UErrorCode cbErr; - UConverterToUnicodeArgs toUArgs = { - sizeof(UConverterToUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; - UConverterFromUnicodeArgs fromUArgs = { - sizeof(UConverterFromUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; - - UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE); - - if (status == NULL || U_FAILURE(*status)){ - UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR); - return NULL; - } - - if (cnv == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - UTRACE_EXIT_STATUS(*status); - return NULL; - } - - UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p", - ucnv_getName(cnv, status), cnv, stackBuffer); - - if (cnv->sharedData->impl->safeClone != NULL) { - /* call the custom safeClone function for sizing */ - bufferSizeNeeded = 0; - cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status); - if (U_FAILURE(*status)) { - UTRACE_EXIT_STATUS(*status); - return NULL; - } - } - else - { - /* inherent sizing */ - bufferSizeNeeded = sizeof(UConverter); - } - - if (pBufferSize == NULL) { - stackBufferSize = 1; - pBufferSize = &stackBufferSize; - } else { - stackBufferSize = *pBufferSize; - if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */ - *pBufferSize = bufferSizeNeeded; - UTRACE_EXIT_VALUE(bufferSizeNeeded); - return NULL; - } - } - - - /* Pointers on 64-bit platforms need to be aligned - * on a 64-bit boundary in memory. - */ - if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { - int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars); - if(stackBufferSize > offsetUp) { - stackBufferSize -= offsetUp; - stackBufferChars += offsetUp; - } else { - /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */ - stackBufferSize = 1; - } - } - - stackBuffer = (void *)stackBufferChars; - - /* Now, see if we must allocate any memory */ - if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL) - { - /* allocate one here...*/ - localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded); - - if(localConverter == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - UTRACE_EXIT_STATUS(*status); - return NULL; - } - *status = U_SAFECLONE_ALLOCATED_WARNING; - - /* record the fact that memory was allocated */ - *pBufferSize = bufferSizeNeeded; - } else { - /* just use the stack buffer */ - localConverter = (UConverter*) stackBuffer; - allocatedConverter = NULL; - } - - uprv_memset(localConverter, 0, bufferSizeNeeded); - - /* Copy initial state */ - uprv_memcpy(localConverter, cnv, sizeof(UConverter)); - localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE; - - /* copy the substitution string */ - if (cnv->subChars == (uint8_t *)cnv->subUChars) { - localConverter->subChars = (uint8_t *)localConverter->subUChars; - } else { - localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); - if (localConverter->subChars == NULL) { - uprv_free(allocatedConverter); - UTRACE_EXIT_STATUS(*status); - return NULL; - } - uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); - } - - /* now either call the safeclone fcn or not */ - if (cnv->sharedData->impl->safeClone != NULL) { - /* call the custom safeClone function */ - localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status); - } - - if(localConverter==NULL || U_FAILURE(*status)) { - if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) { - uprv_free(allocatedConverter->subChars); - } - uprv_free(allocatedConverter); - UTRACE_EXIT_STATUS(*status); - return NULL; - } - - /* increment refcount of shared data if needed */ - if (cnv->sharedData->isReferenceCounted) { - ucnv_incrementRefCount(cnv->sharedData); - } - - if(localConverter == (UConverter*)stackBuffer) { - /* we're using user provided data - set to not destroy */ - localConverter->isCopyLocal = TRUE; - } - - /* allow callback functions to handle any memory allocation */ - toUArgs.converter = fromUArgs.converter = localConverter; - cbErr = U_ZERO_ERROR; - cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr); - cbErr = U_ZERO_ERROR; - cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr); - - UTRACE_EXIT_PTR_STATUS(localConverter, *status); - return localConverter; -} - - - -/*Decreases the reference counter in the shared immutable section of the object - *and frees the mutable part*/ - -U_CAPI void U_EXPORT2 -ucnv_close (UConverter * converter) -{ - UErrorCode errorCode = U_ZERO_ERROR; - - UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE); - - if (converter == NULL) - { - UTRACE_EXIT(); - return; - } - - UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b", - ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal); - - /* In order to speed up the close, only call the callbacks when they have been changed. - This performance check will only work when the callbacks are set within a shared library - or from user code that statically links this code. */ - /* first, notify the callback functions that the converter is closed */ - if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { - UConverterToUnicodeArgs toUArgs = { - sizeof(UConverterToUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; - - toUArgs.converter = converter; - errorCode = U_ZERO_ERROR; - converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode); - } - if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { - UConverterFromUnicodeArgs fromUArgs = { - sizeof(UConverterFromUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; - fromUArgs.converter = converter; - errorCode = U_ZERO_ERROR; - converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode); - } - - if (converter->sharedData->impl->close != NULL) { - converter->sharedData->impl->close(converter); - } - - if (converter->subChars != (uint8_t *)converter->subUChars) { - uprv_free(converter->subChars); - } - - if (converter->sharedData->isReferenceCounted) { - ucnv_unloadSharedDataIfReady(converter->sharedData); - } - - if(!converter->isCopyLocal){ - uprv_free(converter); - } - - UTRACE_EXIT(); -} - -/*returns a single Name from the list, will return NULL if out of bounds - */ -U_CAPI const char* U_EXPORT2 -ucnv_getAvailableName (int32_t n) -{ - if (0 <= n && n <= 0xffff) { - UErrorCode err = U_ZERO_ERROR; - const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err); - if (U_SUCCESS(err)) { - return name; - } - } - return NULL; -} - -U_CAPI int32_t U_EXPORT2 -ucnv_countAvailable () -{ - UErrorCode err = U_ZERO_ERROR; - return ucnv_bld_countAvailableConverters(&err); -} - -U_CAPI void U_EXPORT2 -ucnv_getSubstChars (const UConverter * converter, - char *mySubChar, - int8_t * len, - UErrorCode * err) -{ - if (U_FAILURE (*err)) - return; - - if (converter->subCharLen <= 0) { - /* Unicode string or empty string from ucnv_setSubstString(). */ - *len = 0; - return; - } - - if (*len < converter->subCharLen) /*not enough space in subChars */ - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - - uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */ - *len = converter->subCharLen; /*store # of bytes copied to buffer */ -} - -U_CAPI void U_EXPORT2 -ucnv_setSubstChars (UConverter * converter, - const char *mySubChar, - int8_t len, - UErrorCode * err) -{ - if (U_FAILURE (*err)) - return; - - /*Makes sure that the subChar is within the codepages char length boundaries */ - if ((len > converter->sharedData->staticData->maxBytesPerChar) - || (len < converter->sharedData->staticData->minBytesPerChar)) - { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */ - converter->subCharLen = len; /*sets the new len */ - - /* - * There is currently (2001Feb) no separate API to set/get subChar1. - * In order to always have subChar written after it is explicitly set, - * we set subChar1 to 0. - */ - converter->subChar1 = 0; - - return; -} - -U_CAPI void U_EXPORT2 -ucnv_setSubstString(UConverter *cnv, - const UChar *s, - int32_t length, - UErrorCode *err) { - UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1]; - char chars[UCNV_ERROR_BUFFER_LENGTH]; - - UConverter *clone; - uint8_t *subChars; - int32_t cloneSize, length8; - - /* Let the following functions check all arguments. */ - cloneSize = sizeof(cloneBuffer); - clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err); - ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err); - length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err); - ucnv_close(clone); - if (U_FAILURE(*err)) { - return; - } - - if (cnv->sharedData->impl->writeSub == NULL -#if !UCONFIG_NO_LEGACY_CONVERSION - || (cnv->sharedData->staticData->conversionType == UCNV_MBCS && - ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL) -#endif - ) { - /* The converter is not stateful. Store the charset bytes as a fixed string. */ - subChars = (uint8_t *)chars; - } else { - /* - * The converter has a non-default writeSub() function, indicating - * that it is stateful. - * Store the Unicode string for on-the-fly conversion for correct - * state handling. - */ - if (length > UCNV_ERROR_BUFFER_LENGTH) { - /* - * Should not occur. The converter should output at least one byte - * per UChar, which means that ucnv_fromUChars() should catch all - * overflows. - */ - *err = U_BUFFER_OVERFLOW_ERROR; - return; - } - subChars = (uint8_t *)s; - if (length < 0) { - length = u_strlen(s); - } - length8 = length * U_SIZEOF_UCHAR; - } - - /* - * For storing the substitution string, select either the small buffer inside - * UConverter or allocate a subChars buffer. - */ - if (length8 > UCNV_MAX_SUBCHAR_LEN) { - /* Use a separate buffer for the string. Outside UConverter to not make it too large. */ - if (cnv->subChars == (uint8_t *)cnv->subUChars) { - /* Allocate a new buffer for the string. */ - cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); - if (cnv->subChars == NULL) { - cnv->subChars = (uint8_t *)cnv->subUChars; - *err = U_MEMORY_ALLOCATION_ERROR; - return; - } - uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); - } - } - - /* Copy the substitution string into the UConverter or its subChars buffer. */ - if (length8 == 0) { - cnv->subCharLen = 0; - } else { - uprv_memcpy(cnv->subChars, subChars, length8); - if (subChars == (uint8_t *)chars) { - cnv->subCharLen = (int8_t)length8; - } else /* subChars == s */ { - cnv->subCharLen = (int8_t)-length; - } - } - - /* See comment in ucnv_setSubstChars(). */ - cnv->subChar1 = 0; -} - -/*resets the internal states of a converter - *goal : have the same behaviour than a freshly created converter - */ -static void _reset(UConverter *converter, UConverterResetChoice choice, - UBool callCallback) { - if(converter == NULL) { - return; - } - - if(callCallback) { - /* first, notify the callback functions that the converter is reset */ - UErrorCode errorCode; - - if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { - UConverterToUnicodeArgs toUArgs = { - sizeof(UConverterToUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; - toUArgs.converter = converter; - errorCode = U_ZERO_ERROR; - converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode); - } - if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { - UConverterFromUnicodeArgs fromUArgs = { - sizeof(UConverterFromUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; - fromUArgs.converter = converter; - errorCode = U_ZERO_ERROR; - converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode); - } - } - - /* now reset the converter itself */ - if(choice<=UCNV_RESET_TO_UNICODE) { - converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus; - converter->mode = 0; - converter->toULength = 0; - converter->invalidCharLength = converter->UCharErrorBufferLength = 0; - converter->preToULength = 0; - } - if(choice!=UCNV_RESET_TO_UNICODE) { - converter->fromUnicodeStatus = 0; - converter->fromUChar32 = 0; - converter->invalidUCharLength = converter->charErrorBufferLength = 0; - converter->preFromUFirstCP = U_SENTINEL; - converter->preFromULength = 0; - } - - if (converter->sharedData->impl->reset != NULL) { - /* call the custom reset function */ - converter->sharedData->impl->reset(converter, choice); - } -} - -U_CAPI void U_EXPORT2 -ucnv_reset(UConverter *converter) -{ - _reset(converter, UCNV_RESET_BOTH, TRUE); -} - -U_CAPI void U_EXPORT2 -ucnv_resetToUnicode(UConverter *converter) -{ - _reset(converter, UCNV_RESET_TO_UNICODE, TRUE); -} - -U_CAPI void U_EXPORT2 -ucnv_resetFromUnicode(UConverter *converter) -{ - _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE); -} - -U_CAPI int8_t U_EXPORT2 -ucnv_getMaxCharSize (const UConverter * converter) -{ - return converter->maxBytesPerUChar; -} - - -U_CAPI int8_t U_EXPORT2 -ucnv_getMinCharSize (const UConverter * converter) -{ - return converter->sharedData->staticData->minBytesPerChar; -} - -U_CAPI const char* U_EXPORT2 -ucnv_getName (const UConverter * converter, UErrorCode * err) - -{ - if (U_FAILURE (*err)) - return NULL; - if(converter->sharedData->impl->getName){ - const char* temp= converter->sharedData->impl->getName(converter); - if(temp) - return temp; - } - return converter->sharedData->staticData->name; -} - -U_CAPI int32_t U_EXPORT2 -ucnv_getCCSID(const UConverter * converter, - UErrorCode * err) -{ - int32_t ccsid; - if (U_FAILURE (*err)) - return -1; - - ccsid = converter->sharedData->staticData->codepage; - if (ccsid == 0) { - /* Rare case. This is for cases like gb18030, - which doesn't have an IBM canonical name, but does have an IBM alias. */ - const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err); - if (U_SUCCESS(*err) && standardName) { - const char *ccsidStr = uprv_strchr(standardName, '-'); - if (ccsidStr) { - ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */ - } - } - } - return ccsid; -} - - -U_CAPI UConverterPlatform U_EXPORT2 -ucnv_getPlatform (const UConverter * converter, - UErrorCode * err) -{ - if (U_FAILURE (*err)) - return UCNV_UNKNOWN; - - return (UConverterPlatform)converter->sharedData->staticData->platform; -} - -U_CAPI void U_EXPORT2 - ucnv_getToUCallBack (const UConverter * converter, - UConverterToUCallback *action, - const void **context) -{ - *action = converter->fromCharErrorBehaviour; - *context = converter->toUContext; -} - -U_CAPI void U_EXPORT2 - ucnv_getFromUCallBack (const UConverter * converter, - UConverterFromUCallback *action, - const void **context) -{ - *action = converter->fromUCharErrorBehaviour; - *context = converter->fromUContext; -} - -U_CAPI void U_EXPORT2 -ucnv_setToUCallBack (UConverter * converter, - UConverterToUCallback newAction, - const void* newContext, - UConverterToUCallback *oldAction, - const void** oldContext, - UErrorCode * err) -{ - if (U_FAILURE (*err)) - return; - if (oldAction) *oldAction = converter->fromCharErrorBehaviour; - converter->fromCharErrorBehaviour = newAction; - if (oldContext) *oldContext = converter->toUContext; - converter->toUContext = newContext; -} - -U_CAPI void U_EXPORT2 -ucnv_setFromUCallBack (UConverter * converter, - UConverterFromUCallback newAction, - const void* newContext, - UConverterFromUCallback *oldAction, - const void** oldContext, - UErrorCode * err) -{ - if (U_FAILURE (*err)) - return; - if (oldAction) *oldAction = converter->fromUCharErrorBehaviour; - converter->fromUCharErrorBehaviour = newAction; - if (oldContext) *oldContext = converter->fromUContext; - converter->fromUContext = newContext; -} - -static void -_updateOffsets(int32_t *offsets, int32_t length, - int32_t sourceIndex, int32_t errorInputLength) { - int32_t *limit; - int32_t delta, offset; - - if(sourceIndex>=0) { - /* - * adjust each offset by adding the previous sourceIndex - * minus the length of the input sequence that caused an - * error, if any - */ - delta=sourceIndex-errorInputLength; - } else { - /* - * set each offset to -1 because this conversion function - * does not handle offsets - */ - delta=-1; - } - - limit=offsets+length; - if(delta==0) { - /* most common case, nothing to do */ - } else if(delta>0) { - /* add the delta to each offset (but not if the offset is <0) */ - while(offsets=0) { - *offsets=offset+delta; - } - ++offsets; - } - } else /* delta<0 */ { - /* - * set each offset to -1 because this conversion function - * does not handle offsets - * or the error input sequence started in a previous buffer - */ - while(offsetsconverter; - s=pArgs->source; - t=pArgs->target; - offsets=pArgs->offsets; - - /* get the converter implementation function */ - sourceIndex=0; - if(offsets==NULL) { - fromUnicode=cnv->sharedData->impl->fromUnicode; - } else { - fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets; - if(fromUnicode==NULL) { - /* there is no WithOffsets implementation */ - fromUnicode=cnv->sharedData->impl->fromUnicode; - /* we will write -1 for each offset */ - sourceIndex=-1; - } - } - - if(cnv->preFromULength>=0) { - /* normal mode */ - realSource=NULL; - - /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ - realSourceLimit=NULL; - realFlush=FALSE; - realSourceIndex=0; - } else { - /* - * Previous m:n conversion stored source units from a partial match - * and failed to consume all of them. - * We need to "replay" them from a temporary buffer and convert them first. - */ - realSource=pArgs->source; - realSourceLimit=pArgs->sourceLimit; - realFlush=pArgs->flush; - realSourceIndex=sourceIndex; - - uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); - pArgs->source=replay; - pArgs->sourceLimit=replay-cnv->preFromULength; - pArgs->flush=FALSE; - sourceIndex=-1; - - cnv->preFromULength=0; - } - - /* - * loop for conversion and error handling - * - * loop { - * convert - * loop { - * update offsets - * handle end of input - * handle errors/call callback - * } - * } - */ - for(;;) { - if(U_SUCCESS(*err)) { - /* convert */ - fromUnicode(pArgs, err); - - /* - * set a flag for whether the converter - * successfully processed the end of the input - * - * need not check cnv->preFromULength==0 because a replay (<0) will cause - * sflush && pArgs->source==pArgs->sourceLimit && - cnv->fromUChar32==0); - } else { - /* handle error from ucnv_convertEx() */ - converterSawEndOfInput=FALSE; - } - - /* no callback called yet for this iteration */ - calledCallback=FALSE; - - /* no sourceIndex adjustment for conversion, only for callback output */ - errorInputLength=0; - - /* - * loop for offsets and error handling - * - * iterates at most 3 times: - * 1. to clean up after the conversion function - * 2. after the callback - * 3. after the callback again if there was truncated input - */ - for(;;) { - /* update offsets if we write any */ - if(offsets!=NULL) { - int32_t length=(int32_t)(pArgs->target-t); - if(length>0) { - _updateOffsets(offsets, length, sourceIndex, errorInputLength); - - /* - * if a converter handles offsets and updates the offsets - * pointer at the end, then pArgs->offset should not change - * here; - * however, some converters do not handle offsets at all - * (sourceIndex<0) or may not update the offsets pointer - */ - pArgs->offsets=offsets+=length; - } - - if(sourceIndex>=0) { - sourceIndex+=(int32_t)(pArgs->source-s); - } - } - - if(cnv->preFromULength<0) { - /* - * switch the source to new replay units (cannot occur while replaying) - * after offset handling and before end-of-input and callback handling - */ - if(realSource==NULL) { - realSource=pArgs->source; - realSourceLimit=pArgs->sourceLimit; - realFlush=pArgs->flush; - realSourceIndex=sourceIndex; - - uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); - pArgs->source=replay; - pArgs->sourceLimit=replay-cnv->preFromULength; - pArgs->flush=FALSE; - if((sourceIndex+=cnv->preFromULength)<0) { - sourceIndex=-1; - } - - cnv->preFromULength=0; - } else { - /* see implementation note before _fromUnicodeWithCallback() */ - U_ASSERT(realSource==NULL); - *err=U_INTERNAL_PROGRAM_ERROR; - } - } - - /* update pointers */ - s=pArgs->source; - t=pArgs->target; - - if(U_SUCCESS(*err)) { - if(ssourceLimit) { - /* - * continue with the conversion loop while there is still input left - * (continue converting by breaking out of only the inner loop) - */ - break; - } else if(realSource!=NULL) { - /* switch back from replaying to the real source and continue */ - pArgs->source=realSource; - pArgs->sourceLimit=realSourceLimit; - pArgs->flush=realFlush; - sourceIndex=realSourceIndex; - - realSource=NULL; - break; - } else if(pArgs->flush && cnv->fromUChar32!=0) { - /* - * the entire input stream is consumed - * and there is a partial, truncated input sequence left - */ - - /* inject an error and continue with callback handling */ - *err=U_TRUNCATED_CHAR_FOUND; - calledCallback=FALSE; /* new error condition */ - } else { - /* input consumed */ - if(pArgs->flush) { - /* - * return to the conversion loop once more if the flush - * flag is set and the conversion function has not - * successfully processed the end of the input yet - * - * (continue converting by breaking out of only the inner loop) - */ - if(!converterSawEndOfInput) { - break; - } - - /* reset the converter without calling the callback function */ - _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE); - } - - /* done successfully */ - return; - } - } - - /* U_FAILURE(*err) */ - { - UErrorCode e; - - if( calledCallback || - (e=*err)==U_BUFFER_OVERFLOW_ERROR || - (e!=U_INVALID_CHAR_FOUND && - e!=U_ILLEGAL_CHAR_FOUND && - e!=U_TRUNCATED_CHAR_FOUND) - ) { - /* - * the callback did not or cannot resolve the error: - * set output pointers and return - * - * the check for buffer overflow is redundant but it is - * a high-runner case and hopefully documents the intent - * well - * - * if we were replaying, then the replay buffer must be - * copied back into the UConverter - * and the real arguments must be restored - */ - if(realSource!=NULL) { - int32_t length; - - U_ASSERT(cnv->preFromULength==0); - - length=(int32_t)(pArgs->sourceLimit-pArgs->source); - if(length>0) { - u_memcpy(cnv->preFromU, pArgs->source, length); - cnv->preFromULength=(int8_t)-length; - } - - pArgs->source=realSource; - pArgs->sourceLimit=realSourceLimit; - pArgs->flush=realFlush; - } - - return; - } - } - - /* callback handling */ - { - UChar32 codePoint; - - /* get and write the code point */ - codePoint=cnv->fromUChar32; - errorInputLength=0; - U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint); - cnv->invalidUCharLength=(int8_t)errorInputLength; - - /* set the converter state to deal with the next character */ - cnv->fromUChar32=0; - - /* call the callback function */ - cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, - cnv->invalidUCharBuffer, errorInputLength, codePoint, - *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL, - err); - } - - /* - * loop back to the offset handling - * - * this flag will indicate after offset handling - * that a callback was called; - * if the callback did not resolve the error, then we return - */ - calledCallback=TRUE; - } - } -} - -/* - * Output the fromUnicode overflow buffer. - * Call this function if(cnv->charErrorBufferLength>0). - * @return TRUE if overflow - */ -static UBool -ucnv_outputOverflowFromUnicode(UConverter *cnv, - char **target, const char *targetLimit, - int32_t **pOffsets, - UErrorCode *err) { - int32_t *offsets; - char *overflow, *t; - int32_t i, length; - - t=*target; - if(pOffsets!=NULL) { - offsets=*pOffsets; - } else { - offsets=NULL; - } - - overflow=(char *)cnv->charErrorBuffer; - length=cnv->charErrorBufferLength; - i=0; - while(icharErrorBufferLength=(int8_t)j; - *target=t; - if(offsets!=NULL) { - *pOffsets=offsets; - } - *err=U_BUFFER_OVERFLOW_ERROR; - return TRUE; - } - - /* copy the overflow contents to the target */ - *t++=overflow[i++]; - if(offsets!=NULL) { - *offsets++=-1; /* no source index available for old output */ - } - } - - /* the overflow buffer is completely copied to the target */ - cnv->charErrorBufferLength=0; - *target=t; - if(offsets!=NULL) { - *pOffsets=offsets; - } - return FALSE; -} - -U_CAPI void U_EXPORT2 -ucnv_fromUnicode(UConverter *cnv, - char **target, const char *targetLimit, - const UChar **source, const UChar *sourceLimit, - int32_t *offsets, - UBool flush, - UErrorCode *err) { - UConverterFromUnicodeArgs args; - const UChar *s; - char *t; - - /* check parameters */ - if(err==NULL || U_FAILURE(*err)) { - return; - } - - if(cnv==NULL || target==NULL || source==NULL) { - *err=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - s=*source; - t=*target; - - if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) { - /* - Prevent code from going into an infinite loop in case we do hit this - limit. The limit pointer is expected to be on a UChar * boundary. - This also prevents the next argument check from failing. - */ - sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1); - } - - /* - * All these conditions should never happen. - * - * 1) Make sure that the limits are >= to the address source or target - * - * 2) Make sure that the buffer sizes do not exceed the number range for - * int32_t because some functions use the size (in units or bytes) - * rather than comparing pointers, and because offsets are int32_t values. - * - * size_t is guaranteed to be unsigned and large enough for the job. - * - * Return with an error instead of adjusting the limits because we would - * not be able to maintain the semantics that either the source must be - * consumed or the target filled (unless an error occurs). - * An adjustment would be targetLimit=t+0x7fffffff; for example. - * - * 3) Make sure that the user didn't incorrectly cast a UChar * pointer - * to a char * pointer and provide an incomplete UChar code unit. - */ - if (sourceLimit(size_t)0x3fffffff && sourceLimit>s) || - ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) || - (((const char *)sourceLimit-(const char *)s) & 1) != 0) - { - *err=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - /* output the target overflow buffer */ - if( cnv->charErrorBufferLength>0 && - ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err) - ) { - /* U_BUFFER_OVERFLOW_ERROR */ - return; - } - /* *target may have moved, therefore stop using t */ - - if(!flush && s==sourceLimit && cnv->preFromULength>=0) { - /* the overflow buffer is emptied and there is no new input: we are done */ - return; - } - - /* - * Do not simply return with a buffer overflow error if - * !flush && t==targetLimit - * because it is possible that the source will not generate any output. - * For example, the skip callback may be called; - * it does not output anything. - */ - - /* prepare the converter arguments */ - args.converter=cnv; - args.flush=flush; - args.offsets=offsets; - args.source=s; - args.sourceLimit=sourceLimit; - args.target=*target; - args.targetLimit=targetLimit; - args.size=sizeof(args); - - _fromUnicodeWithCallback(&args, err); - - *source=args.source; - *target=args.target; -} - -/* ucnv_toUnicode() --------------------------------------------------------- */ - -static void -_toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { - UConverterToUnicode toUnicode; - UConverter *cnv; - const char *s; - UChar *t; - int32_t *offsets; - int32_t sourceIndex; - int32_t errorInputLength; - UBool converterSawEndOfInput, calledCallback; - - /* variables for m:n conversion */ - char replay[UCNV_EXT_MAX_BYTES]; - const char *realSource, *realSourceLimit; - int32_t realSourceIndex; - UBool realFlush; - - cnv=pArgs->converter; - s=pArgs->source; - t=pArgs->target; - offsets=pArgs->offsets; - - /* get the converter implementation function */ - sourceIndex=0; - if(offsets==NULL) { - toUnicode=cnv->sharedData->impl->toUnicode; - } else { - toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets; - if(toUnicode==NULL) { - /* there is no WithOffsets implementation */ - toUnicode=cnv->sharedData->impl->toUnicode; - /* we will write -1 for each offset */ - sourceIndex=-1; - } - } - - if(cnv->preToULength>=0) { - /* normal mode */ - realSource=NULL; - - /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ - realSourceLimit=NULL; - realFlush=FALSE; - realSourceIndex=0; - } else { - /* - * Previous m:n conversion stored source units from a partial match - * and failed to consume all of them. - * We need to "replay" them from a temporary buffer and convert them first. - */ - realSource=pArgs->source; - realSourceLimit=pArgs->sourceLimit; - realFlush=pArgs->flush; - realSourceIndex=sourceIndex; - - uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); - pArgs->source=replay; - pArgs->sourceLimit=replay-cnv->preToULength; - pArgs->flush=FALSE; - sourceIndex=-1; - - cnv->preToULength=0; - } - - /* - * loop for conversion and error handling - * - * loop { - * convert - * loop { - * update offsets - * handle end of input - * handle errors/call callback - * } - * } - */ - for(;;) { - if(U_SUCCESS(*err)) { - /* convert */ - toUnicode(pArgs, err); - - /* - * set a flag for whether the converter - * successfully processed the end of the input - * - * need not check cnv->preToULength==0 because a replay (<0) will cause - * sflush && pArgs->source==pArgs->sourceLimit && - cnv->toULength==0); - } else { - /* handle error from getNextUChar() or ucnv_convertEx() */ - converterSawEndOfInput=FALSE; - } - - /* no callback called yet for this iteration */ - calledCallback=FALSE; - - /* no sourceIndex adjustment for conversion, only for callback output */ - errorInputLength=0; - - /* - * loop for offsets and error handling - * - * iterates at most 3 times: - * 1. to clean up after the conversion function - * 2. after the callback - * 3. after the callback again if there was truncated input - */ - for(;;) { - /* update offsets if we write any */ - if(offsets!=NULL) { - int32_t length=(int32_t)(pArgs->target-t); - if(length>0) { - _updateOffsets(offsets, length, sourceIndex, errorInputLength); - - /* - * if a converter handles offsets and updates the offsets - * pointer at the end, then pArgs->offset should not change - * here; - * however, some converters do not handle offsets at all - * (sourceIndex<0) or may not update the offsets pointer - */ - pArgs->offsets=offsets+=length; - } - - if(sourceIndex>=0) { - sourceIndex+=(int32_t)(pArgs->source-s); - } - } - - if(cnv->preToULength<0) { - /* - * switch the source to new replay units (cannot occur while replaying) - * after offset handling and before end-of-input and callback handling - */ - if(realSource==NULL) { - realSource=pArgs->source; - realSourceLimit=pArgs->sourceLimit; - realFlush=pArgs->flush; - realSourceIndex=sourceIndex; - - uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); - pArgs->source=replay; - pArgs->sourceLimit=replay-cnv->preToULength; - pArgs->flush=FALSE; - if((sourceIndex+=cnv->preToULength)<0) { - sourceIndex=-1; - } - - cnv->preToULength=0; - } else { - /* see implementation note before _fromUnicodeWithCallback() */ - U_ASSERT(realSource==NULL); - *err=U_INTERNAL_PROGRAM_ERROR; - } - } - - /* update pointers */ - s=pArgs->source; - t=pArgs->target; - - if(U_SUCCESS(*err)) { - if(ssourceLimit) { - /* - * continue with the conversion loop while there is still input left - * (continue converting by breaking out of only the inner loop) - */ - break; - } else if(realSource!=NULL) { - /* switch back from replaying to the real source and continue */ - pArgs->source=realSource; - pArgs->sourceLimit=realSourceLimit; - pArgs->flush=realFlush; - sourceIndex=realSourceIndex; - - realSource=NULL; - break; - } else if(pArgs->flush && cnv->toULength>0) { - /* - * the entire input stream is consumed - * and there is a partial, truncated input sequence left - */ - - /* inject an error and continue with callback handling */ - *err=U_TRUNCATED_CHAR_FOUND; - calledCallback=FALSE; /* new error condition */ - } else { - /* input consumed */ - if(pArgs->flush) { - /* - * return to the conversion loop once more if the flush - * flag is set and the conversion function has not - * successfully processed the end of the input yet - * - * (continue converting by breaking out of only the inner loop) - */ - if(!converterSawEndOfInput) { - break; - } - - /* reset the converter without calling the callback function */ - _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); - } - - /* done successfully */ - return; - } - } - - /* U_FAILURE(*err) */ - { - UErrorCode e; - - if( calledCallback || - (e=*err)==U_BUFFER_OVERFLOW_ERROR || - (e!=U_INVALID_CHAR_FOUND && - e!=U_ILLEGAL_CHAR_FOUND && - e!=U_TRUNCATED_CHAR_FOUND && - e!=U_ILLEGAL_ESCAPE_SEQUENCE && - e!=U_UNSUPPORTED_ESCAPE_SEQUENCE) - ) { - /* - * the callback did not or cannot resolve the error: - * set output pointers and return - * - * the check for buffer overflow is redundant but it is - * a high-runner case and hopefully documents the intent - * well - * - * if we were replaying, then the replay buffer must be - * copied back into the UConverter - * and the real arguments must be restored - */ - if(realSource!=NULL) { - int32_t length; - - U_ASSERT(cnv->preToULength==0); - - length=(int32_t)(pArgs->sourceLimit-pArgs->source); - if(length>0) { - uprv_memcpy(cnv->preToU, pArgs->source, length); - cnv->preToULength=(int8_t)-length; - } - - pArgs->source=realSource; - pArgs->sourceLimit=realSourceLimit; - pArgs->flush=realFlush; - } - - return; - } - } - - /* copy toUBytes[] to invalidCharBuffer[] */ - errorInputLength=cnv->invalidCharLength=cnv->toULength; - if(errorInputLength>0) { - uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength); - } - - /* set the converter state to deal with the next character */ - cnv->toULength=0; - - /* call the callback function */ - if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) { - cnv->toUCallbackReason = UCNV_UNASSIGNED; - } - cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, - cnv->invalidCharBuffer, errorInputLength, - cnv->toUCallbackReason, - err); - cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */ - - /* - * loop back to the offset handling - * - * this flag will indicate after offset handling - * that a callback was called; - * if the callback did not resolve the error, then we return - */ - calledCallback=TRUE; - } - } -} - -/* - * Output the toUnicode overflow buffer. - * Call this function if(cnv->UCharErrorBufferLength>0). - * @return TRUE if overflow - */ -static UBool -ucnv_outputOverflowToUnicode(UConverter *cnv, - UChar **target, const UChar *targetLimit, - int32_t **pOffsets, - UErrorCode *err) { - int32_t *offsets; - UChar *overflow, *t; - int32_t i, length; - - t=*target; - if(pOffsets!=NULL) { - offsets=*pOffsets; - } else { - offsets=NULL; - } - - overflow=cnv->UCharErrorBuffer; - length=cnv->UCharErrorBufferLength; - i=0; - while(iUCharErrorBufferLength=(int8_t)j; - *target=t; - if(offsets!=NULL) { - *pOffsets=offsets; - } - *err=U_BUFFER_OVERFLOW_ERROR; - return TRUE; - } - - /* copy the overflow contents to the target */ - *t++=overflow[i++]; - if(offsets!=NULL) { - *offsets++=-1; /* no source index available for old output */ - } - } - - /* the overflow buffer is completely copied to the target */ - cnv->UCharErrorBufferLength=0; - *target=t; - if(offsets!=NULL) { - *pOffsets=offsets; - } - return FALSE; -} - -U_CAPI void U_EXPORT2 -ucnv_toUnicode(UConverter *cnv, - UChar **target, const UChar *targetLimit, - const char **source, const char *sourceLimit, - int32_t *offsets, - UBool flush, - UErrorCode *err) { - UConverterToUnicodeArgs args; - const char *s; - UChar *t; - - /* check parameters */ - if(err==NULL || U_FAILURE(*err)) { - return; - } - - if(cnv==NULL || target==NULL || source==NULL) { - *err=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - s=*source; - t=*target; - - if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) { - /* - Prevent code from going into an infinite loop in case we do hit this - limit. The limit pointer is expected to be on a UChar * boundary. - This also prevents the next argument check from failing. - */ - targetLimit = (const UChar *)(((const char *)targetLimit) - 1); - } - - /* - * All these conditions should never happen. - * - * 1) Make sure that the limits are >= to the address source or target - * - * 2) Make sure that the buffer sizes do not exceed the number range for - * int32_t because some functions use the size (in units or bytes) - * rather than comparing pointers, and because offsets are int32_t values. - * - * size_t is guaranteed to be unsigned and large enough for the job. - * - * Return with an error instead of adjusting the limits because we would - * not be able to maintain the semantics that either the source must be - * consumed or the target filled (unless an error occurs). - * An adjustment would be sourceLimit=t+0x7fffffff; for example. - * - * 3) Make sure that the user didn't incorrectly cast a UChar * pointer - * to a char * pointer and provide an incomplete UChar code unit. - */ - if (sourceLimit(size_t)0x7fffffff && sourceLimit>s) || - ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) || - (((const char *)targetLimit-(const char *)t) & 1) != 0 - ) { - *err=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - /* output the target overflow buffer */ - if( cnv->UCharErrorBufferLength>0 && - ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err) - ) { - /* U_BUFFER_OVERFLOW_ERROR */ - return; - } - /* *target may have moved, therefore stop using t */ - - if(!flush && s==sourceLimit && cnv->preToULength>=0) { - /* the overflow buffer is emptied and there is no new input: we are done */ - return; - } - - /* - * Do not simply return with a buffer overflow error if - * !flush && t==targetLimit - * because it is possible that the source will not generate any output. - * For example, the skip callback may be called; - * it does not output anything. - */ - - /* prepare the converter arguments */ - args.converter=cnv; - args.flush=flush; - args.offsets=offsets; - args.source=s; - args.sourceLimit=sourceLimit; - args.target=*target; - args.targetLimit=targetLimit; - args.size=sizeof(args); - - _toUnicodeWithCallback(&args, err); - - *source=args.source; - *target=args.target; -} - -/* ucnv_to/fromUChars() ----------------------------------------------------- */ - -U_CAPI int32_t U_EXPORT2 -ucnv_fromUChars(UConverter *cnv, - char *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode) { - const UChar *srcLimit; - char *originalDest, *destLimit; - int32_t destLength; - - /* check arguments */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - if( cnv==NULL || - destCapacity<0 || (destCapacity>0 && dest==NULL) || - srcLength<-1 || (srcLength!=0 && src==NULL) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* initialize */ - ucnv_resetFromUnicode(cnv); - originalDest=dest; - if(srcLength==-1) { - srcLength=u_strlen(src); - } - if(srcLength>0) { - srcLimit=src+srcLength; - destLimit=dest+destCapacity; - - /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ - if(destLimit0 && dest==NULL) || - srcLength<-1 || (srcLength!=0 && src==NULL)) - { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* initialize */ - ucnv_resetToUnicode(cnv); - originalDest=dest; - if(srcLength==-1) { - srcLength=(int32_t)uprv_strlen(src); - } - if(srcLength>0) { - srcLimit=src+srcLength; - destLimit=dest+destCapacity; - - /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ - if(destLimit(size_t)0x7fffffff && sourceLimit>s)) { - *err=U_ILLEGAL_ARGUMENT_ERROR; - return 0xffff; - } - - c=U_SENTINEL; - - /* flush the target overflow buffer */ - if(cnv->UCharErrorBufferLength>0) { - UChar *overflow; - - overflow=cnv->UCharErrorBuffer; - i=0; - length=cnv->UCharErrorBufferLength; - U16_NEXT(overflow, i, length, c); - - /* move the remaining overflow contents up to the beginning */ - if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) { - uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i, - cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); - } - - if(!U16_IS_LEAD(c) || itoULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) { - c=cnv->sharedData->impl->getNextUChar(&args, err); - *source=s=args.source; - if(*err==U_INDEX_OUTOFBOUNDS_ERROR) { - /* reset the converter without calling the callback function */ - _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); - return 0xffff; /* no output */ - } else if(U_SUCCESS(*err) && c>=0) { - return c; - /* - * else fall through to use _toUnicode() because - * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all - * U_FAILURE: call _toUnicode() for callback handling (do not output c) - */ - } - } - - /* convert to one UChar in buffer[0], or handle getNextUChar() errors */ - _toUnicodeWithCallback(&args, err); - - if(*err==U_BUFFER_OVERFLOW_ERROR) { - *err=U_ZERO_ERROR; - } - - i=0; - length=(int32_t)(args.target-buffer); - } else { - /* write the lead surrogate from the overflow buffer */ - buffer[0]=(UChar)c; - args.target=buffer+1; - i=0; - length=1; - } - - /* buffer contents starts at i and ends before length */ - - if(U_FAILURE(*err)) { - c=0xffff; /* no output */ - } else if(length==0) { - /* no input or only state changes */ - *err=U_INDEX_OUTOFBOUNDS_ERROR; - /* no need to reset explicitly because _toUnicodeWithCallback() did it */ - c=0xffff; /* no output */ - } else { - c=buffer[0]; - i=1; - if(!U16_IS_LEAD(c)) { - /* consume c=buffer[0], done */ - } else { - /* got a lead surrogate, see if a trail surrogate follows */ - UChar c2; - - if(cnv->UCharErrorBufferLength>0) { - /* got overflow output from the conversion */ - if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) { - /* got a trail surrogate, too */ - c=U16_GET_SUPPLEMENTARY(c, c2); - - /* move the remaining overflow contents up to the beginning */ - if((--cnv->UCharErrorBufferLength)>0) { - uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1, - cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); - } - } else { - /* c is an unpaired lead surrogate, just return it */ - } - } else if(args.sourceUCharErrorBufferLength)>0) { - uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer, - length*U_SIZEOF_UCHAR); - } - cnv->UCharErrorBufferLength=(int8_t)(length+delta); - - cnv->UCharErrorBuffer[0]=buffer[i++]; - if(delta>1) { - cnv->UCharErrorBuffer[1]=buffer[i]; - } - } - - *source=args.source; - return c; -} - -/* ucnv_convert() and siblings ---------------------------------------------- */ - -U_CAPI void U_EXPORT2 -ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, - char **target, const char *targetLimit, - const char **source, const char *sourceLimit, - UChar *pivotStart, UChar **pivotSource, - UChar **pivotTarget, const UChar *pivotLimit, - UBool reset, UBool flush, - UErrorCode *pErrorCode) { - UChar pivotBuffer[CHUNK_SIZE]; - const UChar *myPivotSource; - UChar *myPivotTarget; - const char *s; - char *t; - - UConverterToUnicodeArgs toUArgs; - UConverterFromUnicodeArgs fromUArgs; - UConverterConvert convert; - - /* error checking */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return; - } - - if( targetCnv==NULL || sourceCnv==NULL || - source==NULL || *source==NULL || - target==NULL || *target==NULL || targetLimit==NULL - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - s=*source; - t=*target; - if((sourceLimit!=NULL && sourceLimit(size_t)0x7fffffff && sourceLimit>s)) || - ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - if(pivotStart==NULL) { - if(!flush) { - /* streaming conversion requires an explicit pivot buffer */ - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - /* use the stack pivot buffer */ - myPivotSource=myPivotTarget=pivotStart=pivotBuffer; - pivotSource=(UChar **)&myPivotSource; - pivotTarget=&myPivotTarget; - pivotLimit=pivotBuffer+CHUNK_SIZE; - } else if( pivotStart>=pivotLimit || - pivotSource==NULL || *pivotSource==NULL || - pivotTarget==NULL || *pivotTarget==NULL || - pivotLimit==NULL - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - if(sourceLimit==NULL) { - /* get limit of single-byte-NUL-terminated source string */ - sourceLimit=uprv_strchr(*source, 0); - } - - if(reset) { - ucnv_resetToUnicode(sourceCnv); - ucnv_resetFromUnicode(targetCnv); - *pivotSource=*pivotTarget=pivotStart; - } else if(targetCnv->charErrorBufferLength>0) { - /* output the targetCnv overflow buffer */ - if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) { - /* U_BUFFER_OVERFLOW_ERROR */ - return; - } - /* *target has moved, therefore stop using t */ - - if( !flush && - targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget && - sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit - ) { - /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */ - return; - } - } - - /* Is direct-UTF-8 conversion available? */ - if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 && - targetCnv->sharedData->impl->fromUTF8!=NULL - ) { - convert=targetCnv->sharedData->impl->fromUTF8; - } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 && - sourceCnv->sharedData->impl->toUTF8!=NULL - ) { - convert=sourceCnv->sharedData->impl->toUTF8; - } else { - convert=NULL; - } - - /* - * If direct-UTF-8 conversion is available, then we use a smaller - * pivot buffer for error handling and partial matches - * so that we quickly return to direct conversion. - * - * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH. - * - * We could reduce the pivot buffer size further, at the cost of - * buffer overflows from callbacks. - * The pivot buffer should not be smaller than the maximum number of - * fromUnicode extension table input UChars - * (for m:n conversion, see - * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS]) - * or 2 for surrogate pairs. - * - * Too small a buffer can cause thrashing between pivoting and direct - * conversion, with function call overhead outweighing the benefits - * of direct conversion. - */ - if(convert!=NULL && (pivotLimit-pivotStart)>32) { - pivotLimit=pivotStart+32; - } - - /* prepare the converter arguments */ - fromUArgs.converter=targetCnv; - fromUArgs.flush=FALSE; - fromUArgs.offsets=NULL; - fromUArgs.target=*target; - fromUArgs.targetLimit=targetLimit; - fromUArgs.size=sizeof(fromUArgs); - - toUArgs.converter=sourceCnv; - toUArgs.flush=flush; - toUArgs.offsets=NULL; - toUArgs.source=s; - toUArgs.sourceLimit=sourceLimit; - toUArgs.targetLimit=pivotLimit; - toUArgs.size=sizeof(toUArgs); - - /* - * TODO: Consider separating this function into two functions, - * extracting exactly the conversion loop, - * for readability and to reduce the set of visible variables. - * - * Otherwise stop using s and t from here on. - */ - s=t=NULL; - - /* - * conversion loop - * - * The sequence of steps in the loop may appear backward, - * but the principle is simple: - * In the chain of - * source - sourceCnv overflow - pivot - targetCnv overflow - target - * empty out later buffers before refilling them from earlier ones. - * - * The targetCnv overflow buffer is flushed out only once before the loop. - */ - for(;;) { - /* - * if(pivot not empty or error or replay or flush fromUnicode) { - * fromUnicode(pivot -> target); - * } - * - * For pivoting conversion; and for direct conversion for - * error callback handling and flushing the replay buffer. - */ - if( *pivotSource<*pivotTarget || - U_FAILURE(*pErrorCode) || - targetCnv->preFromULength<0 || - fromUArgs.flush - ) { - fromUArgs.source=*pivotSource; - fromUArgs.sourceLimit=*pivotTarget; - _fromUnicodeWithCallback(&fromUArgs, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - /* target overflow, or conversion error */ - *pivotSource=(UChar *)fromUArgs.source; - break; - } - - /* - * _fromUnicodeWithCallback() must have consumed the pivot contents - * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS() - */ - } - - /* The pivot buffer is empty; reset it so we start at pivotStart. */ - *pivotSource=*pivotTarget=pivotStart; - - /* - * if(sourceCnv overflow buffer not empty) { - * move(sourceCnv overflow buffer -> pivot); - * continue; - * } - */ - /* output the sourceCnv overflow buffer */ - if(sourceCnv->UCharErrorBufferLength>0) { - if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) { - /* U_BUFFER_OVERFLOW_ERROR */ - *pErrorCode=U_ZERO_ERROR; - } - continue; - } - - /* - * check for end of input and break if done - * - * Checking both flush and fromUArgs.flush ensures that the converters - * have been called with the flush flag set if the ucnv_convertEx() - * caller set it. - */ - if( toUArgs.source==sourceLimit && - sourceCnv->preToULength>=0 && sourceCnv->toULength==0 && - (!flush || fromUArgs.flush) - ) { - /* done successfully */ - break; - } - - /* - * use direct conversion if available - * but not if continuing a partial match - * or flushing the toUnicode replay buffer - */ - if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) { - if(*pErrorCode==U_USING_DEFAULT_WARNING) { - /* remove a warning that may be set by this function */ - *pErrorCode=U_ZERO_ERROR; - } - convert(&fromUArgs, &toUArgs, pErrorCode); - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { - break; - } else if(U_FAILURE(*pErrorCode)) { - if(sourceCnv->toULength>0) { - /* - * Fall through to calling _toUnicodeWithCallback() - * for callback handling. - * - * The pivot buffer will be reset with - * *pivotSource=*pivotTarget=pivotStart; - * which indicates a toUnicode error to the caller - * (*pivotSource==pivotStart shows no pivot UChars consumed). - */ - } else { - /* - * Indicate a fromUnicode error to the caller - * (*pivotSource>pivotStart shows some pivot UChars consumed). - */ - *pivotSource=*pivotTarget=pivotStart+1; - /* - * Loop around to calling _fromUnicodeWithCallbacks() - * for callback handling. - */ - continue; - } - } else if(*pErrorCode==U_USING_DEFAULT_WARNING) { - /* - * No error, but the implementation requested to temporarily - * fall back to pivoting. - */ - *pErrorCode=U_ZERO_ERROR; - /* - * The following else branches are almost identical to the end-of-input - * handling in _toUnicodeWithCallback(). - * Avoid calling it just for the end of input. - */ - } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */ - /* - * the entire input stream is consumed - * and there is a partial, truncated input sequence left - */ - - /* inject an error and continue with callback handling */ - *pErrorCode=U_TRUNCATED_CHAR_FOUND; - } else { - /* input consumed */ - if(flush) { - /* reset the converters without calling the callback functions */ - _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE); - _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE); - } - - /* done successfully */ - break; - } - } - - /* - * toUnicode(source -> pivot); - * - * For pivoting conversion; and for direct conversion for - * error callback handling, continuing partial matches - * and flushing the replay buffer. - * - * The pivot buffer is empty and reset. - */ - toUArgs.target=pivotStart; /* ==*pivotTarget */ - /* toUArgs.targetLimit=pivotLimit; already set before the loop */ - _toUnicodeWithCallback(&toUArgs, pErrorCode); - *pivotTarget=toUArgs.target; - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { - /* pivot overflow: continue with the conversion loop */ - *pErrorCode=U_ZERO_ERROR; - } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) { - /* conversion error, or there was nothing left to convert */ - break; - } - /* - * else: - * _toUnicodeWithCallback() wrote into the pivot buffer, - * continue with fromUnicode conversion. - * - * Set the fromUnicode flush flag if we flush and if toUnicode has - * processed the end of the input. - */ - if( flush && toUArgs.source==sourceLimit && - sourceCnv->preToULength>=0 && - sourceCnv->UCharErrorBufferLength==0 - ) { - fromUArgs.flush=TRUE; - } - } - - /* - * The conversion loop is exited when one of the following is true: - * - the entire source text has been converted successfully to the target buffer - * - a target buffer overflow occurred - * - a conversion error occurred - */ - - *source=toUArgs.source; - *target=fromUArgs.target; - - /* terminate the target buffer if possible */ - if(flush && U_SUCCESS(*pErrorCode)) { - if(*target!=targetLimit) { - **target=0; - if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { - *pErrorCode=U_ZERO_ERROR; - } - } else { - *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; - } - } -} - -/* internal implementation of ucnv_convert() etc. with preflighting */ -static int32_t -ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter, - char *target, int32_t targetCapacity, - const char *source, int32_t sourceLength, - UErrorCode *pErrorCode) { - UChar pivotBuffer[CHUNK_SIZE]; - UChar *pivot, *pivot2; - - char *myTarget; - const char *sourceLimit; - const char *targetLimit; - int32_t targetLength=0; - - /* set up */ - if(sourceLength<0) { - sourceLimit=uprv_strchr(source, 0); - } else { - sourceLimit=source+sourceLength; - } - - /* if there is no input data, we're done */ - if(source==sourceLimit) { - return u_terminateChars(target, targetCapacity, 0, pErrorCode); - } - - pivot=pivot2=pivotBuffer; - myTarget=target; - targetLength=0; - - if(targetCapacity>0) { - /* perform real conversion */ - targetLimit=target+targetCapacity; - ucnv_convertEx(outConverter, inConverter, - &myTarget, targetLimit, - &source, sourceLimit, - pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, - FALSE, - TRUE, - pErrorCode); - targetLength=(int32_t)(myTarget-target); - } - - /* - * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing - * to it but continue the conversion in order to store in targetCapacity - * the number of bytes that was required. - */ - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0) - { - char targetBuffer[CHUNK_SIZE]; - - targetLimit=targetBuffer+CHUNK_SIZE; - do { - *pErrorCode=U_ZERO_ERROR; - myTarget=targetBuffer; - ucnv_convertEx(outConverter, inConverter, - &myTarget, targetLimit, - &source, sourceLimit, - pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, - FALSE, - TRUE, - pErrorCode); - targetLength+=(int32_t)(myTarget-targetBuffer); - } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); - - /* done with preflighting, set warnings and errors as appropriate */ - return u_terminateChars(target, targetCapacity, targetLength, pErrorCode); - } - - /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */ - return targetLength; -} - -U_CAPI int32_t U_EXPORT2 -ucnv_convert(const char *toConverterName, const char *fromConverterName, - char *target, int32_t targetCapacity, - const char *source, int32_t sourceLength, - UErrorCode *pErrorCode) { - UConverter in, out; /* stack-allocated */ - UConverter *inConverter, *outConverter; - int32_t targetLength; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - if( source==NULL || sourceLength<-1 || - targetCapacity<0 || (targetCapacity>0 && target==NULL) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* if there is no input data, we're done */ - if(sourceLength==0 || (sourceLength<0 && *source==0)) { - return u_terminateChars(target, targetCapacity, 0, pErrorCode); - } - - /* create the converters */ - inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - ucnv_close(inConverter); - return 0; - } - - targetLength=ucnv_internalConvert(outConverter, inConverter, - target, targetCapacity, - source, sourceLength, - pErrorCode); - - ucnv_close(inConverter); - ucnv_close(outConverter); - - return targetLength; -} - -/* @internal */ -static int32_t -ucnv_convertAlgorithmic(UBool convertToAlgorithmic, - UConverterType algorithmicType, - UConverter *cnv, - char *target, int32_t targetCapacity, - const char *source, int32_t sourceLength, - UErrorCode *pErrorCode) { - UConverter algoConverterStatic; /* stack-allocated */ - UConverter *algoConverter, *to, *from; - int32_t targetLength; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - if( cnv==NULL || source==NULL || sourceLength<-1 || - targetCapacity<0 || (targetCapacity>0 && target==NULL) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* if there is no input data, we're done */ - if(sourceLength==0 || (sourceLength<0 && *source==0)) { - return u_terminateChars(target, targetCapacity, 0, pErrorCode); - } - - /* create the algorithmic converter */ - algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType, - "", 0, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - /* reset the other converter */ - if(convertToAlgorithmic) { - /* cnv->Unicode->algo */ - ucnv_resetToUnicode(cnv); - to=algoConverter; - from=cnv; - } else { - /* algo->Unicode->cnv */ - ucnv_resetFromUnicode(cnv); - from=algoConverter; - to=cnv; - } - - targetLength=ucnv_internalConvert(to, from, - target, targetCapacity, - source, sourceLength, - pErrorCode); - - ucnv_close(algoConverter); - - return targetLength; -} - -U_CAPI int32_t U_EXPORT2 -ucnv_toAlgorithmic(UConverterType algorithmicType, - UConverter *cnv, - char *target, int32_t targetCapacity, - const char *source, int32_t sourceLength, - UErrorCode *pErrorCode) { - return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv, - target, targetCapacity, - source, sourceLength, - pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -ucnv_fromAlgorithmic(UConverter *cnv, - UConverterType algorithmicType, - char *target, int32_t targetCapacity, - const char *source, int32_t sourceLength, - UErrorCode *pErrorCode) { - return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv, - target, targetCapacity, - source, sourceLength, - pErrorCode); -} - -U_CAPI UConverterType U_EXPORT2 -ucnv_getType(const UConverter* converter) -{ - int8_t type = converter->sharedData->staticData->conversionType; -#if !UCONFIG_NO_LEGACY_CONVERSION - if(type == UCNV_MBCS) { - return ucnv_MBCSGetType(converter); - } -#endif - return (UConverterType)type; -} - -U_CAPI void U_EXPORT2 -ucnv_getStarters(const UConverter* converter, - UBool starters[256], - UErrorCode* err) -{ - if (err == NULL || U_FAILURE(*err)) { - return; - } - - if(converter->sharedData->impl->getStarters != NULL) { - converter->sharedData->impl->getStarters(converter, starters, err); - } else { - *err = U_ILLEGAL_ARGUMENT_ERROR; - } -} - -static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv) -{ - UErrorCode errorCode; - const char *name; - int32_t i; - - if(cnv==NULL) { - return NULL; - } - - errorCode=U_ZERO_ERROR; - name=ucnv_getName(cnv, &errorCode); - if(U_FAILURE(errorCode)) { - return NULL; - } - - for(i=0; ivariant5c; - for(i=0; iuseFallback = usesFallback; -} - -U_CAPI UBool U_EXPORT2 -ucnv_usesFallback(const UConverter *cnv) -{ - return cnv->useFallback; -} - -U_CAPI void U_EXPORT2 -ucnv_getInvalidChars (const UConverter * converter, - char *errBytes, - int8_t * len, - UErrorCode * err) -{ - if (err == NULL || U_FAILURE(*err)) - { - return; - } - if (len == NULL || errBytes == NULL || converter == NULL) - { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - if (*len < converter->invalidCharLength) - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - if ((*len = converter->invalidCharLength) > 0) - { - uprv_memcpy (errBytes, converter->invalidCharBuffer, *len); - } -} - -U_CAPI void U_EXPORT2 -ucnv_getInvalidUChars (const UConverter * converter, - UChar *errChars, - int8_t * len, - UErrorCode * err) -{ - if (err == NULL || U_FAILURE(*err)) - { - return; - } - if (len == NULL || errChars == NULL || converter == NULL) - { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - if (*len < converter->invalidUCharLength) - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - if ((*len = converter->invalidUCharLength) > 0) - { - u_memcpy (errChars, converter->invalidUCharBuffer, *len); - } -} - -#define SIG_MAX_LEN 5 - -U_CAPI const char* U_EXPORT2 -ucnv_detectUnicodeSignature( const char* source, - int32_t sourceLength, - int32_t* signatureLength, - UErrorCode* pErrorCode) { - int32_t dummy; - - /* initial 0xa5 bytes: make sure that if we read preFromUFirstCP >= 0){ - return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ; - }else if(cnv->preFromULength < 0){ - return -cnv->preFromULength ; - }else if(cnv->fromUChar32 > 0){ - return 1; - } - return 0; - -} - -U_CAPI int32_t U_EXPORT2 -ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){ - - if(status == NULL || U_FAILURE(*status)){ - return -1; - } - if(cnv == NULL){ - *status = U_ILLEGAL_ARGUMENT_ERROR; - return -1; - } - - if(cnv->preToULength > 0){ - return cnv->preToULength ; - }else if(cnv->preToULength < 0){ - return -cnv->preToULength; - }else if(cnv->toULength > 0){ - return cnv->toULength; - } - return 0; -} - -U_CAPI UBool U_EXPORT2 -ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){ - if (U_FAILURE(*status)) { - return FALSE; - } - - if (cnv == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - - switch (ucnv_getType(cnv)) { - case UCNV_SBCS: - case UCNV_DBCS: - case UCNV_UTF32_BigEndian: - case UCNV_UTF32_LittleEndian: - case UCNV_UTF32: - case UCNV_US_ASCII: - return TRUE; - default: - return FALSE; - } -} -#endif - -/* - * Hey, Emacs, please set the following: - * - * Local Variables: - * indent-tabs-mode: nil - * End: - * - */ diff --git a/deps/icu-small/source/common/ucnv.cpp b/deps/icu-small/source/common/ucnv.cpp new file mode 100644 index 0000000000..39ea5dfa66 --- /dev/null +++ b/deps/icu-small/source/common/ucnv.cpp @@ -0,0 +1,2918 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1998-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* ucnv.c: +* Implements APIs for the ICU's codeset conversion library; +* mostly calls through internal functions; +* created by Bertrand A. Damiba +* +* Modification History: +* +* Date Name Description +* 04/04/99 helena Fixed internal header inclusion. +* 05/09/00 helena Added implementation to handle fallback mappings. +* 06/20/2000 helena OS/400 port changes; mostly typecast. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ustring.h" +#include "unicode/ucnv.h" +#include "unicode/ucnv_err.h" +#include "unicode/uset.h" +#include "unicode/utf.h" +#include "unicode/utf16.h" +#include "putilimp.h" +#include "cmemory.h" +#include "cstring.h" +#include "uassert.h" +#include "utracimp.h" +#include "ustr_imp.h" +#include "ucnv_imp.h" +#include "ucnv_cnv.h" +#include "ucnv_bld.h" + +/* size of intermediate and preflighting buffers in ucnv_convert() */ +#define CHUNK_SIZE 1024 + +typedef struct UAmbiguousConverter { + const char *name; + const UChar variant5c; +} UAmbiguousConverter; + +static const UAmbiguousConverter ambiguousConverters[]={ + { "ibm-897_P100-1995", 0xa5 }, + { "ibm-942_P120-1999", 0xa5 }, + { "ibm-943_P130-1999", 0xa5 }, + { "ibm-946_P100-1995", 0xa5 }, + { "ibm-33722_P120-1999", 0xa5 }, + { "ibm-1041_P100-1995", 0xa5 }, + /*{ "ibm-54191_P100-2006", 0xa5 },*/ + /*{ "ibm-62383_P100-2007", 0xa5 },*/ + /*{ "ibm-891_P100-1995", 0x20a9 },*/ + { "ibm-944_P100-1995", 0x20a9 }, + { "ibm-949_P110-1999", 0x20a9 }, + { "ibm-1363_P110-1997", 0x20a9 }, + { "ISO_2022,locale=ko,version=0", 0x20a9 }, + { "ibm-1088_P100-1995", 0x20a9 } +}; + +/*Calls through createConverter */ +U_CAPI UConverter* U_EXPORT2 +ucnv_open (const char *name, + UErrorCode * err) +{ + UConverter *r; + + if (err == NULL || U_FAILURE (*err)) { + return NULL; + } + + r = ucnv_createConverter(NULL, name, err); + return r; +} + +U_CAPI UConverter* U_EXPORT2 +ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err) +{ + return ucnv_createConverterFromPackage(packageName, converterName, err); +} + +/*Extracts the UChar* to a char* and calls through createConverter */ +U_CAPI UConverter* U_EXPORT2 +ucnv_openU (const UChar * name, + UErrorCode * err) +{ + char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH]; + + if (err == NULL || U_FAILURE(*err)) + return NULL; + if (name == NULL) + return ucnv_open (NULL, err); + if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH) + { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + return ucnv_open(u_austrcpy(asciiName, name), err); +} + +/* Copy the string that is represented by the UConverterPlatform enum + * @param platformString An output buffer + * @param platform An enum representing a platform + * @return the length of the copied string. + */ +static int32_t +ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm) +{ + switch (pltfrm) + { + case UCNV_IBM: + uprv_strcpy(platformString, "ibm-"); + return 4; + case UCNV_UNKNOWN: + break; + } + + /* default to empty string */ + *platformString = 0; + return 0; +} + +/*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls + *through createConverter*/ +U_CAPI UConverter* U_EXPORT2 +ucnv_openCCSID (int32_t codepage, + UConverterPlatform platform, + UErrorCode * err) +{ + char myName[UCNV_MAX_CONVERTER_NAME_LENGTH]; + int32_t myNameLen; + + if (err == NULL || U_FAILURE (*err)) + return NULL; + + /* ucnv_copyPlatformString could return "ibm-" or "cp" */ + myNameLen = ucnv_copyPlatformString(myName, platform); + T_CString_integerToString(myName + myNameLen, codepage, 10); + + return ucnv_createConverter(NULL, myName, err); +} + +/* Creating a temporary stack-based object that can be used in one thread, +and created from a converter that is shared across threads. +*/ + +U_CAPI UConverter* U_EXPORT2 +ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status) +{ + UConverter *localConverter, *allocatedConverter; + int32_t stackBufferSize; + int32_t bufferSizeNeeded; + char *stackBufferChars = (char *)stackBuffer; + UErrorCode cbErr; + UConverterToUnicodeArgs toUArgs = { + sizeof(UConverterToUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + UConverterFromUnicodeArgs fromUArgs = { + sizeof(UConverterFromUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + + UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE); + + if (status == NULL || U_FAILURE(*status)){ + UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR); + return NULL; + } + + if (cnv == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + UTRACE_EXIT_STATUS(*status); + return NULL; + } + + UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p", + ucnv_getName(cnv, status), cnv, stackBuffer); + + if (cnv->sharedData->impl->safeClone != NULL) { + /* call the custom safeClone function for sizing */ + bufferSizeNeeded = 0; + cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status); + if (U_FAILURE(*status)) { + UTRACE_EXIT_STATUS(*status); + return NULL; + } + } + else + { + /* inherent sizing */ + bufferSizeNeeded = sizeof(UConverter); + } + + if (pBufferSize == NULL) { + stackBufferSize = 1; + pBufferSize = &stackBufferSize; + } else { + stackBufferSize = *pBufferSize; + if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */ + *pBufferSize = bufferSizeNeeded; + UTRACE_EXIT_VALUE(bufferSizeNeeded); + return NULL; + } + } + + + /* Pointers on 64-bit platforms need to be aligned + * on a 64-bit boundary in memory. + */ + if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { + int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars); + if(stackBufferSize > offsetUp) { + stackBufferSize -= offsetUp; + stackBufferChars += offsetUp; + } else { + /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */ + stackBufferSize = 1; + } + } + + stackBuffer = (void *)stackBufferChars; + + /* Now, see if we must allocate any memory */ + if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL) + { + /* allocate one here...*/ + localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded); + + if(localConverter == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + UTRACE_EXIT_STATUS(*status); + return NULL; + } + *status = U_SAFECLONE_ALLOCATED_WARNING; + + /* record the fact that memory was allocated */ + *pBufferSize = bufferSizeNeeded; + } else { + /* just use the stack buffer */ + localConverter = (UConverter*) stackBuffer; + allocatedConverter = NULL; + } + + uprv_memset(localConverter, 0, bufferSizeNeeded); + + /* Copy initial state */ + uprv_memcpy(localConverter, cnv, sizeof(UConverter)); + localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE; + + /* copy the substitution string */ + if (cnv->subChars == (uint8_t *)cnv->subUChars) { + localConverter->subChars = (uint8_t *)localConverter->subUChars; + } else { + localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); + if (localConverter->subChars == NULL) { + uprv_free(allocatedConverter); + UTRACE_EXIT_STATUS(*status); + return NULL; + } + uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); + } + + /* now either call the safeclone fcn or not */ + if (cnv->sharedData->impl->safeClone != NULL) { + /* call the custom safeClone function */ + localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status); + } + + if(localConverter==NULL || U_FAILURE(*status)) { + if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) { + uprv_free(allocatedConverter->subChars); + } + uprv_free(allocatedConverter); + UTRACE_EXIT_STATUS(*status); + return NULL; + } + + /* increment refcount of shared data if needed */ + if (cnv->sharedData->isReferenceCounted) { + ucnv_incrementRefCount(cnv->sharedData); + } + + if(localConverter == (UConverter*)stackBuffer) { + /* we're using user provided data - set to not destroy */ + localConverter->isCopyLocal = TRUE; + } + + /* allow callback functions to handle any memory allocation */ + toUArgs.converter = fromUArgs.converter = localConverter; + cbErr = U_ZERO_ERROR; + cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr); + cbErr = U_ZERO_ERROR; + cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr); + + UTRACE_EXIT_PTR_STATUS(localConverter, *status); + return localConverter; +} + + + +/*Decreases the reference counter in the shared immutable section of the object + *and frees the mutable part*/ + +U_CAPI void U_EXPORT2 +ucnv_close (UConverter * converter) +{ + UErrorCode errorCode = U_ZERO_ERROR; + + UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE); + + if (converter == NULL) + { + UTRACE_EXIT(); + return; + } + + UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b", + ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal); + + /* In order to speed up the close, only call the callbacks when they have been changed. + This performance check will only work when the callbacks are set within a shared library + or from user code that statically links this code. */ + /* first, notify the callback functions that the converter is closed */ + if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { + UConverterToUnicodeArgs toUArgs = { + sizeof(UConverterToUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + + toUArgs.converter = converter; + errorCode = U_ZERO_ERROR; + converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode); + } + if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { + UConverterFromUnicodeArgs fromUArgs = { + sizeof(UConverterFromUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + fromUArgs.converter = converter; + errorCode = U_ZERO_ERROR; + converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode); + } + + if (converter->sharedData->impl->close != NULL) { + converter->sharedData->impl->close(converter); + } + + if (converter->subChars != (uint8_t *)converter->subUChars) { + uprv_free(converter->subChars); + } + + if (converter->sharedData->isReferenceCounted) { + ucnv_unloadSharedDataIfReady(converter->sharedData); + } + + if(!converter->isCopyLocal){ + uprv_free(converter); + } + + UTRACE_EXIT(); +} + +/*returns a single Name from the list, will return NULL if out of bounds + */ +U_CAPI const char* U_EXPORT2 +ucnv_getAvailableName (int32_t n) +{ + if (0 <= n && n <= 0xffff) { + UErrorCode err = U_ZERO_ERROR; + const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err); + if (U_SUCCESS(err)) { + return name; + } + } + return NULL; +} + +U_CAPI int32_t U_EXPORT2 +ucnv_countAvailable () +{ + UErrorCode err = U_ZERO_ERROR; + return ucnv_bld_countAvailableConverters(&err); +} + +U_CAPI void U_EXPORT2 +ucnv_getSubstChars (const UConverter * converter, + char *mySubChar, + int8_t * len, + UErrorCode * err) +{ + if (U_FAILURE (*err)) + return; + + if (converter->subCharLen <= 0) { + /* Unicode string or empty string from ucnv_setSubstString(). */ + *len = 0; + return; + } + + if (*len < converter->subCharLen) /*not enough space in subChars */ + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + + uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */ + *len = converter->subCharLen; /*store # of bytes copied to buffer */ +} + +U_CAPI void U_EXPORT2 +ucnv_setSubstChars (UConverter * converter, + const char *mySubChar, + int8_t len, + UErrorCode * err) +{ + if (U_FAILURE (*err)) + return; + + /*Makes sure that the subChar is within the codepages char length boundaries */ + if ((len > converter->sharedData->staticData->maxBytesPerChar) + || (len < converter->sharedData->staticData->minBytesPerChar)) + { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */ + converter->subCharLen = len; /*sets the new len */ + + /* + * There is currently (2001Feb) no separate API to set/get subChar1. + * In order to always have subChar written after it is explicitly set, + * we set subChar1 to 0. + */ + converter->subChar1 = 0; + + return; +} + +U_CAPI void U_EXPORT2 +ucnv_setSubstString(UConverter *cnv, + const UChar *s, + int32_t length, + UErrorCode *err) { + UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1]; + char chars[UCNV_ERROR_BUFFER_LENGTH]; + + UConverter *clone; + uint8_t *subChars; + int32_t cloneSize, length8; + + /* Let the following functions check all arguments. */ + cloneSize = sizeof(cloneBuffer); + clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err); + ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err); + length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err); + ucnv_close(clone); + if (U_FAILURE(*err)) { + return; + } + + if (cnv->sharedData->impl->writeSub == NULL +#if !UCONFIG_NO_LEGACY_CONVERSION + || (cnv->sharedData->staticData->conversionType == UCNV_MBCS && + ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL) +#endif + ) { + /* The converter is not stateful. Store the charset bytes as a fixed string. */ + subChars = (uint8_t *)chars; + } else { + /* + * The converter has a non-default writeSub() function, indicating + * that it is stateful. + * Store the Unicode string for on-the-fly conversion for correct + * state handling. + */ + if (length > UCNV_ERROR_BUFFER_LENGTH) { + /* + * Should not occur. The converter should output at least one byte + * per UChar, which means that ucnv_fromUChars() should catch all + * overflows. + */ + *err = U_BUFFER_OVERFLOW_ERROR; + return; + } + subChars = (uint8_t *)s; + if (length < 0) { + length = u_strlen(s); + } + length8 = length * U_SIZEOF_UCHAR; + } + + /* + * For storing the substitution string, select either the small buffer inside + * UConverter or allocate a subChars buffer. + */ + if (length8 > UCNV_MAX_SUBCHAR_LEN) { + /* Use a separate buffer for the string. Outside UConverter to not make it too large. */ + if (cnv->subChars == (uint8_t *)cnv->subUChars) { + /* Allocate a new buffer for the string. */ + cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); + if (cnv->subChars == NULL) { + cnv->subChars = (uint8_t *)cnv->subUChars; + *err = U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); + } + } + + /* Copy the substitution string into the UConverter or its subChars buffer. */ + if (length8 == 0) { + cnv->subCharLen = 0; + } else { + uprv_memcpy(cnv->subChars, subChars, length8); + if (subChars == (uint8_t *)chars) { + cnv->subCharLen = (int8_t)length8; + } else /* subChars == s */ { + cnv->subCharLen = (int8_t)-length; + } + } + + /* See comment in ucnv_setSubstChars(). */ + cnv->subChar1 = 0; +} + +/*resets the internal states of a converter + *goal : have the same behaviour than a freshly created converter + */ +static void _reset(UConverter *converter, UConverterResetChoice choice, + UBool callCallback) { + if(converter == NULL) { + return; + } + + if(callCallback) { + /* first, notify the callback functions that the converter is reset */ + UErrorCode errorCode; + + if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { + UConverterToUnicodeArgs toUArgs = { + sizeof(UConverterToUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + toUArgs.converter = converter; + errorCode = U_ZERO_ERROR; + converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode); + } + if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { + UConverterFromUnicodeArgs fromUArgs = { + sizeof(UConverterFromUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + fromUArgs.converter = converter; + errorCode = U_ZERO_ERROR; + converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode); + } + } + + /* now reset the converter itself */ + if(choice<=UCNV_RESET_TO_UNICODE) { + converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus; + converter->mode = 0; + converter->toULength = 0; + converter->invalidCharLength = converter->UCharErrorBufferLength = 0; + converter->preToULength = 0; + } + if(choice!=UCNV_RESET_TO_UNICODE) { + converter->fromUnicodeStatus = 0; + converter->fromUChar32 = 0; + converter->invalidUCharLength = converter->charErrorBufferLength = 0; + converter->preFromUFirstCP = U_SENTINEL; + converter->preFromULength = 0; + } + + if (converter->sharedData->impl->reset != NULL) { + /* call the custom reset function */ + converter->sharedData->impl->reset(converter, choice); + } +} + +U_CAPI void U_EXPORT2 +ucnv_reset(UConverter *converter) +{ + _reset(converter, UCNV_RESET_BOTH, TRUE); +} + +U_CAPI void U_EXPORT2 +ucnv_resetToUnicode(UConverter *converter) +{ + _reset(converter, UCNV_RESET_TO_UNICODE, TRUE); +} + +U_CAPI void U_EXPORT2 +ucnv_resetFromUnicode(UConverter *converter) +{ + _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE); +} + +U_CAPI int8_t U_EXPORT2 +ucnv_getMaxCharSize (const UConverter * converter) +{ + return converter->maxBytesPerUChar; +} + + +U_CAPI int8_t U_EXPORT2 +ucnv_getMinCharSize (const UConverter * converter) +{ + return converter->sharedData->staticData->minBytesPerChar; +} + +U_CAPI const char* U_EXPORT2 +ucnv_getName (const UConverter * converter, UErrorCode * err) + +{ + if (U_FAILURE (*err)) + return NULL; + if(converter->sharedData->impl->getName){ + const char* temp= converter->sharedData->impl->getName(converter); + if(temp) + return temp; + } + return converter->sharedData->staticData->name; +} + +U_CAPI int32_t U_EXPORT2 +ucnv_getCCSID(const UConverter * converter, + UErrorCode * err) +{ + int32_t ccsid; + if (U_FAILURE (*err)) + return -1; + + ccsid = converter->sharedData->staticData->codepage; + if (ccsid == 0) { + /* Rare case. This is for cases like gb18030, + which doesn't have an IBM canonical name, but does have an IBM alias. */ + const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err); + if (U_SUCCESS(*err) && standardName) { + const char *ccsidStr = uprv_strchr(standardName, '-'); + if (ccsidStr) { + ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */ + } + } + } + return ccsid; +} + + +U_CAPI UConverterPlatform U_EXPORT2 +ucnv_getPlatform (const UConverter * converter, + UErrorCode * err) +{ + if (U_FAILURE (*err)) + return UCNV_UNKNOWN; + + return (UConverterPlatform)converter->sharedData->staticData->platform; +} + +U_CAPI void U_EXPORT2 + ucnv_getToUCallBack (const UConverter * converter, + UConverterToUCallback *action, + const void **context) +{ + *action = converter->fromCharErrorBehaviour; + *context = converter->toUContext; +} + +U_CAPI void U_EXPORT2 + ucnv_getFromUCallBack (const UConverter * converter, + UConverterFromUCallback *action, + const void **context) +{ + *action = converter->fromUCharErrorBehaviour; + *context = converter->fromUContext; +} + +U_CAPI void U_EXPORT2 +ucnv_setToUCallBack (UConverter * converter, + UConverterToUCallback newAction, + const void* newContext, + UConverterToUCallback *oldAction, + const void** oldContext, + UErrorCode * err) +{ + if (U_FAILURE (*err)) + return; + if (oldAction) *oldAction = converter->fromCharErrorBehaviour; + converter->fromCharErrorBehaviour = newAction; + if (oldContext) *oldContext = converter->toUContext; + converter->toUContext = newContext; +} + +U_CAPI void U_EXPORT2 +ucnv_setFromUCallBack (UConverter * converter, + UConverterFromUCallback newAction, + const void* newContext, + UConverterFromUCallback *oldAction, + const void** oldContext, + UErrorCode * err) +{ + if (U_FAILURE (*err)) + return; + if (oldAction) *oldAction = converter->fromUCharErrorBehaviour; + converter->fromUCharErrorBehaviour = newAction; + if (oldContext) *oldContext = converter->fromUContext; + converter->fromUContext = newContext; +} + +static void +_updateOffsets(int32_t *offsets, int32_t length, + int32_t sourceIndex, int32_t errorInputLength) { + int32_t *limit; + int32_t delta, offset; + + if(sourceIndex>=0) { + /* + * adjust each offset by adding the previous sourceIndex + * minus the length of the input sequence that caused an + * error, if any + */ + delta=sourceIndex-errorInputLength; + } else { + /* + * set each offset to -1 because this conversion function + * does not handle offsets + */ + delta=-1; + } + + limit=offsets+length; + if(delta==0) { + /* most common case, nothing to do */ + } else if(delta>0) { + /* add the delta to each offset (but not if the offset is <0) */ + while(offsets=0) { + *offsets=offset+delta; + } + ++offsets; + } + } else /* delta<0 */ { + /* + * set each offset to -1 because this conversion function + * does not handle offsets + * or the error input sequence started in a previous buffer + */ + while(offsetsconverter; + s=pArgs->source; + t=pArgs->target; + offsets=pArgs->offsets; + + /* get the converter implementation function */ + sourceIndex=0; + if(offsets==NULL) { + fromUnicode=cnv->sharedData->impl->fromUnicode; + } else { + fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets; + if(fromUnicode==NULL) { + /* there is no WithOffsets implementation */ + fromUnicode=cnv->sharedData->impl->fromUnicode; + /* we will write -1 for each offset */ + sourceIndex=-1; + } + } + + if(cnv->preFromULength>=0) { + /* normal mode */ + realSource=NULL; + + /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ + realSourceLimit=NULL; + realFlush=FALSE; + realSourceIndex=0; + } else { + /* + * Previous m:n conversion stored source units from a partial match + * and failed to consume all of them. + * We need to "replay" them from a temporary buffer and convert them first. + */ + realSource=pArgs->source; + realSourceLimit=pArgs->sourceLimit; + realFlush=pArgs->flush; + realSourceIndex=sourceIndex; + + uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); + pArgs->source=replay; + pArgs->sourceLimit=replay-cnv->preFromULength; + pArgs->flush=FALSE; + sourceIndex=-1; + + cnv->preFromULength=0; + } + + /* + * loop for conversion and error handling + * + * loop { + * convert + * loop { + * update offsets + * handle end of input + * handle errors/call callback + * } + * } + */ + for(;;) { + if(U_SUCCESS(*err)) { + /* convert */ + fromUnicode(pArgs, err); + + /* + * set a flag for whether the converter + * successfully processed the end of the input + * + * need not check cnv->preFromULength==0 because a replay (<0) will cause + * sflush && pArgs->source==pArgs->sourceLimit && + cnv->fromUChar32==0); + } else { + /* handle error from ucnv_convertEx() */ + converterSawEndOfInput=FALSE; + } + + /* no callback called yet for this iteration */ + calledCallback=FALSE; + + /* no sourceIndex adjustment for conversion, only for callback output */ + errorInputLength=0; + + /* + * loop for offsets and error handling + * + * iterates at most 3 times: + * 1. to clean up after the conversion function + * 2. after the callback + * 3. after the callback again if there was truncated input + */ + for(;;) { + /* update offsets if we write any */ + if(offsets!=NULL) { + int32_t length=(int32_t)(pArgs->target-t); + if(length>0) { + _updateOffsets(offsets, length, sourceIndex, errorInputLength); + + /* + * if a converter handles offsets and updates the offsets + * pointer at the end, then pArgs->offset should not change + * here; + * however, some converters do not handle offsets at all + * (sourceIndex<0) or may not update the offsets pointer + */ + pArgs->offsets=offsets+=length; + } + + if(sourceIndex>=0) { + sourceIndex+=(int32_t)(pArgs->source-s); + } + } + + if(cnv->preFromULength<0) { + /* + * switch the source to new replay units (cannot occur while replaying) + * after offset handling and before end-of-input and callback handling + */ + if(realSource==NULL) { + realSource=pArgs->source; + realSourceLimit=pArgs->sourceLimit; + realFlush=pArgs->flush; + realSourceIndex=sourceIndex; + + uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); + pArgs->source=replay; + pArgs->sourceLimit=replay-cnv->preFromULength; + pArgs->flush=FALSE; + if((sourceIndex+=cnv->preFromULength)<0) { + sourceIndex=-1; + } + + cnv->preFromULength=0; + } else { + /* see implementation note before _fromUnicodeWithCallback() */ + U_ASSERT(realSource==NULL); + *err=U_INTERNAL_PROGRAM_ERROR; + } + } + + /* update pointers */ + s=pArgs->source; + t=pArgs->target; + + if(U_SUCCESS(*err)) { + if(ssourceLimit) { + /* + * continue with the conversion loop while there is still input left + * (continue converting by breaking out of only the inner loop) + */ + break; + } else if(realSource!=NULL) { + /* switch back from replaying to the real source and continue */ + pArgs->source=realSource; + pArgs->sourceLimit=realSourceLimit; + pArgs->flush=realFlush; + sourceIndex=realSourceIndex; + + realSource=NULL; + break; + } else if(pArgs->flush && cnv->fromUChar32!=0) { + /* + * the entire input stream is consumed + * and there is a partial, truncated input sequence left + */ + + /* inject an error and continue with callback handling */ + *err=U_TRUNCATED_CHAR_FOUND; + calledCallback=FALSE; /* new error condition */ + } else { + /* input consumed */ + if(pArgs->flush) { + /* + * return to the conversion loop once more if the flush + * flag is set and the conversion function has not + * successfully processed the end of the input yet + * + * (continue converting by breaking out of only the inner loop) + */ + if(!converterSawEndOfInput) { + break; + } + + /* reset the converter without calling the callback function */ + _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE); + } + + /* done successfully */ + return; + } + } + + /* U_FAILURE(*err) */ + { + UErrorCode e; + + if( calledCallback || + (e=*err)==U_BUFFER_OVERFLOW_ERROR || + (e!=U_INVALID_CHAR_FOUND && + e!=U_ILLEGAL_CHAR_FOUND && + e!=U_TRUNCATED_CHAR_FOUND) + ) { + /* + * the callback did not or cannot resolve the error: + * set output pointers and return + * + * the check for buffer overflow is redundant but it is + * a high-runner case and hopefully documents the intent + * well + * + * if we were replaying, then the replay buffer must be + * copied back into the UConverter + * and the real arguments must be restored + */ + if(realSource!=NULL) { + int32_t length; + + U_ASSERT(cnv->preFromULength==0); + + length=(int32_t)(pArgs->sourceLimit-pArgs->source); + if(length>0) { + u_memcpy(cnv->preFromU, pArgs->source, length); + cnv->preFromULength=(int8_t)-length; + } + + pArgs->source=realSource; + pArgs->sourceLimit=realSourceLimit; + pArgs->flush=realFlush; + } + + return; + } + } + + /* callback handling */ + { + UChar32 codePoint; + + /* get and write the code point */ + codePoint=cnv->fromUChar32; + errorInputLength=0; + U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint); + cnv->invalidUCharLength=(int8_t)errorInputLength; + + /* set the converter state to deal with the next character */ + cnv->fromUChar32=0; + + /* call the callback function */ + cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, + cnv->invalidUCharBuffer, errorInputLength, codePoint, + *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL, + err); + } + + /* + * loop back to the offset handling + * + * this flag will indicate after offset handling + * that a callback was called; + * if the callback did not resolve the error, then we return + */ + calledCallback=TRUE; + } + } +} + +/* + * Output the fromUnicode overflow buffer. + * Call this function if(cnv->charErrorBufferLength>0). + * @return TRUE if overflow + */ +static UBool +ucnv_outputOverflowFromUnicode(UConverter *cnv, + char **target, const char *targetLimit, + int32_t **pOffsets, + UErrorCode *err) { + int32_t *offsets; + char *overflow, *t; + int32_t i, length; + + t=*target; + if(pOffsets!=NULL) { + offsets=*pOffsets; + } else { + offsets=NULL; + } + + overflow=(char *)cnv->charErrorBuffer; + length=cnv->charErrorBufferLength; + i=0; + while(icharErrorBufferLength=(int8_t)j; + *target=t; + if(offsets!=NULL) { + *pOffsets=offsets; + } + *err=U_BUFFER_OVERFLOW_ERROR; + return TRUE; + } + + /* copy the overflow contents to the target */ + *t++=overflow[i++]; + if(offsets!=NULL) { + *offsets++=-1; /* no source index available for old output */ + } + } + + /* the overflow buffer is completely copied to the target */ + cnv->charErrorBufferLength=0; + *target=t; + if(offsets!=NULL) { + *pOffsets=offsets; + } + return FALSE; +} + +U_CAPI void U_EXPORT2 +ucnv_fromUnicode(UConverter *cnv, + char **target, const char *targetLimit, + const UChar **source, const UChar *sourceLimit, + int32_t *offsets, + UBool flush, + UErrorCode *err) { + UConverterFromUnicodeArgs args; + const UChar *s; + char *t; + + /* check parameters */ + if(err==NULL || U_FAILURE(*err)) { + return; + } + + if(cnv==NULL || target==NULL || source==NULL) { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + s=*source; + t=*target; + + if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) { + /* + Prevent code from going into an infinite loop in case we do hit this + limit. The limit pointer is expected to be on a UChar * boundary. + This also prevents the next argument check from failing. + */ + sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1); + } + + /* + * All these conditions should never happen. + * + * 1) Make sure that the limits are >= to the address source or target + * + * 2) Make sure that the buffer sizes do not exceed the number range for + * int32_t because some functions use the size (in units or bytes) + * rather than comparing pointers, and because offsets are int32_t values. + * + * size_t is guaranteed to be unsigned and large enough for the job. + * + * Return with an error instead of adjusting the limits because we would + * not be able to maintain the semantics that either the source must be + * consumed or the target filled (unless an error occurs). + * An adjustment would be targetLimit=t+0x7fffffff; for example. + * + * 3) Make sure that the user didn't incorrectly cast a UChar * pointer + * to a char * pointer and provide an incomplete UChar code unit. + */ + if (sourceLimit(size_t)0x3fffffff && sourceLimit>s) || + ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) || + (((const char *)sourceLimit-(const char *)s) & 1) != 0) + { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + /* output the target overflow buffer */ + if( cnv->charErrorBufferLength>0 && + ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err) + ) { + /* U_BUFFER_OVERFLOW_ERROR */ + return; + } + /* *target may have moved, therefore stop using t */ + + if(!flush && s==sourceLimit && cnv->preFromULength>=0) { + /* the overflow buffer is emptied and there is no new input: we are done */ + return; + } + + /* + * Do not simply return with a buffer overflow error if + * !flush && t==targetLimit + * because it is possible that the source will not generate any output. + * For example, the skip callback may be called; + * it does not output anything. + */ + + /* prepare the converter arguments */ + args.converter=cnv; + args.flush=flush; + args.offsets=offsets; + args.source=s; + args.sourceLimit=sourceLimit; + args.target=*target; + args.targetLimit=targetLimit; + args.size=sizeof(args); + + _fromUnicodeWithCallback(&args, err); + + *source=args.source; + *target=args.target; +} + +/* ucnv_toUnicode() --------------------------------------------------------- */ + +static void +_toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { + UConverterToUnicode toUnicode; + UConverter *cnv; + const char *s; + UChar *t; + int32_t *offsets; + int32_t sourceIndex; + int32_t errorInputLength; + UBool converterSawEndOfInput, calledCallback; + + /* variables for m:n conversion */ + char replay[UCNV_EXT_MAX_BYTES]; + const char *realSource, *realSourceLimit; + int32_t realSourceIndex; + UBool realFlush; + + cnv=pArgs->converter; + s=pArgs->source; + t=pArgs->target; + offsets=pArgs->offsets; + + /* get the converter implementation function */ + sourceIndex=0; + if(offsets==NULL) { + toUnicode=cnv->sharedData->impl->toUnicode; + } else { + toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets; + if(toUnicode==NULL) { + /* there is no WithOffsets implementation */ + toUnicode=cnv->sharedData->impl->toUnicode; + /* we will write -1 for each offset */ + sourceIndex=-1; + } + } + + if(cnv->preToULength>=0) { + /* normal mode */ + realSource=NULL; + + /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ + realSourceLimit=NULL; + realFlush=FALSE; + realSourceIndex=0; + } else { + /* + * Previous m:n conversion stored source units from a partial match + * and failed to consume all of them. + * We need to "replay" them from a temporary buffer and convert them first. + */ + realSource=pArgs->source; + realSourceLimit=pArgs->sourceLimit; + realFlush=pArgs->flush; + realSourceIndex=sourceIndex; + + uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); + pArgs->source=replay; + pArgs->sourceLimit=replay-cnv->preToULength; + pArgs->flush=FALSE; + sourceIndex=-1; + + cnv->preToULength=0; + } + + /* + * loop for conversion and error handling + * + * loop { + * convert + * loop { + * update offsets + * handle end of input + * handle errors/call callback + * } + * } + */ + for(;;) { + if(U_SUCCESS(*err)) { + /* convert */ + toUnicode(pArgs, err); + + /* + * set a flag for whether the converter + * successfully processed the end of the input + * + * need not check cnv->preToULength==0 because a replay (<0) will cause + * sflush && pArgs->source==pArgs->sourceLimit && + cnv->toULength==0); + } else { + /* handle error from getNextUChar() or ucnv_convertEx() */ + converterSawEndOfInput=FALSE; + } + + /* no callback called yet for this iteration */ + calledCallback=FALSE; + + /* no sourceIndex adjustment for conversion, only for callback output */ + errorInputLength=0; + + /* + * loop for offsets and error handling + * + * iterates at most 3 times: + * 1. to clean up after the conversion function + * 2. after the callback + * 3. after the callback again if there was truncated input + */ + for(;;) { + /* update offsets if we write any */ + if(offsets!=NULL) { + int32_t length=(int32_t)(pArgs->target-t); + if(length>0) { + _updateOffsets(offsets, length, sourceIndex, errorInputLength); + + /* + * if a converter handles offsets and updates the offsets + * pointer at the end, then pArgs->offset should not change + * here; + * however, some converters do not handle offsets at all + * (sourceIndex<0) or may not update the offsets pointer + */ + pArgs->offsets=offsets+=length; + } + + if(sourceIndex>=0) { + sourceIndex+=(int32_t)(pArgs->source-s); + } + } + + if(cnv->preToULength<0) { + /* + * switch the source to new replay units (cannot occur while replaying) + * after offset handling and before end-of-input and callback handling + */ + if(realSource==NULL) { + realSource=pArgs->source; + realSourceLimit=pArgs->sourceLimit; + realFlush=pArgs->flush; + realSourceIndex=sourceIndex; + + uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); + pArgs->source=replay; + pArgs->sourceLimit=replay-cnv->preToULength; + pArgs->flush=FALSE; + if((sourceIndex+=cnv->preToULength)<0) { + sourceIndex=-1; + } + + cnv->preToULength=0; + } else { + /* see implementation note before _fromUnicodeWithCallback() */ + U_ASSERT(realSource==NULL); + *err=U_INTERNAL_PROGRAM_ERROR; + } + } + + /* update pointers */ + s=pArgs->source; + t=pArgs->target; + + if(U_SUCCESS(*err)) { + if(ssourceLimit) { + /* + * continue with the conversion loop while there is still input left + * (continue converting by breaking out of only the inner loop) + */ + break; + } else if(realSource!=NULL) { + /* switch back from replaying to the real source and continue */ + pArgs->source=realSource; + pArgs->sourceLimit=realSourceLimit; + pArgs->flush=realFlush; + sourceIndex=realSourceIndex; + + realSource=NULL; + break; + } else if(pArgs->flush && cnv->toULength>0) { + /* + * the entire input stream is consumed + * and there is a partial, truncated input sequence left + */ + + /* inject an error and continue with callback handling */ + *err=U_TRUNCATED_CHAR_FOUND; + calledCallback=FALSE; /* new error condition */ + } else { + /* input consumed */ + if(pArgs->flush) { + /* + * return to the conversion loop once more if the flush + * flag is set and the conversion function has not + * successfully processed the end of the input yet + * + * (continue converting by breaking out of only the inner loop) + */ + if(!converterSawEndOfInput) { + break; + } + + /* reset the converter without calling the callback function */ + _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); + } + + /* done successfully */ + return; + } + } + + /* U_FAILURE(*err) */ + { + UErrorCode e; + + if( calledCallback || + (e=*err)==U_BUFFER_OVERFLOW_ERROR || + (e!=U_INVALID_CHAR_FOUND && + e!=U_ILLEGAL_CHAR_FOUND && + e!=U_TRUNCATED_CHAR_FOUND && + e!=U_ILLEGAL_ESCAPE_SEQUENCE && + e!=U_UNSUPPORTED_ESCAPE_SEQUENCE) + ) { + /* + * the callback did not or cannot resolve the error: + * set output pointers and return + * + * the check for buffer overflow is redundant but it is + * a high-runner case and hopefully documents the intent + * well + * + * if we were replaying, then the replay buffer must be + * copied back into the UConverter + * and the real arguments must be restored + */ + if(realSource!=NULL) { + int32_t length; + + U_ASSERT(cnv->preToULength==0); + + length=(int32_t)(pArgs->sourceLimit-pArgs->source); + if(length>0) { + uprv_memcpy(cnv->preToU, pArgs->source, length); + cnv->preToULength=(int8_t)-length; + } + + pArgs->source=realSource; + pArgs->sourceLimit=realSourceLimit; + pArgs->flush=realFlush; + } + + return; + } + } + + /* copy toUBytes[] to invalidCharBuffer[] */ + errorInputLength=cnv->invalidCharLength=cnv->toULength; + if(errorInputLength>0) { + uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength); + } + + /* set the converter state to deal with the next character */ + cnv->toULength=0; + + /* call the callback function */ + if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) { + cnv->toUCallbackReason = UCNV_UNASSIGNED; + } + cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, + cnv->invalidCharBuffer, errorInputLength, + cnv->toUCallbackReason, + err); + cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */ + + /* + * loop back to the offset handling + * + * this flag will indicate after offset handling + * that a callback was called; + * if the callback did not resolve the error, then we return + */ + calledCallback=TRUE; + } + } +} + +/* + * Output the toUnicode overflow buffer. + * Call this function if(cnv->UCharErrorBufferLength>0). + * @return TRUE if overflow + */ +static UBool +ucnv_outputOverflowToUnicode(UConverter *cnv, + UChar **target, const UChar *targetLimit, + int32_t **pOffsets, + UErrorCode *err) { + int32_t *offsets; + UChar *overflow, *t; + int32_t i, length; + + t=*target; + if(pOffsets!=NULL) { + offsets=*pOffsets; + } else { + offsets=NULL; + } + + overflow=cnv->UCharErrorBuffer; + length=cnv->UCharErrorBufferLength; + i=0; + while(iUCharErrorBufferLength=(int8_t)j; + *target=t; + if(offsets!=NULL) { + *pOffsets=offsets; + } + *err=U_BUFFER_OVERFLOW_ERROR; + return TRUE; + } + + /* copy the overflow contents to the target */ + *t++=overflow[i++]; + if(offsets!=NULL) { + *offsets++=-1; /* no source index available for old output */ + } + } + + /* the overflow buffer is completely copied to the target */ + cnv->UCharErrorBufferLength=0; + *target=t; + if(offsets!=NULL) { + *pOffsets=offsets; + } + return FALSE; +} + +U_CAPI void U_EXPORT2 +ucnv_toUnicode(UConverter *cnv, + UChar **target, const UChar *targetLimit, + const char **source, const char *sourceLimit, + int32_t *offsets, + UBool flush, + UErrorCode *err) { + UConverterToUnicodeArgs args; + const char *s; + UChar *t; + + /* check parameters */ + if(err==NULL || U_FAILURE(*err)) { + return; + } + + if(cnv==NULL || target==NULL || source==NULL) { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + s=*source; + t=*target; + + if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) { + /* + Prevent code from going into an infinite loop in case we do hit this + limit. The limit pointer is expected to be on a UChar * boundary. + This also prevents the next argument check from failing. + */ + targetLimit = (const UChar *)(((const char *)targetLimit) - 1); + } + + /* + * All these conditions should never happen. + * + * 1) Make sure that the limits are >= to the address source or target + * + * 2) Make sure that the buffer sizes do not exceed the number range for + * int32_t because some functions use the size (in units or bytes) + * rather than comparing pointers, and because offsets are int32_t values. + * + * size_t is guaranteed to be unsigned and large enough for the job. + * + * Return with an error instead of adjusting the limits because we would + * not be able to maintain the semantics that either the source must be + * consumed or the target filled (unless an error occurs). + * An adjustment would be sourceLimit=t+0x7fffffff; for example. + * + * 3) Make sure that the user didn't incorrectly cast a UChar * pointer + * to a char * pointer and provide an incomplete UChar code unit. + */ + if (sourceLimit(size_t)0x7fffffff && sourceLimit>s) || + ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) || + (((const char *)targetLimit-(const char *)t) & 1) != 0 + ) { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + /* output the target overflow buffer */ + if( cnv->UCharErrorBufferLength>0 && + ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err) + ) { + /* U_BUFFER_OVERFLOW_ERROR */ + return; + } + /* *target may have moved, therefore stop using t */ + + if(!flush && s==sourceLimit && cnv->preToULength>=0) { + /* the overflow buffer is emptied and there is no new input: we are done */ + return; + } + + /* + * Do not simply return with a buffer overflow error if + * !flush && t==targetLimit + * because it is possible that the source will not generate any output. + * For example, the skip callback may be called; + * it does not output anything. + */ + + /* prepare the converter arguments */ + args.converter=cnv; + args.flush=flush; + args.offsets=offsets; + args.source=s; + args.sourceLimit=sourceLimit; + args.target=*target; + args.targetLimit=targetLimit; + args.size=sizeof(args); + + _toUnicodeWithCallback(&args, err); + + *source=args.source; + *target=args.target; +} + +/* ucnv_to/fromUChars() ----------------------------------------------------- */ + +U_CAPI int32_t U_EXPORT2 +ucnv_fromUChars(UConverter *cnv, + char *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UErrorCode *pErrorCode) { + const UChar *srcLimit; + char *originalDest, *destLimit; + int32_t destLength; + + /* check arguments */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + if( cnv==NULL || + destCapacity<0 || (destCapacity>0 && dest==NULL) || + srcLength<-1 || (srcLength!=0 && src==NULL) + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* initialize */ + ucnv_resetFromUnicode(cnv); + originalDest=dest; + if(srcLength==-1) { + srcLength=u_strlen(src); + } + if(srcLength>0) { + srcLimit=src+srcLength; + destLimit=dest+destCapacity; + + /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ + if(destLimit0 && dest==NULL) || + srcLength<-1 || (srcLength!=0 && src==NULL)) + { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* initialize */ + ucnv_resetToUnicode(cnv); + originalDest=dest; + if(srcLength==-1) { + srcLength=(int32_t)uprv_strlen(src); + } + if(srcLength>0) { + srcLimit=src+srcLength; + destLimit=dest+destCapacity; + + /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ + if(destLimit(size_t)0x7fffffff && sourceLimit>s)) { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return 0xffff; + } + + c=U_SENTINEL; + + /* flush the target overflow buffer */ + if(cnv->UCharErrorBufferLength>0) { + UChar *overflow; + + overflow=cnv->UCharErrorBuffer; + i=0; + length=cnv->UCharErrorBufferLength; + U16_NEXT(overflow, i, length, c); + + /* move the remaining overflow contents up to the beginning */ + if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) { + uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i, + cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); + } + + if(!U16_IS_LEAD(c) || itoULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) { + c=cnv->sharedData->impl->getNextUChar(&args, err); + *source=s=args.source; + if(*err==U_INDEX_OUTOFBOUNDS_ERROR) { + /* reset the converter without calling the callback function */ + _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); + return 0xffff; /* no output */ + } else if(U_SUCCESS(*err) && c>=0) { + return c; + /* + * else fall through to use _toUnicode() because + * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all + * U_FAILURE: call _toUnicode() for callback handling (do not output c) + */ + } + } + + /* convert to one UChar in buffer[0], or handle getNextUChar() errors */ + _toUnicodeWithCallback(&args, err); + + if(*err==U_BUFFER_OVERFLOW_ERROR) { + *err=U_ZERO_ERROR; + } + + i=0; + length=(int32_t)(args.target-buffer); + } else { + /* write the lead surrogate from the overflow buffer */ + buffer[0]=(UChar)c; + args.target=buffer+1; + i=0; + length=1; + } + + /* buffer contents starts at i and ends before length */ + + if(U_FAILURE(*err)) { + c=0xffff; /* no output */ + } else if(length==0) { + /* no input or only state changes */ + *err=U_INDEX_OUTOFBOUNDS_ERROR; + /* no need to reset explicitly because _toUnicodeWithCallback() did it */ + c=0xffff; /* no output */ + } else { + c=buffer[0]; + i=1; + if(!U16_IS_LEAD(c)) { + /* consume c=buffer[0], done */ + } else { + /* got a lead surrogate, see if a trail surrogate follows */ + UChar c2; + + if(cnv->UCharErrorBufferLength>0) { + /* got overflow output from the conversion */ + if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) { + /* got a trail surrogate, too */ + c=U16_GET_SUPPLEMENTARY(c, c2); + + /* move the remaining overflow contents up to the beginning */ + if((--cnv->UCharErrorBufferLength)>0) { + uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1, + cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); + } + } else { + /* c is an unpaired lead surrogate, just return it */ + } + } else if(args.sourceUCharErrorBufferLength)>0) { + uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer, + length*U_SIZEOF_UCHAR); + } + cnv->UCharErrorBufferLength=(int8_t)(length+delta); + + cnv->UCharErrorBuffer[0]=buffer[i++]; + if(delta>1) { + cnv->UCharErrorBuffer[1]=buffer[i]; + } + } + + *source=args.source; + return c; +} + +/* ucnv_convert() and siblings ---------------------------------------------- */ + +U_CAPI void U_EXPORT2 +ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, + char **target, const char *targetLimit, + const char **source, const char *sourceLimit, + UChar *pivotStart, UChar **pivotSource, + UChar **pivotTarget, const UChar *pivotLimit, + UBool reset, UBool flush, + UErrorCode *pErrorCode) { + UChar pivotBuffer[CHUNK_SIZE]; + const UChar *myPivotSource; + UChar *myPivotTarget; + const char *s; + char *t; + + UConverterToUnicodeArgs toUArgs; + UConverterFromUnicodeArgs fromUArgs; + UConverterConvert convert; + + /* error checking */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return; + } + + if( targetCnv==NULL || sourceCnv==NULL || + source==NULL || *source==NULL || + target==NULL || *target==NULL || targetLimit==NULL + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + s=*source; + t=*target; + if((sourceLimit!=NULL && sourceLimit(size_t)0x7fffffff && sourceLimit>s)) || + ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + if(pivotStart==NULL) { + if(!flush) { + /* streaming conversion requires an explicit pivot buffer */ + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + /* use the stack pivot buffer */ + myPivotSource=myPivotTarget=pivotStart=pivotBuffer; + pivotSource=(UChar **)&myPivotSource; + pivotTarget=&myPivotTarget; + pivotLimit=pivotBuffer+CHUNK_SIZE; + } else if( pivotStart>=pivotLimit || + pivotSource==NULL || *pivotSource==NULL || + pivotTarget==NULL || *pivotTarget==NULL || + pivotLimit==NULL + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + if(sourceLimit==NULL) { + /* get limit of single-byte-NUL-terminated source string */ + sourceLimit=uprv_strchr(*source, 0); + } + + if(reset) { + ucnv_resetToUnicode(sourceCnv); + ucnv_resetFromUnicode(targetCnv); + *pivotSource=*pivotTarget=pivotStart; + } else if(targetCnv->charErrorBufferLength>0) { + /* output the targetCnv overflow buffer */ + if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) { + /* U_BUFFER_OVERFLOW_ERROR */ + return; + } + /* *target has moved, therefore stop using t */ + + if( !flush && + targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget && + sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit + ) { + /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */ + return; + } + } + + /* Is direct-UTF-8 conversion available? */ + if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 && + targetCnv->sharedData->impl->fromUTF8!=NULL + ) { + convert=targetCnv->sharedData->impl->fromUTF8; + } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 && + sourceCnv->sharedData->impl->toUTF8!=NULL + ) { + convert=sourceCnv->sharedData->impl->toUTF8; + } else { + convert=NULL; + } + + /* + * If direct-UTF-8 conversion is available, then we use a smaller + * pivot buffer for error handling and partial matches + * so that we quickly return to direct conversion. + * + * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH. + * + * We could reduce the pivot buffer size further, at the cost of + * buffer overflows from callbacks. + * The pivot buffer should not be smaller than the maximum number of + * fromUnicode extension table input UChars + * (for m:n conversion, see + * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS]) + * or 2 for surrogate pairs. + * + * Too small a buffer can cause thrashing between pivoting and direct + * conversion, with function call overhead outweighing the benefits + * of direct conversion. + */ + if(convert!=NULL && (pivotLimit-pivotStart)>32) { + pivotLimit=pivotStart+32; + } + + /* prepare the converter arguments */ + fromUArgs.converter=targetCnv; + fromUArgs.flush=FALSE; + fromUArgs.offsets=NULL; + fromUArgs.target=*target; + fromUArgs.targetLimit=targetLimit; + fromUArgs.size=sizeof(fromUArgs); + + toUArgs.converter=sourceCnv; + toUArgs.flush=flush; + toUArgs.offsets=NULL; + toUArgs.source=s; + toUArgs.sourceLimit=sourceLimit; + toUArgs.targetLimit=pivotLimit; + toUArgs.size=sizeof(toUArgs); + + /* + * TODO: Consider separating this function into two functions, + * extracting exactly the conversion loop, + * for readability and to reduce the set of visible variables. + * + * Otherwise stop using s and t from here on. + */ + s=t=NULL; + + /* + * conversion loop + * + * The sequence of steps in the loop may appear backward, + * but the principle is simple: + * In the chain of + * source - sourceCnv overflow - pivot - targetCnv overflow - target + * empty out later buffers before refilling them from earlier ones. + * + * The targetCnv overflow buffer is flushed out only once before the loop. + */ + for(;;) { + /* + * if(pivot not empty or error or replay or flush fromUnicode) { + * fromUnicode(pivot -> target); + * } + * + * For pivoting conversion; and for direct conversion for + * error callback handling and flushing the replay buffer. + */ + if( *pivotSource<*pivotTarget || + U_FAILURE(*pErrorCode) || + targetCnv->preFromULength<0 || + fromUArgs.flush + ) { + fromUArgs.source=*pivotSource; + fromUArgs.sourceLimit=*pivotTarget; + _fromUnicodeWithCallback(&fromUArgs, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + /* target overflow, or conversion error */ + *pivotSource=(UChar *)fromUArgs.source; + break; + } + + /* + * _fromUnicodeWithCallback() must have consumed the pivot contents + * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS() + */ + } + + /* The pivot buffer is empty; reset it so we start at pivotStart. */ + *pivotSource=*pivotTarget=pivotStart; + + /* + * if(sourceCnv overflow buffer not empty) { + * move(sourceCnv overflow buffer -> pivot); + * continue; + * } + */ + /* output the sourceCnv overflow buffer */ + if(sourceCnv->UCharErrorBufferLength>0) { + if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) { + /* U_BUFFER_OVERFLOW_ERROR */ + *pErrorCode=U_ZERO_ERROR; + } + continue; + } + + /* + * check for end of input and break if done + * + * Checking both flush and fromUArgs.flush ensures that the converters + * have been called with the flush flag set if the ucnv_convertEx() + * caller set it. + */ + if( toUArgs.source==sourceLimit && + sourceCnv->preToULength>=0 && sourceCnv->toULength==0 && + (!flush || fromUArgs.flush) + ) { + /* done successfully */ + break; + } + + /* + * use direct conversion if available + * but not if continuing a partial match + * or flushing the toUnicode replay buffer + */ + if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) { + if(*pErrorCode==U_USING_DEFAULT_WARNING) { + /* remove a warning that may be set by this function */ + *pErrorCode=U_ZERO_ERROR; + } + convert(&fromUArgs, &toUArgs, pErrorCode); + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { + break; + } else if(U_FAILURE(*pErrorCode)) { + if(sourceCnv->toULength>0) { + /* + * Fall through to calling _toUnicodeWithCallback() + * for callback handling. + * + * The pivot buffer will be reset with + * *pivotSource=*pivotTarget=pivotStart; + * which indicates a toUnicode error to the caller + * (*pivotSource==pivotStart shows no pivot UChars consumed). + */ + } else { + /* + * Indicate a fromUnicode error to the caller + * (*pivotSource>pivotStart shows some pivot UChars consumed). + */ + *pivotSource=*pivotTarget=pivotStart+1; + /* + * Loop around to calling _fromUnicodeWithCallbacks() + * for callback handling. + */ + continue; + } + } else if(*pErrorCode==U_USING_DEFAULT_WARNING) { + /* + * No error, but the implementation requested to temporarily + * fall back to pivoting. + */ + *pErrorCode=U_ZERO_ERROR; + /* + * The following else branches are almost identical to the end-of-input + * handling in _toUnicodeWithCallback(). + * Avoid calling it just for the end of input. + */ + } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */ + /* + * the entire input stream is consumed + * and there is a partial, truncated input sequence left + */ + + /* inject an error and continue with callback handling */ + *pErrorCode=U_TRUNCATED_CHAR_FOUND; + } else { + /* input consumed */ + if(flush) { + /* reset the converters without calling the callback functions */ + _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE); + _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE); + } + + /* done successfully */ + break; + } + } + + /* + * toUnicode(source -> pivot); + * + * For pivoting conversion; and for direct conversion for + * error callback handling, continuing partial matches + * and flushing the replay buffer. + * + * The pivot buffer is empty and reset. + */ + toUArgs.target=pivotStart; /* ==*pivotTarget */ + /* toUArgs.targetLimit=pivotLimit; already set before the loop */ + _toUnicodeWithCallback(&toUArgs, pErrorCode); + *pivotTarget=toUArgs.target; + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { + /* pivot overflow: continue with the conversion loop */ + *pErrorCode=U_ZERO_ERROR; + } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) { + /* conversion error, or there was nothing left to convert */ + break; + } + /* + * else: + * _toUnicodeWithCallback() wrote into the pivot buffer, + * continue with fromUnicode conversion. + * + * Set the fromUnicode flush flag if we flush and if toUnicode has + * processed the end of the input. + */ + if( flush && toUArgs.source==sourceLimit && + sourceCnv->preToULength>=0 && + sourceCnv->UCharErrorBufferLength==0 + ) { + fromUArgs.flush=TRUE; + } + } + + /* + * The conversion loop is exited when one of the following is true: + * - the entire source text has been converted successfully to the target buffer + * - a target buffer overflow occurred + * - a conversion error occurred + */ + + *source=toUArgs.source; + *target=fromUArgs.target; + + /* terminate the target buffer if possible */ + if(flush && U_SUCCESS(*pErrorCode)) { + if(*target!=targetLimit) { + **target=0; + if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { + *pErrorCode=U_ZERO_ERROR; + } + } else { + *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; + } + } +} + +/* internal implementation of ucnv_convert() etc. with preflighting */ +static int32_t +ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter, + char *target, int32_t targetCapacity, + const char *source, int32_t sourceLength, + UErrorCode *pErrorCode) { + UChar pivotBuffer[CHUNK_SIZE]; + UChar *pivot, *pivot2; + + char *myTarget; + const char *sourceLimit; + const char *targetLimit; + int32_t targetLength=0; + + /* set up */ + if(sourceLength<0) { + sourceLimit=uprv_strchr(source, 0); + } else { + sourceLimit=source+sourceLength; + } + + /* if there is no input data, we're done */ + if(source==sourceLimit) { + return u_terminateChars(target, targetCapacity, 0, pErrorCode); + } + + pivot=pivot2=pivotBuffer; + myTarget=target; + targetLength=0; + + if(targetCapacity>0) { + /* perform real conversion */ + targetLimit=target+targetCapacity; + ucnv_convertEx(outConverter, inConverter, + &myTarget, targetLimit, + &source, sourceLimit, + pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, + FALSE, + TRUE, + pErrorCode); + targetLength=(int32_t)(myTarget-target); + } + + /* + * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing + * to it but continue the conversion in order to store in targetCapacity + * the number of bytes that was required. + */ + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0) + { + char targetBuffer[CHUNK_SIZE]; + + targetLimit=targetBuffer+CHUNK_SIZE; + do { + *pErrorCode=U_ZERO_ERROR; + myTarget=targetBuffer; + ucnv_convertEx(outConverter, inConverter, + &myTarget, targetLimit, + &source, sourceLimit, + pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, + FALSE, + TRUE, + pErrorCode); + targetLength+=(int32_t)(myTarget-targetBuffer); + } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); + + /* done with preflighting, set warnings and errors as appropriate */ + return u_terminateChars(target, targetCapacity, targetLength, pErrorCode); + } + + /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */ + return targetLength; +} + +U_CAPI int32_t U_EXPORT2 +ucnv_convert(const char *toConverterName, const char *fromConverterName, + char *target, int32_t targetCapacity, + const char *source, int32_t sourceLength, + UErrorCode *pErrorCode) { + UConverter in, out; /* stack-allocated */ + UConverter *inConverter, *outConverter; + int32_t targetLength; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + if( source==NULL || sourceLength<-1 || + targetCapacity<0 || (targetCapacity>0 && target==NULL) + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* if there is no input data, we're done */ + if(sourceLength==0 || (sourceLength<0 && *source==0)) { + return u_terminateChars(target, targetCapacity, 0, pErrorCode); + } + + /* create the converters */ + inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return 0; + } + + outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + ucnv_close(inConverter); + return 0; + } + + targetLength=ucnv_internalConvert(outConverter, inConverter, + target, targetCapacity, + source, sourceLength, + pErrorCode); + + ucnv_close(inConverter); + ucnv_close(outConverter); + + return targetLength; +} + +/* @internal */ +static int32_t +ucnv_convertAlgorithmic(UBool convertToAlgorithmic, + UConverterType algorithmicType, + UConverter *cnv, + char *target, int32_t targetCapacity, + const char *source, int32_t sourceLength, + UErrorCode *pErrorCode) { + UConverter algoConverterStatic; /* stack-allocated */ + UConverter *algoConverter, *to, *from; + int32_t targetLength; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + if( cnv==NULL || source==NULL || sourceLength<-1 || + targetCapacity<0 || (targetCapacity>0 && target==NULL) + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* if there is no input data, we're done */ + if(sourceLength==0 || (sourceLength<0 && *source==0)) { + return u_terminateChars(target, targetCapacity, 0, pErrorCode); + } + + /* create the algorithmic converter */ + algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType, + "", 0, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return 0; + } + + /* reset the other converter */ + if(convertToAlgorithmic) { + /* cnv->Unicode->algo */ + ucnv_resetToUnicode(cnv); + to=algoConverter; + from=cnv; + } else { + /* algo->Unicode->cnv */ + ucnv_resetFromUnicode(cnv); + from=algoConverter; + to=cnv; + } + + targetLength=ucnv_internalConvert(to, from, + target, targetCapacity, + source, sourceLength, + pErrorCode); + + ucnv_close(algoConverter); + + return targetLength; +} + +U_CAPI int32_t U_EXPORT2 +ucnv_toAlgorithmic(UConverterType algorithmicType, + UConverter *cnv, + char *target, int32_t targetCapacity, + const char *source, int32_t sourceLength, + UErrorCode *pErrorCode) { + return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv, + target, targetCapacity, + source, sourceLength, + pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +ucnv_fromAlgorithmic(UConverter *cnv, + UConverterType algorithmicType, + char *target, int32_t targetCapacity, + const char *source, int32_t sourceLength, + UErrorCode *pErrorCode) { + return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv, + target, targetCapacity, + source, sourceLength, + pErrorCode); +} + +U_CAPI UConverterType U_EXPORT2 +ucnv_getType(const UConverter* converter) +{ + int8_t type = converter->sharedData->staticData->conversionType; +#if !UCONFIG_NO_LEGACY_CONVERSION + if(type == UCNV_MBCS) { + return ucnv_MBCSGetType(converter); + } +#endif + return (UConverterType)type; +} + +U_CAPI void U_EXPORT2 +ucnv_getStarters(const UConverter* converter, + UBool starters[256], + UErrorCode* err) +{ + if (err == NULL || U_FAILURE(*err)) { + return; + } + + if(converter->sharedData->impl->getStarters != NULL) { + converter->sharedData->impl->getStarters(converter, starters, err); + } else { + *err = U_ILLEGAL_ARGUMENT_ERROR; + } +} + +static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv) +{ + UErrorCode errorCode; + const char *name; + int32_t i; + + if(cnv==NULL) { + return NULL; + } + + errorCode=U_ZERO_ERROR; + name=ucnv_getName(cnv, &errorCode); + if(U_FAILURE(errorCode)) { + return NULL; + } + + for(i=0; ivariant5c; + for(i=0; iuseFallback = usesFallback; +} + +U_CAPI UBool U_EXPORT2 +ucnv_usesFallback(const UConverter *cnv) +{ + return cnv->useFallback; +} + +U_CAPI void U_EXPORT2 +ucnv_getInvalidChars (const UConverter * converter, + char *errBytes, + int8_t * len, + UErrorCode * err) +{ + if (err == NULL || U_FAILURE(*err)) + { + return; + } + if (len == NULL || errBytes == NULL || converter == NULL) + { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if (*len < converter->invalidCharLength) + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + if ((*len = converter->invalidCharLength) > 0) + { + uprv_memcpy (errBytes, converter->invalidCharBuffer, *len); + } +} + +U_CAPI void U_EXPORT2 +ucnv_getInvalidUChars (const UConverter * converter, + UChar *errChars, + int8_t * len, + UErrorCode * err) +{ + if (err == NULL || U_FAILURE(*err)) + { + return; + } + if (len == NULL || errChars == NULL || converter == NULL) + { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if (*len < converter->invalidUCharLength) + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + if ((*len = converter->invalidUCharLength) > 0) + { + u_memcpy (errChars, converter->invalidUCharBuffer, *len); + } +} + +#define SIG_MAX_LEN 5 + +U_CAPI const char* U_EXPORT2 +ucnv_detectUnicodeSignature( const char* source, + int32_t sourceLength, + int32_t* signatureLength, + UErrorCode* pErrorCode) { + int32_t dummy; + + /* initial 0xa5 bytes: make sure that if we read preFromUFirstCP >= 0){ + return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ; + }else if(cnv->preFromULength < 0){ + return -cnv->preFromULength ; + }else if(cnv->fromUChar32 > 0){ + return 1; + } + return 0; + +} + +U_CAPI int32_t U_EXPORT2 +ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){ + + if(status == NULL || U_FAILURE(*status)){ + return -1; + } + if(cnv == NULL){ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return -1; + } + + if(cnv->preToULength > 0){ + return cnv->preToULength ; + }else if(cnv->preToULength < 0){ + return -cnv->preToULength; + }else if(cnv->toULength > 0){ + return cnv->toULength; + } + return 0; +} + +U_CAPI UBool U_EXPORT2 +ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){ + if (U_FAILURE(*status)) { + return FALSE; + } + + if (cnv == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + + switch (ucnv_getType(cnv)) { + case UCNV_SBCS: + case UCNV_DBCS: + case UCNV_UTF32_BigEndian: + case UCNV_UTF32_LittleEndian: + case UCNV_UTF32: + case UCNV_US_ASCII: + return TRUE; + default: + return FALSE; + } +} +#endif + +/* + * Hey, Emacs, please set the following: + * + * Local Variables: + * indent-tabs-mode: nil + * End: + * + */ diff --git a/deps/icu-small/source/common/ucnv2022.cpp b/deps/icu-small/source/common/ucnv2022.cpp index d1d947f93c..1b625ea06c 100644 --- a/deps/icu-small/source/common/ucnv2022.cpp +++ b/deps/icu-small/source/common/ucnv2022.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ********************************************************************** * file name: ucnv2022.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/ucnv_bld.cpp b/deps/icu-small/source/common/ucnv_bld.cpp index bfbb45a7d1..482034fd0c 100644 --- a/deps/icu-small/source/common/ucnv_bld.cpp +++ b/deps/icu-small/source/common/ucnv_bld.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************** diff --git a/deps/icu-small/source/common/ucnv_bld.h b/deps/icu-small/source/common/ucnv_bld.h index aeb858c9d5..16dd14408a 100644 --- a/deps/icu-small/source/common/ucnv_bld.h +++ b/deps/icu-small/source/common/ucnv_bld.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/ucnv_cb.c b/deps/icu-small/source/common/ucnv_cb.c deleted file mode 100644 index 0c9cc2459f..0000000000 --- a/deps/icu-small/source/common/ucnv_cb.c +++ /dev/null @@ -1,261 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2000-2006, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** - * ucnv_cb.c: - * External APIs for the ICU's codeset conversion library - * Helena Shih - * - * Modification History: - * - * Date Name Description - * 7/28/2000 srl Implementation - */ - -/** - * @name Character Conversion C API - * - */ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ucnv_cb.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "cmemory.h" - -/* need to update the offsets when the target moves. */ -/* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly -if you don't use ucnv_cbXXX functions. Make sure you don't use the same callback within -the same call stack if the complexity arises. */ -U_CAPI void U_EXPORT2 -ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args, - const char* source, - int32_t length, - int32_t offsetIndex, - UErrorCode * err) -{ - if(U_FAILURE(*err)) { - return; - } - - ucnv_fromUWriteBytes( - args->converter, - source, length, - &args->target, args->targetLimit, - &args->offsets, offsetIndex, - err); -} - -U_CAPI void U_EXPORT2 -ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args, - const UChar** source, - const UChar* sourceLimit, - int32_t offsetIndex, - UErrorCode * err) -{ - /* - This is a fun one. Recursion can occur - we're basically going to - just retry shoving data through the same converter. Note, if you got - here through some kind of invalid sequence, you maybe should emit a - reset sequence of some kind and/or call ucnv_reset(). Since this - IS an actual conversion, take care that you've changed the callback - or the data, or you'll get an infinite loop. - - Please set the err value to something reasonable before calling - into this. - */ - - char *oldTarget; - - if(U_FAILURE(*err)) - { - return; - } - - oldTarget = args->target; - - ucnv_fromUnicode(args->converter, - &args->target, - args->targetLimit, - source, - sourceLimit, - NULL, /* no offsets */ - FALSE, /* no flush */ - err); - - if(args->offsets) - { - while (args->target != oldTarget) /* if it moved at all.. */ - { - *(args->offsets)++ = offsetIndex; - oldTarget++; - } - } - - /* - Note, if you did something like used a Stop subcallback, things would get interesting. - In fact, here's where we want to return the partially consumed in-source! - */ - if(*err == U_BUFFER_OVERFLOW_ERROR) - /* && (*source < sourceLimit && args->target >= args->targetLimit) - -- S. Hrcek */ - { - /* Overflowed the target. Now, we'll write into the charErrorBuffer. - It's a fixed size. If we overflow it... Hmm */ - char *newTarget; - const char *newTargetLimit; - UErrorCode err2 = U_ZERO_ERROR; - - int8_t errBuffLen; - - errBuffLen = args->converter->charErrorBufferLength; - - /* start the new target at the first free slot in the errbuff.. */ - newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen); - - newTargetLimit = (char *)(args->converter->charErrorBuffer + - sizeof(args->converter->charErrorBuffer)); - - if(newTarget >= newTargetLimit) - { - *err = U_INTERNAL_PROGRAM_ERROR; - return; - } - - /* We're going to tell the converter that the errbuff len is empty. - This prevents the existing errbuff from being 'flushed' out onto - itself. If the errbuff is needed by the converter this time, - we're hosed - we're out of space! */ - - args->converter->charErrorBufferLength = 0; - - ucnv_fromUnicode(args->converter, - &newTarget, - newTargetLimit, - source, - sourceLimit, - NULL, - FALSE, - &err2); - - /* We can go ahead and overwrite the length here. We know just how - to recalculate it. */ - - args->converter->charErrorBufferLength = (int8_t)( - newTarget - (char*)args->converter->charErrorBuffer); - - if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR)) - { - /* now we're REALLY in trouble. - Internal program error - callback shouldn't have written this much - data! - */ - *err = U_INTERNAL_PROGRAM_ERROR; - return; - } - /*else {*/ - /* sub errs could be invalid/truncated/illegal chars or w/e. - These might want to be passed on up.. But the problem is, we already - need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these - other errs.. */ - - /* - if(U_FAILURE(err2)) - ?? - */ - /*}*/ - } -} - -U_CAPI void U_EXPORT2 -ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args, - int32_t offsetIndex, - UErrorCode * err) -{ - UConverter *converter; - int32_t length; - - if(U_FAILURE(*err)) { - return; - } - converter = args->converter; - length = converter->subCharLen; - - if(length == 0) { - return; - } - - if(length < 0) { - /* - * Write/convert the substitution string. Its real length is -length. - * Unlike the escape callback, we need not change the converter's - * callback function because ucnv_setSubstString() verified that - * the string can be converted, so we will not get a conversion error - * and will not recurse. - * At worst we should get a U_BUFFER_OVERFLOW_ERROR. - */ - const UChar *source = (const UChar *)converter->subChars; - ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err); - return; - } - - if(converter->sharedData->impl->writeSub!=NULL) { - converter->sharedData->impl->writeSub(args, offsetIndex, err); - } - else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) { - /* - TODO: Is this untestable because the MBCS converter has a writeSub function to call - and the other converters don't use subChar1? - */ - ucnv_cbFromUWriteBytes(args, - (const char *)&converter->subChar1, 1, - offsetIndex, err); - } - else { - ucnv_cbFromUWriteBytes(args, - (const char *)converter->subChars, length, - offsetIndex, err); - } -} - -U_CAPI void U_EXPORT2 -ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args, - const UChar* source, - int32_t length, - int32_t offsetIndex, - UErrorCode * err) -{ - if(U_FAILURE(*err)) { - return; - } - - ucnv_toUWriteUChars( - args->converter, - source, length, - &args->target, args->targetLimit, - &args->offsets, offsetIndex, - err); -} - -U_CAPI void U_EXPORT2 -ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args, - int32_t offsetIndex, - UErrorCode * err) -{ - static const UChar kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD; - - /* could optimize this case, just one uchar */ - if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) { - ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err); - } else { - ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err); - } -} - -#endif diff --git a/deps/icu-small/source/common/ucnv_cb.cpp b/deps/icu-small/source/common/ucnv_cb.cpp new file mode 100644 index 0000000000..1bb0012014 --- /dev/null +++ b/deps/icu-small/source/common/ucnv_cb.cpp @@ -0,0 +1,261 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2000-2006, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** + * ucnv_cb.c: + * External APIs for the ICU's codeset conversion library + * Helena Shih + * + * Modification History: + * + * Date Name Description + * 7/28/2000 srl Implementation + */ + +/** + * @name Character Conversion C API + * + */ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ucnv_cb.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "cmemory.h" + +/* need to update the offsets when the target moves. */ +/* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly +if you don't use ucnv_cbXXX functions. Make sure you don't use the same callback within +the same call stack if the complexity arises. */ +U_CAPI void U_EXPORT2 +ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args, + const char* source, + int32_t length, + int32_t offsetIndex, + UErrorCode * err) +{ + if(U_FAILURE(*err)) { + return; + } + + ucnv_fromUWriteBytes( + args->converter, + source, length, + &args->target, args->targetLimit, + &args->offsets, offsetIndex, + err); +} + +U_CAPI void U_EXPORT2 +ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args, + const UChar** source, + const UChar* sourceLimit, + int32_t offsetIndex, + UErrorCode * err) +{ + /* + This is a fun one. Recursion can occur - we're basically going to + just retry shoving data through the same converter. Note, if you got + here through some kind of invalid sequence, you maybe should emit a + reset sequence of some kind and/or call ucnv_reset(). Since this + IS an actual conversion, take care that you've changed the callback + or the data, or you'll get an infinite loop. + + Please set the err value to something reasonable before calling + into this. + */ + + char *oldTarget; + + if(U_FAILURE(*err)) + { + return; + } + + oldTarget = args->target; + + ucnv_fromUnicode(args->converter, + &args->target, + args->targetLimit, + source, + sourceLimit, + NULL, /* no offsets */ + FALSE, /* no flush */ + err); + + if(args->offsets) + { + while (args->target != oldTarget) /* if it moved at all.. */ + { + *(args->offsets)++ = offsetIndex; + oldTarget++; + } + } + + /* + Note, if you did something like used a Stop subcallback, things would get interesting. + In fact, here's where we want to return the partially consumed in-source! + */ + if(*err == U_BUFFER_OVERFLOW_ERROR) + /* && (*source < sourceLimit && args->target >= args->targetLimit) + -- S. Hrcek */ + { + /* Overflowed the target. Now, we'll write into the charErrorBuffer. + It's a fixed size. If we overflow it... Hmm */ + char *newTarget; + const char *newTargetLimit; + UErrorCode err2 = U_ZERO_ERROR; + + int8_t errBuffLen; + + errBuffLen = args->converter->charErrorBufferLength; + + /* start the new target at the first free slot in the errbuff.. */ + newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen); + + newTargetLimit = (char *)(args->converter->charErrorBuffer + + sizeof(args->converter->charErrorBuffer)); + + if(newTarget >= newTargetLimit) + { + *err = U_INTERNAL_PROGRAM_ERROR; + return; + } + + /* We're going to tell the converter that the errbuff len is empty. + This prevents the existing errbuff from being 'flushed' out onto + itself. If the errbuff is needed by the converter this time, + we're hosed - we're out of space! */ + + args->converter->charErrorBufferLength = 0; + + ucnv_fromUnicode(args->converter, + &newTarget, + newTargetLimit, + source, + sourceLimit, + NULL, + FALSE, + &err2); + + /* We can go ahead and overwrite the length here. We know just how + to recalculate it. */ + + args->converter->charErrorBufferLength = (int8_t)( + newTarget - (char*)args->converter->charErrorBuffer); + + if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR)) + { + /* now we're REALLY in trouble. + Internal program error - callback shouldn't have written this much + data! + */ + *err = U_INTERNAL_PROGRAM_ERROR; + return; + } + /*else {*/ + /* sub errs could be invalid/truncated/illegal chars or w/e. + These might want to be passed on up.. But the problem is, we already + need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these + other errs.. */ + + /* + if(U_FAILURE(err2)) + ?? + */ + /*}*/ + } +} + +U_CAPI void U_EXPORT2 +ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args, + int32_t offsetIndex, + UErrorCode * err) +{ + UConverter *converter; + int32_t length; + + if(U_FAILURE(*err)) { + return; + } + converter = args->converter; + length = converter->subCharLen; + + if(length == 0) { + return; + } + + if(length < 0) { + /* + * Write/convert the substitution string. Its real length is -length. + * Unlike the escape callback, we need not change the converter's + * callback function because ucnv_setSubstString() verified that + * the string can be converted, so we will not get a conversion error + * and will not recurse. + * At worst we should get a U_BUFFER_OVERFLOW_ERROR. + */ + const UChar *source = (const UChar *)converter->subChars; + ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err); + return; + } + + if(converter->sharedData->impl->writeSub!=NULL) { + converter->sharedData->impl->writeSub(args, offsetIndex, err); + } + else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) { + /* + TODO: Is this untestable because the MBCS converter has a writeSub function to call + and the other converters don't use subChar1? + */ + ucnv_cbFromUWriteBytes(args, + (const char *)&converter->subChar1, 1, + offsetIndex, err); + } + else { + ucnv_cbFromUWriteBytes(args, + (const char *)converter->subChars, length, + offsetIndex, err); + } +} + +U_CAPI void U_EXPORT2 +ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args, + const UChar* source, + int32_t length, + int32_t offsetIndex, + UErrorCode * err) +{ + if(U_FAILURE(*err)) { + return; + } + + ucnv_toUWriteUChars( + args->converter, + source, length, + &args->target, args->targetLimit, + &args->offsets, offsetIndex, + err); +} + +U_CAPI void U_EXPORT2 +ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args, + int32_t offsetIndex, + UErrorCode * err) +{ + static const UChar kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD; + + /* could optimize this case, just one uchar */ + if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) { + ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err); + } else { + ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err); + } +} + +#endif diff --git a/deps/icu-small/source/common/ucnv_cnv.c b/deps/icu-small/source/common/ucnv_cnv.c deleted file mode 100644 index 01f84829dd..0000000000 --- a/deps/icu-small/source/common/ucnv_cnv.c +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2004, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* uconv_cnv.c: -* Implements all the low level conversion functions -* T_UnicodeConverter_{to,from}Unicode_$ConversionType -* -* Change history: -* -* 06/29/2000 helena Major rewrite of the callback APIs. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ucnv_err.h" -#include "unicode/ucnv.h" -#include "unicode/uset.h" -#include "ucnv_cnv.h" -#include "ucnv_bld.h" -#include "cmemory.h" - -U_CFUNC void -ucnv_getCompleteUnicodeSet(const UConverter *cnv, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode) { - sa->addRange(sa->set, 0, 0x10ffff); -} - -U_CFUNC void -ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode) { - sa->addRange(sa->set, 0, 0xd7ff); - sa->addRange(sa->set, 0xe000, 0x10ffff); -} - -U_CFUNC void -ucnv_fromUWriteBytes(UConverter *cnv, - const char *bytes, int32_t length, - char **target, const char *targetLimit, - int32_t **offsets, - int32_t sourceIndex, - UErrorCode *pErrorCode) { - char *t=*target; - int32_t *o; - - /* write bytes */ - if(offsets==NULL || (o=*offsets)==NULL) { - while(length>0 && t0 && t0) { - if(cnv!=NULL) { - t=(char *)cnv->charErrorBuffer; - cnv->charErrorBufferLength=(int8_t)length; - do { - *t++=(uint8_t)*bytes++; - } while(--length>0); - } - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } -} - -U_CFUNC void -ucnv_toUWriteUChars(UConverter *cnv, - const UChar *uchars, int32_t length, - UChar **target, const UChar *targetLimit, - int32_t **offsets, - int32_t sourceIndex, - UErrorCode *pErrorCode) { - UChar *t=*target; - int32_t *o; - - /* write UChars */ - if(offsets==NULL || (o=*offsets)==NULL) { - while(length>0 && t0 && t0) { - if(cnv!=NULL) { - t=cnv->UCharErrorBuffer; - cnv->UCharErrorBufferLength=(int8_t)length; - do { - *t++=*uchars++; - } while(--length>0); - } - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } -} - -U_CFUNC void -ucnv_toUWriteCodePoint(UConverter *cnv, - UChar32 c, - UChar **target, const UChar *targetLimit, - int32_t **offsets, - int32_t sourceIndex, - UErrorCode *pErrorCode) { - UChar *t; - int32_t *o; - - t=*target; - - if(t=0) { - if(cnv!=NULL) { - int8_t i=0; - U16_APPEND_UNSAFE(cnv->UCharErrorBuffer, i, c); - cnv->UCharErrorBufferLength=i; - } - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } -} - -#endif diff --git a/deps/icu-small/source/common/ucnv_cnv.cpp b/deps/icu-small/source/common/ucnv_cnv.cpp new file mode 100644 index 0000000000..ea71acf92c --- /dev/null +++ b/deps/icu-small/source/common/ucnv_cnv.cpp @@ -0,0 +1,182 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2004, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* uconv_cnv.c: +* Implements all the low level conversion functions +* T_UnicodeConverter_{to,from}Unicode_$ConversionType +* +* Change history: +* +* 06/29/2000 helena Major rewrite of the callback APIs. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ucnv_err.h" +#include "unicode/ucnv.h" +#include "unicode/uset.h" +#include "ucnv_cnv.h" +#include "ucnv_bld.h" +#include "cmemory.h" + +U_CFUNC void +ucnv_getCompleteUnicodeSet(const UConverter *cnv, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) { + (void)cnv; + (void)which; + (void)pErrorCode; + sa->addRange(sa->set, 0, 0x10ffff); +} + +U_CFUNC void +ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) { + (void)cnv; + (void)which; + (void)pErrorCode; + sa->addRange(sa->set, 0, 0xd7ff); + sa->addRange(sa->set, 0xe000, 0x10ffff); +} + +U_CFUNC void +ucnv_fromUWriteBytes(UConverter *cnv, + const char *bytes, int32_t length, + char **target, const char *targetLimit, + int32_t **offsets, + int32_t sourceIndex, + UErrorCode *pErrorCode) { + char *t=*target; + int32_t *o; + + /* write bytes */ + if(offsets==NULL || (o=*offsets)==NULL) { + while(length>0 && t0 && t0) { + if(cnv!=NULL) { + t=(char *)cnv->charErrorBuffer; + cnv->charErrorBufferLength=(int8_t)length; + do { + *t++=(uint8_t)*bytes++; + } while(--length>0); + } + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } +} + +U_CFUNC void +ucnv_toUWriteUChars(UConverter *cnv, + const UChar *uchars, int32_t length, + UChar **target, const UChar *targetLimit, + int32_t **offsets, + int32_t sourceIndex, + UErrorCode *pErrorCode) { + UChar *t=*target; + int32_t *o; + + /* write UChars */ + if(offsets==NULL || (o=*offsets)==NULL) { + while(length>0 && t0 && t0) { + if(cnv!=NULL) { + t=cnv->UCharErrorBuffer; + cnv->UCharErrorBufferLength=(int8_t)length; + do { + *t++=*uchars++; + } while(--length>0); + } + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } +} + +U_CFUNC void +ucnv_toUWriteCodePoint(UConverter *cnv, + UChar32 c, + UChar **target, const UChar *targetLimit, + int32_t **offsets, + int32_t sourceIndex, + UErrorCode *pErrorCode) { + UChar *t; + int32_t *o; + + t=*target; + + if(t=0) { + if(cnv!=NULL) { + int8_t i=0; + U16_APPEND_UNSAFE(cnv->UCharErrorBuffer, i, c); + cnv->UCharErrorBufferLength=i; + } + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } +} + +#endif diff --git a/deps/icu-small/source/common/ucnv_cnv.h b/deps/icu-small/source/common/ucnv_cnv.h index 4897813077..a996e29597 100644 --- a/deps/icu-small/source/common/ucnv_cnv.h +++ b/deps/icu-small/source/common/ucnv_cnv.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/ucnv_ct.c b/deps/icu-small/source/common/ucnv_ct.c deleted file mode 100644 index f76919c4a5..0000000000 --- a/deps/icu-small/source/common/ucnv_ct.c +++ /dev/null @@ -1,637 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2010-2015, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ucnv_ct.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010Dec09 -* created by: Michael Ow -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION - -#include "unicode/ucnv.h" -#include "unicode/uset.h" -#include "unicode/ucnv_err.h" -#include "unicode/ucnv_cb.h" -#include "unicode/utf16.h" -#include "ucnv_imp.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "ucnvmbcs.h" -#include "cstring.h" -#include "cmemory.h" - -typedef enum { - INVALID = -2, - DO_SEARCH = -1, - - COMPOUND_TEXT_SINGLE_0 = 0, - COMPOUND_TEXT_SINGLE_1 = 1, - COMPOUND_TEXT_SINGLE_2 = 2, - COMPOUND_TEXT_SINGLE_3 = 3, - - COMPOUND_TEXT_DOUBLE_1 = 4, - COMPOUND_TEXT_DOUBLE_2 = 5, - COMPOUND_TEXT_DOUBLE_3 = 6, - COMPOUND_TEXT_DOUBLE_4 = 7, - COMPOUND_TEXT_DOUBLE_5 = 8, - COMPOUND_TEXT_DOUBLE_6 = 9, - COMPOUND_TEXT_DOUBLE_7 = 10, - - COMPOUND_TEXT_TRIPLE_DOUBLE = 11, - - IBM_915 = 12, - IBM_916 = 13, - IBM_914 = 14, - IBM_874 = 15, - IBM_912 = 16, - IBM_913 = 17, - ISO_8859_14 = 18, - IBM_923 = 19, - NUM_OF_CONVERTERS = 20 -} COMPOUND_TEXT_CONVERTERS; - -#define SEARCH_LENGTH 12 - -static const uint8_t escSeqCompoundText[NUM_OF_CONVERTERS][5] = { - /* Single */ - { 0x1B, 0x2D, 0x41, 0, 0 }, - { 0x1B, 0x2D, 0x4D, 0, 0 }, - { 0x1B, 0x2D, 0x46, 0, 0 }, - { 0x1B, 0x2D, 0x47, 0, 0 }, - - /* Double */ - { 0x1B, 0x24, 0x29, 0x41, 0 }, - { 0x1B, 0x24, 0x29, 0x42, 0 }, - { 0x1B, 0x24, 0x29, 0x43, 0 }, - { 0x1B, 0x24, 0x29, 0x44, 0 }, - { 0x1B, 0x24, 0x29, 0x47, 0 }, - { 0x1B, 0x24, 0x29, 0x48, 0 }, - { 0x1B, 0x24, 0x29, 0x49, 0 }, - - /* Triple/Double */ - { 0x1B, 0x25, 0x47, 0, 0 }, - - /*IBM-915*/ - { 0x1B, 0x2D, 0x4C, 0, 0 }, - /*IBM-916*/ - { 0x1B, 0x2D, 0x48, 0, 0 }, - /*IBM-914*/ - { 0x1B, 0x2D, 0x44, 0, 0 }, - /*IBM-874*/ - { 0x1B, 0x2D, 0x54, 0, 0 }, - /*IBM-912*/ - { 0x1B, 0x2D, 0x42, 0, 0 }, - /* IBM-913 */ - { 0x1B, 0x2D, 0x43, 0, 0 }, - /* ISO-8859_14 */ - { 0x1B, 0x2D, 0x5F, 0, 0 }, - /* IBM-923 */ - { 0x1B, 0x2D, 0x62, 0, 0 }, -}; - -#define ESC_START 0x1B - -#define isASCIIRange(codepoint) \ - ((codepoint == 0x0000) || (codepoint == 0x0009) || (codepoint == 0x000A) || \ - (codepoint >= 0x0020 && codepoint <= 0x007f) || (codepoint >= 0x00A0 && codepoint <= 0x00FF)) - -#define isIBM915(codepoint) \ - ((codepoint >= 0x0401 && codepoint <= 0x045F) || (codepoint == 0x2116)) - -#define isIBM916(codepoint) \ - ((codepoint >= 0x05D0 && codepoint <= 0x05EA) || (codepoint == 0x2017) || (codepoint == 0x203E)) - -#define isCompoundS3(codepoint) \ - ((codepoint == 0x060C) || (codepoint == 0x061B) || (codepoint == 0x061F) || (codepoint >= 0x0621 && codepoint <= 0x063A) || \ - (codepoint >= 0x0640 && codepoint <= 0x0652) || (codepoint >= 0x0660 && codepoint <= 0x066D) || (codepoint == 0x200B) || \ - (codepoint >= 0x0FE70 && codepoint <= 0x0FE72) || (codepoint == 0x0FE74) || (codepoint >= 0x0FE76 && codepoint <= 0x0FEBE)) - -#define isCompoundS2(codepoint) \ - ((codepoint == 0x02BC) || (codepoint == 0x02BD) || (codepoint >= 0x0384 && codepoint <= 0x03CE) || (codepoint == 0x2015)) - -#define isIBM914(codepoint) \ - ((codepoint == 0x0100) || (codepoint == 0x0101) || (codepoint == 0x0112) || (codepoint == 0x0113) || (codepoint == 0x0116) || (codepoint == 0x0117) || \ - (codepoint == 0x0122) || (codepoint == 0x0123) || (codepoint >= 0x0128 && codepoint <= 0x012B) || (codepoint == 0x012E) || (codepoint == 0x012F) || \ - (codepoint >= 0x0136 && codepoint <= 0x0138) || (codepoint == 0x013B) || (codepoint == 0x013C) || (codepoint == 0x0145) || (codepoint == 0x0146) || \ - (codepoint >= 0x014A && codepoint <= 0x014D) || (codepoint == 0x0156) || (codepoint == 0x0157) || (codepoint >= 0x0166 && codepoint <= 0x016B) || \ - (codepoint == 0x0172) || (codepoint == 0x0173)) - -#define isIBM874(codepoint) \ - ((codepoint >= 0x0E01 && codepoint <= 0x0E3A) || (codepoint >= 0x0E3F && codepoint <= 0x0E5B)) - -#define isIBM912(codepoint) \ - ((codepoint >= 0x0102 && codepoint <= 0x0107) || (codepoint >= 0x010C && codepoint <= 0x0111) || (codepoint >= 0x0118 && codepoint <= 0x011B) || \ - (codepoint == 0x0139) || (codepoint == 0x013A) || (codepoint == 0x013D) || (codepoint == 0x013E) || (codepoint >= 0x0141 && codepoint <= 0x0144) || \ - (codepoint == 0x0147) || (codepoint == 0x0147) || (codepoint == 0x0150) || (codepoint == 0x0151) || (codepoint == 0x0154) || (codepoint == 0x0155) || \ - (codepoint >= 0x0158 && codepoint <= 0x015B) || (codepoint == 0x015E) || (codepoint == 0x015F) || (codepoint >= 0x0160 && codepoint <= 0x0165) || \ - (codepoint == 0x016E) || (codepoint == 0x016F) || (codepoint == 0x0170) || (codepoint == 0x0171) || (codepoint >= 0x0179 && codepoint <= 0x017E) || \ - (codepoint == 0x02C7) || (codepoint == 0x02D8) || (codepoint == 0x02D9) || (codepoint == 0x02DB) || (codepoint == 0x02DD)) - -#define isIBM913(codepoint) \ - ((codepoint >= 0x0108 && codepoint <= 0x010B) || (codepoint == 0x011C) || \ - (codepoint == 0x011D) || (codepoint == 0x0120) || (codepoint == 0x0121) || \ - (codepoint >= 0x0124 && codepoint <= 0x0127) || (codepoint == 0x0134) || (codepoint == 0x0135) || \ - (codepoint == 0x015C) || (codepoint == 0x015D) || (codepoint == 0x016C) || (codepoint == 0x016D)) - -#define isCompoundS1(codepoint) \ - ((codepoint == 0x011E) || (codepoint == 0x011F) || (codepoint == 0x0130) || \ - (codepoint == 0x0131) || (codepoint >= 0x0218 && codepoint <= 0x021B)) - -#define isISO8859_14(codepoint) \ - ((codepoint >= 0x0174 && codepoint <= 0x0177) || (codepoint == 0x1E0A) || \ - (codepoint == 0x1E0B) || (codepoint == 0x1E1E) || (codepoint == 0x1E1F) || \ - (codepoint == 0x1E40) || (codepoint == 0x1E41) || (codepoint == 0x1E56) || \ - (codepoint == 0x1E57) || (codepoint == 0x1E60) || (codepoint == 0x1E61) || \ - (codepoint == 0x1E6A) || (codepoint == 0x1E6B) || (codepoint == 0x1EF2) || \ - (codepoint == 0x1EF3) || (codepoint >= 0x1E80 && codepoint <= 0x1E85)) - -#define isIBM923(codepoint) \ - ((codepoint == 0x0152) || (codepoint == 0x0153) || (codepoint == 0x0178) || (codepoint == 0x20AC)) - - -typedef struct{ - UConverterSharedData *myConverterArray[NUM_OF_CONVERTERS]; - COMPOUND_TEXT_CONVERTERS state; -} UConverterDataCompoundText; - -/*********** Compound Text Converter Protos ***********/ -static void -_CompoundTextOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); - -static void - _CompoundTextClose(UConverter *converter); - -static void -_CompoundTextReset(UConverter *converter, UConverterResetChoice choice); - -static const char* -_CompoundTextgetName(const UConverter* cnv); - - -static int32_t findNextEsc(const char *source, const char *sourceLimit) { - int32_t length = sourceLimit - source; - int32_t i; - for (i = 1; i < length; i++) { - if (*(source + i) == 0x1B) { - return i; - } - } - - return length; -} - -static COMPOUND_TEXT_CONVERTERS getState(int codepoint) { - COMPOUND_TEXT_CONVERTERS state = DO_SEARCH; - - if (isASCIIRange(codepoint)) { - state = COMPOUND_TEXT_SINGLE_0; - } else if (isIBM912(codepoint)) { - state = IBM_912; - }else if (isIBM913(codepoint)) { - state = IBM_913; - } else if (isISO8859_14(codepoint)) { - state = ISO_8859_14; - } else if (isIBM923(codepoint)) { - state = IBM_923; - } else if (isIBM874(codepoint)) { - state = IBM_874; - } else if (isIBM914(codepoint)) { - state = IBM_914; - } else if (isCompoundS2(codepoint)) { - state = COMPOUND_TEXT_SINGLE_2; - } else if (isCompoundS3(codepoint)) { - state = COMPOUND_TEXT_SINGLE_3; - } else if (isIBM916(codepoint)) { - state = IBM_916; - } else if (isIBM915(codepoint)) { - state = IBM_915; - } else if (isCompoundS1(codepoint)) { - state = COMPOUND_TEXT_SINGLE_1; - } - - return state; -} - -static COMPOUND_TEXT_CONVERTERS findStateFromEscSeq(const char* source, const char* sourceLimit, const uint8_t* toUBytesBuffer, int32_t toUBytesBufferLength, UErrorCode *err) { - COMPOUND_TEXT_CONVERTERS state = INVALID; - UBool matchFound = FALSE; - int32_t i, n, offset = toUBytesBufferLength; - - for (i = 0; i < NUM_OF_CONVERTERS; i++) { - matchFound = TRUE; - for (n = 0; escSeqCompoundText[i][n] != 0; n++) { - if (n < toUBytesBufferLength) { - if (toUBytesBuffer[n] != escSeqCompoundText[i][n]) { - matchFound = FALSE; - break; - } - } else if ((source + (n - offset)) >= sourceLimit) { - *err = U_TRUNCATED_CHAR_FOUND; - matchFound = FALSE; - break; - } else if (*(source + (n - offset)) != escSeqCompoundText[i][n]) { - matchFound = FALSE; - break; - } - } - - if (matchFound) { - break; - } - } - - if (matchFound) { - state = (COMPOUND_TEXT_CONVERTERS)i; - } - - return state; -} - -static void -_CompoundTextOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ - cnv->extraInfo = uprv_malloc (sizeof (UConverterDataCompoundText)); - if (cnv->extraInfo != NULL) { - UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) cnv->extraInfo; - - UConverterNamePieces stackPieces; - UConverterLoadArgs stackArgs={ (int32_t)sizeof(UConverterLoadArgs) }; - - myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_0] = NULL; - myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_1] = ucnv_loadSharedData("icu-internal-compound-s1", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_2] = ucnv_loadSharedData("icu-internal-compound-s2", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_3] = ucnv_loadSharedData("icu-internal-compound-s3", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_1] = ucnv_loadSharedData("icu-internal-compound-d1", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_2] = ucnv_loadSharedData("icu-internal-compound-d2", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_3] = ucnv_loadSharedData("icu-internal-compound-d3", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_4] = ucnv_loadSharedData("icu-internal-compound-d4", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_5] = ucnv_loadSharedData("icu-internal-compound-d5", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_6] = ucnv_loadSharedData("icu-internal-compound-d6", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_7] = ucnv_loadSharedData("icu-internal-compound-d7", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[COMPOUND_TEXT_TRIPLE_DOUBLE] = ucnv_loadSharedData("icu-internal-compound-t", &stackPieces, &stackArgs, errorCode); - - myConverterData->myConverterArray[IBM_915] = ucnv_loadSharedData("ibm-915_P100-1995", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[IBM_916] = ucnv_loadSharedData("ibm-916_P100-1995", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[IBM_914] = ucnv_loadSharedData("ibm-914_P100-1995", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[IBM_874] = ucnv_loadSharedData("ibm-874_P100-1995", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[IBM_912] = ucnv_loadSharedData("ibm-912_P100-1995", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[IBM_913] = ucnv_loadSharedData("ibm-913_P100-2000", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[ISO_8859_14] = ucnv_loadSharedData("iso-8859_14-1998", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[IBM_923] = ucnv_loadSharedData("ibm-923_P100-1998", &stackPieces, &stackArgs, errorCode); - - if (U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) { - _CompoundTextClose(cnv); - return; - } - - myConverterData->state = (COMPOUND_TEXT_CONVERTERS)0; - } else { - *errorCode = U_MEMORY_ALLOCATION_ERROR; - } -} - - -static void -_CompoundTextClose(UConverter *converter) { - UConverterDataCompoundText* myConverterData = (UConverterDataCompoundText*)(converter->extraInfo); - int32_t i; - - if (converter->extraInfo != NULL) { - /*close the array of converter pointers and free the memory*/ - for (i = 0; i < NUM_OF_CONVERTERS; i++) { - if (myConverterData->myConverterArray[i] != NULL) { - ucnv_unloadSharedDataIfReady(myConverterData->myConverterArray[i]); - } - } - - uprv_free(converter->extraInfo); - } -} - -static void -_CompoundTextReset(UConverter *converter, UConverterResetChoice choice) { -} - -static const char* -_CompoundTextgetName(const UConverter* cnv){ - return "x11-compound-text"; -} - -static void -UConverter_fromUnicode_CompoundText_OFFSETS(UConverterFromUnicodeArgs* args, UErrorCode* err){ - UConverter *cnv = args->converter; - uint8_t *target = (uint8_t *) args->target; - const uint8_t *targetLimit = (const uint8_t *) args->targetLimit; - const UChar* source = args->source; - const UChar* sourceLimit = args->sourceLimit; - /* int32_t* offsets = args->offsets; */ - UChar32 sourceChar; - UBool useFallback = cnv->useFallback; - uint8_t tmpTargetBuffer[7]; - int32_t tmpTargetBufferLength = 0; - COMPOUND_TEXT_CONVERTERS currentState, tmpState; - uint32_t pValue; - int32_t pValueLength = 0; - int32_t i, n, j; - - UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) cnv->extraInfo; - - currentState = myConverterData->state; - - /* check if the last codepoint of previous buffer was a lead surrogate*/ - if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) { - goto getTrail; - } - - while( source < sourceLimit){ - if(target < targetLimit){ - - sourceChar = *(source++); - /*check if the char is a First surrogate*/ - if(U16_IS_SURROGATE(sourceChar)) { - if(U16_IS_SURROGATE_LEAD(sourceChar)) { -getTrail: - /*look ahead to find the trail surrogate*/ - if(source < sourceLimit) { - /* test the following code unit */ - UChar trail=(UChar) *source; - if(U16_IS_TRAIL(trail)) { - source++; - sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); - cnv->fromUChar32=0x00; - /* convert this supplementary code point */ - /* exit this condition tree */ - } else { - /* this is an unmatched lead code unit (1st surrogate) */ - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - cnv->fromUChar32=sourceChar; - break; - } - } else { - /* no more input */ - cnv->fromUChar32=sourceChar; - break; - } - } else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - cnv->fromUChar32=sourceChar; - break; - } - } - - tmpTargetBufferLength = 0; - tmpState = getState(sourceChar); - - if (tmpState != DO_SEARCH && currentState != tmpState) { - /* Get escape sequence if necessary */ - currentState = tmpState; - for (i = 0; escSeqCompoundText[currentState][i] != 0; i++) { - tmpTargetBuffer[tmpTargetBufferLength++] = escSeqCompoundText[currentState][i]; - } - } - - if (tmpState == DO_SEARCH) { - /* Test all available converters */ - for (i = 1; i < SEARCH_LENGTH; i++) { - pValueLength = ucnv_MBCSFromUChar32(myConverterData->myConverterArray[i], sourceChar, &pValue, useFallback); - if (pValueLength > 0) { - tmpState = (COMPOUND_TEXT_CONVERTERS)i; - if (currentState != tmpState) { - currentState = tmpState; - for (j = 0; escSeqCompoundText[currentState][j] != 0; j++) { - tmpTargetBuffer[tmpTargetBufferLength++] = escSeqCompoundText[currentState][j]; - } - } - for (n = (pValueLength - 1); n >= 0; n--) { - tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)(pValue >> (n * 8)); - } - break; - } - } - } else if (tmpState == COMPOUND_TEXT_SINGLE_0) { - tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)sourceChar; - } else { - pValueLength = ucnv_MBCSFromUChar32(myConverterData->myConverterArray[currentState], sourceChar, &pValue, useFallback); - if (pValueLength > 0) { - for (n = (pValueLength - 1); n >= 0; n--) { - tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)(pValue >> (n * 8)); - } - } - } - - for (i = 0; i < tmpTargetBufferLength; i++) { - if (target < targetLimit) { - *target++ = tmpTargetBuffer[i]; - } else { - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - } - - if (*err == U_BUFFER_OVERFLOW_ERROR) { - for (; i < tmpTargetBufferLength; i++) { - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = tmpTargetBuffer[i]; - } - } - } else { - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - } - - /*save the state and return */ - myConverterData->state = currentState; - args->source = source; - args->target = (char*)target; -} - - -static void -UConverter_toUnicode_CompoundText_OFFSETS(UConverterToUnicodeArgs *args, - UErrorCode* err){ - const char *mySource = (char *) args->source; - UChar *myTarget = args->target; - const char *mySourceLimit = args->sourceLimit; - const char *tmpSourceLimit = mySourceLimit; - uint32_t mySourceChar = 0x0000; - COMPOUND_TEXT_CONVERTERS currentState, tmpState; - int32_t sourceOffset = 0; - UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) args->converter->extraInfo; - UConverterSharedData* savedSharedData = NULL; - - UConverterToUnicodeArgs subArgs; - int32_t minArgsSize; - - /* set up the subconverter arguments */ - if(args->sizesize; - } else { - minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs); - } - - uprv_memcpy(&subArgs, args, minArgsSize); - subArgs.size = (uint16_t)minArgsSize; - - currentState = tmpState = myConverterData->state; - - while(mySource < mySourceLimit){ - if(myTarget < args->targetLimit){ - if (args->converter->toULength > 0) { - mySourceChar = args->converter->toUBytes[0]; - } else { - mySourceChar = (uint8_t)*mySource; - } - - if (mySourceChar == ESC_START) { - tmpState = findStateFromEscSeq(mySource, mySourceLimit, args->converter->toUBytes, args->converter->toULength, err); - - if (*err == U_TRUNCATED_CHAR_FOUND) { - for (; mySource < mySourceLimit;) { - args->converter->toUBytes[args->converter->toULength++] = *mySource++; - } - *err = U_ZERO_ERROR; - break; - } else if (tmpState == INVALID) { - if (args->converter->toULength == 0) { - mySource++; /* skip over the 0x1b byte */ - } - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - - if (tmpState != currentState) { - currentState = tmpState; - } - - sourceOffset = uprv_strlen((char*)escSeqCompoundText[currentState]) - args->converter->toULength; - - mySource += sourceOffset; - - args->converter->toULength = 0; - } - - if (currentState == COMPOUND_TEXT_SINGLE_0) { - while (mySource < mySourceLimit) { - if (*mySource == ESC_START) { - break; - } - if (myTarget < args->targetLimit) { - *myTarget++ = 0x00ff&(*mySource++); - } else { - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } else if (mySource < mySourceLimit){ - sourceOffset = findNextEsc(mySource, mySourceLimit); - - tmpSourceLimit = mySource + sourceOffset; - - subArgs.source = mySource; - subArgs.sourceLimit = tmpSourceLimit; - subArgs.target = myTarget; - savedSharedData = subArgs.converter->sharedData; - subArgs.converter->sharedData = myConverterData->myConverterArray[currentState]; - - ucnv_MBCSToUnicodeWithOffsets(&subArgs, err); - - subArgs.converter->sharedData = savedSharedData; - - mySource = subArgs.source; - myTarget = subArgs.target; - - if (U_FAILURE(*err)) { - if(*err == U_BUFFER_OVERFLOW_ERROR) { - if(subArgs.converter->UCharErrorBufferLength > 0) { - uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer, - subArgs.converter->UCharErrorBufferLength); - } - args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength; - subArgs.converter->UCharErrorBufferLength = 0; - } - break; - } - } - } else { - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - } - myConverterData->state = currentState; - args->target = myTarget; - args->source = mySource; -} - -static void -_CompoundText_GetUnicodeSet(const UConverter *cnv, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode) { - UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *)cnv->extraInfo; - int32_t i; - - for (i = 1; i < NUM_OF_CONVERTERS; i++) { - ucnv_MBCSGetUnicodeSetForUnicode(myConverterData->myConverterArray[i], sa, which, pErrorCode); - } - sa->add(sa->set, 0x0000); - sa->add(sa->set, 0x0009); - sa->add(sa->set, 0x000A); - sa->addRange(sa->set, 0x0020, 0x007F); - sa->addRange(sa->set, 0x00A0, 0x00FF); -} - -static const UConverterImpl _CompoundTextImpl = { - - UCNV_COMPOUND_TEXT, - - NULL, - NULL, - - _CompoundTextOpen, - _CompoundTextClose, - _CompoundTextReset, - - UConverter_toUnicode_CompoundText_OFFSETS, - UConverter_toUnicode_CompoundText_OFFSETS, - UConverter_fromUnicode_CompoundText_OFFSETS, - UConverter_fromUnicode_CompoundText_OFFSETS, - NULL, - - NULL, - _CompoundTextgetName, - NULL, - NULL, - _CompoundText_GetUnicodeSet -}; -static const UConverterStaticData _CompoundTextStaticData = { - sizeof(UConverterStaticData), - "COMPOUND_TEXT", - 0, - UCNV_IBM, - UCNV_COMPOUND_TEXT, - 1, - 6, - { 0xef, 0, 0, 0 }, - 1, - FALSE, - FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; -const UConverterSharedData _CompoundTextData = - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_CompoundTextStaticData, &_CompoundTextImpl); - -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ diff --git a/deps/icu-small/source/common/ucnv_ct.cpp b/deps/icu-small/source/common/ucnv_ct.cpp new file mode 100644 index 0000000000..c9a0ce3693 --- /dev/null +++ b/deps/icu-small/source/common/ucnv_ct.cpp @@ -0,0 +1,645 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2010-2015, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnv_ct.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010Dec09 +* created by: Michael Ow +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/uset.h" +#include "unicode/ucnv_err.h" +#include "unicode/ucnv_cb.h" +#include "unicode/utf16.h" +#include "ucnv_imp.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "ucnvmbcs.h" +#include "cstring.h" +#include "cmemory.h" + +typedef enum { + INVALID = -2, + DO_SEARCH = -1, + + COMPOUND_TEXT_SINGLE_0 = 0, + COMPOUND_TEXT_SINGLE_1 = 1, + COMPOUND_TEXT_SINGLE_2 = 2, + COMPOUND_TEXT_SINGLE_3 = 3, + + COMPOUND_TEXT_DOUBLE_1 = 4, + COMPOUND_TEXT_DOUBLE_2 = 5, + COMPOUND_TEXT_DOUBLE_3 = 6, + COMPOUND_TEXT_DOUBLE_4 = 7, + COMPOUND_TEXT_DOUBLE_5 = 8, + COMPOUND_TEXT_DOUBLE_6 = 9, + COMPOUND_TEXT_DOUBLE_7 = 10, + + COMPOUND_TEXT_TRIPLE_DOUBLE = 11, + + IBM_915 = 12, + IBM_916 = 13, + IBM_914 = 14, + IBM_874 = 15, + IBM_912 = 16, + IBM_913 = 17, + ISO_8859_14 = 18, + IBM_923 = 19, + NUM_OF_CONVERTERS = 20 +} COMPOUND_TEXT_CONVERTERS; + +#define SEARCH_LENGTH 12 + +static const uint8_t escSeqCompoundText[NUM_OF_CONVERTERS][5] = { + /* Single */ + { 0x1B, 0x2D, 0x41, 0, 0 }, + { 0x1B, 0x2D, 0x4D, 0, 0 }, + { 0x1B, 0x2D, 0x46, 0, 0 }, + { 0x1B, 0x2D, 0x47, 0, 0 }, + + /* Double */ + { 0x1B, 0x24, 0x29, 0x41, 0 }, + { 0x1B, 0x24, 0x29, 0x42, 0 }, + { 0x1B, 0x24, 0x29, 0x43, 0 }, + { 0x1B, 0x24, 0x29, 0x44, 0 }, + { 0x1B, 0x24, 0x29, 0x47, 0 }, + { 0x1B, 0x24, 0x29, 0x48, 0 }, + { 0x1B, 0x24, 0x29, 0x49, 0 }, + + /* Triple/Double */ + { 0x1B, 0x25, 0x47, 0, 0 }, + + /*IBM-915*/ + { 0x1B, 0x2D, 0x4C, 0, 0 }, + /*IBM-916*/ + { 0x1B, 0x2D, 0x48, 0, 0 }, + /*IBM-914*/ + { 0x1B, 0x2D, 0x44, 0, 0 }, + /*IBM-874*/ + { 0x1B, 0x2D, 0x54, 0, 0 }, + /*IBM-912*/ + { 0x1B, 0x2D, 0x42, 0, 0 }, + /* IBM-913 */ + { 0x1B, 0x2D, 0x43, 0, 0 }, + /* ISO-8859_14 */ + { 0x1B, 0x2D, 0x5F, 0, 0 }, + /* IBM-923 */ + { 0x1B, 0x2D, 0x62, 0, 0 }, +}; + +#define ESC_START 0x1B + +#define isASCIIRange(codepoint) \ + ((codepoint == 0x0000) || (codepoint == 0x0009) || (codepoint == 0x000A) || \ + (codepoint >= 0x0020 && codepoint <= 0x007f) || (codepoint >= 0x00A0 && codepoint <= 0x00FF)) + +#define isIBM915(codepoint) \ + ((codepoint >= 0x0401 && codepoint <= 0x045F) || (codepoint == 0x2116)) + +#define isIBM916(codepoint) \ + ((codepoint >= 0x05D0 && codepoint <= 0x05EA) || (codepoint == 0x2017) || (codepoint == 0x203E)) + +#define isCompoundS3(codepoint) \ + ((codepoint == 0x060C) || (codepoint == 0x061B) || (codepoint == 0x061F) || (codepoint >= 0x0621 && codepoint <= 0x063A) || \ + (codepoint >= 0x0640 && codepoint <= 0x0652) || (codepoint >= 0x0660 && codepoint <= 0x066D) || (codepoint == 0x200B) || \ + (codepoint >= 0x0FE70 && codepoint <= 0x0FE72) || (codepoint == 0x0FE74) || (codepoint >= 0x0FE76 && codepoint <= 0x0FEBE)) + +#define isCompoundS2(codepoint) \ + ((codepoint == 0x02BC) || (codepoint == 0x02BD) || (codepoint >= 0x0384 && codepoint <= 0x03CE) || (codepoint == 0x2015)) + +#define isIBM914(codepoint) \ + ((codepoint == 0x0100) || (codepoint == 0x0101) || (codepoint == 0x0112) || (codepoint == 0x0113) || (codepoint == 0x0116) || (codepoint == 0x0117) || \ + (codepoint == 0x0122) || (codepoint == 0x0123) || (codepoint >= 0x0128 && codepoint <= 0x012B) || (codepoint == 0x012E) || (codepoint == 0x012F) || \ + (codepoint >= 0x0136 && codepoint <= 0x0138) || (codepoint == 0x013B) || (codepoint == 0x013C) || (codepoint == 0x0145) || (codepoint == 0x0146) || \ + (codepoint >= 0x014A && codepoint <= 0x014D) || (codepoint == 0x0156) || (codepoint == 0x0157) || (codepoint >= 0x0166 && codepoint <= 0x016B) || \ + (codepoint == 0x0172) || (codepoint == 0x0173)) + +#define isIBM874(codepoint) \ + ((codepoint >= 0x0E01 && codepoint <= 0x0E3A) || (codepoint >= 0x0E3F && codepoint <= 0x0E5B)) + +#define isIBM912(codepoint) \ + ((codepoint >= 0x0102 && codepoint <= 0x0107) || (codepoint >= 0x010C && codepoint <= 0x0111) || (codepoint >= 0x0118 && codepoint <= 0x011B) || \ + (codepoint == 0x0139) || (codepoint == 0x013A) || (codepoint == 0x013D) || (codepoint == 0x013E) || (codepoint >= 0x0141 && codepoint <= 0x0144) || \ + (codepoint == 0x0147) || (codepoint == 0x0147) || (codepoint == 0x0150) || (codepoint == 0x0151) || (codepoint == 0x0154) || (codepoint == 0x0155) || \ + (codepoint >= 0x0158 && codepoint <= 0x015B) || (codepoint == 0x015E) || (codepoint == 0x015F) || (codepoint >= 0x0160 && codepoint <= 0x0165) || \ + (codepoint == 0x016E) || (codepoint == 0x016F) || (codepoint == 0x0170) || (codepoint == 0x0171) || (codepoint >= 0x0179 && codepoint <= 0x017E) || \ + (codepoint == 0x02C7) || (codepoint == 0x02D8) || (codepoint == 0x02D9) || (codepoint == 0x02DB) || (codepoint == 0x02DD)) + +#define isIBM913(codepoint) \ + ((codepoint >= 0x0108 && codepoint <= 0x010B) || (codepoint == 0x011C) || \ + (codepoint == 0x011D) || (codepoint == 0x0120) || (codepoint == 0x0121) || \ + (codepoint >= 0x0124 && codepoint <= 0x0127) || (codepoint == 0x0134) || (codepoint == 0x0135) || \ + (codepoint == 0x015C) || (codepoint == 0x015D) || (codepoint == 0x016C) || (codepoint == 0x016D)) + +#define isCompoundS1(codepoint) \ + ((codepoint == 0x011E) || (codepoint == 0x011F) || (codepoint == 0x0130) || \ + (codepoint == 0x0131) || (codepoint >= 0x0218 && codepoint <= 0x021B)) + +#define isISO8859_14(codepoint) \ + ((codepoint >= 0x0174 && codepoint <= 0x0177) || (codepoint == 0x1E0A) || \ + (codepoint == 0x1E0B) || (codepoint == 0x1E1E) || (codepoint == 0x1E1F) || \ + (codepoint == 0x1E40) || (codepoint == 0x1E41) || (codepoint == 0x1E56) || \ + (codepoint == 0x1E57) || (codepoint == 0x1E60) || (codepoint == 0x1E61) || \ + (codepoint == 0x1E6A) || (codepoint == 0x1E6B) || (codepoint == 0x1EF2) || \ + (codepoint == 0x1EF3) || (codepoint >= 0x1E80 && codepoint <= 0x1E85)) + +#define isIBM923(codepoint) \ + ((codepoint == 0x0152) || (codepoint == 0x0153) || (codepoint == 0x0178) || (codepoint == 0x20AC)) + + +typedef struct{ + UConverterSharedData *myConverterArray[NUM_OF_CONVERTERS]; + COMPOUND_TEXT_CONVERTERS state; +} UConverterDataCompoundText; + +/*********** Compound Text Converter Protos ***********/ +U_CDECL_BEGIN +static void U_CALLCONV +_CompoundTextOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); + +static void U_CALLCONV + _CompoundTextClose(UConverter *converter); + +static void U_CALLCONV +_CompoundTextReset(UConverter *converter, UConverterResetChoice choice); + +static const char* U_CALLCONV +_CompoundTextgetName(const UConverter* cnv); + + +static int32_t findNextEsc(const char *source, const char *sourceLimit) { + int32_t length = sourceLimit - source; + int32_t i; + for (i = 1; i < length; i++) { + if (*(source + i) == 0x1B) { + return i; + } + } + + return length; +} + +static COMPOUND_TEXT_CONVERTERS getState(int codepoint) { + COMPOUND_TEXT_CONVERTERS state = DO_SEARCH; + + if (isASCIIRange(codepoint)) { + state = COMPOUND_TEXT_SINGLE_0; + } else if (isIBM912(codepoint)) { + state = IBM_912; + }else if (isIBM913(codepoint)) { + state = IBM_913; + } else if (isISO8859_14(codepoint)) { + state = ISO_8859_14; + } else if (isIBM923(codepoint)) { + state = IBM_923; + } else if (isIBM874(codepoint)) { + state = IBM_874; + } else if (isIBM914(codepoint)) { + state = IBM_914; + } else if (isCompoundS2(codepoint)) { + state = COMPOUND_TEXT_SINGLE_2; + } else if (isCompoundS3(codepoint)) { + state = COMPOUND_TEXT_SINGLE_3; + } else if (isIBM916(codepoint)) { + state = IBM_916; + } else if (isIBM915(codepoint)) { + state = IBM_915; + } else if (isCompoundS1(codepoint)) { + state = COMPOUND_TEXT_SINGLE_1; + } + + return state; +} + +static COMPOUND_TEXT_CONVERTERS findStateFromEscSeq(const char* source, const char* sourceLimit, const uint8_t* toUBytesBuffer, int32_t toUBytesBufferLength, UErrorCode *err) { + COMPOUND_TEXT_CONVERTERS state = INVALID; + UBool matchFound = FALSE; + int32_t i, n, offset = toUBytesBufferLength; + + for (i = 0; i < NUM_OF_CONVERTERS; i++) { + matchFound = TRUE; + for (n = 0; escSeqCompoundText[i][n] != 0; n++) { + if (n < toUBytesBufferLength) { + if (toUBytesBuffer[n] != escSeqCompoundText[i][n]) { + matchFound = FALSE; + break; + } + } else if ((source + (n - offset)) >= sourceLimit) { + *err = U_TRUNCATED_CHAR_FOUND; + matchFound = FALSE; + break; + } else if (*(source + (n - offset)) != escSeqCompoundText[i][n]) { + matchFound = FALSE; + break; + } + } + + if (matchFound) { + break; + } + } + + if (matchFound) { + state = (COMPOUND_TEXT_CONVERTERS)i; + } + + return state; +} + +static void U_CALLCONV +_CompoundTextOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ + cnv->extraInfo = uprv_malloc (sizeof (UConverterDataCompoundText)); + if (cnv->extraInfo != NULL) { + UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) cnv->extraInfo; + + UConverterNamePieces stackPieces; + UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; + + myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_0] = NULL; + myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_1] = ucnv_loadSharedData("icu-internal-compound-s1", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_2] = ucnv_loadSharedData("icu-internal-compound-s2", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_3] = ucnv_loadSharedData("icu-internal-compound-s3", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_1] = ucnv_loadSharedData("icu-internal-compound-d1", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_2] = ucnv_loadSharedData("icu-internal-compound-d2", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_3] = ucnv_loadSharedData("icu-internal-compound-d3", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_4] = ucnv_loadSharedData("icu-internal-compound-d4", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_5] = ucnv_loadSharedData("icu-internal-compound-d5", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_6] = ucnv_loadSharedData("icu-internal-compound-d6", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_7] = ucnv_loadSharedData("icu-internal-compound-d7", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[COMPOUND_TEXT_TRIPLE_DOUBLE] = ucnv_loadSharedData("icu-internal-compound-t", &stackPieces, &stackArgs, errorCode); + + myConverterData->myConverterArray[IBM_915] = ucnv_loadSharedData("ibm-915_P100-1995", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[IBM_916] = ucnv_loadSharedData("ibm-916_P100-1995", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[IBM_914] = ucnv_loadSharedData("ibm-914_P100-1995", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[IBM_874] = ucnv_loadSharedData("ibm-874_P100-1995", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[IBM_912] = ucnv_loadSharedData("ibm-912_P100-1995", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[IBM_913] = ucnv_loadSharedData("ibm-913_P100-2000", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[ISO_8859_14] = ucnv_loadSharedData("iso-8859_14-1998", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[IBM_923] = ucnv_loadSharedData("ibm-923_P100-1998", &stackPieces, &stackArgs, errorCode); + + if (U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) { + _CompoundTextClose(cnv); + return; + } + + myConverterData->state = (COMPOUND_TEXT_CONVERTERS)0; + } else { + *errorCode = U_MEMORY_ALLOCATION_ERROR; + } +} + + +static void U_CALLCONV +_CompoundTextClose(UConverter *converter) { + UConverterDataCompoundText* myConverterData = (UConverterDataCompoundText*)(converter->extraInfo); + int32_t i; + + if (converter->extraInfo != NULL) { + /*close the array of converter pointers and free the memory*/ + for (i = 0; i < NUM_OF_CONVERTERS; i++) { + if (myConverterData->myConverterArray[i] != NULL) { + ucnv_unloadSharedDataIfReady(myConverterData->myConverterArray[i]); + } + } + + uprv_free(converter->extraInfo); + } +} + +static void U_CALLCONV +_CompoundTextReset(UConverter *converter, UConverterResetChoice choice) { + (void)converter; + (void)choice; +} + +static const char* U_CALLCONV +_CompoundTextgetName(const UConverter* cnv){ + (void)cnv; + return "x11-compound-text"; +} + +static void U_CALLCONV +UConverter_fromUnicode_CompoundText_OFFSETS(UConverterFromUnicodeArgs* args, UErrorCode* err){ + UConverter *cnv = args->converter; + uint8_t *target = (uint8_t *) args->target; + const uint8_t *targetLimit = (const uint8_t *) args->targetLimit; + const UChar* source = args->source; + const UChar* sourceLimit = args->sourceLimit; + /* int32_t* offsets = args->offsets; */ + UChar32 sourceChar; + UBool useFallback = cnv->useFallback; + uint8_t tmpTargetBuffer[7]; + int32_t tmpTargetBufferLength = 0; + COMPOUND_TEXT_CONVERTERS currentState, tmpState; + uint32_t pValue; + int32_t pValueLength = 0; + int32_t i, n, j; + + UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) cnv->extraInfo; + + currentState = myConverterData->state; + + /* check if the last codepoint of previous buffer was a lead surrogate*/ + if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) { + goto getTrail; + } + + while( source < sourceLimit){ + if(target < targetLimit){ + + sourceChar = *(source++); + /*check if the char is a First surrogate*/ + if(U16_IS_SURROGATE(sourceChar)) { + if(U16_IS_SURROGATE_LEAD(sourceChar)) { +getTrail: + /*look ahead to find the trail surrogate*/ + if(source < sourceLimit) { + /* test the following code unit */ + UChar trail=(UChar) *source; + if(U16_IS_TRAIL(trail)) { + source++; + sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); + cnv->fromUChar32=0x00; + /* convert this supplementary code point */ + /* exit this condition tree */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + cnv->fromUChar32=sourceChar; + break; + } + } else { + /* no more input */ + cnv->fromUChar32=sourceChar; + break; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + cnv->fromUChar32=sourceChar; + break; + } + } + + tmpTargetBufferLength = 0; + tmpState = getState(sourceChar); + + if (tmpState != DO_SEARCH && currentState != tmpState) { + /* Get escape sequence if necessary */ + currentState = tmpState; + for (i = 0; escSeqCompoundText[currentState][i] != 0; i++) { + tmpTargetBuffer[tmpTargetBufferLength++] = escSeqCompoundText[currentState][i]; + } + } + + if (tmpState == DO_SEARCH) { + /* Test all available converters */ + for (i = 1; i < SEARCH_LENGTH; i++) { + pValueLength = ucnv_MBCSFromUChar32(myConverterData->myConverterArray[i], sourceChar, &pValue, useFallback); + if (pValueLength > 0) { + tmpState = (COMPOUND_TEXT_CONVERTERS)i; + if (currentState != tmpState) { + currentState = tmpState; + for (j = 0; escSeqCompoundText[currentState][j] != 0; j++) { + tmpTargetBuffer[tmpTargetBufferLength++] = escSeqCompoundText[currentState][j]; + } + } + for (n = (pValueLength - 1); n >= 0; n--) { + tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)(pValue >> (n * 8)); + } + break; + } + } + } else if (tmpState == COMPOUND_TEXT_SINGLE_0) { + tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)sourceChar; + } else { + pValueLength = ucnv_MBCSFromUChar32(myConverterData->myConverterArray[currentState], sourceChar, &pValue, useFallback); + if (pValueLength > 0) { + for (n = (pValueLength - 1); n >= 0; n--) { + tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)(pValue >> (n * 8)); + } + } + } + + for (i = 0; i < tmpTargetBufferLength; i++) { + if (target < targetLimit) { + *target++ = tmpTargetBuffer[i]; + } else { + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + } + + if (*err == U_BUFFER_OVERFLOW_ERROR) { + for (; i < tmpTargetBufferLength; i++) { + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = tmpTargetBuffer[i]; + } + } + } else { + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + } + + /*save the state and return */ + myConverterData->state = currentState; + args->source = source; + args->target = (char*)target; +} + + +static void U_CALLCONV +UConverter_toUnicode_CompoundText_OFFSETS(UConverterToUnicodeArgs *args, + UErrorCode* err){ + const char *mySource = (char *) args->source; + UChar *myTarget = args->target; + const char *mySourceLimit = args->sourceLimit; + const char *tmpSourceLimit = mySourceLimit; + uint32_t mySourceChar = 0x0000; + COMPOUND_TEXT_CONVERTERS currentState, tmpState; + int32_t sourceOffset = 0; + UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) args->converter->extraInfo; + UConverterSharedData* savedSharedData = NULL; + + UConverterToUnicodeArgs subArgs; + int32_t minArgsSize; + + /* set up the subconverter arguments */ + if(args->sizesize; + } else { + minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs); + } + + uprv_memcpy(&subArgs, args, minArgsSize); + subArgs.size = (uint16_t)minArgsSize; + + currentState = tmpState = myConverterData->state; + + while(mySource < mySourceLimit){ + if(myTarget < args->targetLimit){ + if (args->converter->toULength > 0) { + mySourceChar = args->converter->toUBytes[0]; + } else { + mySourceChar = (uint8_t)*mySource; + } + + if (mySourceChar == ESC_START) { + tmpState = findStateFromEscSeq(mySource, mySourceLimit, args->converter->toUBytes, args->converter->toULength, err); + + if (*err == U_TRUNCATED_CHAR_FOUND) { + for (; mySource < mySourceLimit;) { + args->converter->toUBytes[args->converter->toULength++] = *mySource++; + } + *err = U_ZERO_ERROR; + break; + } else if (tmpState == INVALID) { + if (args->converter->toULength == 0) { + mySource++; /* skip over the 0x1b byte */ + } + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + + if (tmpState != currentState) { + currentState = tmpState; + } + + sourceOffset = uprv_strlen((char*)escSeqCompoundText[currentState]) - args->converter->toULength; + + mySource += sourceOffset; + + args->converter->toULength = 0; + } + + if (currentState == COMPOUND_TEXT_SINGLE_0) { + while (mySource < mySourceLimit) { + if (*mySource == ESC_START) { + break; + } + if (myTarget < args->targetLimit) { + *myTarget++ = 0x00ff&(*mySource++); + } else { + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } else if (mySource < mySourceLimit){ + sourceOffset = findNextEsc(mySource, mySourceLimit); + + tmpSourceLimit = mySource + sourceOffset; + + subArgs.source = mySource; + subArgs.sourceLimit = tmpSourceLimit; + subArgs.target = myTarget; + savedSharedData = subArgs.converter->sharedData; + subArgs.converter->sharedData = myConverterData->myConverterArray[currentState]; + + ucnv_MBCSToUnicodeWithOffsets(&subArgs, err); + + subArgs.converter->sharedData = savedSharedData; + + mySource = subArgs.source; + myTarget = subArgs.target; + + if (U_FAILURE(*err)) { + if(*err == U_BUFFER_OVERFLOW_ERROR) { + if(subArgs.converter->UCharErrorBufferLength > 0) { + uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer, + subArgs.converter->UCharErrorBufferLength); + } + args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength; + subArgs.converter->UCharErrorBufferLength = 0; + } + break; + } + } + } else { + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + } + myConverterData->state = currentState; + args->target = myTarget; + args->source = mySource; +} + +static void U_CALLCONV +_CompoundText_GetUnicodeSet(const UConverter *cnv, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) { + UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *)cnv->extraInfo; + int32_t i; + + for (i = 1; i < NUM_OF_CONVERTERS; i++) { + ucnv_MBCSGetUnicodeSetForUnicode(myConverterData->myConverterArray[i], sa, which, pErrorCode); + } + sa->add(sa->set, 0x0000); + sa->add(sa->set, 0x0009); + sa->add(sa->set, 0x000A); + sa->addRange(sa->set, 0x0020, 0x007F); + sa->addRange(sa->set, 0x00A0, 0x00FF); +} +U_CDECL_END + +static const UConverterImpl _CompoundTextImpl = { + + UCNV_COMPOUND_TEXT, + + NULL, + NULL, + + _CompoundTextOpen, + _CompoundTextClose, + _CompoundTextReset, + + UConverter_toUnicode_CompoundText_OFFSETS, + UConverter_toUnicode_CompoundText_OFFSETS, + UConverter_fromUnicode_CompoundText_OFFSETS, + UConverter_fromUnicode_CompoundText_OFFSETS, + NULL, + + NULL, + _CompoundTextgetName, + NULL, + NULL, + _CompoundText_GetUnicodeSet, + NULL, + NULL +}; + +static const UConverterStaticData _CompoundTextStaticData = { + sizeof(UConverterStaticData), + "COMPOUND_TEXT", + 0, + UCNV_IBM, + UCNV_COMPOUND_TEXT, + 1, + 6, + { 0xef, 0, 0, 0 }, + 1, + FALSE, + FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; +const UConverterSharedData _CompoundTextData = + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_CompoundTextStaticData, &_CompoundTextImpl); + +#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ diff --git a/deps/icu-small/source/common/ucnv_err.c b/deps/icu-small/source/common/ucnv_err.c deleted file mode 100644 index 449b162152..0000000000 --- a/deps/icu-small/source/common/ucnv_err.c +++ /dev/null @@ -1,481 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ***************************************************************************** - * - * Copyright (C) 1998-2016, International Business Machines - * Corporation and others. All Rights Reserved. - * - ***************************************************************************** - * - * ucnv_err.c - * Implements error behaviour functions called by T_UConverter_{from,to}Unicode - * - * -* Change history: -* -* 06/29/2000 helena Major rewrite of the callback APIs. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ucnv_err.h" -#include "unicode/ucnv_cb.h" -#include "ucnv_cnv.h" -#include "cmemory.h" -#include "unicode/ucnv.h" -#include "ustrfmt.h" - -#define VALUE_STRING_LENGTH 48 -/*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */ -#define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025 -#define UNICODE_U_CODEPOINT 0x0055 -#define UNICODE_X_CODEPOINT 0x0058 -#define UNICODE_RS_CODEPOINT 0x005C -#define UNICODE_U_LOW_CODEPOINT 0x0075 -#define UNICODE_X_LOW_CODEPOINT 0x0078 -#define UNICODE_AMP_CODEPOINT 0x0026 -#define UNICODE_HASH_CODEPOINT 0x0023 -#define UNICODE_SEMICOLON_CODEPOINT 0x003B -#define UNICODE_PLUS_CODEPOINT 0x002B -#define UNICODE_LEFT_CURLY_CODEPOINT 0x007B -#define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D -#define UNICODE_SPACE_CODEPOINT 0x0020 -#define UCNV_PRV_ESCAPE_ICU 0 -#define UCNV_PRV_ESCAPE_C 'C' -#define UCNV_PRV_ESCAPE_XML_DEC 'D' -#define UCNV_PRV_ESCAPE_XML_HEX 'X' -#define UCNV_PRV_ESCAPE_JAVA 'J' -#define UCNV_PRV_ESCAPE_UNICODE 'U' -#define UCNV_PRV_ESCAPE_CSS2 'S' -#define UCNV_PRV_STOP_ON_ILLEGAL 'i' - -/* - * IS_DEFAULT_IGNORABLE_CODE_POINT - * This is to check if a code point has the default ignorable unicode property. - * As such, this list needs to be updated if the ignorable code point list ever - * changes. - * To avoid dependency on other code, this list is hard coded here. - * When an ignorable code point is found and is unmappable, the default callbacks - * will ignore them. - * For a list of the default ignorable code points, use this link: http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3ADI%3A]&g= - * - * This list should be sync with the one in CharsetCallback.java - */ -#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) (\ - (c == 0x00AD) || \ - (c == 0x034F) || \ - (c == 0x061C) || \ - (c == 0x115F) || \ - (c == 0x1160) || \ - (0x17B4 <= c && c <= 0x17B5) || \ - (0x180B <= c && c <= 0x180E) || \ - (0x200B <= c && c <= 0x200F) || \ - (0x202A <= c && c <= 0x202E) || \ - (c == 0x2060) || \ - (0x2066 <= c && c <= 0x2069) || \ - (0x2061 <= c && c <= 0x2064) || \ - (0x206A <= c && c <= 0x206F) || \ - (c == 0x3164) || \ - (0x0FE00 <= c && c <= 0x0FE0F) || \ - (c == 0x0FEFF) || \ - (c == 0x0FFA0) || \ - (0x01BCA0 <= c && c <= 0x01BCA3) || \ - (0x01D173 <= c && c <= 0x01D17A) || \ - (c == 0x0E0001) || \ - (0x0E0020 <= c && c <= 0x0E007F) || \ - (0x0E0100 <= c && c <= 0x0E01EF) || \ - (c == 0x2065) || \ - (0x0FFF0 <= c && c <= 0x0FFF8) || \ - (c == 0x0E0000) || \ - (0x0E0002 <= c && c <= 0x0E001F) || \ - (0x0E0080 <= c && c <= 0x0E00FF) || \ - (0x0E01F0 <= c && c <= 0x0E0FFF) \ - ) - - -/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ -U_CAPI void U_EXPORT2 -UCNV_FROM_U_CALLBACK_STOP ( - const void *context, - UConverterFromUnicodeArgs *fromUArgs, - const UChar* codeUnits, - int32_t length, - UChar32 codePoint, - UConverterCallbackReason reason, - UErrorCode * err) -{ - if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint)) - { - /* - * Skip if the codepoint has unicode property of default ignorable. - */ - *err = U_ZERO_ERROR; - } - /* the caller must have set the error code accordingly */ - return; -} - - -/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ -U_CAPI void U_EXPORT2 -UCNV_TO_U_CALLBACK_STOP ( - const void *context, - UConverterToUnicodeArgs *toUArgs, - const char* codePoints, - int32_t length, - UConverterCallbackReason reason, - UErrorCode * err) -{ - /* the caller must have set the error code accordingly */ - return; -} - -U_CAPI void U_EXPORT2 -UCNV_FROM_U_CALLBACK_SKIP ( - const void *context, - UConverterFromUnicodeArgs *fromUArgs, - const UChar* codeUnits, - int32_t length, - UChar32 codePoint, - UConverterCallbackReason reason, - UErrorCode * err) -{ - if (reason <= UCNV_IRREGULAR) - { - if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint)) - { - /* - * Skip if the codepoint has unicode property of default ignorable. - */ - *err = U_ZERO_ERROR; - } - else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) - { - *err = U_ZERO_ERROR; - } - /* else the caller must have set the error code accordingly. */ - } - /* else ignore the reset, close and clone calls. */ -} - -U_CAPI void U_EXPORT2 -UCNV_FROM_U_CALLBACK_SUBSTITUTE ( - const void *context, - UConverterFromUnicodeArgs *fromArgs, - const UChar* codeUnits, - int32_t length, - UChar32 codePoint, - UConverterCallbackReason reason, - UErrorCode * err) -{ - if (reason <= UCNV_IRREGULAR) - { - if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint)) - { - /* - * Skip if the codepoint has unicode property of default ignorable. - */ - *err = U_ZERO_ERROR; - } - else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) - { - *err = U_ZERO_ERROR; - ucnv_cbFromUWriteSub(fromArgs, 0, err); - } - /* else the caller must have set the error code accordingly. */ - } - /* else ignore the reset, close and clone calls. */ -} - -/*uses uprv_itou to get a unicode escape sequence of the offensive sequence, - *uses a clean copy (resetted) of the converter, to convert that unicode - *escape sequence to the target codepage (if conversion failure happens then - *we revert to substituting with subchar) - */ -U_CAPI void U_EXPORT2 -UCNV_FROM_U_CALLBACK_ESCAPE ( - const void *context, - UConverterFromUnicodeArgs *fromArgs, - const UChar *codeUnits, - int32_t length, - UChar32 codePoint, - UConverterCallbackReason reason, - UErrorCode * err) -{ - - UChar valueString[VALUE_STRING_LENGTH]; - int32_t valueStringLength = 0; - int32_t i = 0; - - const UChar *myValueSource = NULL; - UErrorCode err2 = U_ZERO_ERROR; - UConverterFromUCallback original = NULL; - const void *originalContext; - - UConverterFromUCallback ignoredCallback = NULL; - const void *ignoredContext; - - if (reason > UCNV_IRREGULAR) - { - return; - } - else if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint)) - { - /* - * Skip if the codepoint has unicode property of default ignorable. - */ - *err = U_ZERO_ERROR; - return; - } - - ucnv_setFromUCallBack (fromArgs->converter, - (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE, - NULL, - &original, - &originalContext, - &err2); - - if (U_FAILURE (err2)) - { - *err = err2; - return; - } - if(context==NULL) - { - while (i < length) - { - valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ - valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); - } - } - else - { - switch(*((char*)context)) - { - case UCNV_PRV_ESCAPE_JAVA: - while (i < length) - { - valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ - valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); - } - break; - - case UCNV_PRV_ESCAPE_C: - valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ - - if(length==2){ - valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8); - - } - else{ - valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); - } - break; - - case UCNV_PRV_ESCAPE_XML_DEC: - - valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ - valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ - if(length==2){ - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0); - } - else{ - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0); - } - valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ - break; - - case UCNV_PRV_ESCAPE_XML_HEX: - - valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ - valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ - valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ - if(length==2){ - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); - } - else{ - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0); - } - valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ - break; - - case UCNV_PRV_ESCAPE_UNICODE: - valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */ - valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ - valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */ - if (length == 2) { - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4); - } else { - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); - } - valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */ - break; - - case UCNV_PRV_ESCAPE_CSS2: - valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); - /* Always add space character, becase the next character might be whitespace, - which would erroneously be considered the termination of the escape sequence. */ - valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT; - break; - - default: - while (i < length) - { - valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ - valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); - } - } - } - myValueSource = valueString; - - /* reset the error */ - *err = U_ZERO_ERROR; - - ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err); - - ucnv_setFromUCallBack (fromArgs->converter, - original, - originalContext, - &ignoredCallback, - &ignoredContext, - &err2); - if (U_FAILURE (err2)) - { - *err = err2; - return; - } - - return; -} - - - -U_CAPI void U_EXPORT2 -UCNV_TO_U_CALLBACK_SKIP ( - const void *context, - UConverterToUnicodeArgs *toArgs, - const char* codeUnits, - int32_t length, - UConverterCallbackReason reason, - UErrorCode * err) -{ - if (reason <= UCNV_IRREGULAR) - { - if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) - { - *err = U_ZERO_ERROR; - } - /* else the caller must have set the error code accordingly. */ - } - /* else ignore the reset, close and clone calls. */ -} - -U_CAPI void U_EXPORT2 -UCNV_TO_U_CALLBACK_SUBSTITUTE ( - const void *context, - UConverterToUnicodeArgs *toArgs, - const char* codeUnits, - int32_t length, - UConverterCallbackReason reason, - UErrorCode * err) -{ - if (reason <= UCNV_IRREGULAR) - { - if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) - { - *err = U_ZERO_ERROR; - ucnv_cbToUWriteSub(toArgs,0,err); - } - /* else the caller must have set the error code accordingly. */ - } - /* else ignore the reset, close and clone calls. */ -} - -/*uses uprv_itou to get a unicode escape sequence of the offensive sequence, - *and uses that as the substitution sequence - */ -U_CAPI void U_EXPORT2 -UCNV_TO_U_CALLBACK_ESCAPE ( - const void *context, - UConverterToUnicodeArgs *toArgs, - const char* codeUnits, - int32_t length, - UConverterCallbackReason reason, - UErrorCode * err) -{ - UChar uniValueString[VALUE_STRING_LENGTH]; - int32_t valueStringLength = 0; - int32_t i = 0; - - if (reason > UCNV_IRREGULAR) - { - return; - } - - if(context==NULL) - { - while (i < length) - { - uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ - uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */ - valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); - } - } - else - { - switch(*((char*)context)) - { - case UCNV_PRV_ESCAPE_XML_DEC: - while (i < length) - { - uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ - uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ - valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0); - uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ - } - break; - - case UCNV_PRV_ESCAPE_XML_HEX: - while (i < length) - { - uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ - uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ - uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ - valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0); - uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ - } - break; - case UCNV_PRV_ESCAPE_C: - while (i < length) - { - uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ - uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ - valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2); - } - break; - default: - while (i < length) - { - uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ - uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */ - uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); - valueStringLength += 2; - } - } - } - /* reset the error */ - *err = U_ZERO_ERROR; - - ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err); -} - -#endif diff --git a/deps/icu-small/source/common/ucnv_err.cpp b/deps/icu-small/source/common/ucnv_err.cpp new file mode 100644 index 0000000000..18218835a2 --- /dev/null +++ b/deps/icu-small/source/common/ucnv_err.cpp @@ -0,0 +1,496 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ***************************************************************************** + * + * Copyright (C) 1998-2016, International Business Machines + * Corporation and others. All Rights Reserved. + * + ***************************************************************************** + * + * ucnv_err.c + * Implements error behaviour functions called by T_UConverter_{from,to}Unicode + * + * +* Change history: +* +* 06/29/2000 helena Major rewrite of the callback APIs. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ucnv_err.h" +#include "unicode/ucnv_cb.h" +#include "ucnv_cnv.h" +#include "cmemory.h" +#include "unicode/ucnv.h" +#include "ustrfmt.h" + +#define VALUE_STRING_LENGTH 48 +/*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */ +#define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025 +#define UNICODE_U_CODEPOINT 0x0055 +#define UNICODE_X_CODEPOINT 0x0058 +#define UNICODE_RS_CODEPOINT 0x005C +#define UNICODE_U_LOW_CODEPOINT 0x0075 +#define UNICODE_X_LOW_CODEPOINT 0x0078 +#define UNICODE_AMP_CODEPOINT 0x0026 +#define UNICODE_HASH_CODEPOINT 0x0023 +#define UNICODE_SEMICOLON_CODEPOINT 0x003B +#define UNICODE_PLUS_CODEPOINT 0x002B +#define UNICODE_LEFT_CURLY_CODEPOINT 0x007B +#define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D +#define UNICODE_SPACE_CODEPOINT 0x0020 +#define UCNV_PRV_ESCAPE_ICU 0 +#define UCNV_PRV_ESCAPE_C 'C' +#define UCNV_PRV_ESCAPE_XML_DEC 'D' +#define UCNV_PRV_ESCAPE_XML_HEX 'X' +#define UCNV_PRV_ESCAPE_JAVA 'J' +#define UCNV_PRV_ESCAPE_UNICODE 'U' +#define UCNV_PRV_ESCAPE_CSS2 'S' +#define UCNV_PRV_STOP_ON_ILLEGAL 'i' + +/* + * IS_DEFAULT_IGNORABLE_CODE_POINT + * This is to check if a code point has the default ignorable unicode property. + * As such, this list needs to be updated if the ignorable code point list ever + * changes. + * To avoid dependency on other code, this list is hard coded here. + * When an ignorable code point is found and is unmappable, the default callbacks + * will ignore them. + * For a list of the default ignorable code points, use this link: http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3ADI%3A]&g= + * + * This list should be sync with the one in CharsetCallback.java + */ +#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) (\ + (c == 0x00AD) || \ + (c == 0x034F) || \ + (c == 0x061C) || \ + (c == 0x115F) || \ + (c == 0x1160) || \ + (0x17B4 <= c && c <= 0x17B5) || \ + (0x180B <= c && c <= 0x180E) || \ + (0x200B <= c && c <= 0x200F) || \ + (0x202A <= c && c <= 0x202E) || \ + (c == 0x2060) || \ + (0x2066 <= c && c <= 0x2069) || \ + (0x2061 <= c && c <= 0x2064) || \ + (0x206A <= c && c <= 0x206F) || \ + (c == 0x3164) || \ + (0x0FE00 <= c && c <= 0x0FE0F) || \ + (c == 0x0FEFF) || \ + (c == 0x0FFA0) || \ + (0x01BCA0 <= c && c <= 0x01BCA3) || \ + (0x01D173 <= c && c <= 0x01D17A) || \ + (c == 0x0E0001) || \ + (0x0E0020 <= c && c <= 0x0E007F) || \ + (0x0E0100 <= c && c <= 0x0E01EF) || \ + (c == 0x2065) || \ + (0x0FFF0 <= c && c <= 0x0FFF8) || \ + (c == 0x0E0000) || \ + (0x0E0002 <= c && c <= 0x0E001F) || \ + (0x0E0080 <= c && c <= 0x0E00FF) || \ + (0x0E01F0 <= c && c <= 0x0E0FFF) \ + ) + + +/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ +U_CAPI void U_EXPORT2 +UCNV_FROM_U_CALLBACK_STOP ( + const void *context, + UConverterFromUnicodeArgs *fromUArgs, + const UChar* codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode * err) +{ + (void)context; + (void)fromUArgs; + (void)codeUnits; + (void)length; + if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint)) + { + /* + * Skip if the codepoint has unicode property of default ignorable. + */ + *err = U_ZERO_ERROR; + } + /* the caller must have set the error code accordingly */ + return; +} + + +/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ +U_CAPI void U_EXPORT2 +UCNV_TO_U_CALLBACK_STOP ( + const void *context, + UConverterToUnicodeArgs *toUArgs, + const char* codePoints, + int32_t length, + UConverterCallbackReason reason, + UErrorCode * err) +{ + /* the caller must have set the error code accordingly */ + (void)context; (void)toUArgs; (void)codePoints; (void)length; (void)reason; (void)err; + return; +} + +U_CAPI void U_EXPORT2 +UCNV_FROM_U_CALLBACK_SKIP ( + const void *context, + UConverterFromUnicodeArgs *fromUArgs, + const UChar* codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode * err) +{ + (void)fromUArgs; + (void)codeUnits; + (void)length; + if (reason <= UCNV_IRREGULAR) + { + if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint)) + { + /* + * Skip if the codepoint has unicode property of default ignorable. + */ + *err = U_ZERO_ERROR; + } + else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) + { + *err = U_ZERO_ERROR; + } + /* else the caller must have set the error code accordingly. */ + } + /* else ignore the reset, close and clone calls. */ +} + +U_CAPI void U_EXPORT2 +UCNV_FROM_U_CALLBACK_SUBSTITUTE ( + const void *context, + UConverterFromUnicodeArgs *fromArgs, + const UChar* codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode * err) +{ + (void)codeUnits; + (void)length; + if (reason <= UCNV_IRREGULAR) + { + if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint)) + { + /* + * Skip if the codepoint has unicode property of default ignorable. + */ + *err = U_ZERO_ERROR; + } + else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) + { + *err = U_ZERO_ERROR; + ucnv_cbFromUWriteSub(fromArgs, 0, err); + } + /* else the caller must have set the error code accordingly. */ + } + /* else ignore the reset, close and clone calls. */ +} + +/*uses uprv_itou to get a unicode escape sequence of the offensive sequence, + *uses a clean copy (resetted) of the converter, to convert that unicode + *escape sequence to the target codepage (if conversion failure happens then + *we revert to substituting with subchar) + */ +U_CAPI void U_EXPORT2 +UCNV_FROM_U_CALLBACK_ESCAPE ( + const void *context, + UConverterFromUnicodeArgs *fromArgs, + const UChar *codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode * err) +{ + + UChar valueString[VALUE_STRING_LENGTH]; + int32_t valueStringLength = 0; + int32_t i = 0; + + const UChar *myValueSource = NULL; + UErrorCode err2 = U_ZERO_ERROR; + UConverterFromUCallback original = NULL; + const void *originalContext; + + UConverterFromUCallback ignoredCallback = NULL; + const void *ignoredContext; + + if (reason > UCNV_IRREGULAR) + { + return; + } + else if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint)) + { + /* + * Skip if the codepoint has unicode property of default ignorable. + */ + *err = U_ZERO_ERROR; + return; + } + + ucnv_setFromUCallBack (fromArgs->converter, + (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE, + NULL, + &original, + &originalContext, + &err2); + + if (U_FAILURE (err2)) + { + *err = err2; + return; + } + if(context==NULL) + { + while (i < length) + { + valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ + valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); + } + } + else + { + switch(*((char*)context)) + { + case UCNV_PRV_ESCAPE_JAVA: + while (i < length) + { + valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ + valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); + } + break; + + case UCNV_PRV_ESCAPE_C: + valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ + + if(length==2){ + valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8); + + } + else{ + valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); + } + break; + + case UCNV_PRV_ESCAPE_XML_DEC: + + valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ + valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ + if(length==2){ + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0); + } + else{ + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0); + } + valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ + break; + + case UCNV_PRV_ESCAPE_XML_HEX: + + valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ + valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ + valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ + if(length==2){ + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); + } + else{ + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0); + } + valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ + break; + + case UCNV_PRV_ESCAPE_UNICODE: + valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */ + valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ + valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */ + if (length == 2) { + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4); + } else { + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); + } + valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */ + break; + + case UCNV_PRV_ESCAPE_CSS2: + valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); + /* Always add space character, becase the next character might be whitespace, + which would erroneously be considered the termination of the escape sequence. */ + valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT; + break; + + default: + while (i < length) + { + valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ + valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); + } + } + } + myValueSource = valueString; + + /* reset the error */ + *err = U_ZERO_ERROR; + + ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err); + + ucnv_setFromUCallBack (fromArgs->converter, + original, + originalContext, + &ignoredCallback, + &ignoredContext, + &err2); + if (U_FAILURE (err2)) + { + *err = err2; + return; + } + + return; +} + + + +U_CAPI void U_EXPORT2 +UCNV_TO_U_CALLBACK_SKIP ( + const void *context, + UConverterToUnicodeArgs *toArgs, + const char* codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode * err) +{ + (void)toArgs; + (void)codeUnits; + (void)length; + if (reason <= UCNV_IRREGULAR) + { + if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) + { + *err = U_ZERO_ERROR; + } + /* else the caller must have set the error code accordingly. */ + } + /* else ignore the reset, close and clone calls. */ +} + +U_CAPI void U_EXPORT2 +UCNV_TO_U_CALLBACK_SUBSTITUTE ( + const void *context, + UConverterToUnicodeArgs *toArgs, + const char* codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode * err) +{ + (void)codeUnits; + (void)length; + if (reason <= UCNV_IRREGULAR) + { + if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) + { + *err = U_ZERO_ERROR; + ucnv_cbToUWriteSub(toArgs,0,err); + } + /* else the caller must have set the error code accordingly. */ + } + /* else ignore the reset, close and clone calls. */ +} + +/*uses uprv_itou to get a unicode escape sequence of the offensive sequence, + *and uses that as the substitution sequence + */ +U_CAPI void U_EXPORT2 +UCNV_TO_U_CALLBACK_ESCAPE ( + const void *context, + UConverterToUnicodeArgs *toArgs, + const char* codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode * err) +{ + UChar uniValueString[VALUE_STRING_LENGTH]; + int32_t valueStringLength = 0; + int32_t i = 0; + + if (reason > UCNV_IRREGULAR) + { + return; + } + + if(context==NULL) + { + while (i < length) + { + uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ + uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */ + valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); + } + } + else + { + switch(*((char*)context)) + { + case UCNV_PRV_ESCAPE_XML_DEC: + while (i < length) + { + uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ + uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ + valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0); + uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ + } + break; + + case UCNV_PRV_ESCAPE_XML_HEX: + while (i < length) + { + uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ + uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ + uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ + valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0); + uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ + } + break; + case UCNV_PRV_ESCAPE_C: + while (i < length) + { + uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ + uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ + valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2); + } + break; + default: + while (i < length) + { + uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ + uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */ + uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); + valueStringLength += 2; + } + } + } + /* reset the error */ + *err = U_ZERO_ERROR; + + ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err); +} + +#endif diff --git a/deps/icu-small/source/common/ucnv_ext.cpp b/deps/icu-small/source/common/ucnv_ext.cpp index f860518724..7dea4eef41 100644 --- a/deps/icu-small/source/common/ucnv_ext.cpp +++ b/deps/icu-small/source/common/ucnv_ext.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: ucnv_ext.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -23,6 +23,7 @@ #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION #include "unicode/uset.h" +#include "unicode/ustring.h" #include "ucnv_bld.h" #include "ucnv_cnv.h" #include "ucnv_ext.h" diff --git a/deps/icu-small/source/common/ucnv_ext.h b/deps/icu-small/source/common/ucnv_ext.h index e2ce7fa072..7b753ac217 100644 --- a/deps/icu-small/source/common/ucnv_ext.h +++ b/deps/icu-small/source/common/ucnv_ext.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: ucnv_ext.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/ucnv_imp.h b/deps/icu-small/source/common/ucnv_imp.h index 81aa80fd27..c5e6aeb47e 100644 --- a/deps/icu-small/source/common/ucnv_imp.h +++ b/deps/icu-small/source/common/ucnv_imp.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/ucnv_io.cpp b/deps/icu-small/source/common/ucnv_io.cpp index eaa08e47cd..d9e91314ed 100644 --- a/deps/icu-small/source/common/ucnv_io.cpp +++ b/deps/icu-small/source/common/ucnv_io.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/ucnv_io.h b/deps/icu-small/source/common/ucnv_io.h index 8b3585786d..8f2d7b5a02 100644 --- a/deps/icu-small/source/common/ucnv_io.h +++ b/deps/icu-small/source/common/ucnv_io.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/ucnv_lmb.c b/deps/icu-small/source/common/ucnv_lmb.c deleted file mode 100644 index e595f931a0..0000000000 --- a/deps/icu-small/source/common/ucnv_lmb.c +++ /dev/null @@ -1,1378 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2000-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ucnv_lmb.cpp -* encoding: US-ASCII -* tab size: 4 (not used) -* indentation:4 -* -* created on: 2000feb09 -* created by: Brendan Murray -* extensively hacked up by: Jim Snyder-Grant -* -* Modification History: -* -* Date Name Description -* -* 06/20/2000 helena OS/400 port changes; mostly typecast. -* 06/27/2000 Jim Snyder-Grant Deal with partial characters and small buffers. -* Add comments to document LMBCS format and implementation -* restructured order & breakdown of functions -* 06/28/2000 helena Major rewrite for the callback API changes. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION - -#include "unicode/ucnv_err.h" -#include "unicode/ucnv.h" -#include "unicode/uset.h" -#include "cmemory.h" -#include "cstring.h" -#include "uassert.h" -#include "ucnv_imp.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" - -#ifdef EBCDIC_RTL - #include "ascii_a.h" -#endif - -/* - LMBCS - - (Lotus Multi-Byte Character Set) - - LMBCS was invented in the late 1980's and is primarily used in Lotus Notes - databases and in Lotus 1-2-3 files. Programmers who work with the APIs - into these products will sometimes need to deal with strings in this format. - - The code in this file provides an implementation for an ICU converter of - LMBCS to and from Unicode. - - Since the LMBCS character set is only sparsely documented in existing - printed or online material, we have added extensive annotation to this - file to serve as a guide to understanding LMBCS. - - LMBCS was originally designed with these four sometimes-competing design goals: - - -Provide encodings for the characters in 12 existing national standards - (plus a few other characters) - -Minimal memory footprint - -Maximal speed of conversion into the existing national character sets - -No need to track a changing state as you interpret a string. - - - All of the national character sets LMBCS was trying to encode are 'ANSI' - based, in that the bytes from 0x20 - 0x7F are almost exactly the - same common Latin unaccented characters and symbols in all character sets. - - So, in order to help meet the speed & memory design goals, the common ANSI - bytes from 0x20-0x7F are represented by the same single-byte values in LMBCS. - - The general LMBCS code unit is from 1-3 bytes. We can describe the 3 bytes as - follows: - - [G] D1 [D2] - - That is, a sometimes-optional 'group' byte, followed by 1 and sometimes 2 - data bytes. The maximum size of a LMBCS chjaracter is 3 bytes: -*/ -#define ULMBCS_CHARSIZE_MAX 3 -/* - The single-byte values from 0x20 to 0x7F are examples of single D1 bytes. - We often have to figure out if byte values are below or above this, so we - use the ANSI nomenclature 'C0' and 'C1' to refer to the range of control - characters just above & below the common lower-ANSI range */ -#define ULMBCS_C0END 0x1F -#define ULMBCS_C1START 0x80 -/* - Since LMBCS is always dealing in byte units. we create a local type here for - dealing with these units of LMBCS code units: - -*/ -typedef uint8_t ulmbcs_byte_t; - -/* - Most of the values less than 0x20 are reserved in LMBCS to announce - which national character standard is being used for the 'D' bytes. - In the comments we show the common name and the IBM character-set ID - for these character-set announcers: -*/ - -#define ULMBCS_GRP_L1 0x01 /* Latin-1 :ibm-850 */ -#define ULMBCS_GRP_GR 0x02 /* Greek :ibm-851 */ -#define ULMBCS_GRP_HE 0x03 /* Hebrew :ibm-1255 */ -#define ULMBCS_GRP_AR 0x04 /* Arabic :ibm-1256 */ -#define ULMBCS_GRP_RU 0x05 /* Cyrillic :ibm-1251 */ -#define ULMBCS_GRP_L2 0x06 /* Latin-2 :ibm-852 */ -#define ULMBCS_GRP_TR 0x08 /* Turkish :ibm-1254 */ -#define ULMBCS_GRP_TH 0x0B /* Thai :ibm-874 */ -#define ULMBCS_GRP_JA 0x10 /* Japanese :ibm-943 */ -#define ULMBCS_GRP_KO 0x11 /* Korean :ibm-1261 */ -#define ULMBCS_GRP_TW 0x12 /* Chinese SC :ibm-950 */ -#define ULMBCS_GRP_CN 0x13 /* Chinese TC :ibm-1386 */ - -/* - So, the beginning of understanding LMBCS is that IF the first byte of a LMBCS - character is one of those 12 values, you can interpret the remaining bytes of - that character as coming from one of those character sets. Since the lower - ANSI bytes already are represented in single bytes, using one of the character - set announcers is used to announce a character that starts with a byte of - 0x80 or greater. - - The character sets are arranged so that the single byte sets all appear - before the multi-byte character sets. When we need to tell whether a - group byte is for a single byte char set or not we use this define: */ - -#define ULMBCS_DOUBLEOPTGROUP_START 0x10 - -/* -However, to fully understand LMBCS, you must also understand a series of -exceptions & optimizations made in service of the design goals. - -First, those of you who are character set mavens may have noticed that -the 'double-byte' character sets are actually multi-byte character sets -that can have 1 or two bytes, even in the upper-ascii range. To force -each group byte to introduce a fixed-width encoding (to make it faster to -count characters), we use a convention of doubling up on the group byte -to introduce any single-byte character > 0x80 in an otherwise double-byte -character set. So, for example, the LMBCS sequence x10 x10 xAE is the -same as '0xAE' in the Japanese code page 943. - -Next, you will notice that the list of group bytes has some gaps. -These are used in various ways. - -We reserve a few special single byte values for common control -characters. These are in the same place as their ANSI eqivalents for speed. -*/ - -#define ULMBCS_HT 0x09 /* Fixed control char - Horizontal Tab */ -#define ULMBCS_LF 0x0A /* Fixed control char - Line Feed */ -#define ULMBCS_CR 0x0D /* Fixed control char - Carriage Return */ - -/* Then, 1-2-3 reserved a special single-byte character to put at the -beginning of internal 'system' range names: */ - -#define ULMBCS_123SYSTEMRANGE 0x19 - -/* Then we needed a place to put all the other ansi control characters -that must be moved to different values because LMBCS reserves those -values for other purposes. To represent the control characters, we start -with a first byte of 0xF & add the control chaarcter value as the -second byte */ -#define ULMBCS_GRP_CTRL 0x0F - -/* For the C0 controls (less than 0x20), we add 0x20 to preserve the -useful doctrine that any byte less than 0x20 in a LMBCS char must be -the first byte of a character:*/ -#define ULMBCS_CTRLOFFSET 0x20 - -/* -Where to put the characters that aren't part of any of the 12 national -character sets? The first thing that was done, in the earlier years of -LMBCS, was to use up the spaces of the form - - [G] D1, - - where 'G' was one of the single-byte character groups, and - D1 was less than 0x80. These sequences are gathered together - into a Lotus-invented doublebyte character set to represent a - lot of stray values. Internally, in this implementation, we track this - as group '0', as a place to tuck this exceptions list.*/ - -#define ULMBCS_GRP_EXCEPT 0x00 -/* - Finally, as the durability and usefulness of UNICODE became clear, - LOTUS added a new group 0x14 to hold Unicode values not otherwise - represented in LMBCS: */ -#define ULMBCS_GRP_UNICODE 0x14 -/* The two bytes appearing after a 0x14 are intrepreted as UFT-16 BE -(Big-Endian) characters. The exception comes when the UTF16 -representation would have a zero as the second byte. In that case, -'F6' is used in its place, and the bytes are swapped. (This prevents -LMBCS from encoding any Unicode values of the form U+F6xx, but that's OK: -0xF6xx is in the middle of the Private Use Area.)*/ -#define ULMBCS_UNICOMPATZERO 0xF6 - -/* It is also useful in our code to have a constant for the size of -a LMBCS char that holds a literal Unicode value */ -#define ULMBCS_UNICODE_SIZE 3 - -/* -To squish the LMBCS representations down even further, and to make -translations even faster,sometimes the optimization group byte can be dropped -from a LMBCS character. This is decided on a process-by-process basis. The -group byte that is dropped is called the 'optimization group'. - -For Notes, the optimzation group is always 0x1.*/ -#define ULMBCS_DEFAULTOPTGROUP 0x1 -/* For 1-2-3 files, the optimzation group is stored in the header of the 1-2-3 -file. - - In any case, when using ICU, you either pass in the -optimization group as part of the name of the converter (LMBCS-1, LMBCS-2, -etc.). Using plain 'LMBCS' as the name of the converter will give you -LMBCS-1. - - -*** Implementation strategy *** - - -Because of the extensive use of other character sets, the LMBCS converter -keeps a mapping between optimization groups and IBM character sets, so that -ICU converters can be created and used as needed. */ - -/* As you can see, even though any byte below 0x20 could be an optimization -byte, only those at 0x13 or below can map to an actual converter. To limit -some loops and searches, we define a value for that last group converter:*/ - -#define ULMBCS_GRP_LAST 0x13 /* last LMBCS group that has a converter */ - -static const char * const OptGroupByteToCPName[ULMBCS_GRP_LAST + 1] = { - /* 0x0000 */ "lmb-excp", /* internal home for the LOTUS exceptions list */ - /* 0x0001 */ "ibm-850", - /* 0x0002 */ "ibm-851", - /* 0x0003 */ "windows-1255", - /* 0x0004 */ "windows-1256", - /* 0x0005 */ "windows-1251", - /* 0x0006 */ "ibm-852", - /* 0x0007 */ NULL, /* Unused */ - /* 0x0008 */ "windows-1254", - /* 0x0009 */ NULL, /* Control char HT */ - /* 0x000A */ NULL, /* Control char LF */ - /* 0x000B */ "windows-874", - /* 0x000C */ NULL, /* Unused */ - /* 0x000D */ NULL, /* Control char CR */ - /* 0x000E */ NULL, /* Unused */ - /* 0x000F */ NULL, /* Control chars: 0x0F20 + C0/C1 character: algorithmic */ - /* 0x0010 */ "windows-932", - /* 0x0011 */ "windows-949", - /* 0x0012 */ "windows-950", - /* 0x0013 */ "windows-936" - - /* The rest are null, including the 0x0014 Unicode compatibility region - and 0x0019, the 1-2-3 system range control char */ -}; - - -/* That's approximately all the data that's needed for translating - LMBCS to Unicode. - - -However, to translate Unicode to LMBCS, we need some more support. - -That's because there are often more than one possible mappings from a Unicode -code point back into LMBCS. The first thing we do is look up into a table -to figure out if there are more than one possible mappings. This table, -arranged by Unicode values (including ranges) either lists which group -to use, or says that it could go into one or more of the SBCS sets, or -into one or more of the DBCS sets. (If the character exists in both DBCS & -SBCS, the table will place it in the SBCS sets, to make the LMBCS code point -length as small as possible. Here's the two special markers we use to indicate -ambiguous mappings: */ - -#define ULMBCS_AMBIGUOUS_SBCS 0x80 /* could fit in more than one - LMBCS sbcs native encoding - (example: most accented latin) */ -#define ULMBCS_AMBIGUOUS_MBCS 0x81 /* could fit in more than one - LMBCS mbcs native encoding - (example: Unihan) */ -#define ULMBCS_AMBIGUOUS_ALL 0x82 -/* And here's a simple way to see if a group falls in an appropriate range */ -#define ULMBCS_AMBIGUOUS_MATCH(agroup, xgroup) \ - ((((agroup) == ULMBCS_AMBIGUOUS_SBCS) && \ - (xgroup) < ULMBCS_DOUBLEOPTGROUP_START) || \ - (((agroup) == ULMBCS_AMBIGUOUS_MBCS) && \ - (xgroup) >= ULMBCS_DOUBLEOPTGROUP_START)) || \ - ((agroup) == ULMBCS_AMBIGUOUS_ALL) - - -/* The table & some code to use it: */ - - -static const struct _UniLMBCSGrpMap -{ - const UChar uniStartRange; - const UChar uniEndRange; - const ulmbcs_byte_t GrpType; -} UniLMBCSGrpMap[] -= -{ - - {0x0001, 0x001F, ULMBCS_GRP_CTRL}, - {0x0080, 0x009F, ULMBCS_GRP_CTRL}, - {0x00A0, 0x00A6, ULMBCS_AMBIGUOUS_SBCS}, - {0x00A7, 0x00A8, ULMBCS_AMBIGUOUS_ALL}, - {0x00A9, 0x00AF, ULMBCS_AMBIGUOUS_SBCS}, - {0x00B0, 0x00B1, ULMBCS_AMBIGUOUS_ALL}, - {0x00B2, 0x00B3, ULMBCS_AMBIGUOUS_SBCS}, - {0x00B4, 0x00B4, ULMBCS_AMBIGUOUS_ALL}, - {0x00B5, 0x00B5, ULMBCS_AMBIGUOUS_SBCS}, - {0x00B6, 0x00B6, ULMBCS_AMBIGUOUS_ALL}, - {0x00B7, 0x00D6, ULMBCS_AMBIGUOUS_SBCS}, - {0x00D7, 0x00D7, ULMBCS_AMBIGUOUS_ALL}, - {0x00D8, 0x00F6, ULMBCS_AMBIGUOUS_SBCS}, - {0x00F7, 0x00F7, ULMBCS_AMBIGUOUS_ALL}, - {0x00F8, 0x01CD, ULMBCS_AMBIGUOUS_SBCS}, - {0x01CE, 0x01CE, ULMBCS_GRP_TW }, - {0x01CF, 0x02B9, ULMBCS_AMBIGUOUS_SBCS}, - {0x02BA, 0x02BA, ULMBCS_GRP_CN}, - {0x02BC, 0x02C8, ULMBCS_AMBIGUOUS_SBCS}, - {0x02C9, 0x02D0, ULMBCS_AMBIGUOUS_MBCS}, - {0x02D8, 0x02DD, ULMBCS_AMBIGUOUS_SBCS}, - {0x0384, 0x0390, ULMBCS_AMBIGUOUS_SBCS}, - {0x0391, 0x03A9, ULMBCS_AMBIGUOUS_ALL}, - {0x03AA, 0x03B0, ULMBCS_AMBIGUOUS_SBCS}, - {0x03B1, 0x03C9, ULMBCS_AMBIGUOUS_ALL}, - {0x03CA, 0x03CE, ULMBCS_AMBIGUOUS_SBCS}, - {0x0400, 0x0400, ULMBCS_GRP_RU}, - {0x0401, 0x0401, ULMBCS_AMBIGUOUS_ALL}, - {0x0402, 0x040F, ULMBCS_GRP_RU}, - {0x0410, 0x0431, ULMBCS_AMBIGUOUS_ALL}, - {0x0432, 0x044E, ULMBCS_GRP_RU}, - {0x044F, 0x044F, ULMBCS_AMBIGUOUS_ALL}, - {0x0450, 0x0491, ULMBCS_GRP_RU}, - {0x05B0, 0x05F2, ULMBCS_GRP_HE}, - {0x060C, 0x06AF, ULMBCS_GRP_AR}, - {0x0E01, 0x0E5B, ULMBCS_GRP_TH}, - {0x200C, 0x200F, ULMBCS_AMBIGUOUS_SBCS}, - {0x2010, 0x2010, ULMBCS_AMBIGUOUS_MBCS}, - {0x2013, 0x2014, ULMBCS_AMBIGUOUS_SBCS}, - {0x2015, 0x2015, ULMBCS_AMBIGUOUS_MBCS}, - {0x2016, 0x2016, ULMBCS_AMBIGUOUS_MBCS}, - {0x2017, 0x2017, ULMBCS_AMBIGUOUS_SBCS}, - {0x2018, 0x2019, ULMBCS_AMBIGUOUS_ALL}, - {0x201A, 0x201B, ULMBCS_AMBIGUOUS_SBCS}, - {0x201C, 0x201D, ULMBCS_AMBIGUOUS_ALL}, - {0x201E, 0x201F, ULMBCS_AMBIGUOUS_SBCS}, - {0x2020, 0x2021, ULMBCS_AMBIGUOUS_ALL}, - {0x2022, 0x2024, ULMBCS_AMBIGUOUS_SBCS}, - {0x2025, 0x2025, ULMBCS_AMBIGUOUS_MBCS}, - {0x2026, 0x2026, ULMBCS_AMBIGUOUS_ALL}, - {0x2027, 0x2027, ULMBCS_GRP_TW}, - {0x2030, 0x2030, ULMBCS_AMBIGUOUS_ALL}, - {0x2031, 0x2031, ULMBCS_AMBIGUOUS_SBCS}, - {0x2032, 0x2033, ULMBCS_AMBIGUOUS_MBCS}, - {0x2035, 0x2035, ULMBCS_AMBIGUOUS_MBCS}, - {0x2039, 0x203A, ULMBCS_AMBIGUOUS_SBCS}, - {0x203B, 0x203B, ULMBCS_AMBIGUOUS_MBCS}, - {0x203C, 0x203C, ULMBCS_GRP_EXCEPT}, - {0x2074, 0x2074, ULMBCS_GRP_KO}, - {0x207F, 0x207F, ULMBCS_GRP_EXCEPT}, - {0x2081, 0x2084, ULMBCS_GRP_KO}, - {0x20A4, 0x20AC, ULMBCS_AMBIGUOUS_SBCS}, - {0x2103, 0x2109, ULMBCS_AMBIGUOUS_MBCS}, - {0x2111, 0x2120, ULMBCS_AMBIGUOUS_SBCS}, - /*zhujin: upgrade, for regressiont test, spr HKIA4YHTSU*/ - {0x2121, 0x2121, ULMBCS_AMBIGUOUS_MBCS}, - {0x2122, 0x2126, ULMBCS_AMBIGUOUS_SBCS}, - {0x212B, 0x212B, ULMBCS_AMBIGUOUS_MBCS}, - {0x2135, 0x2135, ULMBCS_AMBIGUOUS_SBCS}, - {0x2153, 0x2154, ULMBCS_GRP_KO}, - {0x215B, 0x215E, ULMBCS_GRP_EXCEPT}, - {0x2160, 0x2179, ULMBCS_AMBIGUOUS_MBCS}, - {0x2190, 0x2193, ULMBCS_AMBIGUOUS_ALL}, - {0x2194, 0x2195, ULMBCS_GRP_EXCEPT}, - {0x2196, 0x2199, ULMBCS_AMBIGUOUS_MBCS}, - {0x21A8, 0x21A8, ULMBCS_GRP_EXCEPT}, - {0x21B8, 0x21B9, ULMBCS_GRP_CN}, - {0x21D0, 0x21D1, ULMBCS_GRP_EXCEPT}, - {0x21D2, 0x21D2, ULMBCS_AMBIGUOUS_MBCS}, - {0x21D3, 0x21D3, ULMBCS_GRP_EXCEPT}, - {0x21D4, 0x21D4, ULMBCS_AMBIGUOUS_MBCS}, - {0x21D5, 0x21D5, ULMBCS_GRP_EXCEPT}, - {0x21E7, 0x21E7, ULMBCS_GRP_CN}, - {0x2200, 0x2200, ULMBCS_AMBIGUOUS_MBCS}, - {0x2201, 0x2201, ULMBCS_GRP_EXCEPT}, - {0x2202, 0x2202, ULMBCS_AMBIGUOUS_MBCS}, - {0x2203, 0x2203, ULMBCS_AMBIGUOUS_MBCS}, - {0x2204, 0x2206, ULMBCS_GRP_EXCEPT}, - {0x2207, 0x2208, ULMBCS_AMBIGUOUS_MBCS}, - {0x2209, 0x220A, ULMBCS_GRP_EXCEPT}, - {0x220B, 0x220B, ULMBCS_AMBIGUOUS_MBCS}, - {0x220F, 0x2215, ULMBCS_AMBIGUOUS_MBCS}, - {0x2219, 0x2219, ULMBCS_GRP_EXCEPT}, - {0x221A, 0x221A, ULMBCS_AMBIGUOUS_MBCS}, - {0x221B, 0x221C, ULMBCS_GRP_EXCEPT}, - {0x221D, 0x221E, ULMBCS_AMBIGUOUS_MBCS}, - {0x221F, 0x221F, ULMBCS_GRP_EXCEPT}, - {0x2220, 0x2220, ULMBCS_AMBIGUOUS_MBCS}, - {0x2223, 0x222A, ULMBCS_AMBIGUOUS_MBCS}, - {0x222B, 0x223D, ULMBCS_AMBIGUOUS_MBCS}, - {0x2245, 0x2248, ULMBCS_GRP_EXCEPT}, - {0x224C, 0x224C, ULMBCS_GRP_TW}, - {0x2252, 0x2252, ULMBCS_AMBIGUOUS_MBCS}, - {0x2260, 0x2261, ULMBCS_AMBIGUOUS_MBCS}, - {0x2262, 0x2265, ULMBCS_GRP_EXCEPT}, - {0x2266, 0x226F, ULMBCS_AMBIGUOUS_MBCS}, - {0x2282, 0x2283, ULMBCS_AMBIGUOUS_MBCS}, - {0x2284, 0x2285, ULMBCS_GRP_EXCEPT}, - {0x2286, 0x2287, ULMBCS_AMBIGUOUS_MBCS}, - {0x2288, 0x2297, ULMBCS_GRP_EXCEPT}, - {0x2299, 0x22BF, ULMBCS_AMBIGUOUS_MBCS}, - {0x22C0, 0x22C0, ULMBCS_GRP_EXCEPT}, - {0x2310, 0x2310, ULMBCS_GRP_EXCEPT}, - {0x2312, 0x2312, ULMBCS_AMBIGUOUS_MBCS}, - {0x2318, 0x2321, ULMBCS_GRP_EXCEPT}, - {0x2318, 0x2321, ULMBCS_GRP_CN}, - {0x2460, 0x24E9, ULMBCS_AMBIGUOUS_MBCS}, - {0x2500, 0x2500, ULMBCS_AMBIGUOUS_SBCS}, - {0x2501, 0x2501, ULMBCS_AMBIGUOUS_MBCS}, - {0x2502, 0x2502, ULMBCS_AMBIGUOUS_ALL}, - {0x2503, 0x2503, ULMBCS_AMBIGUOUS_MBCS}, - {0x2504, 0x2505, ULMBCS_GRP_TW}, - {0x2506, 0x2665, ULMBCS_AMBIGUOUS_ALL}, - {0x2666, 0x2666, ULMBCS_GRP_EXCEPT}, - {0x2667, 0x2669, ULMBCS_AMBIGUOUS_SBCS}, - {0x266A, 0x266A, ULMBCS_AMBIGUOUS_ALL}, - {0x266B, 0x266C, ULMBCS_AMBIGUOUS_SBCS}, - {0x266D, 0x266D, ULMBCS_AMBIGUOUS_MBCS}, - {0x266E, 0x266E, ULMBCS_AMBIGUOUS_SBCS}, - {0x266F, 0x266F, ULMBCS_GRP_JA}, - {0x2670, 0x2E7F, ULMBCS_AMBIGUOUS_SBCS}, - {0x2E80, 0xF861, ULMBCS_AMBIGUOUS_MBCS}, - {0xF862, 0xF8FF, ULMBCS_GRP_EXCEPT}, - {0xF900, 0xFA2D, ULMBCS_AMBIGUOUS_MBCS}, - {0xFB00, 0xFEFF, ULMBCS_AMBIGUOUS_SBCS}, - {0xFF01, 0xFFEE, ULMBCS_AMBIGUOUS_MBCS}, - {0xFFFF, 0xFFFF, ULMBCS_GRP_UNICODE} -}; - -static ulmbcs_byte_t -FindLMBCSUniRange(UChar uniChar) -{ - const struct _UniLMBCSGrpMap * pTable = UniLMBCSGrpMap; - - while (uniChar > pTable->uniEndRange) - { - pTable++; - } - - if (uniChar >= pTable->uniStartRange) - { - return pTable->GrpType; - } - return ULMBCS_GRP_UNICODE; -} - -/* -We also ask the creator of a converter to send in a preferred locale -that we can use in resolving ambiguous mappings. They send the locale -in as a string, and we map it, if possible, to one of the -LMBCS groups. We use this table, and the associated code, to -do the lookup: */ - -/************************************************** - This table maps locale ID's to LMBCS opt groups. - The default return is group 0x01. Note that for - performance reasons, the table is sorted in - increasing alphabetic order, with the notable - exception of zhTW. This is to force the check - for Traditonal Chinese before dropping back to - Simplified. - - Note too that the Latin-1 groups have been - commented out because it's the default, and - this shortens the table, allowing a serial - search to go quickly. - *************************************************/ - -static const struct _LocaleLMBCSGrpMap -{ - const char *LocaleID; - const ulmbcs_byte_t OptGroup; -} LocaleLMBCSGrpMap[] = -{ - {"ar", ULMBCS_GRP_AR}, - {"be", ULMBCS_GRP_RU}, - {"bg", ULMBCS_GRP_L2}, - /* {"ca", ULMBCS_GRP_L1}, */ - {"cs", ULMBCS_GRP_L2}, - /* {"da", ULMBCS_GRP_L1}, */ - /* {"de", ULMBCS_GRP_L1}, */ - {"el", ULMBCS_GRP_GR}, - /* {"en", ULMBCS_GRP_L1}, */ - /* {"es", ULMBCS_GRP_L1}, */ - /* {"et", ULMBCS_GRP_L1}, */ - /* {"fi", ULMBCS_GRP_L1}, */ - /* {"fr", ULMBCS_GRP_L1}, */ - {"he", ULMBCS_GRP_HE}, - {"hu", ULMBCS_GRP_L2}, - /* {"is", ULMBCS_GRP_L1}, */ - /* {"it", ULMBCS_GRP_L1}, */ - {"iw", ULMBCS_GRP_HE}, - {"ja", ULMBCS_GRP_JA}, - {"ko", ULMBCS_GRP_KO}, - /* {"lt", ULMBCS_GRP_L1}, */ - /* {"lv", ULMBCS_GRP_L1}, */ - {"mk", ULMBCS_GRP_RU}, - /* {"nl", ULMBCS_GRP_L1}, */ - /* {"no", ULMBCS_GRP_L1}, */ - {"pl", ULMBCS_GRP_L2}, - /* {"pt", ULMBCS_GRP_L1}, */ - {"ro", ULMBCS_GRP_L2}, - {"ru", ULMBCS_GRP_RU}, - {"sh", ULMBCS_GRP_L2}, - {"sk", ULMBCS_GRP_L2}, - {"sl", ULMBCS_GRP_L2}, - {"sq", ULMBCS_GRP_L2}, - {"sr", ULMBCS_GRP_RU}, - /* {"sv", ULMBCS_GRP_L1}, */ - {"th", ULMBCS_GRP_TH}, - {"tr", ULMBCS_GRP_TR}, - {"uk", ULMBCS_GRP_RU}, - /* {"vi", ULMBCS_GRP_L1}, */ - {"zhTW", ULMBCS_GRP_TW}, - {"zh", ULMBCS_GRP_CN}, - {NULL, ULMBCS_GRP_L1} -}; - - -static ulmbcs_byte_t -FindLMBCSLocale(const char *LocaleID) -{ - const struct _LocaleLMBCSGrpMap *pTable = LocaleLMBCSGrpMap; - - if ((!LocaleID) || (!*LocaleID)) - { - return 0; - } - - while (pTable->LocaleID) - { - if (*pTable->LocaleID == *LocaleID) /* Check only first char for speed */ - { - /* First char matches - check whole name, for entry-length */ - if (uprv_strncmp(pTable->LocaleID, LocaleID, strlen(pTable->LocaleID)) == 0) - return pTable->OptGroup; - } - else - if (*pTable->LocaleID > *LocaleID) /* Sorted alphabetically - exit */ - break; - pTable++; - } - return ULMBCS_GRP_L1; -} - - -/* - Before we get to the main body of code, here's how we hook up to the rest - of ICU. ICU converters are required to define a structure that includes - some function pointers, and some common data, in the style of a C++ - vtable. There is also room in there for converter-specific data. LMBCS - uses that converter-specific data to keep track of the 12 subconverters - we use, the optimization group, and the group (if any) that matches the - locale. We have one structure instantiated for each of the 12 possible - optimization groups. To avoid typos & to avoid boring the reader, we - put the declarations of these structures and functions into macros. To see - the definitions of these structures, see unicode\ucnv_bld.h -*/ - -typedef struct - { - UConverterSharedData *OptGrpConverter[ULMBCS_GRP_LAST+1]; /* Converter per Opt. grp. */ - uint8_t OptGroup; /* default Opt. grp. for this LMBCS session */ - uint8_t localeConverterIndex; /* reasonable locale match for index */ - } -UConverterDataLMBCS; - -static void _LMBCSClose(UConverter * _this); - -#define DECLARE_LMBCS_DATA(n) \ -static const UConverterImpl _LMBCSImpl##n={\ - UCNV_LMBCS_##n,\ - NULL,NULL,\ - _LMBCSOpen##n,\ - _LMBCSClose,\ - NULL,\ - _LMBCSToUnicodeWithOffsets,\ - _LMBCSToUnicodeWithOffsets,\ - _LMBCSFromUnicode,\ - _LMBCSFromUnicode,\ - NULL,\ - NULL,\ - NULL,\ - NULL,\ - _LMBCSSafeClone,\ - ucnv_getCompleteUnicodeSet\ -};\ -static const UConverterStaticData _LMBCSStaticData##n={\ - sizeof(UConverterStaticData),\ - "LMBCS-" #n,\ - 0, UCNV_IBM, UCNV_LMBCS_##n, 1, 3,\ - { 0x3f, 0, 0, 0 },1,FALSE,FALSE,0,0,{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} \ -};\ -const UConverterSharedData _LMBCSData##n= \ - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_LMBCSStaticData##n, &_LMBCSImpl##n); - - /* The only function we needed to duplicate 12 times was the 'open' -function, which will do basically the same thing except set a different -optimization group. So, we put the common stuff into a worker function, -and set up another macro to stamp out the 12 open functions:*/ -#define DEFINE_LMBCS_OPEN(n) \ -static void \ - _LMBCSOpen##n(UConverter* _this, UConverterLoadArgs* pArgs, UErrorCode* err) \ -{ _LMBCSOpenWorker(_this, pArgs, err, n); } - - - -/* Here's the open worker & the common close function */ -static void -_LMBCSOpenWorker(UConverter* _this, - UConverterLoadArgs *pArgs, - UErrorCode* err, - ulmbcs_byte_t OptGroup) -{ - UConverterDataLMBCS * extraInfo = _this->extraInfo = - (UConverterDataLMBCS*)uprv_malloc (sizeof (UConverterDataLMBCS)); - if(extraInfo != NULL) - { - UConverterNamePieces stackPieces; - UConverterLoadArgs stackArgs={ (int32_t)sizeof(UConverterLoadArgs) }; - ulmbcs_byte_t i; - - uprv_memset(extraInfo, 0, sizeof(UConverterDataLMBCS)); - - stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable; - - for (i=0; i <= ULMBCS_GRP_LAST && U_SUCCESS(*err); i++) - { - if(OptGroupByteToCPName[i] != NULL) { - extraInfo->OptGrpConverter[i] = ucnv_loadSharedData(OptGroupByteToCPName[i], &stackPieces, &stackArgs, err); - } - } - - if(U_FAILURE(*err) || pArgs->onlyTestIsLoadable) { - _LMBCSClose(_this); - return; - } - extraInfo->OptGroup = OptGroup; - extraInfo->localeConverterIndex = FindLMBCSLocale(pArgs->locale); - } - else - { - *err = U_MEMORY_ALLOCATION_ERROR; - } -} - -static void -_LMBCSClose(UConverter * _this) -{ - if (_this->extraInfo != NULL) - { - ulmbcs_byte_t Ix; - UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) _this->extraInfo; - - for (Ix=0; Ix <= ULMBCS_GRP_LAST; Ix++) - { - if (extraInfo->OptGrpConverter[Ix] != NULL) - ucnv_unloadSharedDataIfReady(extraInfo->OptGrpConverter[Ix]); - } - if (!_this->isExtraLocal) { - uprv_free (_this->extraInfo); - _this->extraInfo = NULL; - } - } -} - -typedef struct LMBCSClone { - UConverter cnv; - UConverterDataLMBCS lmbcs; -} LMBCSClone; - -static UConverter * -_LMBCSSafeClone(const UConverter *cnv, - void *stackBuffer, - int32_t *pBufferSize, - UErrorCode *status) { - LMBCSClone *newLMBCS; - UConverterDataLMBCS *extraInfo; - int32_t i; - - if(*pBufferSize<=0) { - *pBufferSize=(int32_t)sizeof(LMBCSClone); - return NULL; - } - - extraInfo=(UConverterDataLMBCS *)cnv->extraInfo; - newLMBCS=(LMBCSClone *)stackBuffer; - - /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ - - uprv_memcpy(&newLMBCS->lmbcs, extraInfo, sizeof(UConverterDataLMBCS)); - - /* share the subconverters */ - for(i = 0; i <= ULMBCS_GRP_LAST; ++i) { - if(extraInfo->OptGrpConverter[i] != NULL) { - ucnv_incrementRefCount(extraInfo->OptGrpConverter[i]); - } - } - - newLMBCS->cnv.extraInfo = &newLMBCS->lmbcs; - newLMBCS->cnv.isExtraLocal = TRUE; - return &newLMBCS->cnv; -} - -/* - * There used to be a _LMBCSGetUnicodeSet() function here (up to svn revision 20117) - * which added all code points except for U+F6xx - * because those cannot be represented in the Unicode group. - * However, it turns out that windows-950 has roundtrips for all of U+F6xx - * which means that LMBCS can convert all Unicode code points after all. - * We now simply use ucnv_getCompleteUnicodeSet(). - * - * This may need to be looked at again as Lotus uses _LMBCSGetUnicodeSet(). (091216) - */ - -/* - Here's the basic helper function that we use when converting from - Unicode to LMBCS, and we suspect that a Unicode character will fit into - one of the 12 groups. The return value is the number of bytes written - starting at pStartLMBCS (if any). -*/ - -static size_t -LMBCSConversionWorker ( - UConverterDataLMBCS * extraInfo, /* subconverters, opt & locale groups */ - ulmbcs_byte_t group, /* The group to try */ - ulmbcs_byte_t * pStartLMBCS, /* where to put the results */ - UChar * pUniChar, /* The input unicode character */ - ulmbcs_byte_t * lastConverterIndex, /* output: track last successful group used */ - UBool * groups_tried /* output: track any unsuccessful groups */ -) -{ - ulmbcs_byte_t * pLMBCS = pStartLMBCS; - UConverterSharedData * xcnv = extraInfo->OptGrpConverter[group]; - - int bytesConverted; - uint32_t value; - ulmbcs_byte_t firstByte; - - U_ASSERT(xcnv); - U_ASSERT(group 0) { - firstByte = (ulmbcs_byte_t)(value >> ((bytesConverted - 1) * 8)); - } else { - /* most common failure mode is an unassigned character */ - groups_tried[group] = TRUE; - return 0; - } - - *lastConverterIndex = group; - - /* All initial byte values in lower ascii range should have been caught by now, - except with the exception group. - */ - U_ASSERT((firstByte <= ULMBCS_C0END) || (firstByte >= ULMBCS_C1START) || (group == ULMBCS_GRP_EXCEPT)); - - /* use converted data: first write 0, 1 or two group bytes */ - if (group != ULMBCS_GRP_EXCEPT && extraInfo->OptGroup != group) - { - *pLMBCS++ = group; - if (bytesConverted == 1 && group >= ULMBCS_DOUBLEOPTGROUP_START) - { - *pLMBCS++ = group; - } - } - - /* don't emit control chars */ - if ( bytesConverted == 1 && firstByte < 0x20 ) - return 0; - - - /* then move over the converted data */ - switch(bytesConverted) - { - case 4: - *pLMBCS++ = (ulmbcs_byte_t)(value >> 24); - U_FALLTHROUGH; - case 3: - *pLMBCS++ = (ulmbcs_byte_t)(value >> 16); - U_FALLTHROUGH; - case 2: - *pLMBCS++ = (ulmbcs_byte_t)(value >> 8); - U_FALLTHROUGH; - case 1: - *pLMBCS++ = (ulmbcs_byte_t)value; - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - - return (pLMBCS - pStartLMBCS); -} - - -/* This is a much simpler version of above, when we -know we are writing LMBCS using the Unicode group -*/ -static size_t -LMBCSConvertUni(ulmbcs_byte_t * pLMBCS, UChar uniChar) -{ - /* encode into LMBCS Unicode range */ - uint8_t LowCh = (uint8_t)(uniChar & 0x00FF); - uint8_t HighCh = (uint8_t)(uniChar >> 8); - - *pLMBCS++ = ULMBCS_GRP_UNICODE; - - if (LowCh == 0) - { - *pLMBCS++ = ULMBCS_UNICOMPATZERO; - *pLMBCS++ = HighCh; - } - else - { - *pLMBCS++ = HighCh; - *pLMBCS++ = LowCh; - } - return ULMBCS_UNICODE_SIZE; -} - - - -/* The main Unicode to LMBCS conversion function */ -static void -_LMBCSFromUnicode(UConverterFromUnicodeArgs* args, - UErrorCode* err) -{ - ulmbcs_byte_t lastConverterIndex = 0; - UChar uniChar; - ulmbcs_byte_t LMBCS[ULMBCS_CHARSIZE_MAX]; - ulmbcs_byte_t * pLMBCS; - int32_t bytes_written; - UBool groups_tried[ULMBCS_GRP_LAST+1]; - UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo; - int sourceIndex = 0; - - /* Basic strategy: attempt to fill in local LMBCS 1-char buffer.(LMBCS) - If that succeeds, see if it will all fit into the target & copy it over - if it does. - - We try conversions in the following order: - - 1. Single-byte ascii & special fixed control chars (&null) - 2. Look up group in table & try that (could be - A) Unicode group - B) control group, - C) national encoding, - or ambiguous SBCS or MBCS group (on to step 4...) - - 3. If its ambiguous, try this order: - A) The optimization group - B) The locale group - C) The last group that succeeded with this string. - D) every other group that's relevent (single or double) - E) If its single-byte ambiguous, try the exceptions group - - 4. And as a grand fallback: Unicode - */ - - /*Fix for SPR#DJOE66JFN3 (Lotus)*/ - ulmbcs_byte_t OldConverterIndex = 0; - - while (args->source < args->sourceLimit && !U_FAILURE(*err)) - { - /*Fix for SPR#DJOE66JFN3 (Lotus)*/ - OldConverterIndex = extraInfo->localeConverterIndex; - - if (args->target >= args->targetLimit) - { - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - uniChar = *(args->source); - bytes_written = 0; - pLMBCS = LMBCS; - - /* check cases in rough order of how common they are, for speed */ - - /* single byte matches: strategy 1 */ - /*Fix for SPR#DJOE66JFN3 (Lotus)*/ - if((uniChar>=0x80) && (uniChar<=0xff) - /*Fix for SPR#JUYA6XAERU and TSAO7GL5NK (Lotus)*/ &&(uniChar!=0xB1) &&(uniChar!=0xD7) &&(uniChar!=0xF7) - &&(uniChar!=0xB0) &&(uniChar!=0xB4) &&(uniChar!=0xB6) &&(uniChar!=0xA7) &&(uniChar!=0xA8)) - { - extraInfo->localeConverterIndex = ULMBCS_GRP_L1; - } - if (((uniChar > ULMBCS_C0END) && (uniChar < ULMBCS_C1START)) || - uniChar == 0 || uniChar == ULMBCS_HT || uniChar == ULMBCS_CR || - uniChar == ULMBCS_LF || uniChar == ULMBCS_123SYSTEMRANGE - ) - { - *pLMBCS++ = (ulmbcs_byte_t ) uniChar; - bytes_written = 1; - } - - - if (!bytes_written) - { - /* Check by UNICODE range (Strategy 2) */ - ulmbcs_byte_t group = FindLMBCSUniRange(uniChar); - - if (group == ULMBCS_GRP_UNICODE) /* (Strategy 2A) */ - { - pLMBCS += LMBCSConvertUni(pLMBCS,uniChar); - - bytes_written = (int32_t)(pLMBCS - LMBCS); - } - else if (group == ULMBCS_GRP_CTRL) /* (Strategy 2B) */ - { - /* Handle control characters here */ - if (uniChar <= ULMBCS_C0END) - { - *pLMBCS++ = ULMBCS_GRP_CTRL; - *pLMBCS++ = (ulmbcs_byte_t)(ULMBCS_CTRLOFFSET + uniChar); - } - else if (uniChar >= ULMBCS_C1START && uniChar <= ULMBCS_C1START + ULMBCS_CTRLOFFSET) - { - *pLMBCS++ = ULMBCS_GRP_CTRL; - *pLMBCS++ = (ulmbcs_byte_t ) (uniChar & 0x00FF); - } - bytes_written = (int32_t)(pLMBCS - LMBCS); - } - else if (group < ULMBCS_GRP_UNICODE) /* (Strategy 2C) */ - { - /* a specific converter has been identified - use it */ - bytes_written = (int32_t)LMBCSConversionWorker ( - extraInfo, group, pLMBCS, &uniChar, - &lastConverterIndex, groups_tried); - } - if (!bytes_written) /* the ambiguous group cases (Strategy 3) */ - { - uprv_memset(groups_tried, 0, sizeof(groups_tried)); - - /* check for non-default optimization group (Strategy 3A )*/ - if ((extraInfo->OptGroup != 1) && (ULMBCS_AMBIGUOUS_MATCH(group, extraInfo->OptGroup))) - { - /*zhujin: upgrade, merge #39299 here (Lotus) */ - /*To make R5 compatible translation, look for exceptional group first for non-DBCS*/ - - if(extraInfo->localeConverterIndex < ULMBCS_DOUBLEOPTGROUP_START) - { - bytes_written = LMBCSConversionWorker (extraInfo, - ULMBCS_GRP_L1, pLMBCS, &uniChar, - &lastConverterIndex, groups_tried); - - if(!bytes_written) - { - bytes_written = LMBCSConversionWorker (extraInfo, - ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar, - &lastConverterIndex, groups_tried); - } - if(!bytes_written) - { - bytes_written = LMBCSConversionWorker (extraInfo, - extraInfo->localeConverterIndex, pLMBCS, &uniChar, - &lastConverterIndex, groups_tried); - } - } - else - { - bytes_written = LMBCSConversionWorker (extraInfo, - extraInfo->localeConverterIndex, pLMBCS, &uniChar, - &lastConverterIndex, groups_tried); - } - } - /* check for locale optimization group (Strategy 3B) */ - if (!bytes_written && (extraInfo->localeConverterIndex) && (ULMBCS_AMBIGUOUS_MATCH(group, extraInfo->localeConverterIndex))) - { - bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, - extraInfo->localeConverterIndex, pLMBCS, &uniChar, &lastConverterIndex, groups_tried); - } - /* check for last optimization group used for this string (Strategy 3C) */ - if (!bytes_written && (lastConverterIndex) && (ULMBCS_AMBIGUOUS_MATCH(group, lastConverterIndex))) - { - bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, - lastConverterIndex, pLMBCS, &uniChar, &lastConverterIndex, groups_tried); - } - if (!bytes_written) - { - /* just check every possible matching converter (Strategy 3D) */ - ulmbcs_byte_t grp_start; - ulmbcs_byte_t grp_end; - ulmbcs_byte_t grp_ix; - grp_start = (ulmbcs_byte_t)((group == ULMBCS_AMBIGUOUS_MBCS) - ? ULMBCS_DOUBLEOPTGROUP_START - : ULMBCS_GRP_L1); - grp_end = (ulmbcs_byte_t)((group == ULMBCS_AMBIGUOUS_MBCS) - ? ULMBCS_GRP_LAST - : ULMBCS_GRP_TH); - if(group == ULMBCS_AMBIGUOUS_ALL) - { - grp_start = ULMBCS_GRP_L1; - grp_end = ULMBCS_GRP_LAST; - } - for (grp_ix = grp_start; - grp_ix <= grp_end && !bytes_written; - grp_ix++) - { - if (extraInfo->OptGrpConverter [grp_ix] && !groups_tried [grp_ix]) - { - bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, - grp_ix, pLMBCS, &uniChar, - &lastConverterIndex, groups_tried); - } - } - /* a final conversion fallback to the exceptions group if its likely - to be single byte (Strategy 3E) */ - if (!bytes_written && grp_start == ULMBCS_GRP_L1) - { - bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, - ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar, - &lastConverterIndex, groups_tried); - } - } - /* all of our other strategies failed. Fallback to Unicode. (Strategy 4)*/ - if (!bytes_written) - { - - pLMBCS += LMBCSConvertUni(pLMBCS, uniChar); - bytes_written = (int32_t)(pLMBCS - LMBCS); - } - } - } - - /* we have a translation. increment source and write as much as posible to target */ - args->source++; - pLMBCS = LMBCS; - while (args->target < args->targetLimit && bytes_written--) - { - *(args->target)++ = *pLMBCS++; - if (args->offsets) - { - *(args->offsets)++ = sourceIndex; - } - } - sourceIndex++; - if (bytes_written > 0) - { - /* write any bytes that didn't fit in target to the error buffer, - common code will move this to target if we get called back with - enough target room - */ - uint8_t * pErrorBuffer = args->converter->charErrorBuffer; - *err = U_BUFFER_OVERFLOW_ERROR; - args->converter->charErrorBufferLength = (int8_t)bytes_written; - while (bytes_written--) - { - *pErrorBuffer++ = *pLMBCS++; - } - } - /*Fix for SPR#DJOE66JFN3 (Lotus)*/ - extraInfo->localeConverterIndex = OldConverterIndex; - } -} - - -/* Now, the Unicode from LMBCS section */ - - -/* A function to call when we are looking at the Unicode group byte in LMBCS */ -static UChar -GetUniFromLMBCSUni(char const ** ppLMBCSin) /* Called with LMBCS-style Unicode byte stream */ -{ - uint8_t HighCh = *(*ppLMBCSin)++; /* Big-endian Unicode in LMBCS compatibility group*/ - uint8_t LowCh = *(*ppLMBCSin)++; - - if (HighCh == ULMBCS_UNICOMPATZERO ) - { - HighCh = LowCh; - LowCh = 0; /* zero-byte in LSB special character */ - } - return (UChar)((HighCh << 8) | LowCh); -} - - - -/* CHECK_SOURCE_LIMIT: Helper macro to verify that there are at least'index' - bytes left in source up to sourceLimit.Errors appropriately if not. - If we reach the limit, then update the source pointer to there to consume - all input as required by ICU converter semantics. -*/ - -#define CHECK_SOURCE_LIMIT(index) \ - if (args->source+index > args->sourceLimit){\ - *err = U_TRUNCATED_CHAR_FOUND;\ - args->source = args->sourceLimit;\ - return 0xffff;} - -/* Return the Unicode representation for the current LMBCS character */ - -static UChar32 -_LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args, - UErrorCode* err) -{ - UChar32 uniChar = 0; /* an output UNICODE char */ - ulmbcs_byte_t CurByte; /* A byte from the input stream */ - - /* error check */ - if (args->source >= args->sourceLimit) - { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return 0xffff; - } - /* Grab first byte & save address for error recovery */ - CurByte = *((ulmbcs_byte_t *) (args->source++)); - - /* - * at entry of each if clause: - * 1. 'CurByte' points at the first byte of a LMBCS character - * 2. '*source'points to the next byte of the source stream after 'CurByte' - * - * the job of each if clause is: - * 1. set '*source' to point at the beginning of next char (nop if LMBCS char is only 1 byte) - * 2. set 'uniChar' up with the right Unicode value, or set 'err' appropriately - */ - - /* First lets check the simple fixed values. */ - - if(((CurByte > ULMBCS_C0END) && (CurByte < ULMBCS_C1START)) /* ascii range */ - || (CurByte == 0) - || CurByte == ULMBCS_HT || CurByte == ULMBCS_CR - || CurByte == ULMBCS_LF || CurByte == ULMBCS_123SYSTEMRANGE) - { - uniChar = CurByte; - } - else - { - UConverterDataLMBCS * extraInfo; - ulmbcs_byte_t group; - UConverterSharedData *cnv; - - if (CurByte == ULMBCS_GRP_CTRL) /* Control character group - no opt group update */ - { - ulmbcs_byte_t C0C1byte; - CHECK_SOURCE_LIMIT(1); - C0C1byte = *(args->source)++; - uniChar = (C0C1byte < ULMBCS_C1START) ? C0C1byte - ULMBCS_CTRLOFFSET : C0C1byte; - } - else - if (CurByte == ULMBCS_GRP_UNICODE) /* Unicode compatibility group: BigEndian UTF16 */ - { - CHECK_SOURCE_LIMIT(2); - - /* don't check for error indicators fffe/ffff below */ - return GetUniFromLMBCSUni(&(args->source)); - } - else if (CurByte <= ULMBCS_CTRLOFFSET) - { - group = CurByte; /* group byte is in the source */ - extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo; - if (group > ULMBCS_GRP_LAST || (cnv = extraInfo->OptGrpConverter[group]) == NULL) - { - /* this is not a valid group byte - no converter*/ - *err = U_INVALID_CHAR_FOUND; - } - else if (group >= ULMBCS_DOUBLEOPTGROUP_START) /* double byte conversion */ - { - - CHECK_SOURCE_LIMIT(2); - - /* check for LMBCS doubled-group-byte case */ - if (*args->source == group) { - /* single byte */ - ++args->source; - uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source, 1, FALSE); - ++args->source; - } else { - /* double byte */ - uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source, 2, FALSE); - args->source += 2; - } - } - else { /* single byte conversion */ - CHECK_SOURCE_LIMIT(1); - CurByte = *(args->source)++; - - if (CurByte >= ULMBCS_C1START) - { - uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv, CurByte); - } - else - { - /* The non-optimizable oddballs where there is an explicit byte - * AND the second byte is not in the upper ascii range - */ - char bytes[2]; - - extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo; - cnv = extraInfo->OptGrpConverter [ULMBCS_GRP_EXCEPT]; - - /* Lookup value must include opt group */ - bytes[0] = group; - bytes[1] = CurByte; - uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, bytes, 2, FALSE); - } - } - } - else if (CurByte >= ULMBCS_C1START) /* group byte is implicit */ - { - extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo; - group = extraInfo->OptGroup; - cnv = extraInfo->OptGrpConverter[group]; - if (group >= ULMBCS_DOUBLEOPTGROUP_START) /* double byte conversion */ - { - if (!ucnv_MBCSIsLeadByte(cnv, CurByte)) - { - CHECK_SOURCE_LIMIT(0); - - /* let the MBCS conversion consume CurByte again */ - uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source - 1, 1, FALSE); - } - else - { - CHECK_SOURCE_LIMIT(1); - /* let the MBCS conversion consume CurByte again */ - uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source - 1, 2, FALSE); - ++args->source; - } - } - else /* single byte conversion */ - { - uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv, CurByte); - } - } - } - return uniChar; -} - - -/* The exported function that converts lmbcs to one or more - UChars - currently UTF-16 -*/ -static void -_LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args, - UErrorCode* err) -{ - char LMBCS [ULMBCS_CHARSIZE_MAX]; - UChar uniChar; /* one output UNICODE char */ - const char * saveSource; /* beginning of current code point */ - const char * pStartLMBCS = args->source; /* beginning of whole string */ - const char * errSource = NULL; /* pointer to actual input in case an error occurs */ - int8_t savebytes = 0; - - /* Process from source to limit, or until error */ - while (U_SUCCESS(*err) && args->sourceLimit > args->source && args->targetLimit > args->target) - { - saveSource = args->source; /* beginning of current code point */ - - if (args->converter->toULength) /* reassemble char from previous call */ - { - const char *saveSourceLimit; - size_t size_old = args->converter->toULength; - - /* limit from source is either remainder of temp buffer, or user limit on source */ - size_t size_new_maybe_1 = sizeof(LMBCS) - size_old; - size_t size_new_maybe_2 = args->sourceLimit - args->source; - size_t size_new = (size_new_maybe_1 < size_new_maybe_2) ? size_new_maybe_1 : size_new_maybe_2; - - - uprv_memcpy(LMBCS, args->converter->toUBytes, size_old); - uprv_memcpy(LMBCS + size_old, args->source, size_new); - saveSourceLimit = args->sourceLimit; - args->source = errSource = LMBCS; - args->sourceLimit = LMBCS+size_old+size_new; - savebytes = (int8_t)(size_old+size_new); - uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err); - args->source = saveSource + ((args->source - LMBCS) - size_old); - args->sourceLimit = saveSourceLimit; - - if (*err == U_TRUNCATED_CHAR_FOUND) - { - /* evil special case: source buffers so small a char spans more than 2 buffers */ - args->converter->toULength = savebytes; - uprv_memcpy(args->converter->toUBytes, LMBCS, savebytes); - args->source = args->sourceLimit; - *err = U_ZERO_ERROR; - return; - } - else - { - /* clear the partial-char marker */ - args->converter->toULength = 0; - } - } - else - { - errSource = saveSource; - uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err); - savebytes = (int8_t)(args->source - saveSource); - } - if (U_SUCCESS(*err)) - { - if (uniChar < 0xfffe) - { - *(args->target)++ = uniChar; - if(args->offsets) - { - *(args->offsets)++ = (int32_t)(saveSource - pStartLMBCS); - } - } - else if (uniChar == 0xfffe) - { - *err = U_INVALID_CHAR_FOUND; - } - else /* if (uniChar == 0xffff) */ - { - *err = U_ILLEGAL_CHAR_FOUND; - } - } - } - /* if target ran out before source, return U_BUFFER_OVERFLOW_ERROR */ - if (U_SUCCESS(*err) && args->sourceLimit > args->source && args->targetLimit <= args->target) - { - *err = U_BUFFER_OVERFLOW_ERROR; - } - else if (U_FAILURE(*err)) - { - /* If character incomplete or unmappable/illegal, store it in toUBytes[] */ - args->converter->toULength = savebytes; - if (savebytes > 0) { - uprv_memcpy(args->converter->toUBytes, errSource, savebytes); - } - if (*err == U_TRUNCATED_CHAR_FOUND) { - *err = U_ZERO_ERROR; - } - } -} - -/* And now, the macroized declarations of data & functions: */ -DEFINE_LMBCS_OPEN(1) -DEFINE_LMBCS_OPEN(2) -DEFINE_LMBCS_OPEN(3) -DEFINE_LMBCS_OPEN(4) -DEFINE_LMBCS_OPEN(5) -DEFINE_LMBCS_OPEN(6) -DEFINE_LMBCS_OPEN(8) -DEFINE_LMBCS_OPEN(11) -DEFINE_LMBCS_OPEN(16) -DEFINE_LMBCS_OPEN(17) -DEFINE_LMBCS_OPEN(18) -DEFINE_LMBCS_OPEN(19) - - -DECLARE_LMBCS_DATA(1) -DECLARE_LMBCS_DATA(2) -DECLARE_LMBCS_DATA(3) -DECLARE_LMBCS_DATA(4) -DECLARE_LMBCS_DATA(5) -DECLARE_LMBCS_DATA(6) -DECLARE_LMBCS_DATA(8) -DECLARE_LMBCS_DATA(11) -DECLARE_LMBCS_DATA(16) -DECLARE_LMBCS_DATA(17) -DECLARE_LMBCS_DATA(18) -DECLARE_LMBCS_DATA(19) - -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ diff --git a/deps/icu-small/source/common/ucnv_lmb.cpp b/deps/icu-small/source/common/ucnv_lmb.cpp new file mode 100644 index 0000000000..4a5befde61 --- /dev/null +++ b/deps/icu-small/source/common/ucnv_lmb.cpp @@ -0,0 +1,1386 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2000-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnv_lmb.cpp +* encoding: UTF-8 +* tab size: 4 (not used) +* indentation:4 +* +* created on: 2000feb09 +* created by: Brendan Murray +* extensively hacked up by: Jim Snyder-Grant +* +* Modification History: +* +* Date Name Description +* +* 06/20/2000 helena OS/400 port changes; mostly typecast. +* 06/27/2000 Jim Snyder-Grant Deal with partial characters and small buffers. +* Add comments to document LMBCS format and implementation +* restructured order & breakdown of functions +* 06/28/2000 helena Major rewrite for the callback API changes. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + +#include "unicode/ucnv_err.h" +#include "unicode/ucnv.h" +#include "unicode/uset.h" +#include "cmemory.h" +#include "cstring.h" +#include "uassert.h" +#include "ucnv_imp.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" + +#ifdef EBCDIC_RTL + #include "ascii_a.h" +#endif + +/* + LMBCS + + (Lotus Multi-Byte Character Set) + + LMBCS was invented in the late 1980's and is primarily used in Lotus Notes + databases and in Lotus 1-2-3 files. Programmers who work with the APIs + into these products will sometimes need to deal with strings in this format. + + The code in this file provides an implementation for an ICU converter of + LMBCS to and from Unicode. + + Since the LMBCS character set is only sparsely documented in existing + printed or online material, we have added extensive annotation to this + file to serve as a guide to understanding LMBCS. + + LMBCS was originally designed with these four sometimes-competing design goals: + + -Provide encodings for the characters in 12 existing national standards + (plus a few other characters) + -Minimal memory footprint + -Maximal speed of conversion into the existing national character sets + -No need to track a changing state as you interpret a string. + + + All of the national character sets LMBCS was trying to encode are 'ANSI' + based, in that the bytes from 0x20 - 0x7F are almost exactly the + same common Latin unaccented characters and symbols in all character sets. + + So, in order to help meet the speed & memory design goals, the common ANSI + bytes from 0x20-0x7F are represented by the same single-byte values in LMBCS. + + The general LMBCS code unit is from 1-3 bytes. We can describe the 3 bytes as + follows: + + [G] D1 [D2] + + That is, a sometimes-optional 'group' byte, followed by 1 and sometimes 2 + data bytes. The maximum size of a LMBCS chjaracter is 3 bytes: +*/ +#define ULMBCS_CHARSIZE_MAX 3 +/* + The single-byte values from 0x20 to 0x7F are examples of single D1 bytes. + We often have to figure out if byte values are below or above this, so we + use the ANSI nomenclature 'C0' and 'C1' to refer to the range of control + characters just above & below the common lower-ANSI range */ +#define ULMBCS_C0END 0x1F +#define ULMBCS_C1START 0x80 +/* + Since LMBCS is always dealing in byte units. we create a local type here for + dealing with these units of LMBCS code units: + +*/ +typedef uint8_t ulmbcs_byte_t; + +/* + Most of the values less than 0x20 are reserved in LMBCS to announce + which national character standard is being used for the 'D' bytes. + In the comments we show the common name and the IBM character-set ID + for these character-set announcers: +*/ + +#define ULMBCS_GRP_L1 0x01 /* Latin-1 :ibm-850 */ +#define ULMBCS_GRP_GR 0x02 /* Greek :ibm-851 */ +#define ULMBCS_GRP_HE 0x03 /* Hebrew :ibm-1255 */ +#define ULMBCS_GRP_AR 0x04 /* Arabic :ibm-1256 */ +#define ULMBCS_GRP_RU 0x05 /* Cyrillic :ibm-1251 */ +#define ULMBCS_GRP_L2 0x06 /* Latin-2 :ibm-852 */ +#define ULMBCS_GRP_TR 0x08 /* Turkish :ibm-1254 */ +#define ULMBCS_GRP_TH 0x0B /* Thai :ibm-874 */ +#define ULMBCS_GRP_JA 0x10 /* Japanese :ibm-943 */ +#define ULMBCS_GRP_KO 0x11 /* Korean :ibm-1261 */ +#define ULMBCS_GRP_TW 0x12 /* Chinese SC :ibm-950 */ +#define ULMBCS_GRP_CN 0x13 /* Chinese TC :ibm-1386 */ + +/* + So, the beginning of understanding LMBCS is that IF the first byte of a LMBCS + character is one of those 12 values, you can interpret the remaining bytes of + that character as coming from one of those character sets. Since the lower + ANSI bytes already are represented in single bytes, using one of the character + set announcers is used to announce a character that starts with a byte of + 0x80 or greater. + + The character sets are arranged so that the single byte sets all appear + before the multi-byte character sets. When we need to tell whether a + group byte is for a single byte char set or not we use this define: */ + +#define ULMBCS_DOUBLEOPTGROUP_START 0x10 + +/* +However, to fully understand LMBCS, you must also understand a series of +exceptions & optimizations made in service of the design goals. + +First, those of you who are character set mavens may have noticed that +the 'double-byte' character sets are actually multi-byte character sets +that can have 1 or two bytes, even in the upper-ascii range. To force +each group byte to introduce a fixed-width encoding (to make it faster to +count characters), we use a convention of doubling up on the group byte +to introduce any single-byte character > 0x80 in an otherwise double-byte +character set. So, for example, the LMBCS sequence x10 x10 xAE is the +same as '0xAE' in the Japanese code page 943. + +Next, you will notice that the list of group bytes has some gaps. +These are used in various ways. + +We reserve a few special single byte values for common control +characters. These are in the same place as their ANSI eqivalents for speed. +*/ + +#define ULMBCS_HT 0x09 /* Fixed control char - Horizontal Tab */ +#define ULMBCS_LF 0x0A /* Fixed control char - Line Feed */ +#define ULMBCS_CR 0x0D /* Fixed control char - Carriage Return */ + +/* Then, 1-2-3 reserved a special single-byte character to put at the +beginning of internal 'system' range names: */ + +#define ULMBCS_123SYSTEMRANGE 0x19 + +/* Then we needed a place to put all the other ansi control characters +that must be moved to different values because LMBCS reserves those +values for other purposes. To represent the control characters, we start +with a first byte of 0xF & add the control chaarcter value as the +second byte */ +#define ULMBCS_GRP_CTRL 0x0F + +/* For the C0 controls (less than 0x20), we add 0x20 to preserve the +useful doctrine that any byte less than 0x20 in a LMBCS char must be +the first byte of a character:*/ +#define ULMBCS_CTRLOFFSET 0x20 + +/* +Where to put the characters that aren't part of any of the 12 national +character sets? The first thing that was done, in the earlier years of +LMBCS, was to use up the spaces of the form + + [G] D1, + + where 'G' was one of the single-byte character groups, and + D1 was less than 0x80. These sequences are gathered together + into a Lotus-invented doublebyte character set to represent a + lot of stray values. Internally, in this implementation, we track this + as group '0', as a place to tuck this exceptions list.*/ + +#define ULMBCS_GRP_EXCEPT 0x00 +/* + Finally, as the durability and usefulness of UNICODE became clear, + LOTUS added a new group 0x14 to hold Unicode values not otherwise + represented in LMBCS: */ +#define ULMBCS_GRP_UNICODE 0x14 +/* The two bytes appearing after a 0x14 are intrepreted as UFT-16 BE +(Big-Endian) characters. The exception comes when the UTF16 +representation would have a zero as the second byte. In that case, +'F6' is used in its place, and the bytes are swapped. (This prevents +LMBCS from encoding any Unicode values of the form U+F6xx, but that's OK: +0xF6xx is in the middle of the Private Use Area.)*/ +#define ULMBCS_UNICOMPATZERO 0xF6 + +/* It is also useful in our code to have a constant for the size of +a LMBCS char that holds a literal Unicode value */ +#define ULMBCS_UNICODE_SIZE 3 + +/* +To squish the LMBCS representations down even further, and to make +translations even faster,sometimes the optimization group byte can be dropped +from a LMBCS character. This is decided on a process-by-process basis. The +group byte that is dropped is called the 'optimization group'. + +For Notes, the optimzation group is always 0x1.*/ +#define ULMBCS_DEFAULTOPTGROUP 0x1 +/* For 1-2-3 files, the optimzation group is stored in the header of the 1-2-3 +file. + + In any case, when using ICU, you either pass in the +optimization group as part of the name of the converter (LMBCS-1, LMBCS-2, +etc.). Using plain 'LMBCS' as the name of the converter will give you +LMBCS-1. + + +*** Implementation strategy *** + + +Because of the extensive use of other character sets, the LMBCS converter +keeps a mapping between optimization groups and IBM character sets, so that +ICU converters can be created and used as needed. */ + +/* As you can see, even though any byte below 0x20 could be an optimization +byte, only those at 0x13 or below can map to an actual converter. To limit +some loops and searches, we define a value for that last group converter:*/ + +#define ULMBCS_GRP_LAST 0x13 /* last LMBCS group that has a converter */ + +static const char * const OptGroupByteToCPName[ULMBCS_GRP_LAST + 1] = { + /* 0x0000 */ "lmb-excp", /* internal home for the LOTUS exceptions list */ + /* 0x0001 */ "ibm-850", + /* 0x0002 */ "ibm-851", + /* 0x0003 */ "windows-1255", + /* 0x0004 */ "windows-1256", + /* 0x0005 */ "windows-1251", + /* 0x0006 */ "ibm-852", + /* 0x0007 */ NULL, /* Unused */ + /* 0x0008 */ "windows-1254", + /* 0x0009 */ NULL, /* Control char HT */ + /* 0x000A */ NULL, /* Control char LF */ + /* 0x000B */ "windows-874", + /* 0x000C */ NULL, /* Unused */ + /* 0x000D */ NULL, /* Control char CR */ + /* 0x000E */ NULL, /* Unused */ + /* 0x000F */ NULL, /* Control chars: 0x0F20 + C0/C1 character: algorithmic */ + /* 0x0010 */ "windows-932", + /* 0x0011 */ "windows-949", + /* 0x0012 */ "windows-950", + /* 0x0013 */ "windows-936" + + /* The rest are null, including the 0x0014 Unicode compatibility region + and 0x0019, the 1-2-3 system range control char */ +}; + + +/* That's approximately all the data that's needed for translating + LMBCS to Unicode. + + +However, to translate Unicode to LMBCS, we need some more support. + +That's because there are often more than one possible mappings from a Unicode +code point back into LMBCS. The first thing we do is look up into a table +to figure out if there are more than one possible mappings. This table, +arranged by Unicode values (including ranges) either lists which group +to use, or says that it could go into one or more of the SBCS sets, or +into one or more of the DBCS sets. (If the character exists in both DBCS & +SBCS, the table will place it in the SBCS sets, to make the LMBCS code point +length as small as possible. Here's the two special markers we use to indicate +ambiguous mappings: */ + +#define ULMBCS_AMBIGUOUS_SBCS 0x80 /* could fit in more than one + LMBCS sbcs native encoding + (example: most accented latin) */ +#define ULMBCS_AMBIGUOUS_MBCS 0x81 /* could fit in more than one + LMBCS mbcs native encoding + (example: Unihan) */ +#define ULMBCS_AMBIGUOUS_ALL 0x82 +/* And here's a simple way to see if a group falls in an appropriate range */ +#define ULMBCS_AMBIGUOUS_MATCH(agroup, xgroup) \ + ((((agroup) == ULMBCS_AMBIGUOUS_SBCS) && \ + (xgroup) < ULMBCS_DOUBLEOPTGROUP_START) || \ + (((agroup) == ULMBCS_AMBIGUOUS_MBCS) && \ + (xgroup) >= ULMBCS_DOUBLEOPTGROUP_START)) || \ + ((agroup) == ULMBCS_AMBIGUOUS_ALL) + + +/* The table & some code to use it: */ + + +static const struct _UniLMBCSGrpMap +{ + const UChar uniStartRange; + const UChar uniEndRange; + const ulmbcs_byte_t GrpType; +} UniLMBCSGrpMap[] += +{ + + {0x0001, 0x001F, ULMBCS_GRP_CTRL}, + {0x0080, 0x009F, ULMBCS_GRP_CTRL}, + {0x00A0, 0x00A6, ULMBCS_AMBIGUOUS_SBCS}, + {0x00A7, 0x00A8, ULMBCS_AMBIGUOUS_ALL}, + {0x00A9, 0x00AF, ULMBCS_AMBIGUOUS_SBCS}, + {0x00B0, 0x00B1, ULMBCS_AMBIGUOUS_ALL}, + {0x00B2, 0x00B3, ULMBCS_AMBIGUOUS_SBCS}, + {0x00B4, 0x00B4, ULMBCS_AMBIGUOUS_ALL}, + {0x00B5, 0x00B5, ULMBCS_AMBIGUOUS_SBCS}, + {0x00B6, 0x00B6, ULMBCS_AMBIGUOUS_ALL}, + {0x00B7, 0x00D6, ULMBCS_AMBIGUOUS_SBCS}, + {0x00D7, 0x00D7, ULMBCS_AMBIGUOUS_ALL}, + {0x00D8, 0x00F6, ULMBCS_AMBIGUOUS_SBCS}, + {0x00F7, 0x00F7, ULMBCS_AMBIGUOUS_ALL}, + {0x00F8, 0x01CD, ULMBCS_AMBIGUOUS_SBCS}, + {0x01CE, 0x01CE, ULMBCS_GRP_TW }, + {0x01CF, 0x02B9, ULMBCS_AMBIGUOUS_SBCS}, + {0x02BA, 0x02BA, ULMBCS_GRP_CN}, + {0x02BC, 0x02C8, ULMBCS_AMBIGUOUS_SBCS}, + {0x02C9, 0x02D0, ULMBCS_AMBIGUOUS_MBCS}, + {0x02D8, 0x02DD, ULMBCS_AMBIGUOUS_SBCS}, + {0x0384, 0x0390, ULMBCS_AMBIGUOUS_SBCS}, + {0x0391, 0x03A9, ULMBCS_AMBIGUOUS_ALL}, + {0x03AA, 0x03B0, ULMBCS_AMBIGUOUS_SBCS}, + {0x03B1, 0x03C9, ULMBCS_AMBIGUOUS_ALL}, + {0x03CA, 0x03CE, ULMBCS_AMBIGUOUS_SBCS}, + {0x0400, 0x0400, ULMBCS_GRP_RU}, + {0x0401, 0x0401, ULMBCS_AMBIGUOUS_ALL}, + {0x0402, 0x040F, ULMBCS_GRP_RU}, + {0x0410, 0x0431, ULMBCS_AMBIGUOUS_ALL}, + {0x0432, 0x044E, ULMBCS_GRP_RU}, + {0x044F, 0x044F, ULMBCS_AMBIGUOUS_ALL}, + {0x0450, 0x0491, ULMBCS_GRP_RU}, + {0x05B0, 0x05F2, ULMBCS_GRP_HE}, + {0x060C, 0x06AF, ULMBCS_GRP_AR}, + {0x0E01, 0x0E5B, ULMBCS_GRP_TH}, + {0x200C, 0x200F, ULMBCS_AMBIGUOUS_SBCS}, + {0x2010, 0x2010, ULMBCS_AMBIGUOUS_MBCS}, + {0x2013, 0x2014, ULMBCS_AMBIGUOUS_SBCS}, + {0x2015, 0x2015, ULMBCS_AMBIGUOUS_MBCS}, + {0x2016, 0x2016, ULMBCS_AMBIGUOUS_MBCS}, + {0x2017, 0x2017, ULMBCS_AMBIGUOUS_SBCS}, + {0x2018, 0x2019, ULMBCS_AMBIGUOUS_ALL}, + {0x201A, 0x201B, ULMBCS_AMBIGUOUS_SBCS}, + {0x201C, 0x201D, ULMBCS_AMBIGUOUS_ALL}, + {0x201E, 0x201F, ULMBCS_AMBIGUOUS_SBCS}, + {0x2020, 0x2021, ULMBCS_AMBIGUOUS_ALL}, + {0x2022, 0x2024, ULMBCS_AMBIGUOUS_SBCS}, + {0x2025, 0x2025, ULMBCS_AMBIGUOUS_MBCS}, + {0x2026, 0x2026, ULMBCS_AMBIGUOUS_ALL}, + {0x2027, 0x2027, ULMBCS_GRP_TW}, + {0x2030, 0x2030, ULMBCS_AMBIGUOUS_ALL}, + {0x2031, 0x2031, ULMBCS_AMBIGUOUS_SBCS}, + {0x2032, 0x2033, ULMBCS_AMBIGUOUS_MBCS}, + {0x2035, 0x2035, ULMBCS_AMBIGUOUS_MBCS}, + {0x2039, 0x203A, ULMBCS_AMBIGUOUS_SBCS}, + {0x203B, 0x203B, ULMBCS_AMBIGUOUS_MBCS}, + {0x203C, 0x203C, ULMBCS_GRP_EXCEPT}, + {0x2074, 0x2074, ULMBCS_GRP_KO}, + {0x207F, 0x207F, ULMBCS_GRP_EXCEPT}, + {0x2081, 0x2084, ULMBCS_GRP_KO}, + {0x20A4, 0x20AC, ULMBCS_AMBIGUOUS_SBCS}, + {0x2103, 0x2109, ULMBCS_AMBIGUOUS_MBCS}, + {0x2111, 0x2120, ULMBCS_AMBIGUOUS_SBCS}, + /*zhujin: upgrade, for regressiont test, spr HKIA4YHTSU*/ + {0x2121, 0x2121, ULMBCS_AMBIGUOUS_MBCS}, + {0x2122, 0x2126, ULMBCS_AMBIGUOUS_SBCS}, + {0x212B, 0x212B, ULMBCS_AMBIGUOUS_MBCS}, + {0x2135, 0x2135, ULMBCS_AMBIGUOUS_SBCS}, + {0x2153, 0x2154, ULMBCS_GRP_KO}, + {0x215B, 0x215E, ULMBCS_GRP_EXCEPT}, + {0x2160, 0x2179, ULMBCS_AMBIGUOUS_MBCS}, + {0x2190, 0x2193, ULMBCS_AMBIGUOUS_ALL}, + {0x2194, 0x2195, ULMBCS_GRP_EXCEPT}, + {0x2196, 0x2199, ULMBCS_AMBIGUOUS_MBCS}, + {0x21A8, 0x21A8, ULMBCS_GRP_EXCEPT}, + {0x21B8, 0x21B9, ULMBCS_GRP_CN}, + {0x21D0, 0x21D1, ULMBCS_GRP_EXCEPT}, + {0x21D2, 0x21D2, ULMBCS_AMBIGUOUS_MBCS}, + {0x21D3, 0x21D3, ULMBCS_GRP_EXCEPT}, + {0x21D4, 0x21D4, ULMBCS_AMBIGUOUS_MBCS}, + {0x21D5, 0x21D5, ULMBCS_GRP_EXCEPT}, + {0x21E7, 0x21E7, ULMBCS_GRP_CN}, + {0x2200, 0x2200, ULMBCS_AMBIGUOUS_MBCS}, + {0x2201, 0x2201, ULMBCS_GRP_EXCEPT}, + {0x2202, 0x2202, ULMBCS_AMBIGUOUS_MBCS}, + {0x2203, 0x2203, ULMBCS_AMBIGUOUS_MBCS}, + {0x2204, 0x2206, ULMBCS_GRP_EXCEPT}, + {0x2207, 0x2208, ULMBCS_AMBIGUOUS_MBCS}, + {0x2209, 0x220A, ULMBCS_GRP_EXCEPT}, + {0x220B, 0x220B, ULMBCS_AMBIGUOUS_MBCS}, + {0x220F, 0x2215, ULMBCS_AMBIGUOUS_MBCS}, + {0x2219, 0x2219, ULMBCS_GRP_EXCEPT}, + {0x221A, 0x221A, ULMBCS_AMBIGUOUS_MBCS}, + {0x221B, 0x221C, ULMBCS_GRP_EXCEPT}, + {0x221D, 0x221E, ULMBCS_AMBIGUOUS_MBCS}, + {0x221F, 0x221F, ULMBCS_GRP_EXCEPT}, + {0x2220, 0x2220, ULMBCS_AMBIGUOUS_MBCS}, + {0x2223, 0x222A, ULMBCS_AMBIGUOUS_MBCS}, + {0x222B, 0x223D, ULMBCS_AMBIGUOUS_MBCS}, + {0x2245, 0x2248, ULMBCS_GRP_EXCEPT}, + {0x224C, 0x224C, ULMBCS_GRP_TW}, + {0x2252, 0x2252, ULMBCS_AMBIGUOUS_MBCS}, + {0x2260, 0x2261, ULMBCS_AMBIGUOUS_MBCS}, + {0x2262, 0x2265, ULMBCS_GRP_EXCEPT}, + {0x2266, 0x226F, ULMBCS_AMBIGUOUS_MBCS}, + {0x2282, 0x2283, ULMBCS_AMBIGUOUS_MBCS}, + {0x2284, 0x2285, ULMBCS_GRP_EXCEPT}, + {0x2286, 0x2287, ULMBCS_AMBIGUOUS_MBCS}, + {0x2288, 0x2297, ULMBCS_GRP_EXCEPT}, + {0x2299, 0x22BF, ULMBCS_AMBIGUOUS_MBCS}, + {0x22C0, 0x22C0, ULMBCS_GRP_EXCEPT}, + {0x2310, 0x2310, ULMBCS_GRP_EXCEPT}, + {0x2312, 0x2312, ULMBCS_AMBIGUOUS_MBCS}, + {0x2318, 0x2321, ULMBCS_GRP_EXCEPT}, + {0x2318, 0x2321, ULMBCS_GRP_CN}, + {0x2460, 0x24E9, ULMBCS_AMBIGUOUS_MBCS}, + {0x2500, 0x2500, ULMBCS_AMBIGUOUS_SBCS}, + {0x2501, 0x2501, ULMBCS_AMBIGUOUS_MBCS}, + {0x2502, 0x2502, ULMBCS_AMBIGUOUS_ALL}, + {0x2503, 0x2503, ULMBCS_AMBIGUOUS_MBCS}, + {0x2504, 0x2505, ULMBCS_GRP_TW}, + {0x2506, 0x2665, ULMBCS_AMBIGUOUS_ALL}, + {0x2666, 0x2666, ULMBCS_GRP_EXCEPT}, + {0x2667, 0x2669, ULMBCS_AMBIGUOUS_SBCS}, + {0x266A, 0x266A, ULMBCS_AMBIGUOUS_ALL}, + {0x266B, 0x266C, ULMBCS_AMBIGUOUS_SBCS}, + {0x266D, 0x266D, ULMBCS_AMBIGUOUS_MBCS}, + {0x266E, 0x266E, ULMBCS_AMBIGUOUS_SBCS}, + {0x266F, 0x266F, ULMBCS_GRP_JA}, + {0x2670, 0x2E7F, ULMBCS_AMBIGUOUS_SBCS}, + {0x2E80, 0xF861, ULMBCS_AMBIGUOUS_MBCS}, + {0xF862, 0xF8FF, ULMBCS_GRP_EXCEPT}, + {0xF900, 0xFA2D, ULMBCS_AMBIGUOUS_MBCS}, + {0xFB00, 0xFEFF, ULMBCS_AMBIGUOUS_SBCS}, + {0xFF01, 0xFFEE, ULMBCS_AMBIGUOUS_MBCS}, + {0xFFFF, 0xFFFF, ULMBCS_GRP_UNICODE} +}; + +static ulmbcs_byte_t +FindLMBCSUniRange(UChar uniChar) +{ + const struct _UniLMBCSGrpMap * pTable = UniLMBCSGrpMap; + + while (uniChar > pTable->uniEndRange) + { + pTable++; + } + + if (uniChar >= pTable->uniStartRange) + { + return pTable->GrpType; + } + return ULMBCS_GRP_UNICODE; +} + +/* +We also ask the creator of a converter to send in a preferred locale +that we can use in resolving ambiguous mappings. They send the locale +in as a string, and we map it, if possible, to one of the +LMBCS groups. We use this table, and the associated code, to +do the lookup: */ + +/************************************************** + This table maps locale ID's to LMBCS opt groups. + The default return is group 0x01. Note that for + performance reasons, the table is sorted in + increasing alphabetic order, with the notable + exception of zhTW. This is to force the check + for Traditonal Chinese before dropping back to + Simplified. + + Note too that the Latin-1 groups have been + commented out because it's the default, and + this shortens the table, allowing a serial + search to go quickly. + *************************************************/ + +static const struct _LocaleLMBCSGrpMap +{ + const char *LocaleID; + const ulmbcs_byte_t OptGroup; +} LocaleLMBCSGrpMap[] = +{ + {"ar", ULMBCS_GRP_AR}, + {"be", ULMBCS_GRP_RU}, + {"bg", ULMBCS_GRP_L2}, + /* {"ca", ULMBCS_GRP_L1}, */ + {"cs", ULMBCS_GRP_L2}, + /* {"da", ULMBCS_GRP_L1}, */ + /* {"de", ULMBCS_GRP_L1}, */ + {"el", ULMBCS_GRP_GR}, + /* {"en", ULMBCS_GRP_L1}, */ + /* {"es", ULMBCS_GRP_L1}, */ + /* {"et", ULMBCS_GRP_L1}, */ + /* {"fi", ULMBCS_GRP_L1}, */ + /* {"fr", ULMBCS_GRP_L1}, */ + {"he", ULMBCS_GRP_HE}, + {"hu", ULMBCS_GRP_L2}, + /* {"is", ULMBCS_GRP_L1}, */ + /* {"it", ULMBCS_GRP_L1}, */ + {"iw", ULMBCS_GRP_HE}, + {"ja", ULMBCS_GRP_JA}, + {"ko", ULMBCS_GRP_KO}, + /* {"lt", ULMBCS_GRP_L1}, */ + /* {"lv", ULMBCS_GRP_L1}, */ + {"mk", ULMBCS_GRP_RU}, + /* {"nl", ULMBCS_GRP_L1}, */ + /* {"no", ULMBCS_GRP_L1}, */ + {"pl", ULMBCS_GRP_L2}, + /* {"pt", ULMBCS_GRP_L1}, */ + {"ro", ULMBCS_GRP_L2}, + {"ru", ULMBCS_GRP_RU}, + {"sh", ULMBCS_GRP_L2}, + {"sk", ULMBCS_GRP_L2}, + {"sl", ULMBCS_GRP_L2}, + {"sq", ULMBCS_GRP_L2}, + {"sr", ULMBCS_GRP_RU}, + /* {"sv", ULMBCS_GRP_L1}, */ + {"th", ULMBCS_GRP_TH}, + {"tr", ULMBCS_GRP_TR}, + {"uk", ULMBCS_GRP_RU}, + /* {"vi", ULMBCS_GRP_L1}, */ + {"zhTW", ULMBCS_GRP_TW}, + {"zh", ULMBCS_GRP_CN}, + {NULL, ULMBCS_GRP_L1} +}; + + +static ulmbcs_byte_t +FindLMBCSLocale(const char *LocaleID) +{ + const struct _LocaleLMBCSGrpMap *pTable = LocaleLMBCSGrpMap; + + if ((!LocaleID) || (!*LocaleID)) + { + return 0; + } + + while (pTable->LocaleID) + { + if (*pTable->LocaleID == *LocaleID) /* Check only first char for speed */ + { + /* First char matches - check whole name, for entry-length */ + if (uprv_strncmp(pTable->LocaleID, LocaleID, strlen(pTable->LocaleID)) == 0) + return pTable->OptGroup; + } + else + if (*pTable->LocaleID > *LocaleID) /* Sorted alphabetically - exit */ + break; + pTable++; + } + return ULMBCS_GRP_L1; +} + + +/* + Before we get to the main body of code, here's how we hook up to the rest + of ICU. ICU converters are required to define a structure that includes + some function pointers, and some common data, in the style of a C++ + vtable. There is also room in there for converter-specific data. LMBCS + uses that converter-specific data to keep track of the 12 subconverters + we use, the optimization group, and the group (if any) that matches the + locale. We have one structure instantiated for each of the 12 possible + optimization groups. To avoid typos & to avoid boring the reader, we + put the declarations of these structures and functions into macros. To see + the definitions of these structures, see unicode\ucnv_bld.h +*/ + +typedef struct + { + UConverterSharedData *OptGrpConverter[ULMBCS_GRP_LAST+1]; /* Converter per Opt. grp. */ + uint8_t OptGroup; /* default Opt. grp. for this LMBCS session */ + uint8_t localeConverterIndex; /* reasonable locale match for index */ + } +UConverterDataLMBCS; + +U_CDECL_BEGIN +static void U_CALLCONV _LMBCSClose(UConverter * _this); +U_CDECL_END + +#define DECLARE_LMBCS_DATA(n) \ +static const UConverterImpl _LMBCSImpl##n={\ + UCNV_LMBCS_##n,\ + NULL,NULL,\ + _LMBCSOpen##n,\ + _LMBCSClose,\ + NULL,\ + _LMBCSToUnicodeWithOffsets,\ + _LMBCSToUnicodeWithOffsets,\ + _LMBCSFromUnicode,\ + _LMBCSFromUnicode,\ + NULL,\ + NULL,\ + NULL,\ + NULL,\ + _LMBCSSafeClone,\ + ucnv_getCompleteUnicodeSet,\ + NULL,\ + NULL\ +};\ +static const UConverterStaticData _LMBCSStaticData##n={\ + sizeof(UConverterStaticData),\ + "LMBCS-" #n,\ + 0, UCNV_IBM, UCNV_LMBCS_##n, 1, 3,\ + { 0x3f, 0, 0, 0 },1,FALSE,FALSE,0,0,{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} \ +};\ +const UConverterSharedData _LMBCSData##n= \ + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_LMBCSStaticData##n, &_LMBCSImpl##n); + + /* The only function we needed to duplicate 12 times was the 'open' +function, which will do basically the same thing except set a different +optimization group. So, we put the common stuff into a worker function, +and set up another macro to stamp out the 12 open functions:*/ +#define DEFINE_LMBCS_OPEN(n) \ +static void U_CALLCONV \ + _LMBCSOpen##n(UConverter* _this, UConverterLoadArgs* pArgs, UErrorCode* err) \ +{ _LMBCSOpenWorker(_this, pArgs, err, n); } + + + +/* Here's the open worker & the common close function */ +static void +_LMBCSOpenWorker(UConverter* _this, + UConverterLoadArgs *pArgs, + UErrorCode* err, + ulmbcs_byte_t OptGroup) +{ + UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS*)uprv_malloc (sizeof (UConverterDataLMBCS)); + _this->extraInfo = extraInfo; + if(extraInfo != NULL) + { + UConverterNamePieces stackPieces; + UConverterLoadArgs stackArgs= UCNV_LOAD_ARGS_INITIALIZER; + ulmbcs_byte_t i; + + uprv_memset(extraInfo, 0, sizeof(UConverterDataLMBCS)); + + stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable; + + for (i=0; i <= ULMBCS_GRP_LAST && U_SUCCESS(*err); i++) + { + if(OptGroupByteToCPName[i] != NULL) { + extraInfo->OptGrpConverter[i] = ucnv_loadSharedData(OptGroupByteToCPName[i], &stackPieces, &stackArgs, err); + } + } + + if(U_FAILURE(*err) || pArgs->onlyTestIsLoadable) { + _LMBCSClose(_this); + return; + } + extraInfo->OptGroup = OptGroup; + extraInfo->localeConverterIndex = FindLMBCSLocale(pArgs->locale); + } + else + { + *err = U_MEMORY_ALLOCATION_ERROR; + } +} + +U_CDECL_BEGIN +static void U_CALLCONV +_LMBCSClose(UConverter * _this) +{ + if (_this->extraInfo != NULL) + { + ulmbcs_byte_t Ix; + UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) _this->extraInfo; + + for (Ix=0; Ix <= ULMBCS_GRP_LAST; Ix++) + { + if (extraInfo->OptGrpConverter[Ix] != NULL) + ucnv_unloadSharedDataIfReady(extraInfo->OptGrpConverter[Ix]); + } + if (!_this->isExtraLocal) { + uprv_free (_this->extraInfo); + _this->extraInfo = NULL; + } + } +} + +typedef struct LMBCSClone { + UConverter cnv; + UConverterDataLMBCS lmbcs; +} LMBCSClone; + +static UConverter * U_CALLCONV +_LMBCSSafeClone(const UConverter *cnv, + void *stackBuffer, + int32_t *pBufferSize, + UErrorCode *status) { + (void)status; + LMBCSClone *newLMBCS; + UConverterDataLMBCS *extraInfo; + int32_t i; + + if(*pBufferSize<=0) { + *pBufferSize=(int32_t)sizeof(LMBCSClone); + return NULL; + } + + extraInfo=(UConverterDataLMBCS *)cnv->extraInfo; + newLMBCS=(LMBCSClone *)stackBuffer; + + /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ + + uprv_memcpy(&newLMBCS->lmbcs, extraInfo, sizeof(UConverterDataLMBCS)); + + /* share the subconverters */ + for(i = 0; i <= ULMBCS_GRP_LAST; ++i) { + if(extraInfo->OptGrpConverter[i] != NULL) { + ucnv_incrementRefCount(extraInfo->OptGrpConverter[i]); + } + } + + newLMBCS->cnv.extraInfo = &newLMBCS->lmbcs; + newLMBCS->cnv.isExtraLocal = TRUE; + return &newLMBCS->cnv; +} + +/* + * There used to be a _LMBCSGetUnicodeSet() function here (up to svn revision 20117) + * which added all code points except for U+F6xx + * because those cannot be represented in the Unicode group. + * However, it turns out that windows-950 has roundtrips for all of U+F6xx + * which means that LMBCS can convert all Unicode code points after all. + * We now simply use ucnv_getCompleteUnicodeSet(). + * + * This may need to be looked at again as Lotus uses _LMBCSGetUnicodeSet(). (091216) + */ + +/* + Here's the basic helper function that we use when converting from + Unicode to LMBCS, and we suspect that a Unicode character will fit into + one of the 12 groups. The return value is the number of bytes written + starting at pStartLMBCS (if any). +*/ + +static size_t +LMBCSConversionWorker ( + UConverterDataLMBCS * extraInfo, /* subconverters, opt & locale groups */ + ulmbcs_byte_t group, /* The group to try */ + ulmbcs_byte_t * pStartLMBCS, /* where to put the results */ + UChar * pUniChar, /* The input unicode character */ + ulmbcs_byte_t * lastConverterIndex, /* output: track last successful group used */ + UBool * groups_tried /* output: track any unsuccessful groups */ +) +{ + ulmbcs_byte_t * pLMBCS = pStartLMBCS; + UConverterSharedData * xcnv = extraInfo->OptGrpConverter[group]; + + int bytesConverted; + uint32_t value; + ulmbcs_byte_t firstByte; + + U_ASSERT(xcnv); + U_ASSERT(group 0) { + firstByte = (ulmbcs_byte_t)(value >> ((bytesConverted - 1) * 8)); + } else { + /* most common failure mode is an unassigned character */ + groups_tried[group] = TRUE; + return 0; + } + + *lastConverterIndex = group; + + /* All initial byte values in lower ascii range should have been caught by now, + except with the exception group. + */ + U_ASSERT((firstByte <= ULMBCS_C0END) || (firstByte >= ULMBCS_C1START) || (group == ULMBCS_GRP_EXCEPT)); + + /* use converted data: first write 0, 1 or two group bytes */ + if (group != ULMBCS_GRP_EXCEPT && extraInfo->OptGroup != group) + { + *pLMBCS++ = group; + if (bytesConverted == 1 && group >= ULMBCS_DOUBLEOPTGROUP_START) + { + *pLMBCS++ = group; + } + } + + /* don't emit control chars */ + if ( bytesConverted == 1 && firstByte < 0x20 ) + return 0; + + + /* then move over the converted data */ + switch(bytesConverted) + { + case 4: + *pLMBCS++ = (ulmbcs_byte_t)(value >> 24); + U_FALLTHROUGH; + case 3: + *pLMBCS++ = (ulmbcs_byte_t)(value >> 16); + U_FALLTHROUGH; + case 2: + *pLMBCS++ = (ulmbcs_byte_t)(value >> 8); + U_FALLTHROUGH; + case 1: + *pLMBCS++ = (ulmbcs_byte_t)value; + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + + return (pLMBCS - pStartLMBCS); +} + + +/* This is a much simpler version of above, when we +know we are writing LMBCS using the Unicode group +*/ +static size_t +LMBCSConvertUni(ulmbcs_byte_t * pLMBCS, UChar uniChar) +{ + /* encode into LMBCS Unicode range */ + uint8_t LowCh = (uint8_t)(uniChar & 0x00FF); + uint8_t HighCh = (uint8_t)(uniChar >> 8); + + *pLMBCS++ = ULMBCS_GRP_UNICODE; + + if (LowCh == 0) + { + *pLMBCS++ = ULMBCS_UNICOMPATZERO; + *pLMBCS++ = HighCh; + } + else + { + *pLMBCS++ = HighCh; + *pLMBCS++ = LowCh; + } + return ULMBCS_UNICODE_SIZE; +} + + + +/* The main Unicode to LMBCS conversion function */ +static void U_CALLCONV +_LMBCSFromUnicode(UConverterFromUnicodeArgs* args, + UErrorCode* err) +{ + ulmbcs_byte_t lastConverterIndex = 0; + UChar uniChar; + ulmbcs_byte_t LMBCS[ULMBCS_CHARSIZE_MAX]; + ulmbcs_byte_t * pLMBCS; + int32_t bytes_written; + UBool groups_tried[ULMBCS_GRP_LAST+1]; + UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo; + int sourceIndex = 0; + + /* Basic strategy: attempt to fill in local LMBCS 1-char buffer.(LMBCS) + If that succeeds, see if it will all fit into the target & copy it over + if it does. + + We try conversions in the following order: + + 1. Single-byte ascii & special fixed control chars (&null) + 2. Look up group in table & try that (could be + A) Unicode group + B) control group, + C) national encoding, + or ambiguous SBCS or MBCS group (on to step 4...) + + 3. If its ambiguous, try this order: + A) The optimization group + B) The locale group + C) The last group that succeeded with this string. + D) every other group that's relevent (single or double) + E) If its single-byte ambiguous, try the exceptions group + + 4. And as a grand fallback: Unicode + */ + + /*Fix for SPR#DJOE66JFN3 (Lotus)*/ + ulmbcs_byte_t OldConverterIndex = 0; + + while (args->source < args->sourceLimit && !U_FAILURE(*err)) + { + /*Fix for SPR#DJOE66JFN3 (Lotus)*/ + OldConverterIndex = extraInfo->localeConverterIndex; + + if (args->target >= args->targetLimit) + { + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + uniChar = *(args->source); + bytes_written = 0; + pLMBCS = LMBCS; + + /* check cases in rough order of how common they are, for speed */ + + /* single byte matches: strategy 1 */ + /*Fix for SPR#DJOE66JFN3 (Lotus)*/ + if((uniChar>=0x80) && (uniChar<=0xff) + /*Fix for SPR#JUYA6XAERU and TSAO7GL5NK (Lotus)*/ &&(uniChar!=0xB1) &&(uniChar!=0xD7) &&(uniChar!=0xF7) + &&(uniChar!=0xB0) &&(uniChar!=0xB4) &&(uniChar!=0xB6) &&(uniChar!=0xA7) &&(uniChar!=0xA8)) + { + extraInfo->localeConverterIndex = ULMBCS_GRP_L1; + } + if (((uniChar > ULMBCS_C0END) && (uniChar < ULMBCS_C1START)) || + uniChar == 0 || uniChar == ULMBCS_HT || uniChar == ULMBCS_CR || + uniChar == ULMBCS_LF || uniChar == ULMBCS_123SYSTEMRANGE + ) + { + *pLMBCS++ = (ulmbcs_byte_t ) uniChar; + bytes_written = 1; + } + + + if (!bytes_written) + { + /* Check by UNICODE range (Strategy 2) */ + ulmbcs_byte_t group = FindLMBCSUniRange(uniChar); + + if (group == ULMBCS_GRP_UNICODE) /* (Strategy 2A) */ + { + pLMBCS += LMBCSConvertUni(pLMBCS,uniChar); + + bytes_written = (int32_t)(pLMBCS - LMBCS); + } + else if (group == ULMBCS_GRP_CTRL) /* (Strategy 2B) */ + { + /* Handle control characters here */ + if (uniChar <= ULMBCS_C0END) + { + *pLMBCS++ = ULMBCS_GRP_CTRL; + *pLMBCS++ = (ulmbcs_byte_t)(ULMBCS_CTRLOFFSET + uniChar); + } + else if (uniChar >= ULMBCS_C1START && uniChar <= ULMBCS_C1START + ULMBCS_CTRLOFFSET) + { + *pLMBCS++ = ULMBCS_GRP_CTRL; + *pLMBCS++ = (ulmbcs_byte_t ) (uniChar & 0x00FF); + } + bytes_written = (int32_t)(pLMBCS - LMBCS); + } + else if (group < ULMBCS_GRP_UNICODE) /* (Strategy 2C) */ + { + /* a specific converter has been identified - use it */ + bytes_written = (int32_t)LMBCSConversionWorker ( + extraInfo, group, pLMBCS, &uniChar, + &lastConverterIndex, groups_tried); + } + if (!bytes_written) /* the ambiguous group cases (Strategy 3) */ + { + uprv_memset(groups_tried, 0, sizeof(groups_tried)); + + /* check for non-default optimization group (Strategy 3A )*/ + if ((extraInfo->OptGroup != 1) && (ULMBCS_AMBIGUOUS_MATCH(group, extraInfo->OptGroup))) + { + /*zhujin: upgrade, merge #39299 here (Lotus) */ + /*To make R5 compatible translation, look for exceptional group first for non-DBCS*/ + + if(extraInfo->localeConverterIndex < ULMBCS_DOUBLEOPTGROUP_START) + { + bytes_written = LMBCSConversionWorker (extraInfo, + ULMBCS_GRP_L1, pLMBCS, &uniChar, + &lastConverterIndex, groups_tried); + + if(!bytes_written) + { + bytes_written = LMBCSConversionWorker (extraInfo, + ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar, + &lastConverterIndex, groups_tried); + } + if(!bytes_written) + { + bytes_written = LMBCSConversionWorker (extraInfo, + extraInfo->localeConverterIndex, pLMBCS, &uniChar, + &lastConverterIndex, groups_tried); + } + } + else + { + bytes_written = LMBCSConversionWorker (extraInfo, + extraInfo->localeConverterIndex, pLMBCS, &uniChar, + &lastConverterIndex, groups_tried); + } + } + /* check for locale optimization group (Strategy 3B) */ + if (!bytes_written && (extraInfo->localeConverterIndex) && (ULMBCS_AMBIGUOUS_MATCH(group, extraInfo->localeConverterIndex))) + { + bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, + extraInfo->localeConverterIndex, pLMBCS, &uniChar, &lastConverterIndex, groups_tried); + } + /* check for last optimization group used for this string (Strategy 3C) */ + if (!bytes_written && (lastConverterIndex) && (ULMBCS_AMBIGUOUS_MATCH(group, lastConverterIndex))) + { + bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, + lastConverterIndex, pLMBCS, &uniChar, &lastConverterIndex, groups_tried); + } + if (!bytes_written) + { + /* just check every possible matching converter (Strategy 3D) */ + ulmbcs_byte_t grp_start; + ulmbcs_byte_t grp_end; + ulmbcs_byte_t grp_ix; + grp_start = (ulmbcs_byte_t)((group == ULMBCS_AMBIGUOUS_MBCS) + ? ULMBCS_DOUBLEOPTGROUP_START + : ULMBCS_GRP_L1); + grp_end = (ulmbcs_byte_t)((group == ULMBCS_AMBIGUOUS_MBCS) + ? ULMBCS_GRP_LAST + : ULMBCS_GRP_TH); + if(group == ULMBCS_AMBIGUOUS_ALL) + { + grp_start = ULMBCS_GRP_L1; + grp_end = ULMBCS_GRP_LAST; + } + for (grp_ix = grp_start; + grp_ix <= grp_end && !bytes_written; + grp_ix++) + { + if (extraInfo->OptGrpConverter [grp_ix] && !groups_tried [grp_ix]) + { + bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, + grp_ix, pLMBCS, &uniChar, + &lastConverterIndex, groups_tried); + } + } + /* a final conversion fallback to the exceptions group if its likely + to be single byte (Strategy 3E) */ + if (!bytes_written && grp_start == ULMBCS_GRP_L1) + { + bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, + ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar, + &lastConverterIndex, groups_tried); + } + } + /* all of our other strategies failed. Fallback to Unicode. (Strategy 4)*/ + if (!bytes_written) + { + + pLMBCS += LMBCSConvertUni(pLMBCS, uniChar); + bytes_written = (int32_t)(pLMBCS - LMBCS); + } + } + } + + /* we have a translation. increment source and write as much as posible to target */ + args->source++; + pLMBCS = LMBCS; + while (args->target < args->targetLimit && bytes_written--) + { + *(args->target)++ = *pLMBCS++; + if (args->offsets) + { + *(args->offsets)++ = sourceIndex; + } + } + sourceIndex++; + if (bytes_written > 0) + { + /* write any bytes that didn't fit in target to the error buffer, + common code will move this to target if we get called back with + enough target room + */ + uint8_t * pErrorBuffer = args->converter->charErrorBuffer; + *err = U_BUFFER_OVERFLOW_ERROR; + args->converter->charErrorBufferLength = (int8_t)bytes_written; + while (bytes_written--) + { + *pErrorBuffer++ = *pLMBCS++; + } + } + /*Fix for SPR#DJOE66JFN3 (Lotus)*/ + extraInfo->localeConverterIndex = OldConverterIndex; + } +} + + +/* Now, the Unicode from LMBCS section */ + + +/* A function to call when we are looking at the Unicode group byte in LMBCS */ +static UChar +GetUniFromLMBCSUni(char const ** ppLMBCSin) /* Called with LMBCS-style Unicode byte stream */ +{ + uint8_t HighCh = *(*ppLMBCSin)++; /* Big-endian Unicode in LMBCS compatibility group*/ + uint8_t LowCh = *(*ppLMBCSin)++; + + if (HighCh == ULMBCS_UNICOMPATZERO ) + { + HighCh = LowCh; + LowCh = 0; /* zero-byte in LSB special character */ + } + return (UChar)((HighCh << 8) | LowCh); +} + + + +/* CHECK_SOURCE_LIMIT: Helper macro to verify that there are at least'index' + bytes left in source up to sourceLimit.Errors appropriately if not. + If we reach the limit, then update the source pointer to there to consume + all input as required by ICU converter semantics. +*/ + +#define CHECK_SOURCE_LIMIT(index) \ + if (args->source+index > args->sourceLimit){\ + *err = U_TRUNCATED_CHAR_FOUND;\ + args->source = args->sourceLimit;\ + return 0xffff;} + +/* Return the Unicode representation for the current LMBCS character */ + +static UChar32 U_CALLCONV +_LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args, + UErrorCode* err) +{ + UChar32 uniChar = 0; /* an output UNICODE char */ + ulmbcs_byte_t CurByte; /* A byte from the input stream */ + + /* error check */ + if (args->source >= args->sourceLimit) + { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return 0xffff; + } + /* Grab first byte & save address for error recovery */ + CurByte = *((ulmbcs_byte_t *) (args->source++)); + + /* + * at entry of each if clause: + * 1. 'CurByte' points at the first byte of a LMBCS character + * 2. '*source'points to the next byte of the source stream after 'CurByte' + * + * the job of each if clause is: + * 1. set '*source' to point at the beginning of next char (nop if LMBCS char is only 1 byte) + * 2. set 'uniChar' up with the right Unicode value, or set 'err' appropriately + */ + + /* First lets check the simple fixed values. */ + + if(((CurByte > ULMBCS_C0END) && (CurByte < ULMBCS_C1START)) /* ascii range */ + || (CurByte == 0) + || CurByte == ULMBCS_HT || CurByte == ULMBCS_CR + || CurByte == ULMBCS_LF || CurByte == ULMBCS_123SYSTEMRANGE) + { + uniChar = CurByte; + } + else + { + UConverterDataLMBCS * extraInfo; + ulmbcs_byte_t group; + UConverterSharedData *cnv; + + if (CurByte == ULMBCS_GRP_CTRL) /* Control character group - no opt group update */ + { + ulmbcs_byte_t C0C1byte; + CHECK_SOURCE_LIMIT(1); + C0C1byte = *(args->source)++; + uniChar = (C0C1byte < ULMBCS_C1START) ? C0C1byte - ULMBCS_CTRLOFFSET : C0C1byte; + } + else + if (CurByte == ULMBCS_GRP_UNICODE) /* Unicode compatibility group: BigEndian UTF16 */ + { + CHECK_SOURCE_LIMIT(2); + + /* don't check for error indicators fffe/ffff below */ + return GetUniFromLMBCSUni(&(args->source)); + } + else if (CurByte <= ULMBCS_CTRLOFFSET) + { + group = CurByte; /* group byte is in the source */ + extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo; + if (group > ULMBCS_GRP_LAST || (cnv = extraInfo->OptGrpConverter[group]) == NULL) + { + /* this is not a valid group byte - no converter*/ + *err = U_INVALID_CHAR_FOUND; + } + else if (group >= ULMBCS_DOUBLEOPTGROUP_START) /* double byte conversion */ + { + + CHECK_SOURCE_LIMIT(2); + + /* check for LMBCS doubled-group-byte case */ + if (*args->source == group) { + /* single byte */ + ++args->source; + uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source, 1, FALSE); + ++args->source; + } else { + /* double byte */ + uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source, 2, FALSE); + args->source += 2; + } + } + else { /* single byte conversion */ + CHECK_SOURCE_LIMIT(1); + CurByte = *(args->source)++; + + if (CurByte >= ULMBCS_C1START) + { + uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv, CurByte); + } + else + { + /* The non-optimizable oddballs where there is an explicit byte + * AND the second byte is not in the upper ascii range + */ + char bytes[2]; + + extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo; + cnv = extraInfo->OptGrpConverter [ULMBCS_GRP_EXCEPT]; + + /* Lookup value must include opt group */ + bytes[0] = group; + bytes[1] = CurByte; + uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, bytes, 2, FALSE); + } + } + } + else if (CurByte >= ULMBCS_C1START) /* group byte is implicit */ + { + extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo; + group = extraInfo->OptGroup; + cnv = extraInfo->OptGrpConverter[group]; + if (group >= ULMBCS_DOUBLEOPTGROUP_START) /* double byte conversion */ + { + if (!ucnv_MBCSIsLeadByte(cnv, CurByte)) + { + CHECK_SOURCE_LIMIT(0); + + /* let the MBCS conversion consume CurByte again */ + uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source - 1, 1, FALSE); + } + else + { + CHECK_SOURCE_LIMIT(1); + /* let the MBCS conversion consume CurByte again */ + uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source - 1, 2, FALSE); + ++args->source; + } + } + else /* single byte conversion */ + { + uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv, CurByte); + } + } + } + return uniChar; +} + + +/* The exported function that converts lmbcs to one or more + UChars - currently UTF-16 +*/ +static void U_CALLCONV +_LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args, + UErrorCode* err) +{ + char LMBCS [ULMBCS_CHARSIZE_MAX]; + UChar uniChar; /* one output UNICODE char */ + const char * saveSource; /* beginning of current code point */ + const char * pStartLMBCS = args->source; /* beginning of whole string */ + const char * errSource = NULL; /* pointer to actual input in case an error occurs */ + int8_t savebytes = 0; + + /* Process from source to limit, or until error */ + while (U_SUCCESS(*err) && args->sourceLimit > args->source && args->targetLimit > args->target) + { + saveSource = args->source; /* beginning of current code point */ + + if (args->converter->toULength) /* reassemble char from previous call */ + { + const char *saveSourceLimit; + size_t size_old = args->converter->toULength; + + /* limit from source is either remainder of temp buffer, or user limit on source */ + size_t size_new_maybe_1 = sizeof(LMBCS) - size_old; + size_t size_new_maybe_2 = args->sourceLimit - args->source; + size_t size_new = (size_new_maybe_1 < size_new_maybe_2) ? size_new_maybe_1 : size_new_maybe_2; + + + uprv_memcpy(LMBCS, args->converter->toUBytes, size_old); + uprv_memcpy(LMBCS + size_old, args->source, size_new); + saveSourceLimit = args->sourceLimit; + args->source = errSource = LMBCS; + args->sourceLimit = LMBCS+size_old+size_new; + savebytes = (int8_t)(size_old+size_new); + uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err); + args->source = saveSource + ((args->source - LMBCS) - size_old); + args->sourceLimit = saveSourceLimit; + + if (*err == U_TRUNCATED_CHAR_FOUND) + { + /* evil special case: source buffers so small a char spans more than 2 buffers */ + args->converter->toULength = savebytes; + uprv_memcpy(args->converter->toUBytes, LMBCS, savebytes); + args->source = args->sourceLimit; + *err = U_ZERO_ERROR; + return; + } + else + { + /* clear the partial-char marker */ + args->converter->toULength = 0; + } + } + else + { + errSource = saveSource; + uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err); + savebytes = (int8_t)(args->source - saveSource); + } + if (U_SUCCESS(*err)) + { + if (uniChar < 0xfffe) + { + *(args->target)++ = uniChar; + if(args->offsets) + { + *(args->offsets)++ = (int32_t)(saveSource - pStartLMBCS); + } + } + else if (uniChar == 0xfffe) + { + *err = U_INVALID_CHAR_FOUND; + } + else /* if (uniChar == 0xffff) */ + { + *err = U_ILLEGAL_CHAR_FOUND; + } + } + } + /* if target ran out before source, return U_BUFFER_OVERFLOW_ERROR */ + if (U_SUCCESS(*err) && args->sourceLimit > args->source && args->targetLimit <= args->target) + { + *err = U_BUFFER_OVERFLOW_ERROR; + } + else if (U_FAILURE(*err)) + { + /* If character incomplete or unmappable/illegal, store it in toUBytes[] */ + args->converter->toULength = savebytes; + if (savebytes > 0) { + uprv_memcpy(args->converter->toUBytes, errSource, savebytes); + } + if (*err == U_TRUNCATED_CHAR_FOUND) { + *err = U_ZERO_ERROR; + } + } +} + +/* And now, the macroized declarations of data & functions: */ +DEFINE_LMBCS_OPEN(1) +DEFINE_LMBCS_OPEN(2) +DEFINE_LMBCS_OPEN(3) +DEFINE_LMBCS_OPEN(4) +DEFINE_LMBCS_OPEN(5) +DEFINE_LMBCS_OPEN(6) +DEFINE_LMBCS_OPEN(8) +DEFINE_LMBCS_OPEN(11) +DEFINE_LMBCS_OPEN(16) +DEFINE_LMBCS_OPEN(17) +DEFINE_LMBCS_OPEN(18) +DEFINE_LMBCS_OPEN(19) + + +DECLARE_LMBCS_DATA(1) +DECLARE_LMBCS_DATA(2) +DECLARE_LMBCS_DATA(3) +DECLARE_LMBCS_DATA(4) +DECLARE_LMBCS_DATA(5) +DECLARE_LMBCS_DATA(6) +DECLARE_LMBCS_DATA(8) +DECLARE_LMBCS_DATA(11) +DECLARE_LMBCS_DATA(16) +DECLARE_LMBCS_DATA(17) +DECLARE_LMBCS_DATA(18) +DECLARE_LMBCS_DATA(19) + +U_CDECL_END + +#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ diff --git a/deps/icu-small/source/common/ucnv_set.c b/deps/icu-small/source/common/ucnv_set.c deleted file mode 100644 index c3933ab610..0000000000 --- a/deps/icu-small/source/common/ucnv_set.c +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2003-2007, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: ucnv_set.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2004sep07 -* created by: Markus W. Scherer -* -* Conversion API functions using USet (ucnv_getUnicodeSet()) -* moved here from ucnv.c for removing the dependency of other ucnv_ -* implementation functions on the USet implementation. -*/ - -#include "unicode/utypes.h" -#include "unicode/uset.h" -#include "unicode/ucnv.h" -#include "ucnv_bld.h" -#include "uset_imp.h" - -#if !UCONFIG_NO_CONVERSION - -U_CAPI void U_EXPORT2 -ucnv_getUnicodeSet(const UConverter *cnv, - USet *setFillIn, - UConverterUnicodeSet whichSet, - UErrorCode *pErrorCode) { - /* argument checking */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return; - } - if(cnv==NULL || setFillIn==NULL || whichSetsharedData->impl->getUnicodeSet==NULL) { - *pErrorCode=U_UNSUPPORTED_ERROR; - return; - } - - { - USetAdder sa={ - NULL, - uset_add, - uset_addRange, - uset_addString, - uset_remove, - uset_removeRange - }; - sa.set=setFillIn; - - /* empty the set */ - uset_clear(setFillIn); - - /* call the converter to add the code points it supports */ - cnv->sharedData->impl->getUnicodeSet(cnv, &sa, whichSet, pErrorCode); - } -} - -#endif diff --git a/deps/icu-small/source/common/ucnv_set.cpp b/deps/icu-small/source/common/ucnv_set.cpp new file mode 100644 index 0000000000..926cee0de8 --- /dev/null +++ b/deps/icu-small/source/common/ucnv_set.cpp @@ -0,0 +1,70 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2003-2007, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ucnv_set.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2004sep07 +* created by: Markus W. Scherer +* +* Conversion API functions using USet (ucnv_getUnicodeSet()) +* moved here from ucnv.c for removing the dependency of other ucnv_ +* implementation functions on the USet implementation. +*/ + +#include "unicode/utypes.h" +#include "unicode/uset.h" +#include "unicode/ucnv.h" +#include "ucnv_bld.h" +#include "uset_imp.h" + +#if !UCONFIG_NO_CONVERSION + +U_CAPI void U_EXPORT2 +ucnv_getUnicodeSet(const UConverter *cnv, + USet *setFillIn, + UConverterUnicodeSet whichSet, + UErrorCode *pErrorCode) { + /* argument checking */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return; + } + if(cnv==NULL || setFillIn==NULL || whichSetsharedData->impl->getUnicodeSet==NULL) { + *pErrorCode=U_UNSUPPORTED_ERROR; + return; + } + + { + USetAdder sa={ + NULL, + uset_add, + uset_addRange, + uset_addString, + uset_remove, + uset_removeRange + }; + sa.set=setFillIn; + + /* empty the set */ + uset_clear(setFillIn); + + /* call the converter to add the code points it supports */ + cnv->sharedData->impl->getUnicodeSet(cnv, &sa, whichSet, pErrorCode); + } +} + +#endif diff --git a/deps/icu-small/source/common/ucnv_u16.c b/deps/icu-small/source/common/ucnv_u16.c deleted file mode 100644 index d8f4576f33..0000000000 --- a/deps/icu-small/source/common/ucnv_u16.c +++ /dev/null @@ -1,1563 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2002-2015, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ucnv_u16.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002jul01 -* created by: Markus W. Scherer -* -* UTF-16 converter implementation. Used to be in ucnv_utf.c. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ucnv.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "cmemory.h" - -enum { - UCNV_NEED_TO_WRITE_BOM=1 -}; - -/* - * The UTF-16 toUnicode implementation is also used for the Java-specific - * "with BOM" variants of UTF-16BE and UTF-16LE. - */ -static void -_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode); - -/* UTF-16BE ----------------------------------------------------------------- */ - -#if U_IS_BIG_ENDIAN -# define _UTF16PEFromUnicodeWithOffsets _UTF16BEFromUnicodeWithOffsets -#else -# define _UTF16PEFromUnicodeWithOffsets _UTF16LEFromUnicodeWithOffsets -#endif - - -static void -_UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const UChar *source; - char *target; - int32_t *offsets; - - uint32_t targetCapacity, length, sourceIndex; - UChar c, trail; - char overflow[4]; - - source=pArgs->source; - length=(int32_t)(pArgs->sourceLimit-source); - if(length<=0) { - /* no input, nothing to do */ - return; - } - - cnv=pArgs->converter; - - /* write the BOM if necessary */ - if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { - static const char bom[]={ (char)0xfe, (char)0xff }; - ucnv_fromUWriteBytes(cnv, - bom, 2, - &pArgs->target, pArgs->targetLimit, - &pArgs->offsets, -1, - pErrorCode); - cnv->fromUnicodeStatus=0; - } - - target=pArgs->target; - if(target >= pArgs->targetLimit) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - return; - } - - targetCapacity=(uint32_t)(pArgs->targetLimit-target); - offsets=pArgs->offsets; - sourceIndex=0; - - /* c!=0 indicates in several places outside the main loops that a surrogate was found */ - - if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) { - /* the last buffer ended with a lead surrogate, output the surrogate pair */ - ++source; - --length; - target[0]=(uint8_t)(c>>8); - target[1]=(uint8_t)c; - target[2]=(uint8_t)(trail>>8); - target[3]=(uint8_t)trail; - target+=4; - targetCapacity-=4; - if(offsets!=NULL) { - *offsets++=-1; - *offsets++=-1; - *offsets++=-1; - *offsets++=-1; - } - sourceIndex=1; - cnv->fromUChar32=c=0; - } - - if(c==0) { - /* copy an even number of bytes for complete UChars */ - uint32_t count=2*length; - if(count>targetCapacity) { - count=targetCapacity&~1; - } - /* count is even */ - targetCapacity-=count; - count>>=1; - length-=count; - - if(offsets==NULL) { - while(count>0) { - c=*source++; - if(U16_IS_SINGLE(c)) { - target[0]=(uint8_t)(c>>8); - target[1]=(uint8_t)c; - target+=2; - } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { - ++source; - --count; - target[0]=(uint8_t)(c>>8); - target[1]=(uint8_t)c; - target[2]=(uint8_t)(trail>>8); - target[3]=(uint8_t)trail; - target+=4; - } else { - break; - } - --count; - } - } else { - while(count>0) { - c=*source++; - if(U16_IS_SINGLE(c)) { - target[0]=(uint8_t)(c>>8); - target[1]=(uint8_t)c; - target+=2; - *offsets++=sourceIndex; - *offsets++=sourceIndex++; - } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { - ++source; - --count; - target[0]=(uint8_t)(c>>8); - target[1]=(uint8_t)c; - target[2]=(uint8_t)(trail>>8); - target[3]=(uint8_t)trail; - target+=4; - *offsets++=sourceIndex; - *offsets++=sourceIndex; - *offsets++=sourceIndex; - *offsets++=sourceIndex; - sourceIndex+=2; - } else { - break; - } - --count; - } - } - - if(count==0) { - /* done with the loop for complete UChars */ - if(length>0 && targetCapacity>0) { - /* - * there is more input and some target capacity - - * it must be targetCapacity==1 because otherwise - * the above would have copied more; - * prepare for overflow output - */ - if(U16_IS_SINGLE(c=*source++)) { - overflow[0]=(char)(c>>8); - overflow[1]=(char)c; - length=2; /* 2 bytes to output */ - c=0; - /* } else { keep c for surrogate handling, length will be set there */ - } - } else { - length=0; - c=0; - } - } else { - /* keep c for surrogate handling, length will be set there */ - targetCapacity+=2*count; - } - } else { - length=0; /* from here on, length counts the bytes in overflow[] */ - } - - if(c!=0) { - /* - * c is a surrogate, and - * - source or target too short - * - or the surrogate is unmatched - */ - length=0; - if(U16_IS_SURROGATE_LEAD(c)) { - if(sourcesourceLimit) { - if(U16_IS_TRAIL(trail=*source)) { - /* output the surrogate pair, will overflow (see conditions comment above) */ - ++source; - overflow[0]=(char)(c>>8); - overflow[1]=(char)c; - overflow[2]=(char)(trail>>8); - overflow[3]=(char)trail; - length=4; /* 4 bytes to output */ - c=0; - } else { - /* unmatched lead surrogate */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - } else { - /* see if the trail surrogate is in the next buffer */ - } - } else { - /* unmatched trail surrogate */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - cnv->fromUChar32=c; - } - - if(length>0) { - /* output length bytes with overflow (length>targetCapacity>0) */ - ucnv_fromUWriteBytes(cnv, - overflow, length, - (char **)&target, pArgs->targetLimit, - &offsets, sourceIndex, - pErrorCode); - targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target); - } - - if(U_SUCCESS(*pErrorCode) && sourcesourceLimit && targetCapacity==0) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - - /* write back the updated pointers */ - pArgs->source=source; - pArgs->target=(char *)target; - pArgs->offsets=offsets; -} - -static void -_UTF16BEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const uint8_t *source; - UChar *target; - int32_t *offsets; - - uint32_t targetCapacity, length, count, sourceIndex; - UChar c, trail; - - if(pArgs->converter->mode<8) { - _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode); - return; - } - - cnv=pArgs->converter; - source=(const uint8_t *)pArgs->source; - length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source); - if(length<=0 && cnv->toUnicodeStatus==0) { - /* no input, nothing to do */ - return; - } - - target=pArgs->target; - if(target >= pArgs->targetLimit) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - return; - } - - targetCapacity=(uint32_t)(pArgs->targetLimit-target); - offsets=pArgs->offsets; - sourceIndex=0; - c=0; - - /* complete a partial UChar or pair from the last call */ - if(cnv->toUnicodeStatus!=0) { - /* - * special case: single byte from a previous buffer, - * where the byte turned out not to belong to a trail surrogate - * and the preceding, unmatched lead surrogate was put into toUBytes[] - * for error handling - */ - cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus; - cnv->toULength=1; - cnv->toUnicodeStatus=0; - } - if((count=cnv->toULength)!=0) { - uint8_t *p=cnv->toUBytes; - do { - p[count++]=*source++; - ++sourceIndex; - --length; - if(count==2) { - c=((UChar)p[0]<<8)|p[1]; - if(U16_IS_SINGLE(c)) { - /* output the BMP code point */ - *target++=c; - if(offsets!=NULL) { - *offsets++=-1; - } - --targetCapacity; - count=0; - c=0; - break; - } else if(U16_IS_SURROGATE_LEAD(c)) { - /* continue collecting bytes for the trail surrogate */ - c=0; /* avoid unnecessary surrogate handling below */ - } else { - /* fall through to error handling for an unmatched trail surrogate */ - break; - } - } else if(count==4) { - c=((UChar)p[0]<<8)|p[1]; - trail=((UChar)p[2]<<8)|p[3]; - if(U16_IS_TRAIL(trail)) { - /* output the surrogate pair */ - *target++=c; - if(targetCapacity>=2) { - *target++=trail; - if(offsets!=NULL) { - *offsets++=-1; - *offsets++=-1; - } - targetCapacity-=2; - } else /* targetCapacity==1 */ { - targetCapacity=0; - cnv->UCharErrorBuffer[0]=trail; - cnv->UCharErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - count=0; - c=0; - break; - } else { - /* unmatched lead surrogate, handle here for consistent toUBytes[] */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - - /* back out reading the code unit after it */ - if(((const uint8_t *)pArgs->source-source)>=2) { - source-=2; - } else { - /* - * if the trail unit's first byte was in a previous buffer, then - * we need to put it into a special place because toUBytes[] will be - * used for the lead unit's bytes - */ - cnv->toUnicodeStatus=0x100|p[2]; - --source; - } - cnv->toULength=2; - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; - return; - } - } - } while(length>0); - cnv->toULength=(int8_t)count; - } - - /* copy an even number of bytes for complete UChars */ - count=2*targetCapacity; - if(count>length) { - count=length&~1; - } - if(c==0 && count>0) { - length-=count; - count>>=1; - targetCapacity-=count; - if(offsets==NULL) { - do { - c=((UChar)source[0]<<8)|source[1]; - source+=2; - if(U16_IS_SINGLE(c)) { - *target++=c; - } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && - U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1]) - ) { - source+=2; - --count; - *target++=c; - *target++=trail; - } else { - break; - } - } while(--count>0); - } else { - do { - c=((UChar)source[0]<<8)|source[1]; - source+=2; - if(U16_IS_SINGLE(c)) { - *target++=c; - *offsets++=sourceIndex; - sourceIndex+=2; - } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && - U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1]) - ) { - source+=2; - --count; - *target++=c; - *target++=trail; - *offsets++=sourceIndex; - *offsets++=sourceIndex; - sourceIndex+=4; - } else { - break; - } - } while(--count>0); - } - - if(count==0) { - /* done with the loop for complete UChars */ - c=0; - } else { - /* keep c for surrogate handling, trail will be set there */ - length+=2*(count-1); /* one more byte pair was consumed than count decremented */ - targetCapacity+=count; - } - } - - if(c!=0) { - /* - * c is a surrogate, and - * - source or target too short - * - or the surrogate is unmatched - */ - cnv->toUBytes[0]=(uint8_t)(c>>8); - cnv->toUBytes[1]=(uint8_t)c; - cnv->toULength=2; - - if(U16_IS_SURROGATE_LEAD(c)) { - if(length>=2) { - if(U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1])) { - /* output the surrogate pair, will overflow (see conditions comment above) */ - source+=2; - length-=2; - *target++=c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - cnv->UCharErrorBuffer[0]=trail; - cnv->UCharErrorBufferLength=1; - cnv->toULength=0; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } else { - /* unmatched lead surrogate */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - } else { - /* see if the trail surrogate is in the next buffer */ - } - } else { - /* unmatched trail surrogate */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - } - - if(U_SUCCESS(*pErrorCode)) { - /* check for a remaining source byte */ - if(length>0) { - if(targetCapacity==0) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } else { - /* it must be length==1 because otherwise the above would have copied more */ - cnv->toUBytes[cnv->toULength++]=*source++; - } - } - } - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; -} - -static UChar32 -_UTF16BEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { - const uint8_t *s, *sourceLimit; - UChar32 c; - - if(pArgs->converter->mode<8) { - return UCNV_GET_NEXT_UCHAR_USE_TO_U; - } - - s=(const uint8_t *)pArgs->source; - sourceLimit=(const uint8_t *)pArgs->sourceLimit; - - if(s>=sourceLimit) { - /* no input */ - *err=U_INDEX_OUTOFBOUNDS_ERROR; - return 0xffff; - } - - if(s+2>sourceLimit) { - /* only one byte: truncated UChar */ - pArgs->converter->toUBytes[0]=*s++; - pArgs->converter->toULength=1; - pArgs->source=(const char *)s; - *err = U_TRUNCATED_CHAR_FOUND; - return 0xffff; - } - - /* get one UChar */ - c=((UChar32)*s<<8)|s[1]; - s+=2; - - /* check for a surrogate pair */ - if(U_IS_SURROGATE(c)) { - if(U16_IS_SURROGATE_LEAD(c)) { - if(s+2<=sourceLimit) { - UChar trail; - - /* get a second UChar and see if it is a trail surrogate */ - trail=((UChar)*s<<8)|s[1]; - if(U16_IS_TRAIL(trail)) { - c=U16_GET_SUPPLEMENTARY(c, trail); - s+=2; - } else { - /* unmatched lead surrogate */ - c=-2; - } - } else { - /* too few (2 or 3) bytes for a surrogate pair: truncated code point */ - uint8_t *bytes=pArgs->converter->toUBytes; - s-=2; - pArgs->converter->toULength=(int8_t)(sourceLimit-s); - do { - *bytes++=*s++; - } while(sconverter->toUBytes; - pArgs->converter->toULength=2; - *bytes=*(s-2); - bytes[1]=*(s-1); - - c=0xffff; - *err=U_ILLEGAL_CHAR_FOUND; - } - } - - pArgs->source=(const char *)s; - return c; -} - -static void -_UTF16BEReset(UConverter *cnv, UConverterResetChoice choice) { - if(choice<=UCNV_RESET_TO_UNICODE) { - /* reset toUnicode state */ - if(UCNV_GET_VERSION(cnv)==0) { - cnv->mode=8; /* no BOM handling */ - } else { - cnv->mode=0; /* Java-specific "UnicodeBig" requires BE BOM or no BOM */ - } - } - if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) { - /* reset fromUnicode for "UnicodeBig": prepare to output the UTF-16BE BOM */ - cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; - } -} - -static void -_UTF16BEOpen(UConverter *cnv, - UConverterLoadArgs *pArgs, - UErrorCode *pErrorCode) { - if(UCNV_GET_VERSION(cnv)<=1) { - _UTF16BEReset(cnv, UCNV_RESET_BOTH); - } else { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - } -} - -static const char * -_UTF16BEGetName(const UConverter *cnv) { - if(UCNV_GET_VERSION(cnv)==0) { - return "UTF-16BE"; - } else { - return "UTF-16BE,version=1"; - } -} - -static const UConverterImpl _UTF16BEImpl={ - UCNV_UTF16_BigEndian, - - NULL, - NULL, - - _UTF16BEOpen, - NULL, - _UTF16BEReset, - - _UTF16BEToUnicodeWithOffsets, - _UTF16BEToUnicodeWithOffsets, - _UTF16BEFromUnicodeWithOffsets, - _UTF16BEFromUnicodeWithOffsets, - _UTF16BEGetNextUChar, - - NULL, - _UTF16BEGetName, - NULL, - NULL, - ucnv_getNonSurrogateUnicodeSet, - - NULL, - NULL -}; - -static const UConverterStaticData _UTF16BEStaticData={ - sizeof(UConverterStaticData), - "UTF-16BE", - 1200, UCNV_IBM, UCNV_UTF16_BigEndian, 2, 2, - { 0xff, 0xfd, 0, 0 },2,FALSE,FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - - -const UConverterSharedData _UTF16BEData= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16BEStaticData, &_UTF16BEImpl); - -/* UTF-16LE ----------------------------------------------------------------- */ - -static void -_UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const UChar *source; - char *target; - int32_t *offsets; - - uint32_t targetCapacity, length, sourceIndex; - UChar c, trail; - char overflow[4]; - - source=pArgs->source; - length=(int32_t)(pArgs->sourceLimit-source); - if(length<=0) { - /* no input, nothing to do */ - return; - } - - cnv=pArgs->converter; - - /* write the BOM if necessary */ - if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { - static const char bom[]={ (char)0xff, (char)0xfe }; - ucnv_fromUWriteBytes(cnv, - bom, 2, - &pArgs->target, pArgs->targetLimit, - &pArgs->offsets, -1, - pErrorCode); - cnv->fromUnicodeStatus=0; - } - - target=pArgs->target; - if(target >= pArgs->targetLimit) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - return; - } - - targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target); - offsets=pArgs->offsets; - sourceIndex=0; - - /* c!=0 indicates in several places outside the main loops that a surrogate was found */ - - if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) { - /* the last buffer ended with a lead surrogate, output the surrogate pair */ - ++source; - --length; - target[0]=(uint8_t)c; - target[1]=(uint8_t)(c>>8); - target[2]=(uint8_t)trail; - target[3]=(uint8_t)(trail>>8); - target+=4; - targetCapacity-=4; - if(offsets!=NULL) { - *offsets++=-1; - *offsets++=-1; - *offsets++=-1; - *offsets++=-1; - } - sourceIndex=1; - cnv->fromUChar32=c=0; - } - - if(c==0) { - /* copy an even number of bytes for complete UChars */ - uint32_t count=2*length; - if(count>targetCapacity) { - count=targetCapacity&~1; - } - /* count is even */ - targetCapacity-=count; - count>>=1; - length-=count; - - if(offsets==NULL) { - while(count>0) { - c=*source++; - if(U16_IS_SINGLE(c)) { - target[0]=(uint8_t)c; - target[1]=(uint8_t)(c>>8); - target+=2; - } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { - ++source; - --count; - target[0]=(uint8_t)c; - target[1]=(uint8_t)(c>>8); - target[2]=(uint8_t)trail; - target[3]=(uint8_t)(trail>>8); - target+=4; - } else { - break; - } - --count; - } - } else { - while(count>0) { - c=*source++; - if(U16_IS_SINGLE(c)) { - target[0]=(uint8_t)c; - target[1]=(uint8_t)(c>>8); - target+=2; - *offsets++=sourceIndex; - *offsets++=sourceIndex++; - } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { - ++source; - --count; - target[0]=(uint8_t)c; - target[1]=(uint8_t)(c>>8); - target[2]=(uint8_t)trail; - target[3]=(uint8_t)(trail>>8); - target+=4; - *offsets++=sourceIndex; - *offsets++=sourceIndex; - *offsets++=sourceIndex; - *offsets++=sourceIndex; - sourceIndex+=2; - } else { - break; - } - --count; - } - } - - if(count==0) { - /* done with the loop for complete UChars */ - if(length>0 && targetCapacity>0) { - /* - * there is more input and some target capacity - - * it must be targetCapacity==1 because otherwise - * the above would have copied more; - * prepare for overflow output - */ - if(U16_IS_SINGLE(c=*source++)) { - overflow[0]=(char)c; - overflow[1]=(char)(c>>8); - length=2; /* 2 bytes to output */ - c=0; - /* } else { keep c for surrogate handling, length will be set there */ - } - } else { - length=0; - c=0; - } - } else { - /* keep c for surrogate handling, length will be set there */ - targetCapacity+=2*count; - } - } else { - length=0; /* from here on, length counts the bytes in overflow[] */ - } - - if(c!=0) { - /* - * c is a surrogate, and - * - source or target too short - * - or the surrogate is unmatched - */ - length=0; - if(U16_IS_SURROGATE_LEAD(c)) { - if(sourcesourceLimit) { - if(U16_IS_TRAIL(trail=*source)) { - /* output the surrogate pair, will overflow (see conditions comment above) */ - ++source; - overflow[0]=(char)c; - overflow[1]=(char)(c>>8); - overflow[2]=(char)trail; - overflow[3]=(char)(trail>>8); - length=4; /* 4 bytes to output */ - c=0; - } else { - /* unmatched lead surrogate */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - } else { - /* see if the trail surrogate is in the next buffer */ - } - } else { - /* unmatched trail surrogate */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - cnv->fromUChar32=c; - } - - if(length>0) { - /* output length bytes with overflow (length>targetCapacity>0) */ - ucnv_fromUWriteBytes(cnv, - overflow, length, - &target, pArgs->targetLimit, - &offsets, sourceIndex, - pErrorCode); - targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target); - } - - if(U_SUCCESS(*pErrorCode) && sourcesourceLimit && targetCapacity==0) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - - /* write back the updated pointers */ - pArgs->source=source; - pArgs->target=target; - pArgs->offsets=offsets; -} - -static void -_UTF16LEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const uint8_t *source; - UChar *target; - int32_t *offsets; - - uint32_t targetCapacity, length, count, sourceIndex; - UChar c, trail; - - if(pArgs->converter->mode<8) { - _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode); - return; - } - - cnv=pArgs->converter; - source=(const uint8_t *)pArgs->source; - length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source); - if(length<=0 && cnv->toUnicodeStatus==0) { - /* no input, nothing to do */ - return; - } - - target=pArgs->target; - if(target >= pArgs->targetLimit) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - return; - } - - targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target); - offsets=pArgs->offsets; - sourceIndex=0; - c=0; - - /* complete a partial UChar or pair from the last call */ - if(cnv->toUnicodeStatus!=0) { - /* - * special case: single byte from a previous buffer, - * where the byte turned out not to belong to a trail surrogate - * and the preceding, unmatched lead surrogate was put into toUBytes[] - * for error handling - */ - cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus; - cnv->toULength=1; - cnv->toUnicodeStatus=0; - } - if((count=cnv->toULength)!=0) { - uint8_t *p=cnv->toUBytes; - do { - p[count++]=*source++; - ++sourceIndex; - --length; - if(count==2) { - c=((UChar)p[1]<<8)|p[0]; - if(U16_IS_SINGLE(c)) { - /* output the BMP code point */ - *target++=c; - if(offsets!=NULL) { - *offsets++=-1; - } - --targetCapacity; - count=0; - c=0; - break; - } else if(U16_IS_SURROGATE_LEAD(c)) { - /* continue collecting bytes for the trail surrogate */ - c=0; /* avoid unnecessary surrogate handling below */ - } else { - /* fall through to error handling for an unmatched trail surrogate */ - break; - } - } else if(count==4) { - c=((UChar)p[1]<<8)|p[0]; - trail=((UChar)p[3]<<8)|p[2]; - if(U16_IS_TRAIL(trail)) { - /* output the surrogate pair */ - *target++=c; - if(targetCapacity>=2) { - *target++=trail; - if(offsets!=NULL) { - *offsets++=-1; - *offsets++=-1; - } - targetCapacity-=2; - } else /* targetCapacity==1 */ { - targetCapacity=0; - cnv->UCharErrorBuffer[0]=trail; - cnv->UCharErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - count=0; - c=0; - break; - } else { - /* unmatched lead surrogate, handle here for consistent toUBytes[] */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - - /* back out reading the code unit after it */ - if(((const uint8_t *)pArgs->source-source)>=2) { - source-=2; - } else { - /* - * if the trail unit's first byte was in a previous buffer, then - * we need to put it into a special place because toUBytes[] will be - * used for the lead unit's bytes - */ - cnv->toUnicodeStatus=0x100|p[2]; - --source; - } - cnv->toULength=2; - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; - return; - } - } - } while(length>0); - cnv->toULength=(int8_t)count; - } - - /* copy an even number of bytes for complete UChars */ - count=2*targetCapacity; - if(count>length) { - count=length&~1; - } - if(c==0 && count>0) { - length-=count; - count>>=1; - targetCapacity-=count; - if(offsets==NULL) { - do { - c=((UChar)source[1]<<8)|source[0]; - source+=2; - if(U16_IS_SINGLE(c)) { - *target++=c; - } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && - U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0]) - ) { - source+=2; - --count; - *target++=c; - *target++=trail; - } else { - break; - } - } while(--count>0); - } else { - do { - c=((UChar)source[1]<<8)|source[0]; - source+=2; - if(U16_IS_SINGLE(c)) { - *target++=c; - *offsets++=sourceIndex; - sourceIndex+=2; - } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && - U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0]) - ) { - source+=2; - --count; - *target++=c; - *target++=trail; - *offsets++=sourceIndex; - *offsets++=sourceIndex; - sourceIndex+=4; - } else { - break; - } - } while(--count>0); - } - - if(count==0) { - /* done with the loop for complete UChars */ - c=0; - } else { - /* keep c for surrogate handling, trail will be set there */ - length+=2*(count-1); /* one more byte pair was consumed than count decremented */ - targetCapacity+=count; - } - } - - if(c!=0) { - /* - * c is a surrogate, and - * - source or target too short - * - or the surrogate is unmatched - */ - cnv->toUBytes[0]=(uint8_t)c; - cnv->toUBytes[1]=(uint8_t)(c>>8); - cnv->toULength=2; - - if(U16_IS_SURROGATE_LEAD(c)) { - if(length>=2) { - if(U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0])) { - /* output the surrogate pair, will overflow (see conditions comment above) */ - source+=2; - length-=2; - *target++=c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - cnv->UCharErrorBuffer[0]=trail; - cnv->UCharErrorBufferLength=1; - cnv->toULength=0; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } else { - /* unmatched lead surrogate */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - } else { - /* see if the trail surrogate is in the next buffer */ - } - } else { - /* unmatched trail surrogate */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - } - - if(U_SUCCESS(*pErrorCode)) { - /* check for a remaining source byte */ - if(length>0) { - if(targetCapacity==0) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } else { - /* it must be length==1 because otherwise the above would have copied more */ - cnv->toUBytes[cnv->toULength++]=*source++; - } - } - } - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; -} - -static UChar32 -_UTF16LEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { - const uint8_t *s, *sourceLimit; - UChar32 c; - - if(pArgs->converter->mode<8) { - return UCNV_GET_NEXT_UCHAR_USE_TO_U; - } - - s=(const uint8_t *)pArgs->source; - sourceLimit=(const uint8_t *)pArgs->sourceLimit; - - if(s>=sourceLimit) { - /* no input */ - *err=U_INDEX_OUTOFBOUNDS_ERROR; - return 0xffff; - } - - if(s+2>sourceLimit) { - /* only one byte: truncated UChar */ - pArgs->converter->toUBytes[0]=*s++; - pArgs->converter->toULength=1; - pArgs->source=(const char *)s; - *err = U_TRUNCATED_CHAR_FOUND; - return 0xffff; - } - - /* get one UChar */ - c=((UChar32)s[1]<<8)|*s; - s+=2; - - /* check for a surrogate pair */ - if(U_IS_SURROGATE(c)) { - if(U16_IS_SURROGATE_LEAD(c)) { - if(s+2<=sourceLimit) { - UChar trail; - - /* get a second UChar and see if it is a trail surrogate */ - trail=((UChar)s[1]<<8)|*s; - if(U16_IS_TRAIL(trail)) { - c=U16_GET_SUPPLEMENTARY(c, trail); - s+=2; - } else { - /* unmatched lead surrogate */ - c=-2; - } - } else { - /* too few (2 or 3) bytes for a surrogate pair: truncated code point */ - uint8_t *bytes=pArgs->converter->toUBytes; - s-=2; - pArgs->converter->toULength=(int8_t)(sourceLimit-s); - do { - *bytes++=*s++; - } while(sconverter->toUBytes; - pArgs->converter->toULength=2; - *bytes=*(s-2); - bytes[1]=*(s-1); - - c=0xffff; - *err=U_ILLEGAL_CHAR_FOUND; - } - } - - pArgs->source=(const char *)s; - return c; -} - -static void -_UTF16LEReset(UConverter *cnv, UConverterResetChoice choice) { - if(choice<=UCNV_RESET_TO_UNICODE) { - /* reset toUnicode state */ - if(UCNV_GET_VERSION(cnv)==0) { - cnv->mode=8; /* no BOM handling */ - } else { - cnv->mode=0; /* Java-specific "UnicodeLittle" requires LE BOM or no BOM */ - } - } - if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) { - /* reset fromUnicode for "UnicodeLittle": prepare to output the UTF-16LE BOM */ - cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; - } -} - -static void -_UTF16LEOpen(UConverter *cnv, - UConverterLoadArgs *pArgs, - UErrorCode *pErrorCode) { - if(UCNV_GET_VERSION(cnv)<=1) { - _UTF16LEReset(cnv, UCNV_RESET_BOTH); - } else { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - } -} - -static const char * -_UTF16LEGetName(const UConverter *cnv) { - if(UCNV_GET_VERSION(cnv)==0) { - return "UTF-16LE"; - } else { - return "UTF-16LE,version=1"; - } -} - -static const UConverterImpl _UTF16LEImpl={ - UCNV_UTF16_LittleEndian, - - NULL, - NULL, - - _UTF16LEOpen, - NULL, - _UTF16LEReset, - - _UTF16LEToUnicodeWithOffsets, - _UTF16LEToUnicodeWithOffsets, - _UTF16LEFromUnicodeWithOffsets, - _UTF16LEFromUnicodeWithOffsets, - _UTF16LEGetNextUChar, - - NULL, - _UTF16LEGetName, - NULL, - NULL, - ucnv_getNonSurrogateUnicodeSet, - - NULL, - NULL -}; - - -static const UConverterStaticData _UTF16LEStaticData={ - sizeof(UConverterStaticData), - "UTF-16LE", - 1202, UCNV_IBM, UCNV_UTF16_LittleEndian, 2, 2, - { 0xfd, 0xff, 0, 0 },2,FALSE,FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - - -const UConverterSharedData _UTF16LEData= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16LEStaticData, &_UTF16LEImpl); - -/* UTF-16 (Detect BOM) ------------------------------------------------------ */ - -/* - * Detect a BOM at the beginning of the stream and select UTF-16BE or UTF-16LE - * accordingly. - * This is a simpler version of the UTF-32 converter, with - * fewer states for shorter BOMs. - * - * State values: - * 0 initial state - * 1 saw first byte - * 2..5 - - * 6..7 see _UTF16ToUnicodeWithOffsets() comments in state 1 - * 8 UTF-16BE mode - * 9 UTF-16LE mode - * - * During detection: state==number of initial bytes seen so far. - * - * On output, emit U+FEFF as the first code point. - * - * Variants: - * - UTF-16,version=1 (Java "Unicode" encoding) treats a missing BOM as an error. - * - UTF-16BE,version=1 (Java "UnicodeBig" encoding) and - * UTF-16LE,version=1 (Java "UnicodeLittle" encoding) treat a reverse BOM as an error. - */ - -static void -_UTF16Reset(UConverter *cnv, UConverterResetChoice choice) { - if(choice<=UCNV_RESET_TO_UNICODE) { - /* reset toUnicode: state=0 */ - cnv->mode=0; - } - if(choice!=UCNV_RESET_TO_UNICODE) { - /* reset fromUnicode: prepare to output the UTF-16PE BOM */ - cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; - } -} - -static const UConverterSharedData _UTF16v2Data; - -static void -_UTF16Open(UConverter *cnv, - UConverterLoadArgs *pArgs, - UErrorCode *pErrorCode) { - if(UCNV_GET_VERSION(cnv)<=2) { - if(UCNV_GET_VERSION(cnv)==2 && !pArgs->onlyTestIsLoadable) { - /* - * Switch implementation, and switch the staticData that's different - * and was copied into the UConverter. - * (See ucnv_createConverterFromSharedData() in ucnv_bld.c.) - * UTF-16,version=2 fromUnicode() always writes a big-endian byte stream. - */ - cnv->sharedData=(UConverterSharedData*)&_UTF16v2Data; - uprv_memcpy(cnv->subChars, _UTF16v2Data.staticData->subChar, UCNV_MAX_SUBCHAR_LEN); - } - _UTF16Reset(cnv, UCNV_RESET_BOTH); - } else { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - } -} - -static const char * -_UTF16GetName(const UConverter *cnv) { - if(UCNV_GET_VERSION(cnv)==0) { - return "UTF-16"; - } else if(UCNV_GET_VERSION(cnv)==1) { - return "UTF-16,version=1"; - } else { - return "UTF-16,version=2"; - } -} - -const UConverterSharedData _UTF16Data; - -#define IS_UTF16BE(cnv) ((cnv)->sharedData==&_UTF16BEData) -#define IS_UTF16LE(cnv) ((cnv)->sharedData==&_UTF16LEData) -#define IS_UTF16(cnv) ((cnv)->sharedData==&_UTF16Data || (cnv)->sharedData==&_UTF16v2Data) - -static void -_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv=pArgs->converter; - const char *source=pArgs->source; - const char *sourceLimit=pArgs->sourceLimit; - int32_t *offsets=pArgs->offsets; - - int32_t state, offsetDelta; - uint8_t b; - - state=cnv->mode; - - /* - * If we detect a BOM in this buffer, then we must add the BOM size to the - * offsets because the actual converter function will not see and count the BOM. - * offsetDelta will have the number of the BOM bytes that are in the current buffer. - */ - offsetDelta=0; - - while(sourcetoUBytes[0]=(uint8_t)*source++; - cnv->toULength=1; - state=1; - break; - case 1: - /* - * Only inside this switch case can the state variable - * temporarily take two additional values: - * 6: BOM error, continue with BE - * 7: BOM error, continue with LE - */ - b=*source; - if(cnv->toUBytes[0]==0xfe && b==0xff) { - if(IS_UTF16LE(cnv)) { - state=7; /* illegal reverse BOM for Java "UnicodeLittle" */ - } else { - state=8; /* detect UTF-16BE */ - } - } else if(cnv->toUBytes[0]==0xff && b==0xfe) { - if(IS_UTF16BE(cnv)) { - state=6; /* illegal reverse BOM for Java "UnicodeBig" */ - } else { - state=9; /* detect UTF-16LE */ - } - } else if((IS_UTF16(cnv) && UCNV_GET_VERSION(cnv)==1)) { - state=6; /* illegal missing BOM for Java "Unicode" */ - } - if(state>=8) { - /* BOM detected, consume it */ - ++source; - cnv->toULength=0; - offsetDelta=(int32_t)(source-pArgs->source); - } else if(state<6) { - /* ok: no BOM, and not a reverse BOM */ - if(source!=pArgs->source) { - /* reset the source for a correct first offset */ - source=pArgs->source; - cnv->toULength=0; - } - if(IS_UTF16LE(cnv)) { - /* Make Java "UnicodeLittle" default to LE. */ - state=9; - } else { - /* Make standard UTF-16 and Java "UnicodeBig" default to BE. */ - state=8; - } - } else { - /* - * error: missing BOM, or reverse BOM - * UTF-16,version=1: Java-specific "Unicode" requires a BOM. - * UTF-16BE,version=1: Java-specific "UnicodeBig" requires a BE BOM or no BOM. - * UTF-16LE,version=1: Java-specific "UnicodeLittle" requires an LE BOM or no BOM. - */ - /* report the non-BOM or reverse BOM as an illegal sequence */ - cnv->toUBytes[1]=b; - cnv->toULength=2; - pArgs->source=source+1; - /* continue with conversion if the callback resets the error */ - /* - * Make Java "Unicode" default to BE like standard UTF-16. - * Make Java "UnicodeBig" and "UnicodeLittle" default - * to their normal endiannesses. - */ - cnv->mode=state+2; - *pErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE; - return; - } - /* convert the rest of the stream */ - cnv->mode=state; - continue; - case 8: - /* call UTF-16BE */ - pArgs->source=source; - _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode); - source=pArgs->source; - break; - case 9: - /* call UTF-16LE */ - pArgs->source=source; - _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode); - source=pArgs->source; - break; - default: - break; /* does not occur */ - } - } - - /* add BOM size to offsets - see comment at offsetDelta declaration */ - if(offsets!=NULL && offsetDelta!=0) { - int32_t *offsetsLimit=pArgs->offsets; - while(offsetssource=source; - - if(source==sourceLimit && pArgs->flush) { - /* handle truncated input */ - switch(state) { - case 0: - break; /* no input at all, nothing to do */ - case 8: - _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode); - break; - case 9: - _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode); - break; - default: - /* 0mode=state; -} - -static UChar32 -_UTF16GetNextUChar(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - switch(pArgs->converter->mode) { - case 8: - return _UTF16BEGetNextUChar(pArgs, pErrorCode); - case 9: - return _UTF16LEGetNextUChar(pArgs, pErrorCode); - default: - return UCNV_GET_NEXT_UCHAR_USE_TO_U; - } -} - -static const UConverterImpl _UTF16Impl = { - UCNV_UTF16, - - NULL, - NULL, - - _UTF16Open, - NULL, - _UTF16Reset, - - _UTF16ToUnicodeWithOffsets, - _UTF16ToUnicodeWithOffsets, - _UTF16PEFromUnicodeWithOffsets, - _UTF16PEFromUnicodeWithOffsets, - _UTF16GetNextUChar, - - NULL, /* ### TODO implement getStarters for all Unicode encodings?! */ - _UTF16GetName, - NULL, - NULL, - ucnv_getNonSurrogateUnicodeSet, - - NULL, - NULL -}; - -static const UConverterStaticData _UTF16StaticData = { - sizeof(UConverterStaticData), - "UTF-16", - 1204, /* CCSID for BOM sensitive UTF-16 */ - UCNV_IBM, UCNV_UTF16, 2, 2, -#if U_IS_BIG_ENDIAN - { 0xff, 0xfd, 0, 0 }, 2, -#else - { 0xfd, 0xff, 0, 0 }, 2, -#endif - FALSE, FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -const UConverterSharedData _UTF16Data = - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16StaticData, &_UTF16Impl); - -static const UConverterImpl _UTF16v2Impl = { - UCNV_UTF16, - - NULL, - NULL, - - _UTF16Open, - NULL, - _UTF16Reset, - - _UTF16ToUnicodeWithOffsets, - _UTF16ToUnicodeWithOffsets, - _UTF16BEFromUnicodeWithOffsets, - _UTF16BEFromUnicodeWithOffsets, - _UTF16GetNextUChar, - - NULL, /* ### TODO implement getStarters for all Unicode encodings?! */ - _UTF16GetName, - NULL, - NULL, - ucnv_getNonSurrogateUnicodeSet, - - NULL, - NULL -}; - -static const UConverterStaticData _UTF16v2StaticData = { - sizeof(UConverterStaticData), - "UTF-16,version=2", - 1204, /* CCSID for BOM sensitive UTF-16 */ - UCNV_IBM, UCNV_UTF16, 2, 2, - { 0xff, 0xfd, 0, 0 }, 2, - FALSE, FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -static const UConverterSharedData _UTF16v2Data = - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16v2StaticData, &_UTF16v2Impl); - -#endif diff --git a/deps/icu-small/source/common/ucnv_u16.cpp b/deps/icu-small/source/common/ucnv_u16.cpp new file mode 100644 index 0000000000..674d0323ef --- /dev/null +++ b/deps/icu-small/source/common/ucnv_u16.cpp @@ -0,0 +1,1571 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2002-2015, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnv_u16.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002jul01 +* created by: Markus W. Scherer +* +* UTF-16 converter implementation. Used to be in ucnv_utf.c. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/uversion.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "cmemory.h" + +enum { + UCNV_NEED_TO_WRITE_BOM=1 +}; + +U_CDECL_BEGIN +/* + * The UTF-16 toUnicode implementation is also used for the Java-specific + * "with BOM" variants of UTF-16BE and UTF-16LE. + */ +static void U_CALLCONV +_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode); + +/* UTF-16BE ----------------------------------------------------------------- */ + +#if U_IS_BIG_ENDIAN +# define _UTF16PEFromUnicodeWithOffsets _UTF16BEFromUnicodeWithOffsets +#else +# define _UTF16PEFromUnicodeWithOffsets _UTF16LEFromUnicodeWithOffsets +#endif + + +static void U_CALLCONV +_UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const UChar *source; + char *target; + int32_t *offsets; + + uint32_t targetCapacity, length, sourceIndex; + UChar c, trail; + char overflow[4]; + + source=pArgs->source; + length=(int32_t)(pArgs->sourceLimit-source); + if(length<=0) { + /* no input, nothing to do */ + return; + } + + cnv=pArgs->converter; + + /* write the BOM if necessary */ + if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { + static const char bom[]={ (char)0xfe, (char)0xff }; + ucnv_fromUWriteBytes(cnv, + bom, 2, + &pArgs->target, pArgs->targetLimit, + &pArgs->offsets, -1, + pErrorCode); + cnv->fromUnicodeStatus=0; + } + + target=pArgs->target; + if(target >= pArgs->targetLimit) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + return; + } + + targetCapacity=(uint32_t)(pArgs->targetLimit-target); + offsets=pArgs->offsets; + sourceIndex=0; + + /* c!=0 indicates in several places outside the main loops that a surrogate was found */ + + if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) { + /* the last buffer ended with a lead surrogate, output the surrogate pair */ + ++source; + --length; + target[0]=(uint8_t)(c>>8); + target[1]=(uint8_t)c; + target[2]=(uint8_t)(trail>>8); + target[3]=(uint8_t)trail; + target+=4; + targetCapacity-=4; + if(offsets!=NULL) { + *offsets++=-1; + *offsets++=-1; + *offsets++=-1; + *offsets++=-1; + } + sourceIndex=1; + cnv->fromUChar32=c=0; + } + + if(c==0) { + /* copy an even number of bytes for complete UChars */ + uint32_t count=2*length; + if(count>targetCapacity) { + count=targetCapacity&~1; + } + /* count is even */ + targetCapacity-=count; + count>>=1; + length-=count; + + if(offsets==NULL) { + while(count>0) { + c=*source++; + if(U16_IS_SINGLE(c)) { + target[0]=(uint8_t)(c>>8); + target[1]=(uint8_t)c; + target+=2; + } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { + ++source; + --count; + target[0]=(uint8_t)(c>>8); + target[1]=(uint8_t)c; + target[2]=(uint8_t)(trail>>8); + target[3]=(uint8_t)trail; + target+=4; + } else { + break; + } + --count; + } + } else { + while(count>0) { + c=*source++; + if(U16_IS_SINGLE(c)) { + target[0]=(uint8_t)(c>>8); + target[1]=(uint8_t)c; + target+=2; + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { + ++source; + --count; + target[0]=(uint8_t)(c>>8); + target[1]=(uint8_t)c; + target[2]=(uint8_t)(trail>>8); + target[3]=(uint8_t)trail; + target+=4; + *offsets++=sourceIndex; + *offsets++=sourceIndex; + *offsets++=sourceIndex; + *offsets++=sourceIndex; + sourceIndex+=2; + } else { + break; + } + --count; + } + } + + if(count==0) { + /* done with the loop for complete UChars */ + if(length>0 && targetCapacity>0) { + /* + * there is more input and some target capacity - + * it must be targetCapacity==1 because otherwise + * the above would have copied more; + * prepare for overflow output + */ + if(U16_IS_SINGLE(c=*source++)) { + overflow[0]=(char)(c>>8); + overflow[1]=(char)c; + length=2; /* 2 bytes to output */ + c=0; + /* } else { keep c for surrogate handling, length will be set there */ + } + } else { + length=0; + c=0; + } + } else { + /* keep c for surrogate handling, length will be set there */ + targetCapacity+=2*count; + } + } else { + length=0; /* from here on, length counts the bytes in overflow[] */ + } + + if(c!=0) { + /* + * c is a surrogate, and + * - source or target too short + * - or the surrogate is unmatched + */ + length=0; + if(U16_IS_SURROGATE_LEAD(c)) { + if(sourcesourceLimit) { + if(U16_IS_TRAIL(trail=*source)) { + /* output the surrogate pair, will overflow (see conditions comment above) */ + ++source; + overflow[0]=(char)(c>>8); + overflow[1]=(char)c; + overflow[2]=(char)(trail>>8); + overflow[3]=(char)trail; + length=4; /* 4 bytes to output */ + c=0; + } else { + /* unmatched lead surrogate */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + } else { + /* see if the trail surrogate is in the next buffer */ + } + } else { + /* unmatched trail surrogate */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + cnv->fromUChar32=c; + } + + if(length>0) { + /* output length bytes with overflow (length>targetCapacity>0) */ + ucnv_fromUWriteBytes(cnv, + overflow, length, + (char **)&target, pArgs->targetLimit, + &offsets, sourceIndex, + pErrorCode); + targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target); + } + + if(U_SUCCESS(*pErrorCode) && sourcesourceLimit && targetCapacity==0) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + + /* write back the updated pointers */ + pArgs->source=source; + pArgs->target=(char *)target; + pArgs->offsets=offsets; +} + +static void U_CALLCONV +_UTF16BEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const uint8_t *source; + UChar *target; + int32_t *offsets; + + uint32_t targetCapacity, length, count, sourceIndex; + UChar c, trail; + + if(pArgs->converter->mode<8) { + _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode); + return; + } + + cnv=pArgs->converter; + source=(const uint8_t *)pArgs->source; + length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source); + if(length<=0 && cnv->toUnicodeStatus==0) { + /* no input, nothing to do */ + return; + } + + target=pArgs->target; + if(target >= pArgs->targetLimit) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + return; + } + + targetCapacity=(uint32_t)(pArgs->targetLimit-target); + offsets=pArgs->offsets; + sourceIndex=0; + c=0; + + /* complete a partial UChar or pair from the last call */ + if(cnv->toUnicodeStatus!=0) { + /* + * special case: single byte from a previous buffer, + * where the byte turned out not to belong to a trail surrogate + * and the preceding, unmatched lead surrogate was put into toUBytes[] + * for error handling + */ + cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus; + cnv->toULength=1; + cnv->toUnicodeStatus=0; + } + if((count=cnv->toULength)!=0) { + uint8_t *p=cnv->toUBytes; + do { + p[count++]=*source++; + ++sourceIndex; + --length; + if(count==2) { + c=((UChar)p[0]<<8)|p[1]; + if(U16_IS_SINGLE(c)) { + /* output the BMP code point */ + *target++=c; + if(offsets!=NULL) { + *offsets++=-1; + } + --targetCapacity; + count=0; + c=0; + break; + } else if(U16_IS_SURROGATE_LEAD(c)) { + /* continue collecting bytes for the trail surrogate */ + c=0; /* avoid unnecessary surrogate handling below */ + } else { + /* fall through to error handling for an unmatched trail surrogate */ + break; + } + } else if(count==4) { + c=((UChar)p[0]<<8)|p[1]; + trail=((UChar)p[2]<<8)|p[3]; + if(U16_IS_TRAIL(trail)) { + /* output the surrogate pair */ + *target++=c; + if(targetCapacity>=2) { + *target++=trail; + if(offsets!=NULL) { + *offsets++=-1; + *offsets++=-1; + } + targetCapacity-=2; + } else /* targetCapacity==1 */ { + targetCapacity=0; + cnv->UCharErrorBuffer[0]=trail; + cnv->UCharErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + count=0; + c=0; + break; + } else { + /* unmatched lead surrogate, handle here for consistent toUBytes[] */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + + /* back out reading the code unit after it */ + if(((const uint8_t *)pArgs->source-source)>=2) { + source-=2; + } else { + /* + * if the trail unit's first byte was in a previous buffer, then + * we need to put it into a special place because toUBytes[] will be + * used for the lead unit's bytes + */ + cnv->toUnicodeStatus=0x100|p[2]; + --source; + } + cnv->toULength=2; + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + pArgs->offsets=offsets; + return; + } + } + } while(length>0); + cnv->toULength=(int8_t)count; + } + + /* copy an even number of bytes for complete UChars */ + count=2*targetCapacity; + if(count>length) { + count=length&~1; + } + if(c==0 && count>0) { + length-=count; + count>>=1; + targetCapacity-=count; + if(offsets==NULL) { + do { + c=((UChar)source[0]<<8)|source[1]; + source+=2; + if(U16_IS_SINGLE(c)) { + *target++=c; + } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && + U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1]) + ) { + source+=2; + --count; + *target++=c; + *target++=trail; + } else { + break; + } + } while(--count>0); + } else { + do { + c=((UChar)source[0]<<8)|source[1]; + source+=2; + if(U16_IS_SINGLE(c)) { + *target++=c; + *offsets++=sourceIndex; + sourceIndex+=2; + } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && + U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1]) + ) { + source+=2; + --count; + *target++=c; + *target++=trail; + *offsets++=sourceIndex; + *offsets++=sourceIndex; + sourceIndex+=4; + } else { + break; + } + } while(--count>0); + } + + if(count==0) { + /* done with the loop for complete UChars */ + c=0; + } else { + /* keep c for surrogate handling, trail will be set there */ + length+=2*(count-1); /* one more byte pair was consumed than count decremented */ + targetCapacity+=count; + } + } + + if(c!=0) { + /* + * c is a surrogate, and + * - source or target too short + * - or the surrogate is unmatched + */ + cnv->toUBytes[0]=(uint8_t)(c>>8); + cnv->toUBytes[1]=(uint8_t)c; + cnv->toULength=2; + + if(U16_IS_SURROGATE_LEAD(c)) { + if(length>=2) { + if(U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1])) { + /* output the surrogate pair, will overflow (see conditions comment above) */ + source+=2; + length-=2; + *target++=c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + cnv->UCharErrorBuffer[0]=trail; + cnv->UCharErrorBufferLength=1; + cnv->toULength=0; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } else { + /* unmatched lead surrogate */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + } else { + /* see if the trail surrogate is in the next buffer */ + } + } else { + /* unmatched trail surrogate */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + } + + if(U_SUCCESS(*pErrorCode)) { + /* check for a remaining source byte */ + if(length>0) { + if(targetCapacity==0) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } else { + /* it must be length==1 because otherwise the above would have copied more */ + cnv->toUBytes[cnv->toULength++]=*source++; + } + } + } + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + pArgs->offsets=offsets; +} + +static UChar32 U_CALLCONV +_UTF16BEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { + const uint8_t *s, *sourceLimit; + UChar32 c; + + if(pArgs->converter->mode<8) { + return UCNV_GET_NEXT_UCHAR_USE_TO_U; + } + + s=(const uint8_t *)pArgs->source; + sourceLimit=(const uint8_t *)pArgs->sourceLimit; + + if(s>=sourceLimit) { + /* no input */ + *err=U_INDEX_OUTOFBOUNDS_ERROR; + return 0xffff; + } + + if(s+2>sourceLimit) { + /* only one byte: truncated UChar */ + pArgs->converter->toUBytes[0]=*s++; + pArgs->converter->toULength=1; + pArgs->source=(const char *)s; + *err = U_TRUNCATED_CHAR_FOUND; + return 0xffff; + } + + /* get one UChar */ + c=((UChar32)*s<<8)|s[1]; + s+=2; + + /* check for a surrogate pair */ + if(U_IS_SURROGATE(c)) { + if(U16_IS_SURROGATE_LEAD(c)) { + if(s+2<=sourceLimit) { + UChar trail; + + /* get a second UChar and see if it is a trail surrogate */ + trail=((UChar)*s<<8)|s[1]; + if(U16_IS_TRAIL(trail)) { + c=U16_GET_SUPPLEMENTARY(c, trail); + s+=2; + } else { + /* unmatched lead surrogate */ + c=-2; + } + } else { + /* too few (2 or 3) bytes for a surrogate pair: truncated code point */ + uint8_t *bytes=pArgs->converter->toUBytes; + s-=2; + pArgs->converter->toULength=(int8_t)(sourceLimit-s); + do { + *bytes++=*s++; + } while(sconverter->toUBytes; + pArgs->converter->toULength=2; + *bytes=*(s-2); + bytes[1]=*(s-1); + + c=0xffff; + *err=U_ILLEGAL_CHAR_FOUND; + } + } + + pArgs->source=(const char *)s; + return c; +} + +static void U_CALLCONV +_UTF16BEReset(UConverter *cnv, UConverterResetChoice choice) { + if(choice<=UCNV_RESET_TO_UNICODE) { + /* reset toUnicode state */ + if(UCNV_GET_VERSION(cnv)==0) { + cnv->mode=8; /* no BOM handling */ + } else { + cnv->mode=0; /* Java-specific "UnicodeBig" requires BE BOM or no BOM */ + } + } + if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) { + /* reset fromUnicode for "UnicodeBig": prepare to output the UTF-16BE BOM */ + cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; + } +} + +static void U_CALLCONV +_UTF16BEOpen(UConverter *cnv, + UConverterLoadArgs *pArgs, + UErrorCode *pErrorCode) { + (void)pArgs; + if(UCNV_GET_VERSION(cnv)<=1) { + _UTF16BEReset(cnv, UCNV_RESET_BOTH); + } else { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + } +} + +static const char * U_CALLCONV +_UTF16BEGetName(const UConverter *cnv) { + if(UCNV_GET_VERSION(cnv)==0) { + return "UTF-16BE"; + } else { + return "UTF-16BE,version=1"; + } +} +U_CDECL_END + +static const UConverterImpl _UTF16BEImpl={ + UCNV_UTF16_BigEndian, + + NULL, + NULL, + + _UTF16BEOpen, + NULL, + _UTF16BEReset, + + _UTF16BEToUnicodeWithOffsets, + _UTF16BEToUnicodeWithOffsets, + _UTF16BEFromUnicodeWithOffsets, + _UTF16BEFromUnicodeWithOffsets, + _UTF16BEGetNextUChar, + + NULL, + _UTF16BEGetName, + NULL, + NULL, + ucnv_getNonSurrogateUnicodeSet, + + NULL, + NULL +}; + +static const UConverterStaticData _UTF16BEStaticData={ + sizeof(UConverterStaticData), + "UTF-16BE", + 1200, UCNV_IBM, UCNV_UTF16_BigEndian, 2, 2, + { 0xff, 0xfd, 0, 0 },2,FALSE,FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + + +const UConverterSharedData _UTF16BEData= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16BEStaticData, &_UTF16BEImpl); + +/* UTF-16LE ----------------------------------------------------------------- */ +U_CDECL_BEGIN +static void U_CALLCONV +_UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const UChar *source; + char *target; + int32_t *offsets; + + uint32_t targetCapacity, length, sourceIndex; + UChar c, trail; + char overflow[4]; + + source=pArgs->source; + length=(int32_t)(pArgs->sourceLimit-source); + if(length<=0) { + /* no input, nothing to do */ + return; + } + + cnv=pArgs->converter; + + /* write the BOM if necessary */ + if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { + static const char bom[]={ (char)0xff, (char)0xfe }; + ucnv_fromUWriteBytes(cnv, + bom, 2, + &pArgs->target, pArgs->targetLimit, + &pArgs->offsets, -1, + pErrorCode); + cnv->fromUnicodeStatus=0; + } + + target=pArgs->target; + if(target >= pArgs->targetLimit) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + return; + } + + targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target); + offsets=pArgs->offsets; + sourceIndex=0; + + /* c!=0 indicates in several places outside the main loops that a surrogate was found */ + + if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) { + /* the last buffer ended with a lead surrogate, output the surrogate pair */ + ++source; + --length; + target[0]=(uint8_t)c; + target[1]=(uint8_t)(c>>8); + target[2]=(uint8_t)trail; + target[3]=(uint8_t)(trail>>8); + target+=4; + targetCapacity-=4; + if(offsets!=NULL) { + *offsets++=-1; + *offsets++=-1; + *offsets++=-1; + *offsets++=-1; + } + sourceIndex=1; + cnv->fromUChar32=c=0; + } + + if(c==0) { + /* copy an even number of bytes for complete UChars */ + uint32_t count=2*length; + if(count>targetCapacity) { + count=targetCapacity&~1; + } + /* count is even */ + targetCapacity-=count; + count>>=1; + length-=count; + + if(offsets==NULL) { + while(count>0) { + c=*source++; + if(U16_IS_SINGLE(c)) { + target[0]=(uint8_t)c; + target[1]=(uint8_t)(c>>8); + target+=2; + } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { + ++source; + --count; + target[0]=(uint8_t)c; + target[1]=(uint8_t)(c>>8); + target[2]=(uint8_t)trail; + target[3]=(uint8_t)(trail>>8); + target+=4; + } else { + break; + } + --count; + } + } else { + while(count>0) { + c=*source++; + if(U16_IS_SINGLE(c)) { + target[0]=(uint8_t)c; + target[1]=(uint8_t)(c>>8); + target+=2; + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { + ++source; + --count; + target[0]=(uint8_t)c; + target[1]=(uint8_t)(c>>8); + target[2]=(uint8_t)trail; + target[3]=(uint8_t)(trail>>8); + target+=4; + *offsets++=sourceIndex; + *offsets++=sourceIndex; + *offsets++=sourceIndex; + *offsets++=sourceIndex; + sourceIndex+=2; + } else { + break; + } + --count; + } + } + + if(count==0) { + /* done with the loop for complete UChars */ + if(length>0 && targetCapacity>0) { + /* + * there is more input and some target capacity - + * it must be targetCapacity==1 because otherwise + * the above would have copied more; + * prepare for overflow output + */ + if(U16_IS_SINGLE(c=*source++)) { + overflow[0]=(char)c; + overflow[1]=(char)(c>>8); + length=2; /* 2 bytes to output */ + c=0; + /* } else { keep c for surrogate handling, length will be set there */ + } + } else { + length=0; + c=0; + } + } else { + /* keep c for surrogate handling, length will be set there */ + targetCapacity+=2*count; + } + } else { + length=0; /* from here on, length counts the bytes in overflow[] */ + } + + if(c!=0) { + /* + * c is a surrogate, and + * - source or target too short + * - or the surrogate is unmatched + */ + length=0; + if(U16_IS_SURROGATE_LEAD(c)) { + if(sourcesourceLimit) { + if(U16_IS_TRAIL(trail=*source)) { + /* output the surrogate pair, will overflow (see conditions comment above) */ + ++source; + overflow[0]=(char)c; + overflow[1]=(char)(c>>8); + overflow[2]=(char)trail; + overflow[3]=(char)(trail>>8); + length=4; /* 4 bytes to output */ + c=0; + } else { + /* unmatched lead surrogate */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + } else { + /* see if the trail surrogate is in the next buffer */ + } + } else { + /* unmatched trail surrogate */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + cnv->fromUChar32=c; + } + + if(length>0) { + /* output length bytes with overflow (length>targetCapacity>0) */ + ucnv_fromUWriteBytes(cnv, + overflow, length, + &target, pArgs->targetLimit, + &offsets, sourceIndex, + pErrorCode); + targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target); + } + + if(U_SUCCESS(*pErrorCode) && sourcesourceLimit && targetCapacity==0) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + + /* write back the updated pointers */ + pArgs->source=source; + pArgs->target=target; + pArgs->offsets=offsets; +} + +static void U_CALLCONV +_UTF16LEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const uint8_t *source; + UChar *target; + int32_t *offsets; + + uint32_t targetCapacity, length, count, sourceIndex; + UChar c, trail; + + if(pArgs->converter->mode<8) { + _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode); + return; + } + + cnv=pArgs->converter; + source=(const uint8_t *)pArgs->source; + length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source); + if(length<=0 && cnv->toUnicodeStatus==0) { + /* no input, nothing to do */ + return; + } + + target=pArgs->target; + if(target >= pArgs->targetLimit) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + return; + } + + targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target); + offsets=pArgs->offsets; + sourceIndex=0; + c=0; + + /* complete a partial UChar or pair from the last call */ + if(cnv->toUnicodeStatus!=0) { + /* + * special case: single byte from a previous buffer, + * where the byte turned out not to belong to a trail surrogate + * and the preceding, unmatched lead surrogate was put into toUBytes[] + * for error handling + */ + cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus; + cnv->toULength=1; + cnv->toUnicodeStatus=0; + } + if((count=cnv->toULength)!=0) { + uint8_t *p=cnv->toUBytes; + do { + p[count++]=*source++; + ++sourceIndex; + --length; + if(count==2) { + c=((UChar)p[1]<<8)|p[0]; + if(U16_IS_SINGLE(c)) { + /* output the BMP code point */ + *target++=c; + if(offsets!=NULL) { + *offsets++=-1; + } + --targetCapacity; + count=0; + c=0; + break; + } else if(U16_IS_SURROGATE_LEAD(c)) { + /* continue collecting bytes for the trail surrogate */ + c=0; /* avoid unnecessary surrogate handling below */ + } else { + /* fall through to error handling for an unmatched trail surrogate */ + break; + } + } else if(count==4) { + c=((UChar)p[1]<<8)|p[0]; + trail=((UChar)p[3]<<8)|p[2]; + if(U16_IS_TRAIL(trail)) { + /* output the surrogate pair */ + *target++=c; + if(targetCapacity>=2) { + *target++=trail; + if(offsets!=NULL) { + *offsets++=-1; + *offsets++=-1; + } + targetCapacity-=2; + } else /* targetCapacity==1 */ { + targetCapacity=0; + cnv->UCharErrorBuffer[0]=trail; + cnv->UCharErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + count=0; + c=0; + break; + } else { + /* unmatched lead surrogate, handle here for consistent toUBytes[] */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + + /* back out reading the code unit after it */ + if(((const uint8_t *)pArgs->source-source)>=2) { + source-=2; + } else { + /* + * if the trail unit's first byte was in a previous buffer, then + * we need to put it into a special place because toUBytes[] will be + * used for the lead unit's bytes + */ + cnv->toUnicodeStatus=0x100|p[2]; + --source; + } + cnv->toULength=2; + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + pArgs->offsets=offsets; + return; + } + } + } while(length>0); + cnv->toULength=(int8_t)count; + } + + /* copy an even number of bytes for complete UChars */ + count=2*targetCapacity; + if(count>length) { + count=length&~1; + } + if(c==0 && count>0) { + length-=count; + count>>=1; + targetCapacity-=count; + if(offsets==NULL) { + do { + c=((UChar)source[1]<<8)|source[0]; + source+=2; + if(U16_IS_SINGLE(c)) { + *target++=c; + } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && + U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0]) + ) { + source+=2; + --count; + *target++=c; + *target++=trail; + } else { + break; + } + } while(--count>0); + } else { + do { + c=((UChar)source[1]<<8)|source[0]; + source+=2; + if(U16_IS_SINGLE(c)) { + *target++=c; + *offsets++=sourceIndex; + sourceIndex+=2; + } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && + U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0]) + ) { + source+=2; + --count; + *target++=c; + *target++=trail; + *offsets++=sourceIndex; + *offsets++=sourceIndex; + sourceIndex+=4; + } else { + break; + } + } while(--count>0); + } + + if(count==0) { + /* done with the loop for complete UChars */ + c=0; + } else { + /* keep c for surrogate handling, trail will be set there */ + length+=2*(count-1); /* one more byte pair was consumed than count decremented */ + targetCapacity+=count; + } + } + + if(c!=0) { + /* + * c is a surrogate, and + * - source or target too short + * - or the surrogate is unmatched + */ + cnv->toUBytes[0]=(uint8_t)c; + cnv->toUBytes[1]=(uint8_t)(c>>8); + cnv->toULength=2; + + if(U16_IS_SURROGATE_LEAD(c)) { + if(length>=2) { + if(U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0])) { + /* output the surrogate pair, will overflow (see conditions comment above) */ + source+=2; + length-=2; + *target++=c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + cnv->UCharErrorBuffer[0]=trail; + cnv->UCharErrorBufferLength=1; + cnv->toULength=0; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } else { + /* unmatched lead surrogate */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + } else { + /* see if the trail surrogate is in the next buffer */ + } + } else { + /* unmatched trail surrogate */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + } + + if(U_SUCCESS(*pErrorCode)) { + /* check for a remaining source byte */ + if(length>0) { + if(targetCapacity==0) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } else { + /* it must be length==1 because otherwise the above would have copied more */ + cnv->toUBytes[cnv->toULength++]=*source++; + } + } + } + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + pArgs->offsets=offsets; +} + +static UChar32 U_CALLCONV +_UTF16LEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { + const uint8_t *s, *sourceLimit; + UChar32 c; + + if(pArgs->converter->mode<8) { + return UCNV_GET_NEXT_UCHAR_USE_TO_U; + } + + s=(const uint8_t *)pArgs->source; + sourceLimit=(const uint8_t *)pArgs->sourceLimit; + + if(s>=sourceLimit) { + /* no input */ + *err=U_INDEX_OUTOFBOUNDS_ERROR; + return 0xffff; + } + + if(s+2>sourceLimit) { + /* only one byte: truncated UChar */ + pArgs->converter->toUBytes[0]=*s++; + pArgs->converter->toULength=1; + pArgs->source=(const char *)s; + *err = U_TRUNCATED_CHAR_FOUND; + return 0xffff; + } + + /* get one UChar */ + c=((UChar32)s[1]<<8)|*s; + s+=2; + + /* check for a surrogate pair */ + if(U_IS_SURROGATE(c)) { + if(U16_IS_SURROGATE_LEAD(c)) { + if(s+2<=sourceLimit) { + UChar trail; + + /* get a second UChar and see if it is a trail surrogate */ + trail=((UChar)s[1]<<8)|*s; + if(U16_IS_TRAIL(trail)) { + c=U16_GET_SUPPLEMENTARY(c, trail); + s+=2; + } else { + /* unmatched lead surrogate */ + c=-2; + } + } else { + /* too few (2 or 3) bytes for a surrogate pair: truncated code point */ + uint8_t *bytes=pArgs->converter->toUBytes; + s-=2; + pArgs->converter->toULength=(int8_t)(sourceLimit-s); + do { + *bytes++=*s++; + } while(sconverter->toUBytes; + pArgs->converter->toULength=2; + *bytes=*(s-2); + bytes[1]=*(s-1); + + c=0xffff; + *err=U_ILLEGAL_CHAR_FOUND; + } + } + + pArgs->source=(const char *)s; + return c; +} + +static void U_CALLCONV +_UTF16LEReset(UConverter *cnv, UConverterResetChoice choice) { + if(choice<=UCNV_RESET_TO_UNICODE) { + /* reset toUnicode state */ + if(UCNV_GET_VERSION(cnv)==0) { + cnv->mode=8; /* no BOM handling */ + } else { + cnv->mode=0; /* Java-specific "UnicodeLittle" requires LE BOM or no BOM */ + } + } + if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) { + /* reset fromUnicode for "UnicodeLittle": prepare to output the UTF-16LE BOM */ + cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; + } +} + +static void U_CALLCONV +_UTF16LEOpen(UConverter *cnv, + UConverterLoadArgs *pArgs, + UErrorCode *pErrorCode) { + (void)pArgs; + if(UCNV_GET_VERSION(cnv)<=1) { + _UTF16LEReset(cnv, UCNV_RESET_BOTH); + } else { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + } +} + +static const char * U_CALLCONV +_UTF16LEGetName(const UConverter *cnv) { + if(UCNV_GET_VERSION(cnv)==0) { + return "UTF-16LE"; + } else { + return "UTF-16LE,version=1"; + } +} +U_CDECL_END + +static const UConverterImpl _UTF16LEImpl={ + UCNV_UTF16_LittleEndian, + + NULL, + NULL, + + _UTF16LEOpen, + NULL, + _UTF16LEReset, + + _UTF16LEToUnicodeWithOffsets, + _UTF16LEToUnicodeWithOffsets, + _UTF16LEFromUnicodeWithOffsets, + _UTF16LEFromUnicodeWithOffsets, + _UTF16LEGetNextUChar, + + NULL, + _UTF16LEGetName, + NULL, + NULL, + ucnv_getNonSurrogateUnicodeSet, + + NULL, + NULL +}; + + +static const UConverterStaticData _UTF16LEStaticData={ + sizeof(UConverterStaticData), + "UTF-16LE", + 1202, UCNV_IBM, UCNV_UTF16_LittleEndian, 2, 2, + { 0xfd, 0xff, 0, 0 },2,FALSE,FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + + +const UConverterSharedData _UTF16LEData= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16LEStaticData, &_UTF16LEImpl); + +/* UTF-16 (Detect BOM) ------------------------------------------------------ */ + +/* + * Detect a BOM at the beginning of the stream and select UTF-16BE or UTF-16LE + * accordingly. + * This is a simpler version of the UTF-32 converter, with + * fewer states for shorter BOMs. + * + * State values: + * 0 initial state + * 1 saw first byte + * 2..5 - + * 6..7 see _UTF16ToUnicodeWithOffsets() comments in state 1 + * 8 UTF-16BE mode + * 9 UTF-16LE mode + * + * During detection: state==number of initial bytes seen so far. + * + * On output, emit U+FEFF as the first code point. + * + * Variants: + * - UTF-16,version=1 (Java "Unicode" encoding) treats a missing BOM as an error. + * - UTF-16BE,version=1 (Java "UnicodeBig" encoding) and + * UTF-16LE,version=1 (Java "UnicodeLittle" encoding) treat a reverse BOM as an error. + */ +U_CDECL_BEGIN +static void U_CALLCONV +_UTF16Reset(UConverter *cnv, UConverterResetChoice choice) { + if(choice<=UCNV_RESET_TO_UNICODE) { + /* reset toUnicode: state=0 */ + cnv->mode=0; + } + if(choice!=UCNV_RESET_TO_UNICODE) { + /* reset fromUnicode: prepare to output the UTF-16PE BOM */ + cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; + } +} +U_CDECL_END +extern const UConverterSharedData _UTF16v2Data; +U_CDECL_BEGIN +static void U_CALLCONV +_UTF16Open(UConverter *cnv, + UConverterLoadArgs *pArgs, + UErrorCode *pErrorCode) { + if(UCNV_GET_VERSION(cnv)<=2) { + if(UCNV_GET_VERSION(cnv)==2 && !pArgs->onlyTestIsLoadable) { + /* + * Switch implementation, and switch the staticData that's different + * and was copied into the UConverter. + * (See ucnv_createConverterFromSharedData() in ucnv_bld.c.) + * UTF-16,version=2 fromUnicode() always writes a big-endian byte stream. + */ + cnv->sharedData=(UConverterSharedData*)&_UTF16v2Data; + uprv_memcpy(cnv->subChars, _UTF16v2Data.staticData->subChar, UCNV_MAX_SUBCHAR_LEN); + } + _UTF16Reset(cnv, UCNV_RESET_BOTH); + } else { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + } +} + +static const char * U_CALLCONV +_UTF16GetName(const UConverter *cnv) { + if(UCNV_GET_VERSION(cnv)==0) { + return "UTF-16"; + } else if(UCNV_GET_VERSION(cnv)==1) { + return "UTF-16,version=1"; + } else { + return "UTF-16,version=2"; + } +} +U_CDECL_END +extern const UConverterSharedData _UTF16Data; + +#define IS_UTF16BE(cnv) ((cnv)->sharedData==&_UTF16BEData) +#define IS_UTF16LE(cnv) ((cnv)->sharedData==&_UTF16LEData) +#define IS_UTF16(cnv) ((cnv)->sharedData==&_UTF16Data || (cnv)->sharedData==&_UTF16v2Data) + +U_CDECL_BEGIN +static void U_CALLCONV +_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv=pArgs->converter; + const char *source=pArgs->source; + const char *sourceLimit=pArgs->sourceLimit; + int32_t *offsets=pArgs->offsets; + + int32_t state, offsetDelta; + uint8_t b; + + state=cnv->mode; + + /* + * If we detect a BOM in this buffer, then we must add the BOM size to the + * offsets because the actual converter function will not see and count the BOM. + * offsetDelta will have the number of the BOM bytes that are in the current buffer. + */ + offsetDelta=0; + + while(sourcetoUBytes[0]=(uint8_t)*source++; + cnv->toULength=1; + state=1; + break; + case 1: + /* + * Only inside this switch case can the state variable + * temporarily take two additional values: + * 6: BOM error, continue with BE + * 7: BOM error, continue with LE + */ + b=*source; + if(cnv->toUBytes[0]==0xfe && b==0xff) { + if(IS_UTF16LE(cnv)) { + state=7; /* illegal reverse BOM for Java "UnicodeLittle" */ + } else { + state=8; /* detect UTF-16BE */ + } + } else if(cnv->toUBytes[0]==0xff && b==0xfe) { + if(IS_UTF16BE(cnv)) { + state=6; /* illegal reverse BOM for Java "UnicodeBig" */ + } else { + state=9; /* detect UTF-16LE */ + } + } else if((IS_UTF16(cnv) && UCNV_GET_VERSION(cnv)==1)) { + state=6; /* illegal missing BOM for Java "Unicode" */ + } + if(state>=8) { + /* BOM detected, consume it */ + ++source; + cnv->toULength=0; + offsetDelta=(int32_t)(source-pArgs->source); + } else if(state<6) { + /* ok: no BOM, and not a reverse BOM */ + if(source!=pArgs->source) { + /* reset the source for a correct first offset */ + source=pArgs->source; + cnv->toULength=0; + } + if(IS_UTF16LE(cnv)) { + /* Make Java "UnicodeLittle" default to LE. */ + state=9; + } else { + /* Make standard UTF-16 and Java "UnicodeBig" default to BE. */ + state=8; + } + } else { + /* + * error: missing BOM, or reverse BOM + * UTF-16,version=1: Java-specific "Unicode" requires a BOM. + * UTF-16BE,version=1: Java-specific "UnicodeBig" requires a BE BOM or no BOM. + * UTF-16LE,version=1: Java-specific "UnicodeLittle" requires an LE BOM or no BOM. + */ + /* report the non-BOM or reverse BOM as an illegal sequence */ + cnv->toUBytes[1]=b; + cnv->toULength=2; + pArgs->source=source+1; + /* continue with conversion if the callback resets the error */ + /* + * Make Java "Unicode" default to BE like standard UTF-16. + * Make Java "UnicodeBig" and "UnicodeLittle" default + * to their normal endiannesses. + */ + cnv->mode=state+2; + *pErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE; + return; + } + /* convert the rest of the stream */ + cnv->mode=state; + continue; + case 8: + /* call UTF-16BE */ + pArgs->source=source; + _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode); + source=pArgs->source; + break; + case 9: + /* call UTF-16LE */ + pArgs->source=source; + _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode); + source=pArgs->source; + break; + default: + break; /* does not occur */ + } + } + + /* add BOM size to offsets - see comment at offsetDelta declaration */ + if(offsets!=NULL && offsetDelta!=0) { + int32_t *offsetsLimit=pArgs->offsets; + while(offsetssource=source; + + if(source==sourceLimit && pArgs->flush) { + /* handle truncated input */ + switch(state) { + case 0: + break; /* no input at all, nothing to do */ + case 8: + _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode); + break; + case 9: + _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode); + break; + default: + /* 0mode=state; +} + +static UChar32 U_CALLCONV +_UTF16GetNextUChar(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + switch(pArgs->converter->mode) { + case 8: + return _UTF16BEGetNextUChar(pArgs, pErrorCode); + case 9: + return _UTF16LEGetNextUChar(pArgs, pErrorCode); + default: + return UCNV_GET_NEXT_UCHAR_USE_TO_U; + } +} +U_CDECL_END + +static const UConverterImpl _UTF16Impl = { + UCNV_UTF16, + + NULL, + NULL, + + _UTF16Open, + NULL, + _UTF16Reset, + + _UTF16ToUnicodeWithOffsets, + _UTF16ToUnicodeWithOffsets, + _UTF16PEFromUnicodeWithOffsets, + _UTF16PEFromUnicodeWithOffsets, + _UTF16GetNextUChar, + + NULL, /* ### TODO implement getStarters for all Unicode encodings?! */ + _UTF16GetName, + NULL, + NULL, + ucnv_getNonSurrogateUnicodeSet, + + NULL, + NULL +}; + +static const UConverterStaticData _UTF16StaticData = { + sizeof(UConverterStaticData), + "UTF-16", + 1204, /* CCSID for BOM sensitive UTF-16 */ + UCNV_IBM, UCNV_UTF16, 2, 2, +#if U_IS_BIG_ENDIAN + { 0xff, 0xfd, 0, 0 }, 2, +#else + { 0xfd, 0xff, 0, 0 }, 2, +#endif + FALSE, FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +const UConverterSharedData _UTF16Data = + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16StaticData, &_UTF16Impl); + +static const UConverterImpl _UTF16v2Impl = { + UCNV_UTF16, + + NULL, + NULL, + + _UTF16Open, + NULL, + _UTF16Reset, + + _UTF16ToUnicodeWithOffsets, + _UTF16ToUnicodeWithOffsets, + _UTF16BEFromUnicodeWithOffsets, + _UTF16BEFromUnicodeWithOffsets, + _UTF16GetNextUChar, + + NULL, /* ### TODO implement getStarters for all Unicode encodings?! */ + _UTF16GetName, + NULL, + NULL, + ucnv_getNonSurrogateUnicodeSet, + + NULL, + NULL +}; + +static const UConverterStaticData _UTF16v2StaticData = { + sizeof(UConverterStaticData), + "UTF-16,version=2", + 1204, /* CCSID for BOM sensitive UTF-16 */ + UCNV_IBM, UCNV_UTF16, 2, 2, + { 0xff, 0xfd, 0, 0 }, 2, + FALSE, FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +const UConverterSharedData _UTF16v2Data = + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16v2StaticData, &_UTF16v2Impl); + +#endif diff --git a/deps/icu-small/source/common/ucnv_u32.c b/deps/icu-small/source/common/ucnv_u32.c deleted file mode 100644 index fa74b85c93..0000000000 --- a/deps/icu-small/source/common/ucnv_u32.c +++ /dev/null @@ -1,1251 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2002-2015, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ucnv_u32.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002jul01 -* created by: Markus W. Scherer -* -* UTF-32 converter implementation. Used to be in ucnv_utf.c. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION - -#include "unicode/ucnv.h" -#include "unicode/utf.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "cmemory.h" - -#define MAXIMUM_UCS2 0x0000FFFF -#define MAXIMUM_UTF 0x0010FFFF -#define HALF_SHIFT 10 -#define HALF_BASE 0x0010000 -#define HALF_MASK 0x3FF -#define SURROGATE_HIGH_START 0xD800 -#define SURROGATE_LOW_START 0xDC00 - -/* -SURROGATE_LOW_START + HALF_BASE */ -#define SURROGATE_LOW_BASE 9216 - -enum { - UCNV_NEED_TO_WRITE_BOM=1 -}; - -/* UTF-32BE ----------------------------------------------------------------- */ - -static void -T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args, - UErrorCode * err) -{ - const unsigned char *mySource = (unsigned char *) args->source; - UChar *myTarget = args->target; - const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; - const UChar *targetLimit = args->targetLimit; - unsigned char *toUBytes = args->converter->toUBytes; - uint32_t ch, i; - - /* Restore state of current sequence */ - if (args->converter->toUnicodeStatus && myTarget < targetLimit) { - i = args->converter->toULength; /* restore # of bytes consumed */ - args->converter->toULength = 0; - - ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/ - args->converter->toUnicodeStatus = 0; - goto morebytes; - } - - while (mySource < sourceLimit && myTarget < targetLimit) { - i = 0; - ch = 0; -morebytes: - while (i < sizeof(uint32_t)) { - if (mySource < sourceLimit) { - ch = (ch << 8) | (uint8_t)(*mySource); - toUBytes[i++] = (char) *(mySource++); - } - else { - /* stores a partially calculated target*/ - /* + 1 to make 0 a valid character */ - args->converter->toUnicodeStatus = ch + 1; - args->converter->toULength = (int8_t) i; - goto donefornow; - } - } - - if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { - /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ - if (ch <= MAXIMUM_UCS2) - { - /* fits in 16 bits */ - *(myTarget++) = (UChar) ch; - } - else { - /* write out the surrogates */ - *(myTarget++) = U16_LEAD(ch); - ch = U16_TRAIL(ch); - if (myTarget < targetLimit) { - *(myTarget++) = (UChar)ch; - } - else { - /* Put in overflow buffer (not handled here) */ - args->converter->UCharErrorBuffer[0] = (UChar) ch; - args->converter->UCharErrorBufferLength = 1; - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } - else { - args->converter->toULength = (int8_t)i; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - -donefornow: - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { - /* End of target buffer */ - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = myTarget; - args->source = (const char *) mySource; -} - -static void -T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, - UErrorCode * err) -{ - const unsigned char *mySource = (unsigned char *) args->source; - UChar *myTarget = args->target; - int32_t *myOffsets = args->offsets; - const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; - const UChar *targetLimit = args->targetLimit; - unsigned char *toUBytes = args->converter->toUBytes; - uint32_t ch, i; - int32_t offsetNum = 0; - - /* Restore state of current sequence */ - if (args->converter->toUnicodeStatus && myTarget < targetLimit) { - i = args->converter->toULength; /* restore # of bytes consumed */ - args->converter->toULength = 0; - - ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/ - args->converter->toUnicodeStatus = 0; - goto morebytes; - } - - while (mySource < sourceLimit && myTarget < targetLimit) { - i = 0; - ch = 0; -morebytes: - while (i < sizeof(uint32_t)) { - if (mySource < sourceLimit) { - ch = (ch << 8) | (uint8_t)(*mySource); - toUBytes[i++] = (char) *(mySource++); - } - else { - /* stores a partially calculated target*/ - /* + 1 to make 0 a valid character */ - args->converter->toUnicodeStatus = ch + 1; - args->converter->toULength = (int8_t) i; - goto donefornow; - } - } - - if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { - /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ - if (ch <= MAXIMUM_UCS2) { - /* fits in 16 bits */ - *(myTarget++) = (UChar) ch; - *(myOffsets++) = offsetNum; - } - else { - /* write out the surrogates */ - *(myTarget++) = U16_LEAD(ch); - *myOffsets++ = offsetNum; - ch = U16_TRAIL(ch); - if (myTarget < targetLimit) - { - *(myTarget++) = (UChar)ch; - *(myOffsets++) = offsetNum; - } - else { - /* Put in overflow buffer (not handled here) */ - args->converter->UCharErrorBuffer[0] = (UChar) ch; - args->converter->UCharErrorBufferLength = 1; - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } - else { - args->converter->toULength = (int8_t)i; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - offsetNum += i; - } - -donefornow: - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) - { - /* End of target buffer */ - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = myTarget; - args->source = (const char *) mySource; - args->offsets = myOffsets; -} - -static void -T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args, - UErrorCode * err) -{ - const UChar *mySource = args->source; - unsigned char *myTarget; - const UChar *sourceLimit = args->sourceLimit; - const unsigned char *targetLimit = (unsigned char *) args->targetLimit; - UChar32 ch, ch2; - unsigned int indexToWrite; - unsigned char temp[sizeof(uint32_t)]; - - if(mySource >= sourceLimit) { - /* no input, nothing to do */ - return; - } - - /* write the BOM if necessary */ - if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { - static const char bom[]={ 0, 0, (char)0xfe, (char)0xff }; - ucnv_fromUWriteBytes(args->converter, - bom, 4, - &args->target, args->targetLimit, - &args->offsets, -1, - err); - args->converter->fromUnicodeStatus=0; - } - - myTarget = (unsigned char *) args->target; - temp[0] = 0; - - if (args->converter->fromUChar32) { - ch = args->converter->fromUChar32; - args->converter->fromUChar32 = 0; - goto lowsurogate; - } - - while (mySource < sourceLimit && myTarget < targetLimit) { - ch = *(mySource++); - - if (U_IS_SURROGATE(ch)) { - if (U_IS_LEAD(ch)) { -lowsurogate: - if (mySource < sourceLimit) { - ch2 = *mySource; - if (U_IS_TRAIL(ch2)) { - ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; - mySource++; - } - else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - args->converter->fromUChar32 = ch; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - else { - /* ran out of source */ - args->converter->fromUChar32 = ch; - if (args->flush) { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - *err = U_ILLEGAL_CHAR_FOUND; - } - break; - } - } - else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - args->converter->fromUChar32 = ch; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - - /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ - temp[1] = (uint8_t) (ch >> 16 & 0x1F); - temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ - temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ - - for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) { - if (myTarget < targetLimit) { - *(myTarget++) = temp[indexToWrite]; - } - else { - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; - *err = U_BUFFER_OVERFLOW_ERROR; - } - } - } - - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = (char *) myTarget; - args->source = mySource; -} - -static void -T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args, - UErrorCode * err) -{ - const UChar *mySource = args->source; - unsigned char *myTarget; - int32_t *myOffsets; - const UChar *sourceLimit = args->sourceLimit; - const unsigned char *targetLimit = (unsigned char *) args->targetLimit; - UChar32 ch, ch2; - int32_t offsetNum = 0; - unsigned int indexToWrite; - unsigned char temp[sizeof(uint32_t)]; - - if(mySource >= sourceLimit) { - /* no input, nothing to do */ - return; - } - - /* write the BOM if necessary */ - if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { - static const char bom[]={ 0, 0, (char)0xfe, (char)0xff }; - ucnv_fromUWriteBytes(args->converter, - bom, 4, - &args->target, args->targetLimit, - &args->offsets, -1, - err); - args->converter->fromUnicodeStatus=0; - } - - myTarget = (unsigned char *) args->target; - myOffsets = args->offsets; - temp[0] = 0; - - if (args->converter->fromUChar32) { - ch = args->converter->fromUChar32; - args->converter->fromUChar32 = 0; - goto lowsurogate; - } - - while (mySource < sourceLimit && myTarget < targetLimit) { - ch = *(mySource++); - - if (U_IS_SURROGATE(ch)) { - if (U_IS_LEAD(ch)) { -lowsurogate: - if (mySource < sourceLimit) { - ch2 = *mySource; - if (U_IS_TRAIL(ch2)) { - ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; - mySource++; - } - else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - args->converter->fromUChar32 = ch; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - else { - /* ran out of source */ - args->converter->fromUChar32 = ch; - if (args->flush) { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - *err = U_ILLEGAL_CHAR_FOUND; - } - break; - } - } - else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - args->converter->fromUChar32 = ch; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - - /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ - temp[1] = (uint8_t) (ch >> 16 & 0x1F); - temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ - temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ - - for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) { - if (myTarget < targetLimit) { - *(myTarget++) = temp[indexToWrite]; - *(myOffsets++) = offsetNum; - } - else { - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; - *err = U_BUFFER_OVERFLOW_ERROR; - } - } - offsetNum = offsetNum + 1 + (temp[1] != 0); - } - - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = (char *) myTarget; - args->source = mySource; - args->offsets = myOffsets; -} - -static UChar32 -T_UConverter_getNextUChar_UTF32_BE(UConverterToUnicodeArgs* args, - UErrorCode* err) -{ - const uint8_t *mySource; - UChar32 myUChar; - int32_t length; - - mySource = (const uint8_t *)args->source; - if (mySource >= (const uint8_t *)args->sourceLimit) - { - /* no input */ - *err = U_INDEX_OUTOFBOUNDS_ERROR; - return 0xffff; - } - - length = (int32_t)((const uint8_t *)args->sourceLimit - mySource); - if (length < 4) - { - /* got a partial character */ - uprv_memcpy(args->converter->toUBytes, mySource, length); - args->converter->toULength = (int8_t)length; - args->source = (const char *)(mySource + length); - *err = U_TRUNCATED_CHAR_FOUND; - return 0xffff; - } - - /* Don't even try to do a direct cast because the value may be on an odd address. */ - myUChar = ((UChar32)mySource[0] << 24) - | ((UChar32)mySource[1] << 16) - | ((UChar32)mySource[2] << 8) - | ((UChar32)mySource[3]); - - args->source = (const char *)(mySource + 4); - if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) { - return myUChar; - } - - uprv_memcpy(args->converter->toUBytes, mySource, 4); - args->converter->toULength = 4; - - *err = U_ILLEGAL_CHAR_FOUND; - return 0xffff; -} - -static const UConverterImpl _UTF32BEImpl = { - UCNV_UTF32_BigEndian, - - NULL, - NULL, - - NULL, - NULL, - NULL, - - T_UConverter_toUnicode_UTF32_BE, - T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC, - T_UConverter_fromUnicode_UTF32_BE, - T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC, - T_UConverter_getNextUChar_UTF32_BE, - - NULL, - NULL, - NULL, - NULL, - ucnv_getNonSurrogateUnicodeSet, - - NULL, - NULL -}; - -/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */ -static const UConverterStaticData _UTF32BEStaticData = { - sizeof(UConverterStaticData), - "UTF-32BE", - 1232, - UCNV_IBM, UCNV_UTF32_BigEndian, 4, 4, - { 0, 0, 0xff, 0xfd }, 4, FALSE, FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -const UConverterSharedData _UTF32BEData = - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32BEStaticData, &_UTF32BEImpl); - -/* UTF-32LE ---------------------------------------------------------- */ - -static void -T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args, - UErrorCode * err) -{ - const unsigned char *mySource = (unsigned char *) args->source; - UChar *myTarget = args->target; - const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; - const UChar *targetLimit = args->targetLimit; - unsigned char *toUBytes = args->converter->toUBytes; - uint32_t ch, i; - - /* Restore state of current sequence */ - if (args->converter->toUnicodeStatus && myTarget < targetLimit) - { - i = args->converter->toULength; /* restore # of bytes consumed */ - args->converter->toULength = 0; - - /* Stores the previously calculated ch from a previous call*/ - ch = args->converter->toUnicodeStatus - 1; - args->converter->toUnicodeStatus = 0; - goto morebytes; - } - - while (mySource < sourceLimit && myTarget < targetLimit) - { - i = 0; - ch = 0; -morebytes: - while (i < sizeof(uint32_t)) - { - if (mySource < sourceLimit) - { - ch |= ((uint8_t)(*mySource)) << (i * 8); - toUBytes[i++] = (char) *(mySource++); - } - else - { - /* stores a partially calculated target*/ - /* + 1 to make 0 a valid character */ - args->converter->toUnicodeStatus = ch + 1; - args->converter->toULength = (int8_t) i; - goto donefornow; - } - } - - if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { - /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ - if (ch <= MAXIMUM_UCS2) { - /* fits in 16 bits */ - *(myTarget++) = (UChar) ch; - } - else { - /* write out the surrogates */ - *(myTarget++) = U16_LEAD(ch); - ch = U16_TRAIL(ch); - if (myTarget < targetLimit) { - *(myTarget++) = (UChar)ch; - } - else { - /* Put in overflow buffer (not handled here) */ - args->converter->UCharErrorBuffer[0] = (UChar) ch; - args->converter->UCharErrorBufferLength = 1; - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } - else { - args->converter->toULength = (int8_t)i; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - -donefornow: - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) - { - /* End of target buffer */ - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = myTarget; - args->source = (const char *) mySource; -} - -static void -T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, - UErrorCode * err) -{ - const unsigned char *mySource = (unsigned char *) args->source; - UChar *myTarget = args->target; - int32_t *myOffsets = args->offsets; - const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; - const UChar *targetLimit = args->targetLimit; - unsigned char *toUBytes = args->converter->toUBytes; - uint32_t ch, i; - int32_t offsetNum = 0; - - /* Restore state of current sequence */ - if (args->converter->toUnicodeStatus && myTarget < targetLimit) - { - i = args->converter->toULength; /* restore # of bytes consumed */ - args->converter->toULength = 0; - - /* Stores the previously calculated ch from a previous call*/ - ch = args->converter->toUnicodeStatus - 1; - args->converter->toUnicodeStatus = 0; - goto morebytes; - } - - while (mySource < sourceLimit && myTarget < targetLimit) - { - i = 0; - ch = 0; -morebytes: - while (i < sizeof(uint32_t)) - { - if (mySource < sourceLimit) - { - ch |= ((uint8_t)(*mySource)) << (i * 8); - toUBytes[i++] = (char) *(mySource++); - } - else - { - /* stores a partially calculated target*/ - /* + 1 to make 0 a valid character */ - args->converter->toUnicodeStatus = ch + 1; - args->converter->toULength = (int8_t) i; - goto donefornow; - } - } - - if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) - { - /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ - if (ch <= MAXIMUM_UCS2) - { - /* fits in 16 bits */ - *(myTarget++) = (UChar) ch; - *(myOffsets++) = offsetNum; - } - else { - /* write out the surrogates */ - *(myTarget++) = U16_LEAD(ch); - *(myOffsets++) = offsetNum; - ch = U16_TRAIL(ch); - if (myTarget < targetLimit) - { - *(myTarget++) = (UChar)ch; - *(myOffsets++) = offsetNum; - } - else - { - /* Put in overflow buffer (not handled here) */ - args->converter->UCharErrorBuffer[0] = (UChar) ch; - args->converter->UCharErrorBufferLength = 1; - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } - else - { - args->converter->toULength = (int8_t)i; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - offsetNum += i; - } - -donefornow: - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) - { - /* End of target buffer */ - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = myTarget; - args->source = (const char *) mySource; - args->offsets = myOffsets; -} - -static void -T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args, - UErrorCode * err) -{ - const UChar *mySource = args->source; - unsigned char *myTarget; - const UChar *sourceLimit = args->sourceLimit; - const unsigned char *targetLimit = (unsigned char *) args->targetLimit; - UChar32 ch, ch2; - unsigned int indexToWrite; - unsigned char temp[sizeof(uint32_t)]; - - if(mySource >= sourceLimit) { - /* no input, nothing to do */ - return; - } - - /* write the BOM if necessary */ - if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { - static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 }; - ucnv_fromUWriteBytes(args->converter, - bom, 4, - &args->target, args->targetLimit, - &args->offsets, -1, - err); - args->converter->fromUnicodeStatus=0; - } - - myTarget = (unsigned char *) args->target; - temp[3] = 0; - - if (args->converter->fromUChar32) - { - ch = args->converter->fromUChar32; - args->converter->fromUChar32 = 0; - goto lowsurogate; - } - - while (mySource < sourceLimit && myTarget < targetLimit) - { - ch = *(mySource++); - - if (U16_IS_SURROGATE(ch)) { - if (U16_IS_LEAD(ch)) - { -lowsurogate: - if (mySource < sourceLimit) - { - ch2 = *mySource; - if (U16_IS_TRAIL(ch2)) { - ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; - mySource++; - } - else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - args->converter->fromUChar32 = ch; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - else { - /* ran out of source */ - args->converter->fromUChar32 = ch; - if (args->flush) { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - *err = U_ILLEGAL_CHAR_FOUND; - } - break; - } - } - else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - args->converter->fromUChar32 = ch; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - - /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ - temp[2] = (uint8_t) (ch >> 16 & 0x1F); - temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ - temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ - - for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) - { - if (myTarget < targetLimit) - { - *(myTarget++) = temp[indexToWrite]; - } - else - { - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; - *err = U_BUFFER_OVERFLOW_ERROR; - } - } - } - - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) - { - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = (char *) myTarget; - args->source = mySource; -} - -static void -T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args, - UErrorCode * err) -{ - const UChar *mySource = args->source; - unsigned char *myTarget; - int32_t *myOffsets; - const UChar *sourceLimit = args->sourceLimit; - const unsigned char *targetLimit = (unsigned char *) args->targetLimit; - UChar32 ch, ch2; - unsigned int indexToWrite; - unsigned char temp[sizeof(uint32_t)]; - int32_t offsetNum = 0; - - if(mySource >= sourceLimit) { - /* no input, nothing to do */ - return; - } - - /* write the BOM if necessary */ - if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { - static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 }; - ucnv_fromUWriteBytes(args->converter, - bom, 4, - &args->target, args->targetLimit, - &args->offsets, -1, - err); - args->converter->fromUnicodeStatus=0; - } - - myTarget = (unsigned char *) args->target; - myOffsets = args->offsets; - temp[3] = 0; - - if (args->converter->fromUChar32) - { - ch = args->converter->fromUChar32; - args->converter->fromUChar32 = 0; - goto lowsurogate; - } - - while (mySource < sourceLimit && myTarget < targetLimit) - { - ch = *(mySource++); - - if (U16_IS_SURROGATE(ch)) { - if (U16_IS_LEAD(ch)) - { -lowsurogate: - if (mySource < sourceLimit) - { - ch2 = *mySource; - if (U16_IS_TRAIL(ch2)) - { - ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; - mySource++; - } - else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - args->converter->fromUChar32 = ch; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - else { - /* ran out of source */ - args->converter->fromUChar32 = ch; - if (args->flush) { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - *err = U_ILLEGAL_CHAR_FOUND; - } - break; - } - } - else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - args->converter->fromUChar32 = ch; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - - /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ - temp[2] = (uint8_t) (ch >> 16 & 0x1F); - temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ - temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ - - for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) - { - if (myTarget < targetLimit) - { - *(myTarget++) = temp[indexToWrite]; - *(myOffsets++) = offsetNum; - } - else - { - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; - *err = U_BUFFER_OVERFLOW_ERROR; - } - } - offsetNum = offsetNum + 1 + (temp[2] != 0); - } - - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) - { - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = (char *) myTarget; - args->source = mySource; - args->offsets = myOffsets; -} - -static UChar32 -T_UConverter_getNextUChar_UTF32_LE(UConverterToUnicodeArgs* args, - UErrorCode* err) -{ - const uint8_t *mySource; - UChar32 myUChar; - int32_t length; - - mySource = (const uint8_t *)args->source; - if (mySource >= (const uint8_t *)args->sourceLimit) - { - /* no input */ - *err = U_INDEX_OUTOFBOUNDS_ERROR; - return 0xffff; - } - - length = (int32_t)((const uint8_t *)args->sourceLimit - mySource); - if (length < 4) - { - /* got a partial character */ - uprv_memcpy(args->converter->toUBytes, mySource, length); - args->converter->toULength = (int8_t)length; - args->source = (const char *)(mySource + length); - *err = U_TRUNCATED_CHAR_FOUND; - return 0xffff; - } - - /* Don't even try to do a direct cast because the value may be on an odd address. */ - myUChar = ((UChar32)mySource[3] << 24) - | ((UChar32)mySource[2] << 16) - | ((UChar32)mySource[1] << 8) - | ((UChar32)mySource[0]); - - args->source = (const char *)(mySource + 4); - if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) { - return myUChar; - } - - uprv_memcpy(args->converter->toUBytes, mySource, 4); - args->converter->toULength = 4; - - *err = U_ILLEGAL_CHAR_FOUND; - return 0xffff; -} - -static const UConverterImpl _UTF32LEImpl = { - UCNV_UTF32_LittleEndian, - - NULL, - NULL, - - NULL, - NULL, - NULL, - - T_UConverter_toUnicode_UTF32_LE, - T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC, - T_UConverter_fromUnicode_UTF32_LE, - T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC, - T_UConverter_getNextUChar_UTF32_LE, - - NULL, - NULL, - NULL, - NULL, - ucnv_getNonSurrogateUnicodeSet, - - NULL, - NULL -}; - -/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */ -static const UConverterStaticData _UTF32LEStaticData = { - sizeof(UConverterStaticData), - "UTF-32LE", - 1234, - UCNV_IBM, UCNV_UTF32_LittleEndian, 4, 4, - { 0xfd, 0xff, 0, 0 }, 4, FALSE, FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - - -const UConverterSharedData _UTF32LEData = - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32LEStaticData, &_UTF32LEImpl); - -/* UTF-32 (Detect BOM) ------------------------------------------------------ */ - -/* - * Detect a BOM at the beginning of the stream and select UTF-32BE or UTF-32LE - * accordingly. - * - * State values: - * 0 initial state - * 1 saw 00 - * 2 saw 00 00 - * 3 saw 00 00 FE - * 4 - - * 5 saw FF - * 6 saw FF FE - * 7 saw FF FE 00 - * 8 UTF-32BE mode - * 9 UTF-32LE mode - * - * During detection: state&3==number of matching bytes so far. - * - * On output, emit U+FEFF as the first code point. - */ - -static void -_UTF32Reset(UConverter *cnv, UConverterResetChoice choice) { - if(choice<=UCNV_RESET_TO_UNICODE) { - /* reset toUnicode: state=0 */ - cnv->mode=0; - } - if(choice!=UCNV_RESET_TO_UNICODE) { - /* reset fromUnicode: prepare to output the UTF-32PE BOM */ - cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; - } -} - -static void -_UTF32Open(UConverter *cnv, - UConverterLoadArgs *pArgs, - UErrorCode *pErrorCode) { - _UTF32Reset(cnv, UCNV_RESET_BOTH); -} - -static const char utf32BOM[8]={ 0, 0, (char)0xfe, (char)0xff, (char)0xff, (char)0xfe, 0, 0 }; - -static void -_UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv=pArgs->converter; - const char *source=pArgs->source; - const char *sourceLimit=pArgs->sourceLimit; - int32_t *offsets=pArgs->offsets; - - int32_t state, offsetDelta; - char b; - - state=cnv->mode; - - /* - * If we detect a BOM in this buffer, then we must add the BOM size to the - * offsets because the actual converter function will not see and count the BOM. - * offsetDelta will have the number of the BOM bytes that are in the current buffer. - */ - offsetDelta=0; - - while(sourcesource); - } else if(state==8) { - state=9; /* detect UTF-32LE */ - offsetDelta=(int32_t)(source-pArgs->source); - } - } else { - /* switch to UTF-32BE and pass the previous bytes */ - int32_t count=(int32_t)(source-pArgs->source); /* number of bytes from this buffer */ - - /* reset the source */ - source=pArgs->source; - - if(count==(state&3)) { - /* simple: all in the same buffer, just reset source */ - } else { - UBool oldFlush=pArgs->flush; - - /* some of the bytes are from a previous buffer, replay those first */ - pArgs->source=utf32BOM+(state&4); /* select the correct BOM */ - pArgs->sourceLimit=pArgs->source+((state&3)-count); /* replay previous bytes */ - pArgs->flush=FALSE; /* this sourceLimit is not the real source stream limit */ - - /* no offsets: bytes from previous buffer, and not enough for output */ - T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); - - /* restore real pointers; pArgs->source will be set in case 8/9 */ - pArgs->sourceLimit=sourceLimit; - pArgs->flush=oldFlush; - } - state=8; - continue; - } - break; - case 8: - /* call UTF-32BE */ - pArgs->source=source; - if(offsets==NULL) { - T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); - } else { - T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(pArgs, pErrorCode); - } - source=pArgs->source; - break; - case 9: - /* call UTF-32LE */ - pArgs->source=source; - if(offsets==NULL) { - T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode); - } else { - T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(pArgs, pErrorCode); - } - source=pArgs->source; - break; - default: - break; /* does not occur */ - } - } - - /* add BOM size to offsets - see comment at offsetDelta declaration */ - if(offsets!=NULL && offsetDelta!=0) { - int32_t *offsetsLimit=pArgs->offsets; - while(offsetssource=source; - - if(source==sourceLimit && pArgs->flush) { - /* handle truncated input */ - switch(state) { - case 0: - break; /* no input at all, nothing to do */ - case 8: - T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); - break; - case 9: - T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode); - break; - default: - /* handle 0source=utf32BOM+(state&4); /* select the correct BOM */ - pArgs->sourceLimit=pArgs->source+(state&3); /* replay bytes */ - - /* no offsets: not enough for output */ - T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); - pArgs->source=source; - pArgs->sourceLimit=sourceLimit; - state=8; - break; - } - } - - cnv->mode=state; -} - -static UChar32 -_UTF32GetNextUChar(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - switch(pArgs->converter->mode) { - case 8: - return T_UConverter_getNextUChar_UTF32_BE(pArgs, pErrorCode); - case 9: - return T_UConverter_getNextUChar_UTF32_LE(pArgs, pErrorCode); - default: - return UCNV_GET_NEXT_UCHAR_USE_TO_U; - } -} - -static const UConverterImpl _UTF32Impl = { - UCNV_UTF32, - - NULL, - NULL, - - _UTF32Open, - NULL, - _UTF32Reset, - - _UTF32ToUnicodeWithOffsets, - _UTF32ToUnicodeWithOffsets, -#if U_IS_BIG_ENDIAN - T_UConverter_fromUnicode_UTF32_BE, - T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC, -#else - T_UConverter_fromUnicode_UTF32_LE, - T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC, -#endif - _UTF32GetNextUChar, - - NULL, /* ### TODO implement getStarters for all Unicode encodings?! */ - NULL, - NULL, - NULL, - ucnv_getNonSurrogateUnicodeSet, - - NULL, - NULL -}; - -/* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianess of UTF-32 */ -static const UConverterStaticData _UTF32StaticData = { - sizeof(UConverterStaticData), - "UTF-32", - 1236, - UCNV_IBM, UCNV_UTF32, 4, 4, -#if U_IS_BIG_ENDIAN - { 0, 0, 0xff, 0xfd }, 4, -#else - { 0xfd, 0xff, 0, 0 }, 4, -#endif - FALSE, FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -const UConverterSharedData _UTF32Data = - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32StaticData, &_UTF32Impl); - -#endif diff --git a/deps/icu-small/source/common/ucnv_u32.cpp b/deps/icu-small/source/common/ucnv_u32.cpp new file mode 100644 index 0000000000..3fac04b300 --- /dev/null +++ b/deps/icu-small/source/common/ucnv_u32.cpp @@ -0,0 +1,1253 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2002-2015, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnv_u32.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002jul01 +* created by: Markus W. Scherer +* +* UTF-32 converter implementation. Used to be in ucnv_utf.c. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/utf.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "cmemory.h" + +#define MAXIMUM_UCS2 0x0000FFFF +#define MAXIMUM_UTF 0x0010FFFF +#define HALF_SHIFT 10 +#define HALF_BASE 0x0010000 +#define HALF_MASK 0x3FF +#define SURROGATE_HIGH_START 0xD800 +#define SURROGATE_LOW_START 0xDC00 + +/* -SURROGATE_LOW_START + HALF_BASE */ +#define SURROGATE_LOW_BASE 9216 + +enum { + UCNV_NEED_TO_WRITE_BOM=1 +}; + +/* UTF-32BE ----------------------------------------------------------------- */ +U_CDECL_BEGIN +static void U_CALLCONV +T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args, + UErrorCode * err) +{ + const unsigned char *mySource = (unsigned char *) args->source; + UChar *myTarget = args->target; + const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; + const UChar *targetLimit = args->targetLimit; + unsigned char *toUBytes = args->converter->toUBytes; + uint32_t ch, i; + + /* Restore state of current sequence */ + if (args->converter->toUnicodeStatus && myTarget < targetLimit) { + i = args->converter->toULength; /* restore # of bytes consumed */ + args->converter->toULength = 0; + + ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/ + args->converter->toUnicodeStatus = 0; + goto morebytes; + } + + while (mySource < sourceLimit && myTarget < targetLimit) { + i = 0; + ch = 0; +morebytes: + while (i < sizeof(uint32_t)) { + if (mySource < sourceLimit) { + ch = (ch << 8) | (uint8_t)(*mySource); + toUBytes[i++] = (char) *(mySource++); + } + else { + /* stores a partially calculated target*/ + /* + 1 to make 0 a valid character */ + args->converter->toUnicodeStatus = ch + 1; + args->converter->toULength = (int8_t) i; + goto donefornow; + } + } + + if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { + /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ + if (ch <= MAXIMUM_UCS2) + { + /* fits in 16 bits */ + *(myTarget++) = (UChar) ch; + } + else { + /* write out the surrogates */ + *(myTarget++) = U16_LEAD(ch); + ch = U16_TRAIL(ch); + if (myTarget < targetLimit) { + *(myTarget++) = (UChar)ch; + } + else { + /* Put in overflow buffer (not handled here) */ + args->converter->UCharErrorBuffer[0] = (UChar) ch; + args->converter->UCharErrorBufferLength = 1; + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } + else { + args->converter->toULength = (int8_t)i; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + +donefornow: + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { + /* End of target buffer */ + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = myTarget; + args->source = (const char *) mySource; +} + +static void U_CALLCONV +T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, + UErrorCode * err) +{ + const unsigned char *mySource = (unsigned char *) args->source; + UChar *myTarget = args->target; + int32_t *myOffsets = args->offsets; + const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; + const UChar *targetLimit = args->targetLimit; + unsigned char *toUBytes = args->converter->toUBytes; + uint32_t ch, i; + int32_t offsetNum = 0; + + /* Restore state of current sequence */ + if (args->converter->toUnicodeStatus && myTarget < targetLimit) { + i = args->converter->toULength; /* restore # of bytes consumed */ + args->converter->toULength = 0; + + ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/ + args->converter->toUnicodeStatus = 0; + goto morebytes; + } + + while (mySource < sourceLimit && myTarget < targetLimit) { + i = 0; + ch = 0; +morebytes: + while (i < sizeof(uint32_t)) { + if (mySource < sourceLimit) { + ch = (ch << 8) | (uint8_t)(*mySource); + toUBytes[i++] = (char) *(mySource++); + } + else { + /* stores a partially calculated target*/ + /* + 1 to make 0 a valid character */ + args->converter->toUnicodeStatus = ch + 1; + args->converter->toULength = (int8_t) i; + goto donefornow; + } + } + + if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { + /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ + if (ch <= MAXIMUM_UCS2) { + /* fits in 16 bits */ + *(myTarget++) = (UChar) ch; + *(myOffsets++) = offsetNum; + } + else { + /* write out the surrogates */ + *(myTarget++) = U16_LEAD(ch); + *myOffsets++ = offsetNum; + ch = U16_TRAIL(ch); + if (myTarget < targetLimit) + { + *(myTarget++) = (UChar)ch; + *(myOffsets++) = offsetNum; + } + else { + /* Put in overflow buffer (not handled here) */ + args->converter->UCharErrorBuffer[0] = (UChar) ch; + args->converter->UCharErrorBufferLength = 1; + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } + else { + args->converter->toULength = (int8_t)i; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + offsetNum += i; + } + +donefornow: + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) + { + /* End of target buffer */ + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = myTarget; + args->source = (const char *) mySource; + args->offsets = myOffsets; +} + +static void U_CALLCONV +T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args, + UErrorCode * err) +{ + const UChar *mySource = args->source; + unsigned char *myTarget; + const UChar *sourceLimit = args->sourceLimit; + const unsigned char *targetLimit = (unsigned char *) args->targetLimit; + UChar32 ch, ch2; + unsigned int indexToWrite; + unsigned char temp[sizeof(uint32_t)]; + + if(mySource >= sourceLimit) { + /* no input, nothing to do */ + return; + } + + /* write the BOM if necessary */ + if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { + static const char bom[]={ 0, 0, (char)0xfe, (char)0xff }; + ucnv_fromUWriteBytes(args->converter, + bom, 4, + &args->target, args->targetLimit, + &args->offsets, -1, + err); + args->converter->fromUnicodeStatus=0; + } + + myTarget = (unsigned char *) args->target; + temp[0] = 0; + + if (args->converter->fromUChar32) { + ch = args->converter->fromUChar32; + args->converter->fromUChar32 = 0; + goto lowsurogate; + } + + while (mySource < sourceLimit && myTarget < targetLimit) { + ch = *(mySource++); + + if (U_IS_SURROGATE(ch)) { + if (U_IS_LEAD(ch)) { +lowsurogate: + if (mySource < sourceLimit) { + ch2 = *mySource; + if (U_IS_TRAIL(ch2)) { + ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; + mySource++; + } + else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + args->converter->fromUChar32 = ch; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + else { + /* ran out of source */ + args->converter->fromUChar32 = ch; + if (args->flush) { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *err = U_ILLEGAL_CHAR_FOUND; + } + break; + } + } + else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + args->converter->fromUChar32 = ch; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + + /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ + temp[1] = (uint8_t) (ch >> 16 & 0x1F); + temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ + temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ + + for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) { + if (myTarget < targetLimit) { + *(myTarget++) = temp[indexToWrite]; + } + else { + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; + *err = U_BUFFER_OVERFLOW_ERROR; + } + } + } + + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = (char *) myTarget; + args->source = mySource; +} + +static void U_CALLCONV +T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args, + UErrorCode * err) +{ + const UChar *mySource = args->source; + unsigned char *myTarget; + int32_t *myOffsets; + const UChar *sourceLimit = args->sourceLimit; + const unsigned char *targetLimit = (unsigned char *) args->targetLimit; + UChar32 ch, ch2; + int32_t offsetNum = 0; + unsigned int indexToWrite; + unsigned char temp[sizeof(uint32_t)]; + + if(mySource >= sourceLimit) { + /* no input, nothing to do */ + return; + } + + /* write the BOM if necessary */ + if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { + static const char bom[]={ 0, 0, (char)0xfe, (char)0xff }; + ucnv_fromUWriteBytes(args->converter, + bom, 4, + &args->target, args->targetLimit, + &args->offsets, -1, + err); + args->converter->fromUnicodeStatus=0; + } + + myTarget = (unsigned char *) args->target; + myOffsets = args->offsets; + temp[0] = 0; + + if (args->converter->fromUChar32) { + ch = args->converter->fromUChar32; + args->converter->fromUChar32 = 0; + goto lowsurogate; + } + + while (mySource < sourceLimit && myTarget < targetLimit) { + ch = *(mySource++); + + if (U_IS_SURROGATE(ch)) { + if (U_IS_LEAD(ch)) { +lowsurogate: + if (mySource < sourceLimit) { + ch2 = *mySource; + if (U_IS_TRAIL(ch2)) { + ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; + mySource++; + } + else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + args->converter->fromUChar32 = ch; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + else { + /* ran out of source */ + args->converter->fromUChar32 = ch; + if (args->flush) { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *err = U_ILLEGAL_CHAR_FOUND; + } + break; + } + } + else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + args->converter->fromUChar32 = ch; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + + /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ + temp[1] = (uint8_t) (ch >> 16 & 0x1F); + temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ + temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ + + for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) { + if (myTarget < targetLimit) { + *(myTarget++) = temp[indexToWrite]; + *(myOffsets++) = offsetNum; + } + else { + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; + *err = U_BUFFER_OVERFLOW_ERROR; + } + } + offsetNum = offsetNum + 1 + (temp[1] != 0); + } + + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = (char *) myTarget; + args->source = mySource; + args->offsets = myOffsets; +} + +static UChar32 U_CALLCONV +T_UConverter_getNextUChar_UTF32_BE(UConverterToUnicodeArgs* args, + UErrorCode* err) +{ + const uint8_t *mySource; + UChar32 myUChar; + int32_t length; + + mySource = (const uint8_t *)args->source; + if (mySource >= (const uint8_t *)args->sourceLimit) + { + /* no input */ + *err = U_INDEX_OUTOFBOUNDS_ERROR; + return 0xffff; + } + + length = (int32_t)((const uint8_t *)args->sourceLimit - mySource); + if (length < 4) + { + /* got a partial character */ + uprv_memcpy(args->converter->toUBytes, mySource, length); + args->converter->toULength = (int8_t)length; + args->source = (const char *)(mySource + length); + *err = U_TRUNCATED_CHAR_FOUND; + return 0xffff; + } + + /* Don't even try to do a direct cast because the value may be on an odd address. */ + myUChar = ((UChar32)mySource[0] << 24) + | ((UChar32)mySource[1] << 16) + | ((UChar32)mySource[2] << 8) + | ((UChar32)mySource[3]); + + args->source = (const char *)(mySource + 4); + if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) { + return myUChar; + } + + uprv_memcpy(args->converter->toUBytes, mySource, 4); + args->converter->toULength = 4; + + *err = U_ILLEGAL_CHAR_FOUND; + return 0xffff; +} +U_CDECL_END +static const UConverterImpl _UTF32BEImpl = { + UCNV_UTF32_BigEndian, + + NULL, + NULL, + + NULL, + NULL, + NULL, + + T_UConverter_toUnicode_UTF32_BE, + T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC, + T_UConverter_fromUnicode_UTF32_BE, + T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC, + T_UConverter_getNextUChar_UTF32_BE, + + NULL, + NULL, + NULL, + NULL, + ucnv_getNonSurrogateUnicodeSet, + + NULL, + NULL +}; + +/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */ +static const UConverterStaticData _UTF32BEStaticData = { + sizeof(UConverterStaticData), + "UTF-32BE", + 1232, + UCNV_IBM, UCNV_UTF32_BigEndian, 4, 4, + { 0, 0, 0xff, 0xfd }, 4, FALSE, FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +const UConverterSharedData _UTF32BEData = + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32BEStaticData, &_UTF32BEImpl); + +/* UTF-32LE ---------------------------------------------------------- */ +U_CDECL_BEGIN +static void U_CALLCONV +T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args, + UErrorCode * err) +{ + const unsigned char *mySource = (unsigned char *) args->source; + UChar *myTarget = args->target; + const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; + const UChar *targetLimit = args->targetLimit; + unsigned char *toUBytes = args->converter->toUBytes; + uint32_t ch, i; + + /* Restore state of current sequence */ + if (args->converter->toUnicodeStatus && myTarget < targetLimit) + { + i = args->converter->toULength; /* restore # of bytes consumed */ + args->converter->toULength = 0; + + /* Stores the previously calculated ch from a previous call*/ + ch = args->converter->toUnicodeStatus - 1; + args->converter->toUnicodeStatus = 0; + goto morebytes; + } + + while (mySource < sourceLimit && myTarget < targetLimit) + { + i = 0; + ch = 0; +morebytes: + while (i < sizeof(uint32_t)) + { + if (mySource < sourceLimit) + { + ch |= ((uint8_t)(*mySource)) << (i * 8); + toUBytes[i++] = (char) *(mySource++); + } + else + { + /* stores a partially calculated target*/ + /* + 1 to make 0 a valid character */ + args->converter->toUnicodeStatus = ch + 1; + args->converter->toULength = (int8_t) i; + goto donefornow; + } + } + + if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { + /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ + if (ch <= MAXIMUM_UCS2) { + /* fits in 16 bits */ + *(myTarget++) = (UChar) ch; + } + else { + /* write out the surrogates */ + *(myTarget++) = U16_LEAD(ch); + ch = U16_TRAIL(ch); + if (myTarget < targetLimit) { + *(myTarget++) = (UChar)ch; + } + else { + /* Put in overflow buffer (not handled here) */ + args->converter->UCharErrorBuffer[0] = (UChar) ch; + args->converter->UCharErrorBufferLength = 1; + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } + else { + args->converter->toULength = (int8_t)i; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + +donefornow: + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) + { + /* End of target buffer */ + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = myTarget; + args->source = (const char *) mySource; +} + +static void U_CALLCONV +T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, + UErrorCode * err) +{ + const unsigned char *mySource = (unsigned char *) args->source; + UChar *myTarget = args->target; + int32_t *myOffsets = args->offsets; + const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; + const UChar *targetLimit = args->targetLimit; + unsigned char *toUBytes = args->converter->toUBytes; + uint32_t ch, i; + int32_t offsetNum = 0; + + /* Restore state of current sequence */ + if (args->converter->toUnicodeStatus && myTarget < targetLimit) + { + i = args->converter->toULength; /* restore # of bytes consumed */ + args->converter->toULength = 0; + + /* Stores the previously calculated ch from a previous call*/ + ch = args->converter->toUnicodeStatus - 1; + args->converter->toUnicodeStatus = 0; + goto morebytes; + } + + while (mySource < sourceLimit && myTarget < targetLimit) + { + i = 0; + ch = 0; +morebytes: + while (i < sizeof(uint32_t)) + { + if (mySource < sourceLimit) + { + ch |= ((uint8_t)(*mySource)) << (i * 8); + toUBytes[i++] = (char) *(mySource++); + } + else + { + /* stores a partially calculated target*/ + /* + 1 to make 0 a valid character */ + args->converter->toUnicodeStatus = ch + 1; + args->converter->toULength = (int8_t) i; + goto donefornow; + } + } + + if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) + { + /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ + if (ch <= MAXIMUM_UCS2) + { + /* fits in 16 bits */ + *(myTarget++) = (UChar) ch; + *(myOffsets++) = offsetNum; + } + else { + /* write out the surrogates */ + *(myTarget++) = U16_LEAD(ch); + *(myOffsets++) = offsetNum; + ch = U16_TRAIL(ch); + if (myTarget < targetLimit) + { + *(myTarget++) = (UChar)ch; + *(myOffsets++) = offsetNum; + } + else + { + /* Put in overflow buffer (not handled here) */ + args->converter->UCharErrorBuffer[0] = (UChar) ch; + args->converter->UCharErrorBufferLength = 1; + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } + else + { + args->converter->toULength = (int8_t)i; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + offsetNum += i; + } + +donefornow: + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) + { + /* End of target buffer */ + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = myTarget; + args->source = (const char *) mySource; + args->offsets = myOffsets; +} + +static void U_CALLCONV +T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args, + UErrorCode * err) +{ + const UChar *mySource = args->source; + unsigned char *myTarget; + const UChar *sourceLimit = args->sourceLimit; + const unsigned char *targetLimit = (unsigned char *) args->targetLimit; + UChar32 ch, ch2; + unsigned int indexToWrite; + unsigned char temp[sizeof(uint32_t)]; + + if(mySource >= sourceLimit) { + /* no input, nothing to do */ + return; + } + + /* write the BOM if necessary */ + if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { + static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 }; + ucnv_fromUWriteBytes(args->converter, + bom, 4, + &args->target, args->targetLimit, + &args->offsets, -1, + err); + args->converter->fromUnicodeStatus=0; + } + + myTarget = (unsigned char *) args->target; + temp[3] = 0; + + if (args->converter->fromUChar32) + { + ch = args->converter->fromUChar32; + args->converter->fromUChar32 = 0; + goto lowsurogate; + } + + while (mySource < sourceLimit && myTarget < targetLimit) + { + ch = *(mySource++); + + if (U16_IS_SURROGATE(ch)) { + if (U16_IS_LEAD(ch)) + { +lowsurogate: + if (mySource < sourceLimit) + { + ch2 = *mySource; + if (U16_IS_TRAIL(ch2)) { + ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; + mySource++; + } + else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + args->converter->fromUChar32 = ch; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + else { + /* ran out of source */ + args->converter->fromUChar32 = ch; + if (args->flush) { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *err = U_ILLEGAL_CHAR_FOUND; + } + break; + } + } + else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + args->converter->fromUChar32 = ch; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + + /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ + temp[2] = (uint8_t) (ch >> 16 & 0x1F); + temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ + temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ + + for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) + { + if (myTarget < targetLimit) + { + *(myTarget++) = temp[indexToWrite]; + } + else + { + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; + *err = U_BUFFER_OVERFLOW_ERROR; + } + } + } + + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) + { + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = (char *) myTarget; + args->source = mySource; +} + +static void U_CALLCONV +T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args, + UErrorCode * err) +{ + const UChar *mySource = args->source; + unsigned char *myTarget; + int32_t *myOffsets; + const UChar *sourceLimit = args->sourceLimit; + const unsigned char *targetLimit = (unsigned char *) args->targetLimit; + UChar32 ch, ch2; + unsigned int indexToWrite; + unsigned char temp[sizeof(uint32_t)]; + int32_t offsetNum = 0; + + if(mySource >= sourceLimit) { + /* no input, nothing to do */ + return; + } + + /* write the BOM if necessary */ + if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { + static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 }; + ucnv_fromUWriteBytes(args->converter, + bom, 4, + &args->target, args->targetLimit, + &args->offsets, -1, + err); + args->converter->fromUnicodeStatus=0; + } + + myTarget = (unsigned char *) args->target; + myOffsets = args->offsets; + temp[3] = 0; + + if (args->converter->fromUChar32) + { + ch = args->converter->fromUChar32; + args->converter->fromUChar32 = 0; + goto lowsurogate; + } + + while (mySource < sourceLimit && myTarget < targetLimit) + { + ch = *(mySource++); + + if (U16_IS_SURROGATE(ch)) { + if (U16_IS_LEAD(ch)) + { +lowsurogate: + if (mySource < sourceLimit) + { + ch2 = *mySource; + if (U16_IS_TRAIL(ch2)) + { + ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; + mySource++; + } + else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + args->converter->fromUChar32 = ch; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + else { + /* ran out of source */ + args->converter->fromUChar32 = ch; + if (args->flush) { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *err = U_ILLEGAL_CHAR_FOUND; + } + break; + } + } + else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + args->converter->fromUChar32 = ch; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + + /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ + temp[2] = (uint8_t) (ch >> 16 & 0x1F); + temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ + temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ + + for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) + { + if (myTarget < targetLimit) + { + *(myTarget++) = temp[indexToWrite]; + *(myOffsets++) = offsetNum; + } + else + { + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; + *err = U_BUFFER_OVERFLOW_ERROR; + } + } + offsetNum = offsetNum + 1 + (temp[2] != 0); + } + + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) + { + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = (char *) myTarget; + args->source = mySource; + args->offsets = myOffsets; +} + +static UChar32 U_CALLCONV +T_UConverter_getNextUChar_UTF32_LE(UConverterToUnicodeArgs* args, + UErrorCode* err) +{ + const uint8_t *mySource; + UChar32 myUChar; + int32_t length; + + mySource = (const uint8_t *)args->source; + if (mySource >= (const uint8_t *)args->sourceLimit) + { + /* no input */ + *err = U_INDEX_OUTOFBOUNDS_ERROR; + return 0xffff; + } + + length = (int32_t)((const uint8_t *)args->sourceLimit - mySource); + if (length < 4) + { + /* got a partial character */ + uprv_memcpy(args->converter->toUBytes, mySource, length); + args->converter->toULength = (int8_t)length; + args->source = (const char *)(mySource + length); + *err = U_TRUNCATED_CHAR_FOUND; + return 0xffff; + } + + /* Don't even try to do a direct cast because the value may be on an odd address. */ + myUChar = ((UChar32)mySource[3] << 24) + | ((UChar32)mySource[2] << 16) + | ((UChar32)mySource[1] << 8) + | ((UChar32)mySource[0]); + + args->source = (const char *)(mySource + 4); + if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) { + return myUChar; + } + + uprv_memcpy(args->converter->toUBytes, mySource, 4); + args->converter->toULength = 4; + + *err = U_ILLEGAL_CHAR_FOUND; + return 0xffff; +} +U_CDECL_END +static const UConverterImpl _UTF32LEImpl = { + UCNV_UTF32_LittleEndian, + + NULL, + NULL, + + NULL, + NULL, + NULL, + + T_UConverter_toUnicode_UTF32_LE, + T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC, + T_UConverter_fromUnicode_UTF32_LE, + T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC, + T_UConverter_getNextUChar_UTF32_LE, + + NULL, + NULL, + NULL, + NULL, + ucnv_getNonSurrogateUnicodeSet, + + NULL, + NULL +}; + +/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */ +static const UConverterStaticData _UTF32LEStaticData = { + sizeof(UConverterStaticData), + "UTF-32LE", + 1234, + UCNV_IBM, UCNV_UTF32_LittleEndian, 4, 4, + { 0xfd, 0xff, 0, 0 }, 4, FALSE, FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + + +const UConverterSharedData _UTF32LEData = + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32LEStaticData, &_UTF32LEImpl); + +/* UTF-32 (Detect BOM) ------------------------------------------------------ */ + +/* + * Detect a BOM at the beginning of the stream and select UTF-32BE or UTF-32LE + * accordingly. + * + * State values: + * 0 initial state + * 1 saw 00 + * 2 saw 00 00 + * 3 saw 00 00 FE + * 4 - + * 5 saw FF + * 6 saw FF FE + * 7 saw FF FE 00 + * 8 UTF-32BE mode + * 9 UTF-32LE mode + * + * During detection: state&3==number of matching bytes so far. + * + * On output, emit U+FEFF as the first code point. + */ +U_CDECL_BEGIN +static void U_CALLCONV +_UTF32Reset(UConverter *cnv, UConverterResetChoice choice) { + if(choice<=UCNV_RESET_TO_UNICODE) { + /* reset toUnicode: state=0 */ + cnv->mode=0; + } + if(choice!=UCNV_RESET_TO_UNICODE) { + /* reset fromUnicode: prepare to output the UTF-32PE BOM */ + cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; + } +} + +static void U_CALLCONV +_UTF32Open(UConverter *cnv, + UConverterLoadArgs *pArgs, + UErrorCode *pErrorCode) { + (void)pArgs; + (void)pErrorCode; + _UTF32Reset(cnv, UCNV_RESET_BOTH); +} + +static const char utf32BOM[8]={ 0, 0, (char)0xfe, (char)0xff, (char)0xff, (char)0xfe, 0, 0 }; + +static void U_CALLCONV +_UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv=pArgs->converter; + const char *source=pArgs->source; + const char *sourceLimit=pArgs->sourceLimit; + int32_t *offsets=pArgs->offsets; + + int32_t state, offsetDelta; + char b; + + state=cnv->mode; + + /* + * If we detect a BOM in this buffer, then we must add the BOM size to the + * offsets because the actual converter function will not see and count the BOM. + * offsetDelta will have the number of the BOM bytes that are in the current buffer. + */ + offsetDelta=0; + + while(sourcesource); + } else if(state==8) { + state=9; /* detect UTF-32LE */ + offsetDelta=(int32_t)(source-pArgs->source); + } + } else { + /* switch to UTF-32BE and pass the previous bytes */ + int32_t count=(int32_t)(source-pArgs->source); /* number of bytes from this buffer */ + + /* reset the source */ + source=pArgs->source; + + if(count==(state&3)) { + /* simple: all in the same buffer, just reset source */ + } else { + UBool oldFlush=pArgs->flush; + + /* some of the bytes are from a previous buffer, replay those first */ + pArgs->source=utf32BOM+(state&4); /* select the correct BOM */ + pArgs->sourceLimit=pArgs->source+((state&3)-count); /* replay previous bytes */ + pArgs->flush=FALSE; /* this sourceLimit is not the real source stream limit */ + + /* no offsets: bytes from previous buffer, and not enough for output */ + T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); + + /* restore real pointers; pArgs->source will be set in case 8/9 */ + pArgs->sourceLimit=sourceLimit; + pArgs->flush=oldFlush; + } + state=8; + continue; + } + break; + case 8: + /* call UTF-32BE */ + pArgs->source=source; + if(offsets==NULL) { + T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); + } else { + T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(pArgs, pErrorCode); + } + source=pArgs->source; + break; + case 9: + /* call UTF-32LE */ + pArgs->source=source; + if(offsets==NULL) { + T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode); + } else { + T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(pArgs, pErrorCode); + } + source=pArgs->source; + break; + default: + break; /* does not occur */ + } + } + + /* add BOM size to offsets - see comment at offsetDelta declaration */ + if(offsets!=NULL && offsetDelta!=0) { + int32_t *offsetsLimit=pArgs->offsets; + while(offsetssource=source; + + if(source==sourceLimit && pArgs->flush) { + /* handle truncated input */ + switch(state) { + case 0: + break; /* no input at all, nothing to do */ + case 8: + T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); + break; + case 9: + T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode); + break; + default: + /* handle 0source=utf32BOM+(state&4); /* select the correct BOM */ + pArgs->sourceLimit=pArgs->source+(state&3); /* replay bytes */ + + /* no offsets: not enough for output */ + T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); + pArgs->source=source; + pArgs->sourceLimit=sourceLimit; + state=8; + break; + } + } + + cnv->mode=state; +} + +static UChar32 U_CALLCONV +_UTF32GetNextUChar(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + switch(pArgs->converter->mode) { + case 8: + return T_UConverter_getNextUChar_UTF32_BE(pArgs, pErrorCode); + case 9: + return T_UConverter_getNextUChar_UTF32_LE(pArgs, pErrorCode); + default: + return UCNV_GET_NEXT_UCHAR_USE_TO_U; + } +} +U_CDECL_END +static const UConverterImpl _UTF32Impl = { + UCNV_UTF32, + + NULL, + NULL, + + _UTF32Open, + NULL, + _UTF32Reset, + + _UTF32ToUnicodeWithOffsets, + _UTF32ToUnicodeWithOffsets, +#if U_IS_BIG_ENDIAN + T_UConverter_fromUnicode_UTF32_BE, + T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC, +#else + T_UConverter_fromUnicode_UTF32_LE, + T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC, +#endif + _UTF32GetNextUChar, + + NULL, /* ### TODO implement getStarters for all Unicode encodings?! */ + NULL, + NULL, + NULL, + ucnv_getNonSurrogateUnicodeSet, + + NULL, + NULL +}; + +/* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianess of UTF-32 */ +static const UConverterStaticData _UTF32StaticData = { + sizeof(UConverterStaticData), + "UTF-32", + 1236, + UCNV_IBM, UCNV_UTF32, 4, 4, +#if U_IS_BIG_ENDIAN + { 0, 0, 0xff, 0xfd }, 4, +#else + { 0xfd, 0xff, 0, 0 }, 4, +#endif + FALSE, FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +const UConverterSharedData _UTF32Data = + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32StaticData, &_UTF32Impl); + +#endif diff --git a/deps/icu-small/source/common/ucnv_u7.c b/deps/icu-small/source/common/ucnv_u7.c deleted file mode 100644 index 3c1d240ed8..0000000000 --- a/deps/icu-small/source/common/ucnv_u7.c +++ /dev/null @@ -1,1484 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2002-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ucnv_u7.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002jul01 -* created by: Markus W. Scherer -* -* UTF-7 converter implementation. Used to be in ucnv_utf.c. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION - -#include "cmemory.h" -#include "unicode/ucnv.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "uassert.h" - -/* UTF-7 -------------------------------------------------------------------- */ - -/* - * UTF-7 is a stateful encoding of Unicode. - * It is defined in RFC 2152. (http://www.ietf.org/rfc/rfc2152.txt) - * It was intended for use in Internet email systems, using in its bytewise - * encoding only a subset of 7-bit US-ASCII. - * UTF-7 is deprecated in favor of UTF-8/16/32 and SCSU, but still - * occasionally used. - * - * For converting Unicode to UTF-7, the RFC allows to encode some US-ASCII - * characters directly or in base64. Especially, the characters in set O - * as defined in the RFC (see below) may be encoded directly but are not - * allowed in, e.g., email headers. - * By default, the ICU UTF-7 converter encodes set O directly. - * By choosing the option "version=1", set O will be escaped instead. - * For example: - * utf7Converter=ucnv_open("UTF-7,version=1"); - * - * For details about email headers see RFC 2047. - */ - -/* - * Tests for US-ASCII characters belonging to character classes - * defined in UTF-7. - * - * Set D (directly encoded characters) consists of the following - * characters: the upper and lower case letters A through Z - * and a through z, the 10 digits 0-9, and the following nine special - * characters (note that "+" and "=" are omitted): - * '(),-./:? - * - * Set O (optional direct characters) consists of the following - * characters (note that "\" and "~" are omitted): - * !"#$%&*;<=>@[]^_`{|} - * - * According to the rules in RFC 2152, the byte values for the following - * US-ASCII characters are not used in UTF-7 and are therefore illegal: - * - all C0 control codes except for CR LF TAB - * - BACKSLASH - * - TILDE - * - DEL - * - all codes beyond US-ASCII, i.e. all >127 - */ -#define inSetD(c) \ - ((uint8_t)((c)-97)<26 || (uint8_t)((c)-65)<26 || /* letters */ \ - (uint8_t)((c)-48)<10 || /* digits */ \ - (uint8_t)((c)-39)<3 || /* '() */ \ - (uint8_t)((c)-44)<4 || /* ,-./ */ \ - (c)==58 || (c)==63 /* :? */ \ - ) - -#define inSetO(c) \ - ((uint8_t)((c)-33)<6 || /* !"#$%& */ \ - (uint8_t)((c)-59)<4 || /* ;<=> */ \ - (uint8_t)((c)-93)<4 || /* ]^_` */ \ - (uint8_t)((c)-123)<3 || /* {|} */ \ - (c)==42 || (c)==64 || (c)==91 /* *@[ */ \ - ) - -#define isCRLFTAB(c) ((c)==13 || (c)==10 || (c)==9) -#define isCRLFSPTAB(c) ((c)==32 || (c)==13 || (c)==10 || (c)==9) - -#define PLUS 43 -#define MINUS 45 -#define BACKSLASH 92 -#define TILDE 126 - -/* legal byte values: all US-ASCII graphic characters from space to before tilde, and CR LF TAB */ -#define isLegalUTF7(c) (((uint8_t)((c)-32)<94 && (c)!=BACKSLASH) || isCRLFTAB(c)) - -/* encode directly sets D and O and CR LF SP TAB */ -static const UBool encodeDirectlyMaximum[128]={ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 -}; - -/* encode directly set D and CR LF SP TAB but not set O */ -static const UBool encodeDirectlyRestricted[128]={ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, - - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 -}; - -static const uint8_t -toBase64[64]={ - /* A-Z */ - 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, - 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, - /* a-z */ - 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, - 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, - /* 0-9 */ - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, - /* +/ */ - 43, 47 -}; - -static const int8_t -fromBase64[128]={ - /* C0 controls, -1 for legal ones (CR LF TAB), -3 for illegal ones */ - -3, -3, -3, -3, -3, -3, -3, -3, -3, -1, -1, -3, -3, -1, -3, -3, - -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, - - /* general punctuation with + and / and a special value (-2) for - */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -2, -1, 63, - /* digits */ - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, - - /* A-Z */ - -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1, - - /* a-z */ - -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -3, -3 -}; - -/* - * converter status values: - * - * toUnicodeStatus: - * 24 inDirectMode (boolean) - * 23..16 base64Counter (-1..7) - * 15..0 bits (up to 14 bits incoming base64) - * - * fromUnicodeStatus: - * 31..28 version (0: set O direct 1: set O escaped) - * 24 inDirectMode (boolean) - * 23..16 base64Counter (0..2) - * 7..0 bits (6 bits outgoing base64) - * - */ - -static void -_UTF7Reset(UConverter *cnv, UConverterResetChoice choice) { - if(choice<=UCNV_RESET_TO_UNICODE) { - /* reset toUnicode */ - cnv->toUnicodeStatus=0x1000000; /* inDirectMode=TRUE */ - cnv->toULength=0; - } - if(choice!=UCNV_RESET_TO_UNICODE) { - /* reset fromUnicode */ - cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */ - } -} - -static void -_UTF7Open(UConverter *cnv, - UConverterLoadArgs *pArgs, - UErrorCode *pErrorCode) { - if(UCNV_GET_VERSION(cnv)<=1) { - /* TODO(markus): Should just use cnv->options rather than copying the version number. */ - cnv->fromUnicodeStatus=UCNV_GET_VERSION(cnv)<<28; - _UTF7Reset(cnv, UCNV_RESET_BOTH); - } else { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - } -} - -static void -_UTF7ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const uint8_t *source, *sourceLimit; - UChar *target; - const UChar *targetLimit; - int32_t *offsets; - - uint8_t *bytes; - uint8_t byteIndex; - - int32_t length, targetCapacity; - - /* UTF-7 state */ - uint16_t bits; - int8_t base64Counter; - UBool inDirectMode; - - int8_t base64Value; - - int32_t sourceIndex, nextSourceIndex; - - uint8_t b; - /* set up the local pointers */ - cnv=pArgs->converter; - - source=(const uint8_t *)pArgs->source; - sourceLimit=(const uint8_t *)pArgs->sourceLimit; - target=pArgs->target; - targetLimit=pArgs->targetLimit; - offsets=pArgs->offsets; - /* get the state machine state */ - { - uint32_t status=cnv->toUnicodeStatus; - inDirectMode=(UBool)((status>>24)&1); - base64Counter=(int8_t)(status>>16); - bits=(uint16_t)status; - } - bytes=cnv->toUBytes; - byteIndex=cnv->toULength; - - /* sourceIndex=-1 if the current character began in the previous buffer */ - sourceIndex=byteIndex==0 ? 0 : -1; - nextSourceIndex=0; - - if(inDirectMode) { -directMode: - /* - * In Direct Mode, most US-ASCII characters are encoded directly, i.e., - * with their US-ASCII byte values. - * Backslash and Tilde and most control characters are not allowed in UTF-7. - * A plus sign starts Unicode (or "escape") Mode. - * - * In Direct Mode, only the sourceIndex is used. - */ - byteIndex=0; - length=(int32_t)(sourceLimit-source); - targetCapacity=(int32_t)(targetLimit-target); - if(length>targetCapacity) { - length=targetCapacity; - } - while(length>0) { - b=*source++; - if(!isLegalUTF7(b)) { - /* illegal */ - bytes[0]=b; - byteIndex=1; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } else if(b!=PLUS) { - /* write directly encoded character */ - *target++=b; - if(offsets!=NULL) { - *offsets++=sourceIndex++; - } - } else /* PLUS */ { - /* switch to Unicode mode */ - nextSourceIndex=++sourceIndex; - inDirectMode=FALSE; - byteIndex=0; - bits=0; - base64Counter=-1; - goto unicodeMode; - } - --length; - } - if(source=targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } else { -unicodeMode: - /* - * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded. - * The base64 sequence ends with any character that is not in the base64 alphabet. - * A terminating minus sign is consumed. - * - * In Unicode Mode, the sourceIndex has the index to the start of the current - * base64 bytes, while nextSourceIndex is precisely parallel to source, - * keeping the index to the following byte. - * Note that in 2 out of 3 cases, UChars overlap within a base64 byte. - */ - while(source=126 || (base64Value=fromBase64[b])==-3 || base64Value==-1) { - /* either - * base64Value==-1 for any legal character except base64 and minus sign, or - * base64Value==-3 for illegal characters: - * 1. In either case, leave Unicode mode. - * 2.1. If we ended with an incomplete UChar or none after the +, then - * generate an error for the preceding erroneous sequence and deal with - * the current (possibly illegal) character next time through. - * 2.2. Else the current char comes after a complete UChar, which was already - * pushed to the output buf, so: - * 2.2.1. If the current char is legal, just save it for processing next time. - * It may be for example, a plus which we need to deal with in direct mode. - * 2.2.2. Else if the current char is illegal, we might as well deal with it here. - */ - inDirectMode=TRUE; - if(base64Counter==-1) { - /* illegal: + immediately followed by something other than base64 or minus sign */ - /* include the plus sign in the reported sequence, but not the subsequent char */ - --source; - bytes[0]=PLUS; - byteIndex=1; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } else if(bits!=0) { - /* bits are illegally left over, a UChar is incomplete */ - /* don't include current char (legal or illegal) in error seq */ - --source; - --byteIndex; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } else { - /* previous UChar was complete */ - if(base64Value==-3) { - /* current character is illegal, deal with it here */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } else { - /* un-read the current character in case it is a plus sign */ - --source; - sourceIndex=nextSourceIndex-1; - goto directMode; - } - } - } else if(base64Value>=0) { - /* collect base64 bytes into UChars */ - switch(base64Counter) { - case -1: /* -1 is immediately after the + */ - case 0: - bits=base64Value; - base64Counter=1; - break; - case 1: - case 3: - case 4: - case 6: - bits=(uint16_t)((bits<<6)|base64Value); - ++base64Counter; - break; - case 2: - *target++=(UChar)((bits<<4)|(base64Value>>2)); - if(offsets!=NULL) { - *offsets++=sourceIndex; - sourceIndex=nextSourceIndex-1; - } - bytes[0]=b; /* keep this byte in case an error occurs */ - byteIndex=1; - bits=(uint16_t)(base64Value&3); - base64Counter=3; - break; - case 5: - *target++=(UChar)((bits<<2)|(base64Value>>4)); - if(offsets!=NULL) { - *offsets++=sourceIndex; - sourceIndex=nextSourceIndex-1; - } - bytes[0]=b; /* keep this byte in case an error occurs */ - byteIndex=1; - bits=(uint16_t)(base64Value&15); - base64Counter=6; - break; - case 7: - *target++=(UChar)((bits<<6)|base64Value); - if(offsets!=NULL) { - *offsets++=sourceIndex; - sourceIndex=nextSourceIndex; - } - byteIndex=0; - bits=0; - base64Counter=0; - break; - default: - /* will never occur */ - break; - } - } else /*base64Value==-2*/ { - /* minus sign terminates the base64 sequence */ - inDirectMode=TRUE; - if(base64Counter==-1) { - /* +- i.e. a minus immediately following a plus */ - *target++=PLUS; - if(offsets!=NULL) { - *offsets++=sourceIndex-1; - } - } else { - /* absorb the minus and leave the Unicode Mode */ - if(bits!=0) { - /* bits are illegally left over, a UChar is incomplete */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } - } - sourceIndex=nextSourceIndex; - goto directMode; - } - } else { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } - - if(U_SUCCESS(*pErrorCode) && pArgs->flush && source==sourceLimit && bits==0) { - /* - * if we are in Unicode mode, then the byteIndex might not be 0, - * but that is ok if bits==0 - * -> we set byteIndex=0 at the end of the stream to avoid a truncated error - * (not true for IMAP-mailbox-name where we must end in direct mode) - */ - byteIndex=0; - } - - /* set the converter state back into UConverter */ - cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits; - cnv->toULength=byteIndex; - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; - return; -} - -static void -_UTF7FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const UChar *source, *sourceLimit; - uint8_t *target, *targetLimit; - int32_t *offsets; - - int32_t length, targetCapacity, sourceIndex; - UChar c; - - /* UTF-7 state */ - const UBool *encodeDirectly; - uint8_t bits; - int8_t base64Counter; - UBool inDirectMode; - - /* set up the local pointers */ - cnv=pArgs->converter; - - /* set up the local pointers */ - source=pArgs->source; - sourceLimit=pArgs->sourceLimit; - target=(uint8_t *)pArgs->target; - targetLimit=(uint8_t *)pArgs->targetLimit; - offsets=pArgs->offsets; - - /* get the state machine state */ - { - uint32_t status=cnv->fromUnicodeStatus; - encodeDirectly= status<0x10000000 ? encodeDirectlyMaximum : encodeDirectlyRestricted; - inDirectMode=(UBool)((status>>24)&1); - base64Counter=(int8_t)(status>>16); - bits=(uint8_t)status; - U_ASSERT(bits<=UPRV_LENGTHOF(toBase64)); - } - - /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */ - sourceIndex=0; - - if(inDirectMode) { -directMode: - length=(int32_t)(sourceLimit-source); - targetCapacity=(int32_t)(targetLimit-target); - if(length>targetCapacity) { - length=targetCapacity; - } - while(length>0) { - c=*source++; - /* currently always encode CR LF SP TAB directly */ - if(c<=127 && encodeDirectly[c]) { - /* encode directly */ - *target++=(uint8_t)c; - if(offsets!=NULL) { - *offsets++=sourceIndex++; - } - } else if(c==PLUS) { - /* output +- for + */ - *target++=PLUS; - if(targetcharErrorBuffer[0]=MINUS; - cnv->charErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } else { - /* un-read this character and switch to Unicode Mode */ - --source; - *target++=PLUS; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - inDirectMode=FALSE; - base64Counter=0; - goto unicodeMode; - } - --length; - } - if(source=targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } else { -unicodeMode: - while(sourcecharErrorBuffer[0]=MINUS; - cnv->charErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - goto directMode; - } else { - /* - * base64 this character: - * Output 2 or 3 base64 bytes for the remaining bits of the previous character - * and the bits of this character, each implicitly in UTF-16BE. - * - * Here, bits is an 8-bit variable because only 6 bits need to be kept from one - * character to the next. The actual 2 or 4 bits are shifted to the left edge - * of the 6-bits field 5..0 to make the termination of the base64 sequence easier. - */ - switch(base64Counter) { - case 0: - *target++=toBase64[c>>10]; - if(target>4)&0x3f]; - if(offsets!=NULL) { - *offsets++=sourceIndex; - *offsets++=sourceIndex++; - } - } else { - if(offsets!=NULL) { - *offsets++=sourceIndex++; - } - cnv->charErrorBuffer[0]=toBase64[(c>>4)&0x3f]; - cnv->charErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - bits=(uint8_t)((c&15)<<2); - base64Counter=1; - break; - case 1: - *target++=toBase64[bits|(c>>14)]; - if(target>8)&0x3f]; - if(target>2)&0x3f]; - if(offsets!=NULL) { - *offsets++=sourceIndex; - *offsets++=sourceIndex; - *offsets++=sourceIndex++; - } - } else { - if(offsets!=NULL) { - *offsets++=sourceIndex; - *offsets++=sourceIndex++; - } - cnv->charErrorBuffer[0]=toBase64[(c>>2)&0x3f]; - cnv->charErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } else { - if(offsets!=NULL) { - *offsets++=sourceIndex++; - } - cnv->charErrorBuffer[0]=toBase64[(c>>8)&0x3f]; - cnv->charErrorBuffer[1]=toBase64[(c>>2)&0x3f]; - cnv->charErrorBufferLength=2; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - bits=(uint8_t)((c&3)<<4); - base64Counter=2; - break; - case 2: - *target++=toBase64[bits|(c>>12)]; - if(target>6)&0x3f]; - if(targetcharErrorBuffer[0]=toBase64[c&0x3f]; - cnv->charErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } else { - if(offsets!=NULL) { - *offsets++=sourceIndex++; - } - cnv->charErrorBuffer[0]=toBase64[(c>>6)&0x3f]; - cnv->charErrorBuffer[1]=toBase64[c&0x3f]; - cnv->charErrorBufferLength=2; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - bits=0; - base64Counter=0; - break; - default: - /* will never occur */ - break; - } - } - } else { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } - - if(pArgs->flush && source>=sourceLimit) { - /* flush remaining bits to the target */ - if(!inDirectMode) { - if (base64Counter!=0) { - if(targetcharErrorBuffer[cnv->charErrorBufferLength++]=toBase64[bits]; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } - /* Add final MINUS to terminate unicodeMode */ - if(targetcharErrorBuffer[cnv->charErrorBufferLength++]=MINUS; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } - /* reset the state for the next conversion */ - cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */ - } else { - /* set the converter state back into UConverter */ - cnv->fromUnicodeStatus= - (cnv->fromUnicodeStatus&0xf0000000)| /* keep version*/ - ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits; - } - - /* write back the updated pointers */ - pArgs->source=source; - pArgs->target=(char *)target; - pArgs->offsets=offsets; - return; -} - -static const char * -_UTF7GetName(const UConverter *cnv) { - switch(cnv->fromUnicodeStatus>>28) { - case 1: - return "UTF-7,version=1"; - default: - return "UTF-7"; - } -} - -static const UConverterImpl _UTF7Impl={ - UCNV_UTF7, - - NULL, - NULL, - - _UTF7Open, - NULL, - _UTF7Reset, - - _UTF7ToUnicodeWithOffsets, - _UTF7ToUnicodeWithOffsets, - _UTF7FromUnicodeWithOffsets, - _UTF7FromUnicodeWithOffsets, - NULL, - - NULL, - _UTF7GetName, - NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */ - NULL, - ucnv_getCompleteUnicodeSet, - - NULL, - NULL -}; - -static const UConverterStaticData _UTF7StaticData={ - sizeof(UConverterStaticData), - "UTF-7", - 0, /* TODO CCSID for UTF-7 */ - UCNV_IBM, UCNV_UTF7, - 1, 4, - { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */ - FALSE, FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -const UConverterSharedData _UTF7Data= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF7StaticData, &_UTF7Impl); - -/* IMAP mailbox name encoding ----------------------------------------------- */ - -/* - * RFC 2060: INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1 - * http://www.ietf.org/rfc/rfc2060.txt - * - * 5.1.3. Mailbox International Naming Convention - * - * By convention, international mailbox names are specified using a - * modified version of the UTF-7 encoding described in [UTF-7]. The - * purpose of these modifications is to correct the following problems - * with UTF-7: - * - * 1) UTF-7 uses the "+" character for shifting; this conflicts with - * the common use of "+" in mailbox names, in particular USENET - * newsgroup names. - * - * 2) UTF-7's encoding is BASE64 which uses the "/" character; this - * conflicts with the use of "/" as a popular hierarchy delimiter. - * - * 3) UTF-7 prohibits the unencoded usage of "\"; this conflicts with - * the use of "\" as a popular hierarchy delimiter. - * - * 4) UTF-7 prohibits the unencoded usage of "~"; this conflicts with - * the use of "~" in some servers as a home directory indicator. - * - * 5) UTF-7 permits multiple alternate forms to represent the same - * string; in particular, printable US-ASCII chararacters can be - * represented in encoded form. - * - * In modified UTF-7, printable US-ASCII characters except for "&" - * represent themselves; that is, characters with octet values 0x20-0x25 - * and 0x27-0x7e. The character "&" (0x26) is represented by the two- - * octet sequence "&-". - * - * All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all - * Unicode 16-bit octets) are represented in modified BASE64, with a - * further modification from [UTF-7] that "," is used instead of "/". - * Modified BASE64 MUST NOT be used to represent any printing US-ASCII - * character which can represent itself. - * - * "&" is used to shift to modified BASE64 and "-" to shift back to US- - * ASCII. All names start in US-ASCII, and MUST end in US-ASCII (that - * is, a name that ends with a Unicode 16-bit octet MUST end with a "- - * "). - * - * For example, here is a mailbox name which mixes English, Japanese, - * and Chinese text: ~peter/mail/&ZeVnLIqe-/&U,BTFw- - */ - -/* - * Tests for US-ASCII characters belonging to character classes - * defined in UTF-7. - * - * Set D (directly encoded characters) consists of the following - * characters: the upper and lower case letters A through Z - * and a through z, the 10 digits 0-9, and the following nine special - * characters (note that "+" and "=" are omitted): - * '(),-./:? - * - * Set O (optional direct characters) consists of the following - * characters (note that "\" and "~" are omitted): - * !"#$%&*;<=>@[]^_`{|} - * - * According to the rules in RFC 2152, the byte values for the following - * US-ASCII characters are not used in UTF-7 and are therefore illegal: - * - all C0 control codes except for CR LF TAB - * - BACKSLASH - * - TILDE - * - DEL - * - all codes beyond US-ASCII, i.e. all >127 - */ - -/* uses '&' not '+' to start a base64 sequence */ -#define AMPERSAND 0x26 -#define COMMA 0x2c -#define SLASH 0x2f - -/* legal byte values: all US-ASCII graphic characters 0x20..0x7e */ -#define isLegalIMAP(c) (0x20<=(c) && (c)<=0x7e) - -/* direct-encode all of printable ASCII 0x20..0x7e except '&' 0x26 */ -#define inSetDIMAP(c) (isLegalIMAP(c) && c!=AMPERSAND) - -#define TO_BASE64_IMAP(n) ((n)<63 ? toBase64[n] : COMMA) -#define FROM_BASE64_IMAP(c) ((c)==COMMA ? 63 : (c)==SLASH ? -1 : fromBase64[c]) - -/* - * converter status values: - * - * toUnicodeStatus: - * 24 inDirectMode (boolean) - * 23..16 base64Counter (-1..7) - * 15..0 bits (up to 14 bits incoming base64) - * - * fromUnicodeStatus: - * 24 inDirectMode (boolean) - * 23..16 base64Counter (0..2) - * 7..0 bits (6 bits outgoing base64) - * - * ignore bits 31..25 - */ - -static void -_IMAPToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const uint8_t *source, *sourceLimit; - UChar *target; - const UChar *targetLimit; - int32_t *offsets; - - uint8_t *bytes; - uint8_t byteIndex; - - int32_t length, targetCapacity; - - /* UTF-7 state */ - uint16_t bits; - int8_t base64Counter; - UBool inDirectMode; - - int8_t base64Value; - - int32_t sourceIndex, nextSourceIndex; - - UChar c; - uint8_t b; - - /* set up the local pointers */ - cnv=pArgs->converter; - - source=(const uint8_t *)pArgs->source; - sourceLimit=(const uint8_t *)pArgs->sourceLimit; - target=pArgs->target; - targetLimit=pArgs->targetLimit; - offsets=pArgs->offsets; - /* get the state machine state */ - { - uint32_t status=cnv->toUnicodeStatus; - inDirectMode=(UBool)((status>>24)&1); - base64Counter=(int8_t)(status>>16); - bits=(uint16_t)status; - } - bytes=cnv->toUBytes; - byteIndex=cnv->toULength; - - /* sourceIndex=-1 if the current character began in the previous buffer */ - sourceIndex=byteIndex==0 ? 0 : -1; - nextSourceIndex=0; - - if(inDirectMode) { -directMode: - /* - * In Direct Mode, US-ASCII characters are encoded directly, i.e., - * with their US-ASCII byte values. - * An ampersand starts Unicode (or "escape") Mode. - * - * In Direct Mode, only the sourceIndex is used. - */ - byteIndex=0; - length=(int32_t)(sourceLimit-source); - targetCapacity=(int32_t)(targetLimit-target); - if(length>targetCapacity) { - length=targetCapacity; - } - while(length>0) { - b=*source++; - if(!isLegalIMAP(b)) { - /* illegal */ - bytes[0]=b; - byteIndex=1; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } else if(b!=AMPERSAND) { - /* write directly encoded character */ - *target++=b; - if(offsets!=NULL) { - *offsets++=sourceIndex++; - } - } else /* AMPERSAND */ { - /* switch to Unicode mode */ - nextSourceIndex=++sourceIndex; - inDirectMode=FALSE; - byteIndex=0; - bits=0; - base64Counter=-1; - goto unicodeMode; - } - --length; - } - if(source=targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } else { -unicodeMode: - /* - * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded. - * The base64 sequence ends with any character that is not in the base64 alphabet. - * A terminating minus sign is consumed. - * US-ASCII must not be base64-ed. - * - * In Unicode Mode, the sourceIndex has the index to the start of the current - * base64 bytes, while nextSourceIndex is precisely parallel to source, - * keeping the index to the following byte. - * Note that in 2 out of 3 cases, UChars overlap within a base64 byte. - */ - while(source0x7e) { - /* illegal - test other illegal US-ASCII values by base64Value==-3 */ - inDirectMode=TRUE; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } else if((base64Value=FROM_BASE64_IMAP(b))>=0) { - /* collect base64 bytes into UChars */ - switch(base64Counter) { - case -1: /* -1 is immediately after the & */ - case 0: - bits=base64Value; - base64Counter=1; - break; - case 1: - case 3: - case 4: - case 6: - bits=(uint16_t)((bits<<6)|base64Value); - ++base64Counter; - break; - case 2: - c=(UChar)((bits<<4)|(base64Value>>2)); - if(isLegalIMAP(c)) { - /* illegal */ - inDirectMode=TRUE; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - goto endloop; - } - *target++=c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - sourceIndex=nextSourceIndex-1; - } - bytes[0]=b; /* keep this byte in case an error occurs */ - byteIndex=1; - bits=(uint16_t)(base64Value&3); - base64Counter=3; - break; - case 5: - c=(UChar)((bits<<2)|(base64Value>>4)); - if(isLegalIMAP(c)) { - /* illegal */ - inDirectMode=TRUE; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - goto endloop; - } - *target++=c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - sourceIndex=nextSourceIndex-1; - } - bytes[0]=b; /* keep this byte in case an error occurs */ - byteIndex=1; - bits=(uint16_t)(base64Value&15); - base64Counter=6; - break; - case 7: - c=(UChar)((bits<<6)|base64Value); - if(isLegalIMAP(c)) { - /* illegal */ - inDirectMode=TRUE; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - goto endloop; - } - *target++=c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - sourceIndex=nextSourceIndex; - } - byteIndex=0; - bits=0; - base64Counter=0; - break; - default: - /* will never occur */ - break; - } - } else if(base64Value==-2) { - /* minus sign terminates the base64 sequence */ - inDirectMode=TRUE; - if(base64Counter==-1) { - /* &- i.e. a minus immediately following an ampersand */ - *target++=AMPERSAND; - if(offsets!=NULL) { - *offsets++=sourceIndex-1; - } - } else { - /* absorb the minus and leave the Unicode Mode */ - if(bits!=0 || (base64Counter!=0 && base64Counter!=3 && base64Counter!=6)) { - /* bits are illegally left over, a UChar is incomplete */ - /* base64Counter other than 0, 3, 6 means non-minimal zero-padding, also illegal */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } - } - sourceIndex=nextSourceIndex; - goto directMode; - } else { - if(base64Counter==-1) { - /* illegal: & immediately followed by something other than base64 or minus sign */ - /* include the ampersand in the reported sequence */ - --sourceIndex; - bytes[0]=AMPERSAND; - bytes[1]=b; - byteIndex=2; - } - /* base64Value==-1 for characters that are illegal only in Unicode mode */ - /* base64Value==-3 for illegal characters */ - /* illegal */ - inDirectMode=TRUE; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } - } else { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } -endloop: - - /* - * the end of the input stream and detection of truncated input - * are handled by the framework, but here we must check if we are in Unicode - * mode and byteIndex==0 because we must end in direct mode - * - * conditions: - * successful - * in Unicode mode and byteIndex==0 - * end of input and no truncated input - */ - if( U_SUCCESS(*pErrorCode) && - !inDirectMode && byteIndex==0 && - pArgs->flush && source>=sourceLimit - ) { - if(base64Counter==-1) { - /* & at the very end of the input */ - /* make the ampersand the reported sequence */ - bytes[0]=AMPERSAND; - byteIndex=1; - } - /* else if(base64Counter!=-1) byteIndex remains 0 because there is no particular byte sequence */ - - inDirectMode=TRUE; /* avoid looping */ - *pErrorCode=U_TRUNCATED_CHAR_FOUND; - } - - /* set the converter state back into UConverter */ - cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits; - cnv->toULength=byteIndex; - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; - return; -} - -static void -_IMAPFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const UChar *source, *sourceLimit; - uint8_t *target, *targetLimit; - int32_t *offsets; - - int32_t length, targetCapacity, sourceIndex; - UChar c; - uint8_t b; - - /* UTF-7 state */ - uint8_t bits; - int8_t base64Counter; - UBool inDirectMode; - - /* set up the local pointers */ - cnv=pArgs->converter; - - /* set up the local pointers */ - source=pArgs->source; - sourceLimit=pArgs->sourceLimit; - target=(uint8_t *)pArgs->target; - targetLimit=(uint8_t *)pArgs->targetLimit; - offsets=pArgs->offsets; - - /* get the state machine state */ - { - uint32_t status=cnv->fromUnicodeStatus; - inDirectMode=(UBool)((status>>24)&1); - base64Counter=(int8_t)(status>>16); - bits=(uint8_t)status; - } - - /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */ - sourceIndex=0; - - if(inDirectMode) { -directMode: - length=(int32_t)(sourceLimit-source); - targetCapacity=(int32_t)(targetLimit-target); - if(length>targetCapacity) { - length=targetCapacity; - } - while(length>0) { - c=*source++; - /* encode 0x20..0x7e except '&' directly */ - if(inSetDIMAP(c)) { - /* encode directly */ - *target++=(uint8_t)c; - if(offsets!=NULL) { - *offsets++=sourceIndex++; - } - } else if(c==AMPERSAND) { - /* output &- for & */ - *target++=AMPERSAND; - if(targetcharErrorBuffer[0]=MINUS; - cnv->charErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } else { - /* un-read this character and switch to Unicode Mode */ - --source; - *target++=AMPERSAND; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - inDirectMode=FALSE; - base64Counter=0; - goto unicodeMode; - } - --length; - } - if(source=targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } else { -unicodeMode: - while(sourcecharErrorBuffer[0]=MINUS; - cnv->charErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - goto directMode; - } else { - /* - * base64 this character: - * Output 2 or 3 base64 bytes for the remaining bits of the previous character - * and the bits of this character, each implicitly in UTF-16BE. - * - * Here, bits is an 8-bit variable because only 6 bits need to be kept from one - * character to the next. The actual 2 or 4 bits are shifted to the left edge - * of the 6-bits field 5..0 to make the termination of the base64 sequence easier. - */ - switch(base64Counter) { - case 0: - b=(uint8_t)(c>>10); - *target++=TO_BASE64_IMAP(b); - if(target>4)&0x3f); - *target++=TO_BASE64_IMAP(b); - if(offsets!=NULL) { - *offsets++=sourceIndex; - *offsets++=sourceIndex++; - } - } else { - if(offsets!=NULL) { - *offsets++=sourceIndex++; - } - b=(uint8_t)((c>>4)&0x3f); - cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b); - cnv->charErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - bits=(uint8_t)((c&15)<<2); - base64Counter=1; - break; - case 1: - b=(uint8_t)(bits|(c>>14)); - *target++=TO_BASE64_IMAP(b); - if(target>8)&0x3f); - *target++=TO_BASE64_IMAP(b); - if(target>2)&0x3f); - *target++=TO_BASE64_IMAP(b); - if(offsets!=NULL) { - *offsets++=sourceIndex; - *offsets++=sourceIndex; - *offsets++=sourceIndex++; - } - } else { - if(offsets!=NULL) { - *offsets++=sourceIndex; - *offsets++=sourceIndex++; - } - b=(uint8_t)((c>>2)&0x3f); - cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b); - cnv->charErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } else { - if(offsets!=NULL) { - *offsets++=sourceIndex++; - } - b=(uint8_t)((c>>8)&0x3f); - cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b); - b=(uint8_t)((c>>2)&0x3f); - cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b); - cnv->charErrorBufferLength=2; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - bits=(uint8_t)((c&3)<<4); - base64Counter=2; - break; - case 2: - b=(uint8_t)(bits|(c>>12)); - *target++=TO_BASE64_IMAP(b); - if(target>6)&0x3f); - *target++=TO_BASE64_IMAP(b); - if(targetcharErrorBuffer[0]=TO_BASE64_IMAP(b); - cnv->charErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } else { - if(offsets!=NULL) { - *offsets++=sourceIndex++; - } - b=(uint8_t)((c>>6)&0x3f); - cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b); - b=(uint8_t)(c&0x3f); - cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b); - cnv->charErrorBufferLength=2; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - bits=0; - base64Counter=0; - break; - default: - /* will never occur */ - break; - } - } - } else { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } - - if(pArgs->flush && source>=sourceLimit) { - /* flush remaining bits to the target */ - if(!inDirectMode) { - if(base64Counter!=0) { - if(targetcharErrorBuffer[cnv->charErrorBufferLength++]=TO_BASE64_IMAP(bits); - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } - /* need to terminate with a minus */ - if(targetcharErrorBuffer[cnv->charErrorBufferLength++]=MINUS; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } - /* reset the state for the next conversion */ - cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */ - } else { - /* set the converter state back into UConverter */ - cnv->fromUnicodeStatus= - (cnv->fromUnicodeStatus&0xf0000000)| /* keep version*/ - ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits; - } - - /* write back the updated pointers */ - pArgs->source=source; - pArgs->target=(char *)target; - pArgs->offsets=offsets; - return; -} - -static const UConverterImpl _IMAPImpl={ - UCNV_IMAP_MAILBOX, - - NULL, - NULL, - - _UTF7Open, - NULL, - _UTF7Reset, - - _IMAPToUnicodeWithOffsets, - _IMAPToUnicodeWithOffsets, - _IMAPFromUnicodeWithOffsets, - _IMAPFromUnicodeWithOffsets, - NULL, - - NULL, - NULL, - NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */ - NULL, - ucnv_getCompleteUnicodeSet -}; - -static const UConverterStaticData _IMAPStaticData={ - sizeof(UConverterStaticData), - "IMAP-mailbox-name", - 0, /* TODO CCSID for IMAP-mailbox-name */ - UCNV_IBM, UCNV_IMAP_MAILBOX, - 1, 4, - { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */ - FALSE, FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -const UConverterSharedData _IMAPData= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_IMAPStaticData, &_IMAPImpl); - -#endif diff --git a/deps/icu-small/source/common/ucnv_u7.cpp b/deps/icu-small/source/common/ucnv_u7.cpp new file mode 100644 index 0000000000..ec7befe9fc --- /dev/null +++ b/deps/icu-small/source/common/ucnv_u7.cpp @@ -0,0 +1,1491 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2002-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnv_u7.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002jul01 +* created by: Markus W. Scherer +* +* UTF-7 converter implementation. Used to be in ucnv_utf.c. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + +#include "cmemory.h" +#include "unicode/ucnv.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "uassert.h" + +/* UTF-7 -------------------------------------------------------------------- */ + +/* + * UTF-7 is a stateful encoding of Unicode. + * It is defined in RFC 2152. (http://www.ietf.org/rfc/rfc2152.txt) + * It was intended for use in Internet email systems, using in its bytewise + * encoding only a subset of 7-bit US-ASCII. + * UTF-7 is deprecated in favor of UTF-8/16/32 and SCSU, but still + * occasionally used. + * + * For converting Unicode to UTF-7, the RFC allows to encode some US-ASCII + * characters directly or in base64. Especially, the characters in set O + * as defined in the RFC (see below) may be encoded directly but are not + * allowed in, e.g., email headers. + * By default, the ICU UTF-7 converter encodes set O directly. + * By choosing the option "version=1", set O will be escaped instead. + * For example: + * utf7Converter=ucnv_open("UTF-7,version=1"); + * + * For details about email headers see RFC 2047. + */ + +/* + * Tests for US-ASCII characters belonging to character classes + * defined in UTF-7. + * + * Set D (directly encoded characters) consists of the following + * characters: the upper and lower case letters A through Z + * and a through z, the 10 digits 0-9, and the following nine special + * characters (note that "+" and "=" are omitted): + * '(),-./:? + * + * Set O (optional direct characters) consists of the following + * characters (note that "\" and "~" are omitted): + * !"#$%&*;<=>@[]^_`{|} + * + * According to the rules in RFC 2152, the byte values for the following + * US-ASCII characters are not used in UTF-7 and are therefore illegal: + * - all C0 control codes except for CR LF TAB + * - BACKSLASH + * - TILDE + * - DEL + * - all codes beyond US-ASCII, i.e. all >127 + */ +#define inSetD(c) \ + ((uint8_t)((c)-97)<26 || (uint8_t)((c)-65)<26 || /* letters */ \ + (uint8_t)((c)-48)<10 || /* digits */ \ + (uint8_t)((c)-39)<3 || /* '() */ \ + (uint8_t)((c)-44)<4 || /* ,-./ */ \ + (c)==58 || (c)==63 /* :? */ \ + ) + +#define inSetO(c) \ + ((uint8_t)((c)-33)<6 || /* !"#$%& */ \ + (uint8_t)((c)-59)<4 || /* ;<=> */ \ + (uint8_t)((c)-93)<4 || /* ]^_` */ \ + (uint8_t)((c)-123)<3 || /* {|} */ \ + (c)==42 || (c)==64 || (c)==91 /* *@[ */ \ + ) + +#define isCRLFTAB(c) ((c)==13 || (c)==10 || (c)==9) +#define isCRLFSPTAB(c) ((c)==32 || (c)==13 || (c)==10 || (c)==9) + +#define PLUS 43 +#define MINUS 45 +#define BACKSLASH 92 +#define TILDE 126 + +/* legal byte values: all US-ASCII graphic characters from space to before tilde, and CR LF TAB */ +#define isLegalUTF7(c) (((uint8_t)((c)-32)<94 && (c)!=BACKSLASH) || isCRLFTAB(c)) + +/* encode directly sets D and O and CR LF SP TAB */ +static const UBool encodeDirectlyMaximum[128]={ + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 +}; + +/* encode directly set D and CR LF SP TAB but not set O */ +static const UBool encodeDirectlyRestricted[128]={ + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, + + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 +}; + +static const uint8_t +toBase64[64]={ + /* A-Z */ + 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, + 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + /* a-z */ + 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, + /* 0-9 */ + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, + /* +/ */ + 43, 47 +}; + +static const int8_t +fromBase64[128]={ + /* C0 controls, -1 for legal ones (CR LF TAB), -3 for illegal ones */ + -3, -3, -3, -3, -3, -3, -3, -3, -3, -1, -1, -3, -3, -1, -3, -3, + -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, + + /* general punctuation with + and / and a special value (-2) for - */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -2, -1, 63, + /* digits */ + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, + + /* A-Z */ + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1, + + /* a-z */ + -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -3, -3 +}; + +/* + * converter status values: + * + * toUnicodeStatus: + * 24 inDirectMode (boolean) + * 23..16 base64Counter (-1..7) + * 15..0 bits (up to 14 bits incoming base64) + * + * fromUnicodeStatus: + * 31..28 version (0: set O direct 1: set O escaped) + * 24 inDirectMode (boolean) + * 23..16 base64Counter (0..2) + * 7..0 bits (6 bits outgoing base64) + * + */ + +U_CDECL_BEGIN +static void U_CALLCONV +_UTF7Reset(UConverter *cnv, UConverterResetChoice choice) { + if(choice<=UCNV_RESET_TO_UNICODE) { + /* reset toUnicode */ + cnv->toUnicodeStatus=0x1000000; /* inDirectMode=TRUE */ + cnv->toULength=0; + } + if(choice!=UCNV_RESET_TO_UNICODE) { + /* reset fromUnicode */ + cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */ + } +} + +static void U_CALLCONV +_UTF7Open(UConverter *cnv, + UConverterLoadArgs *pArgs, + UErrorCode *pErrorCode) { + (void)pArgs; + if(UCNV_GET_VERSION(cnv)<=1) { + /* TODO(markus): Should just use cnv->options rather than copying the version number. */ + cnv->fromUnicodeStatus=UCNV_GET_VERSION(cnv)<<28; + _UTF7Reset(cnv, UCNV_RESET_BOTH); + } else { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + } +} + +static void U_CALLCONV +_UTF7ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const uint8_t *source, *sourceLimit; + UChar *target; + const UChar *targetLimit; + int32_t *offsets; + + uint8_t *bytes; + uint8_t byteIndex; + + int32_t length, targetCapacity; + + /* UTF-7 state */ + uint16_t bits; + int8_t base64Counter; + UBool inDirectMode; + + int8_t base64Value; + + int32_t sourceIndex, nextSourceIndex; + + uint8_t b; + /* set up the local pointers */ + cnv=pArgs->converter; + + source=(const uint8_t *)pArgs->source; + sourceLimit=(const uint8_t *)pArgs->sourceLimit; + target=pArgs->target; + targetLimit=pArgs->targetLimit; + offsets=pArgs->offsets; + /* get the state machine state */ + { + uint32_t status=cnv->toUnicodeStatus; + inDirectMode=(UBool)((status>>24)&1); + base64Counter=(int8_t)(status>>16); + bits=(uint16_t)status; + } + bytes=cnv->toUBytes; + byteIndex=cnv->toULength; + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex=byteIndex==0 ? 0 : -1; + nextSourceIndex=0; + + if(inDirectMode) { +directMode: + /* + * In Direct Mode, most US-ASCII characters are encoded directly, i.e., + * with their US-ASCII byte values. + * Backslash and Tilde and most control characters are not allowed in UTF-7. + * A plus sign starts Unicode (or "escape") Mode. + * + * In Direct Mode, only the sourceIndex is used. + */ + byteIndex=0; + length=(int32_t)(sourceLimit-source); + targetCapacity=(int32_t)(targetLimit-target); + if(length>targetCapacity) { + length=targetCapacity; + } + while(length>0) { + b=*source++; + if(!isLegalUTF7(b)) { + /* illegal */ + bytes[0]=b; + byteIndex=1; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } else if(b!=PLUS) { + /* write directly encoded character */ + *target++=b; + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + } else /* PLUS */ { + /* switch to Unicode mode */ + nextSourceIndex=++sourceIndex; + inDirectMode=FALSE; + byteIndex=0; + bits=0; + base64Counter=-1; + goto unicodeMode; + } + --length; + } + if(source=targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } else { +unicodeMode: + /* + * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded. + * The base64 sequence ends with any character that is not in the base64 alphabet. + * A terminating minus sign is consumed. + * + * In Unicode Mode, the sourceIndex has the index to the start of the current + * base64 bytes, while nextSourceIndex is precisely parallel to source, + * keeping the index to the following byte. + * Note that in 2 out of 3 cases, UChars overlap within a base64 byte. + */ + while(source=126 || (base64Value=fromBase64[b])==-3 || base64Value==-1) { + /* either + * base64Value==-1 for any legal character except base64 and minus sign, or + * base64Value==-3 for illegal characters: + * 1. In either case, leave Unicode mode. + * 2.1. If we ended with an incomplete UChar or none after the +, then + * generate an error for the preceding erroneous sequence and deal with + * the current (possibly illegal) character next time through. + * 2.2. Else the current char comes after a complete UChar, which was already + * pushed to the output buf, so: + * 2.2.1. If the current char is legal, just save it for processing next time. + * It may be for example, a plus which we need to deal with in direct mode. + * 2.2.2. Else if the current char is illegal, we might as well deal with it here. + */ + inDirectMode=TRUE; + if(base64Counter==-1) { + /* illegal: + immediately followed by something other than base64 or minus sign */ + /* include the plus sign in the reported sequence, but not the subsequent char */ + --source; + bytes[0]=PLUS; + byteIndex=1; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } else if(bits!=0) { + /* bits are illegally left over, a UChar is incomplete */ + /* don't include current char (legal or illegal) in error seq */ + --source; + --byteIndex; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } else { + /* previous UChar was complete */ + if(base64Value==-3) { + /* current character is illegal, deal with it here */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } else { + /* un-read the current character in case it is a plus sign */ + --source; + sourceIndex=nextSourceIndex-1; + goto directMode; + } + } + } else if(base64Value>=0) { + /* collect base64 bytes into UChars */ + switch(base64Counter) { + case -1: /* -1 is immediately after the + */ + case 0: + bits=base64Value; + base64Counter=1; + break; + case 1: + case 3: + case 4: + case 6: + bits=(uint16_t)((bits<<6)|base64Value); + ++base64Counter; + break; + case 2: + *target++=(UChar)((bits<<4)|(base64Value>>2)); + if(offsets!=NULL) { + *offsets++=sourceIndex; + sourceIndex=nextSourceIndex-1; + } + bytes[0]=b; /* keep this byte in case an error occurs */ + byteIndex=1; + bits=(uint16_t)(base64Value&3); + base64Counter=3; + break; + case 5: + *target++=(UChar)((bits<<2)|(base64Value>>4)); + if(offsets!=NULL) { + *offsets++=sourceIndex; + sourceIndex=nextSourceIndex-1; + } + bytes[0]=b; /* keep this byte in case an error occurs */ + byteIndex=1; + bits=(uint16_t)(base64Value&15); + base64Counter=6; + break; + case 7: + *target++=(UChar)((bits<<6)|base64Value); + if(offsets!=NULL) { + *offsets++=sourceIndex; + sourceIndex=nextSourceIndex; + } + byteIndex=0; + bits=0; + base64Counter=0; + break; + default: + /* will never occur */ + break; + } + } else /*base64Value==-2*/ { + /* minus sign terminates the base64 sequence */ + inDirectMode=TRUE; + if(base64Counter==-1) { + /* +- i.e. a minus immediately following a plus */ + *target++=PLUS; + if(offsets!=NULL) { + *offsets++=sourceIndex-1; + } + } else { + /* absorb the minus and leave the Unicode Mode */ + if(bits!=0) { + /* bits are illegally left over, a UChar is incomplete */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } + } + sourceIndex=nextSourceIndex; + goto directMode; + } + } else { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } + + if(U_SUCCESS(*pErrorCode) && pArgs->flush && source==sourceLimit && bits==0) { + /* + * if we are in Unicode mode, then the byteIndex might not be 0, + * but that is ok if bits==0 + * -> we set byteIndex=0 at the end of the stream to avoid a truncated error + * (not true for IMAP-mailbox-name where we must end in direct mode) + */ + byteIndex=0; + } + + /* set the converter state back into UConverter */ + cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits; + cnv->toULength=byteIndex; + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + pArgs->offsets=offsets; + return; +} + +static void U_CALLCONV +_UTF7FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const UChar *source, *sourceLimit; + uint8_t *target, *targetLimit; + int32_t *offsets; + + int32_t length, targetCapacity, sourceIndex; + UChar c; + + /* UTF-7 state */ + const UBool *encodeDirectly; + uint8_t bits; + int8_t base64Counter; + UBool inDirectMode; + + /* set up the local pointers */ + cnv=pArgs->converter; + + /* set up the local pointers */ + source=pArgs->source; + sourceLimit=pArgs->sourceLimit; + target=(uint8_t *)pArgs->target; + targetLimit=(uint8_t *)pArgs->targetLimit; + offsets=pArgs->offsets; + + /* get the state machine state */ + { + uint32_t status=cnv->fromUnicodeStatus; + encodeDirectly= status<0x10000000 ? encodeDirectlyMaximum : encodeDirectlyRestricted; + inDirectMode=(UBool)((status>>24)&1); + base64Counter=(int8_t)(status>>16); + bits=(uint8_t)status; + U_ASSERT(bits<=UPRV_LENGTHOF(toBase64)); + } + + /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */ + sourceIndex=0; + + if(inDirectMode) { +directMode: + length=(int32_t)(sourceLimit-source); + targetCapacity=(int32_t)(targetLimit-target); + if(length>targetCapacity) { + length=targetCapacity; + } + while(length>0) { + c=*source++; + /* currently always encode CR LF SP TAB directly */ + if(c<=127 && encodeDirectly[c]) { + /* encode directly */ + *target++=(uint8_t)c; + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + } else if(c==PLUS) { + /* output +- for + */ + *target++=PLUS; + if(targetcharErrorBuffer[0]=MINUS; + cnv->charErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } else { + /* un-read this character and switch to Unicode Mode */ + --source; + *target++=PLUS; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + inDirectMode=FALSE; + base64Counter=0; + goto unicodeMode; + } + --length; + } + if(source=targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } else { +unicodeMode: + while(sourcecharErrorBuffer[0]=MINUS; + cnv->charErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + goto directMode; + } else { + /* + * base64 this character: + * Output 2 or 3 base64 bytes for the remaining bits of the previous character + * and the bits of this character, each implicitly in UTF-16BE. + * + * Here, bits is an 8-bit variable because only 6 bits need to be kept from one + * character to the next. The actual 2 or 4 bits are shifted to the left edge + * of the 6-bits field 5..0 to make the termination of the base64 sequence easier. + */ + switch(base64Counter) { + case 0: + *target++=toBase64[c>>10]; + if(target>4)&0x3f]; + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } + } else { + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + cnv->charErrorBuffer[0]=toBase64[(c>>4)&0x3f]; + cnv->charErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + bits=(uint8_t)((c&15)<<2); + base64Counter=1; + break; + case 1: + *target++=toBase64[bits|(c>>14)]; + if(target>8)&0x3f]; + if(target>2)&0x3f]; + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } + } else { + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } + cnv->charErrorBuffer[0]=toBase64[(c>>2)&0x3f]; + cnv->charErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } else { + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + cnv->charErrorBuffer[0]=toBase64[(c>>8)&0x3f]; + cnv->charErrorBuffer[1]=toBase64[(c>>2)&0x3f]; + cnv->charErrorBufferLength=2; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + bits=(uint8_t)((c&3)<<4); + base64Counter=2; + break; + case 2: + *target++=toBase64[bits|(c>>12)]; + if(target>6)&0x3f]; + if(targetcharErrorBuffer[0]=toBase64[c&0x3f]; + cnv->charErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } else { + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + cnv->charErrorBuffer[0]=toBase64[(c>>6)&0x3f]; + cnv->charErrorBuffer[1]=toBase64[c&0x3f]; + cnv->charErrorBufferLength=2; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + bits=0; + base64Counter=0; + break; + default: + /* will never occur */ + break; + } + } + } else { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } + + if(pArgs->flush && source>=sourceLimit) { + /* flush remaining bits to the target */ + if(!inDirectMode) { + if (base64Counter!=0) { + if(targetcharErrorBuffer[cnv->charErrorBufferLength++]=toBase64[bits]; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } + /* Add final MINUS to terminate unicodeMode */ + if(targetcharErrorBuffer[cnv->charErrorBufferLength++]=MINUS; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } + /* reset the state for the next conversion */ + cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */ + } else { + /* set the converter state back into UConverter */ + cnv->fromUnicodeStatus= + (cnv->fromUnicodeStatus&0xf0000000)| /* keep version*/ + ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits; + } + + /* write back the updated pointers */ + pArgs->source=source; + pArgs->target=(char *)target; + pArgs->offsets=offsets; + return; +} + +static const char * U_CALLCONV +_UTF7GetName(const UConverter *cnv) { + switch(cnv->fromUnicodeStatus>>28) { + case 1: + return "UTF-7,version=1"; + default: + return "UTF-7"; + } +} +U_CDECL_END + +static const UConverterImpl _UTF7Impl={ + UCNV_UTF7, + + NULL, + NULL, + + _UTF7Open, + NULL, + _UTF7Reset, + + _UTF7ToUnicodeWithOffsets, + _UTF7ToUnicodeWithOffsets, + _UTF7FromUnicodeWithOffsets, + _UTF7FromUnicodeWithOffsets, + NULL, + + NULL, + _UTF7GetName, + NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */ + NULL, + ucnv_getCompleteUnicodeSet, + + NULL, + NULL +}; + +static const UConverterStaticData _UTF7StaticData={ + sizeof(UConverterStaticData), + "UTF-7", + 0, /* TODO CCSID for UTF-7 */ + UCNV_IBM, UCNV_UTF7, + 1, 4, + { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */ + FALSE, FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +const UConverterSharedData _UTF7Data= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF7StaticData, &_UTF7Impl); + +/* IMAP mailbox name encoding ----------------------------------------------- */ + +/* + * RFC 2060: INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1 + * http://www.ietf.org/rfc/rfc2060.txt + * + * 5.1.3. Mailbox International Naming Convention + * + * By convention, international mailbox names are specified using a + * modified version of the UTF-7 encoding described in [UTF-7]. The + * purpose of these modifications is to correct the following problems + * with UTF-7: + * + * 1) UTF-7 uses the "+" character for shifting; this conflicts with + * the common use of "+" in mailbox names, in particular USENET + * newsgroup names. + * + * 2) UTF-7's encoding is BASE64 which uses the "/" character; this + * conflicts with the use of "/" as a popular hierarchy delimiter. + * + * 3) UTF-7 prohibits the unencoded usage of "\"; this conflicts with + * the use of "\" as a popular hierarchy delimiter. + * + * 4) UTF-7 prohibits the unencoded usage of "~"; this conflicts with + * the use of "~" in some servers as a home directory indicator. + * + * 5) UTF-7 permits multiple alternate forms to represent the same + * string; in particular, printable US-ASCII chararacters can be + * represented in encoded form. + * + * In modified UTF-7, printable US-ASCII characters except for "&" + * represent themselves; that is, characters with octet values 0x20-0x25 + * and 0x27-0x7e. The character "&" (0x26) is represented by the two- + * octet sequence "&-". + * + * All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all + * Unicode 16-bit octets) are represented in modified BASE64, with a + * further modification from [UTF-7] that "," is used instead of "/". + * Modified BASE64 MUST NOT be used to represent any printing US-ASCII + * character which can represent itself. + * + * "&" is used to shift to modified BASE64 and "-" to shift back to US- + * ASCII. All names start in US-ASCII, and MUST end in US-ASCII (that + * is, a name that ends with a Unicode 16-bit octet MUST end with a "- + * "). + * + * For example, here is a mailbox name which mixes English, Japanese, + * and Chinese text: ~peter/mail/&ZeVnLIqe-/&U,BTFw- + */ + +/* + * Tests for US-ASCII characters belonging to character classes + * defined in UTF-7. + * + * Set D (directly encoded characters) consists of the following + * characters: the upper and lower case letters A through Z + * and a through z, the 10 digits 0-9, and the following nine special + * characters (note that "+" and "=" are omitted): + * '(),-./:? + * + * Set O (optional direct characters) consists of the following + * characters (note that "\" and "~" are omitted): + * !"#$%&*;<=>@[]^_`{|} + * + * According to the rules in RFC 2152, the byte values for the following + * US-ASCII characters are not used in UTF-7 and are therefore illegal: + * - all C0 control codes except for CR LF TAB + * - BACKSLASH + * - TILDE + * - DEL + * - all codes beyond US-ASCII, i.e. all >127 + */ + +/* uses '&' not '+' to start a base64 sequence */ +#define AMPERSAND 0x26 +#define COMMA 0x2c +#define SLASH 0x2f + +/* legal byte values: all US-ASCII graphic characters 0x20..0x7e */ +#define isLegalIMAP(c) (0x20<=(c) && (c)<=0x7e) + +/* direct-encode all of printable ASCII 0x20..0x7e except '&' 0x26 */ +#define inSetDIMAP(c) (isLegalIMAP(c) && c!=AMPERSAND) + +#define TO_BASE64_IMAP(n) ((n)<63 ? toBase64[n] : COMMA) +#define FROM_BASE64_IMAP(c) ((c)==COMMA ? 63 : (c)==SLASH ? -1 : fromBase64[c]) + +/* + * converter status values: + * + * toUnicodeStatus: + * 24 inDirectMode (boolean) + * 23..16 base64Counter (-1..7) + * 15..0 bits (up to 14 bits incoming base64) + * + * fromUnicodeStatus: + * 24 inDirectMode (boolean) + * 23..16 base64Counter (0..2) + * 7..0 bits (6 bits outgoing base64) + * + * ignore bits 31..25 + */ + +U_CDECL_BEGIN +static void U_CALLCONV +_IMAPToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const uint8_t *source, *sourceLimit; + UChar *target; + const UChar *targetLimit; + int32_t *offsets; + + uint8_t *bytes; + uint8_t byteIndex; + + int32_t length, targetCapacity; + + /* UTF-7 state */ + uint16_t bits; + int8_t base64Counter; + UBool inDirectMode; + + int8_t base64Value; + + int32_t sourceIndex, nextSourceIndex; + + UChar c; + uint8_t b; + + /* set up the local pointers */ + cnv=pArgs->converter; + + source=(const uint8_t *)pArgs->source; + sourceLimit=(const uint8_t *)pArgs->sourceLimit; + target=pArgs->target; + targetLimit=pArgs->targetLimit; + offsets=pArgs->offsets; + /* get the state machine state */ + { + uint32_t status=cnv->toUnicodeStatus; + inDirectMode=(UBool)((status>>24)&1); + base64Counter=(int8_t)(status>>16); + bits=(uint16_t)status; + } + bytes=cnv->toUBytes; + byteIndex=cnv->toULength; + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex=byteIndex==0 ? 0 : -1; + nextSourceIndex=0; + + if(inDirectMode) { +directMode: + /* + * In Direct Mode, US-ASCII characters are encoded directly, i.e., + * with their US-ASCII byte values. + * An ampersand starts Unicode (or "escape") Mode. + * + * In Direct Mode, only the sourceIndex is used. + */ + byteIndex=0; + length=(int32_t)(sourceLimit-source); + targetCapacity=(int32_t)(targetLimit-target); + if(length>targetCapacity) { + length=targetCapacity; + } + while(length>0) { + b=*source++; + if(!isLegalIMAP(b)) { + /* illegal */ + bytes[0]=b; + byteIndex=1; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } else if(b!=AMPERSAND) { + /* write directly encoded character */ + *target++=b; + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + } else /* AMPERSAND */ { + /* switch to Unicode mode */ + nextSourceIndex=++sourceIndex; + inDirectMode=FALSE; + byteIndex=0; + bits=0; + base64Counter=-1; + goto unicodeMode; + } + --length; + } + if(source=targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } else { +unicodeMode: + /* + * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded. + * The base64 sequence ends with any character that is not in the base64 alphabet. + * A terminating minus sign is consumed. + * US-ASCII must not be base64-ed. + * + * In Unicode Mode, the sourceIndex has the index to the start of the current + * base64 bytes, while nextSourceIndex is precisely parallel to source, + * keeping the index to the following byte. + * Note that in 2 out of 3 cases, UChars overlap within a base64 byte. + */ + while(source0x7e) { + /* illegal - test other illegal US-ASCII values by base64Value==-3 */ + inDirectMode=TRUE; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } else if((base64Value=FROM_BASE64_IMAP(b))>=0) { + /* collect base64 bytes into UChars */ + switch(base64Counter) { + case -1: /* -1 is immediately after the & */ + case 0: + bits=base64Value; + base64Counter=1; + break; + case 1: + case 3: + case 4: + case 6: + bits=(uint16_t)((bits<<6)|base64Value); + ++base64Counter; + break; + case 2: + c=(UChar)((bits<<4)|(base64Value>>2)); + if(isLegalIMAP(c)) { + /* illegal */ + inDirectMode=TRUE; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + goto endloop; + } + *target++=c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + sourceIndex=nextSourceIndex-1; + } + bytes[0]=b; /* keep this byte in case an error occurs */ + byteIndex=1; + bits=(uint16_t)(base64Value&3); + base64Counter=3; + break; + case 5: + c=(UChar)((bits<<2)|(base64Value>>4)); + if(isLegalIMAP(c)) { + /* illegal */ + inDirectMode=TRUE; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + goto endloop; + } + *target++=c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + sourceIndex=nextSourceIndex-1; + } + bytes[0]=b; /* keep this byte in case an error occurs */ + byteIndex=1; + bits=(uint16_t)(base64Value&15); + base64Counter=6; + break; + case 7: + c=(UChar)((bits<<6)|base64Value); + if(isLegalIMAP(c)) { + /* illegal */ + inDirectMode=TRUE; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + goto endloop; + } + *target++=c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + sourceIndex=nextSourceIndex; + } + byteIndex=0; + bits=0; + base64Counter=0; + break; + default: + /* will never occur */ + break; + } + } else if(base64Value==-2) { + /* minus sign terminates the base64 sequence */ + inDirectMode=TRUE; + if(base64Counter==-1) { + /* &- i.e. a minus immediately following an ampersand */ + *target++=AMPERSAND; + if(offsets!=NULL) { + *offsets++=sourceIndex-1; + } + } else { + /* absorb the minus and leave the Unicode Mode */ + if(bits!=0 || (base64Counter!=0 && base64Counter!=3 && base64Counter!=6)) { + /* bits are illegally left over, a UChar is incomplete */ + /* base64Counter other than 0, 3, 6 means non-minimal zero-padding, also illegal */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } + } + sourceIndex=nextSourceIndex; + goto directMode; + } else { + if(base64Counter==-1) { + /* illegal: & immediately followed by something other than base64 or minus sign */ + /* include the ampersand in the reported sequence */ + --sourceIndex; + bytes[0]=AMPERSAND; + bytes[1]=b; + byteIndex=2; + } + /* base64Value==-1 for characters that are illegal only in Unicode mode */ + /* base64Value==-3 for illegal characters */ + /* illegal */ + inDirectMode=TRUE; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } + } else { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } +endloop: + + /* + * the end of the input stream and detection of truncated input + * are handled by the framework, but here we must check if we are in Unicode + * mode and byteIndex==0 because we must end in direct mode + * + * conditions: + * successful + * in Unicode mode and byteIndex==0 + * end of input and no truncated input + */ + if( U_SUCCESS(*pErrorCode) && + !inDirectMode && byteIndex==0 && + pArgs->flush && source>=sourceLimit + ) { + if(base64Counter==-1) { + /* & at the very end of the input */ + /* make the ampersand the reported sequence */ + bytes[0]=AMPERSAND; + byteIndex=1; + } + /* else if(base64Counter!=-1) byteIndex remains 0 because there is no particular byte sequence */ + + inDirectMode=TRUE; /* avoid looping */ + *pErrorCode=U_TRUNCATED_CHAR_FOUND; + } + + /* set the converter state back into UConverter */ + cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits; + cnv->toULength=byteIndex; + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + pArgs->offsets=offsets; + return; +} + +static void U_CALLCONV +_IMAPFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const UChar *source, *sourceLimit; + uint8_t *target, *targetLimit; + int32_t *offsets; + + int32_t length, targetCapacity, sourceIndex; + UChar c; + uint8_t b; + + /* UTF-7 state */ + uint8_t bits; + int8_t base64Counter; + UBool inDirectMode; + + /* set up the local pointers */ + cnv=pArgs->converter; + + /* set up the local pointers */ + source=pArgs->source; + sourceLimit=pArgs->sourceLimit; + target=(uint8_t *)pArgs->target; + targetLimit=(uint8_t *)pArgs->targetLimit; + offsets=pArgs->offsets; + + /* get the state machine state */ + { + uint32_t status=cnv->fromUnicodeStatus; + inDirectMode=(UBool)((status>>24)&1); + base64Counter=(int8_t)(status>>16); + bits=(uint8_t)status; + } + + /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */ + sourceIndex=0; + + if(inDirectMode) { +directMode: + length=(int32_t)(sourceLimit-source); + targetCapacity=(int32_t)(targetLimit-target); + if(length>targetCapacity) { + length=targetCapacity; + } + while(length>0) { + c=*source++; + /* encode 0x20..0x7e except '&' directly */ + if(inSetDIMAP(c)) { + /* encode directly */ + *target++=(uint8_t)c; + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + } else if(c==AMPERSAND) { + /* output &- for & */ + *target++=AMPERSAND; + if(targetcharErrorBuffer[0]=MINUS; + cnv->charErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } else { + /* un-read this character and switch to Unicode Mode */ + --source; + *target++=AMPERSAND; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + inDirectMode=FALSE; + base64Counter=0; + goto unicodeMode; + } + --length; + } + if(source=targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } else { +unicodeMode: + while(sourcecharErrorBuffer[0]=MINUS; + cnv->charErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + goto directMode; + } else { + /* + * base64 this character: + * Output 2 or 3 base64 bytes for the remaining bits of the previous character + * and the bits of this character, each implicitly in UTF-16BE. + * + * Here, bits is an 8-bit variable because only 6 bits need to be kept from one + * character to the next. The actual 2 or 4 bits are shifted to the left edge + * of the 6-bits field 5..0 to make the termination of the base64 sequence easier. + */ + switch(base64Counter) { + case 0: + b=(uint8_t)(c>>10); + *target++=TO_BASE64_IMAP(b); + if(target>4)&0x3f); + *target++=TO_BASE64_IMAP(b); + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } + } else { + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + b=(uint8_t)((c>>4)&0x3f); + cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b); + cnv->charErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + bits=(uint8_t)((c&15)<<2); + base64Counter=1; + break; + case 1: + b=(uint8_t)(bits|(c>>14)); + *target++=TO_BASE64_IMAP(b); + if(target>8)&0x3f); + *target++=TO_BASE64_IMAP(b); + if(target>2)&0x3f); + *target++=TO_BASE64_IMAP(b); + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } + } else { + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } + b=(uint8_t)((c>>2)&0x3f); + cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b); + cnv->charErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } else { + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + b=(uint8_t)((c>>8)&0x3f); + cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b); + b=(uint8_t)((c>>2)&0x3f); + cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b); + cnv->charErrorBufferLength=2; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + bits=(uint8_t)((c&3)<<4); + base64Counter=2; + break; + case 2: + b=(uint8_t)(bits|(c>>12)); + *target++=TO_BASE64_IMAP(b); + if(target>6)&0x3f); + *target++=TO_BASE64_IMAP(b); + if(targetcharErrorBuffer[0]=TO_BASE64_IMAP(b); + cnv->charErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } else { + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + b=(uint8_t)((c>>6)&0x3f); + cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b); + b=(uint8_t)(c&0x3f); + cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b); + cnv->charErrorBufferLength=2; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + bits=0; + base64Counter=0; + break; + default: + /* will never occur */ + break; + } + } + } else { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } + + if(pArgs->flush && source>=sourceLimit) { + /* flush remaining bits to the target */ + if(!inDirectMode) { + if(base64Counter!=0) { + if(targetcharErrorBuffer[cnv->charErrorBufferLength++]=TO_BASE64_IMAP(bits); + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } + /* need to terminate with a minus */ + if(targetcharErrorBuffer[cnv->charErrorBufferLength++]=MINUS; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } + /* reset the state for the next conversion */ + cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */ + } else { + /* set the converter state back into UConverter */ + cnv->fromUnicodeStatus= + (cnv->fromUnicodeStatus&0xf0000000)| /* keep version*/ + ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits; + } + + /* write back the updated pointers */ + pArgs->source=source; + pArgs->target=(char *)target; + pArgs->offsets=offsets; + return; +} +U_CDECL_END + +static const UConverterImpl _IMAPImpl={ + UCNV_IMAP_MAILBOX, + + NULL, + NULL, + + _UTF7Open, + NULL, + _UTF7Reset, + + _IMAPToUnicodeWithOffsets, + _IMAPToUnicodeWithOffsets, + _IMAPFromUnicodeWithOffsets, + _IMAPFromUnicodeWithOffsets, + NULL, + + NULL, + NULL, + NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */ + NULL, + ucnv_getCompleteUnicodeSet, + NULL, + NULL +}; + +static const UConverterStaticData _IMAPStaticData={ + sizeof(UConverterStaticData), + "IMAP-mailbox-name", + 0, /* TODO CCSID for IMAP-mailbox-name */ + UCNV_IBM, UCNV_IMAP_MAILBOX, + 1, 4, + { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */ + FALSE, FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +const UConverterSharedData _IMAPData= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_IMAPStaticData, &_IMAPImpl); + +#endif diff --git a/deps/icu-small/source/common/ucnv_u8.c b/deps/icu-small/source/common/ucnv_u8.c deleted file mode 100644 index ff73993bd6..0000000000 --- a/deps/icu-small/source/common/ucnv_u8.c +++ /dev/null @@ -1,1098 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2002-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ucnv_u8.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002jul01 -* created by: Markus W. Scherer -* -* UTF-8 converter implementation. Used to be in ucnv_utf.c. -* -* Also, CESU-8 implementation, see UTR 26. -* The CESU-8 converter uses all the same functions as the -* UTF-8 converter, with a branch for converting supplementary code points. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ucnv.h" -#include "unicode/utf.h" -#include "unicode/utf8.h" -#include "unicode/utf16.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "cmemory.h" - -/* Prototypes --------------------------------------------------------------- */ - -/* Keep these here to make finicky compilers happy */ - -U_CFUNC void ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs *args, - UErrorCode *err); -U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs *args, - UErrorCode *err); - - -/* UTF-8 -------------------------------------------------------------------- */ - -/* UTF-8 Conversion DATA - * for more information see Unicode Standard 2.0, Transformation Formats Appendix A-9 - */ -/*static const uint32_t REPLACEMENT_CHARACTER = 0x0000FFFD;*/ -#define MAXIMUM_UCS2 0x0000FFFF -#define MAXIMUM_UTF 0x0010FFFF -#define MAXIMUM_UCS4 0x7FFFFFFF -#define HALF_SHIFT 10 -#define HALF_BASE 0x0010000 -#define HALF_MASK 0x3FF -#define SURROGATE_HIGH_START 0xD800 -#define SURROGATE_HIGH_END 0xDBFF -#define SURROGATE_LOW_START 0xDC00 -#define SURROGATE_LOW_END 0xDFFF - -/* -SURROGATE_LOW_START + HALF_BASE */ -#define SURROGATE_LOW_BASE 9216 - -static const uint32_t offsetsFromUTF8[7] = {0, - (uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080, - (uint32_t) 0x03C82080, (uint32_t) 0xFA082080, (uint32_t) 0x82082080 -}; - -/* END OF UTF-8 Conversion DATA */ - -static const int8_t bytesFromUTF8[256] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0 -}; - -/* - * Starting with Unicode 3.0.1: - * UTF-8 byte sequences of length N _must_ encode code points of or above utf8_minChar32[N]; - * byte sequences with more than 4 bytes are illegal in UTF-8, - * which is tested with impossible values for them - */ -static const uint32_t -utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff }; - -static UBool hasCESU8Data(const UConverter *cnv) -{ -#if UCONFIG_ONLY_HTML_CONVERSION - return FALSE; -#else - return (UBool)(cnv->sharedData == &_CESU8Data); -#endif -} - -static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, - UErrorCode * err) -{ - UConverter *cnv = args->converter; - const unsigned char *mySource = (unsigned char *) args->source; - UChar *myTarget = args->target; - const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; - const UChar *targetLimit = args->targetLimit; - unsigned char *toUBytes = cnv->toUBytes; - UBool isCESU8 = hasCESU8Data(cnv); - uint32_t ch, ch2 = 0; - int32_t i, inBytes; - - /* Restore size of current sequence */ - if (cnv->toUnicodeStatus && myTarget < targetLimit) - { - inBytes = cnv->mode; /* restore # of bytes to consume */ - i = cnv->toULength; /* restore # of bytes consumed */ - cnv->toULength = 0; - - ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/ - cnv->toUnicodeStatus = 0; - goto morebytes; - } - - - while (mySource < sourceLimit && myTarget < targetLimit) - { - ch = *(mySource++); - if (ch < 0x80) /* Simple case */ - { - *(myTarget++) = (UChar) ch; - } - else - { - /* store the first char */ - toUBytes[0] = (char)ch; - inBytes = bytesFromUTF8[ch]; /* lookup current sequence length */ - i = 1; - -morebytes: - while (i < inBytes) - { - if (mySource < sourceLimit) - { - toUBytes[i] = (char) (ch2 = *mySource); - if (!U8_IS_TRAIL(ch2)) - { - break; /* i < inBytes */ - } - ch = (ch << 6) + ch2; - ++mySource; - i++; - } - else - { - /* stores a partially calculated target*/ - cnv->toUnicodeStatus = ch; - cnv->mode = inBytes; - cnv->toULength = (int8_t) i; - goto donefornow; - } - } - - /* Remove the accumulated high bits */ - ch -= offsetsFromUTF8[inBytes]; - - /* - * Legal UTF-8 byte sequences in Unicode 3.0.1 and up: - * - use only trail bytes after a lead byte (checked above) - * - use the right number of trail bytes for a given lead byte - * - encode a code point <= U+10ffff - * - use the fewest possible number of bytes for their code points - * - use at most 4 bytes (for i>=5 it is 0x10ffff= utf8_minChar32[i] && - (isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch))) - { - /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ - if (ch <= MAXIMUM_UCS2) - { - /* fits in 16 bits */ - *(myTarget++) = (UChar) ch; - } - else - { - /* write out the surrogates */ - ch -= HALF_BASE; - *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START); - ch = (ch & HALF_MASK) + SURROGATE_LOW_START; - if (myTarget < targetLimit) - { - *(myTarget++) = (UChar)ch; - } - else - { - /* Put in overflow buffer (not handled here) */ - cnv->UCharErrorBuffer[0] = (UChar) ch; - cnv->UCharErrorBufferLength = 1; - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } - else - { - cnv->toULength = (int8_t)i; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - } - -donefornow: - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) - { - /* End of target buffer */ - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = myTarget; - args->source = (const char *) mySource; -} - -static void ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs * args, - UErrorCode * err) -{ - UConverter *cnv = args->converter; - const unsigned char *mySource = (unsigned char *) args->source; - UChar *myTarget = args->target; - int32_t *myOffsets = args->offsets; - int32_t offsetNum = 0; - const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; - const UChar *targetLimit = args->targetLimit; - unsigned char *toUBytes = cnv->toUBytes; - UBool isCESU8 = hasCESU8Data(cnv); - uint32_t ch, ch2 = 0; - int32_t i, inBytes; - - /* Restore size of current sequence */ - if (cnv->toUnicodeStatus && myTarget < targetLimit) - { - inBytes = cnv->mode; /* restore # of bytes to consume */ - i = cnv->toULength; /* restore # of bytes consumed */ - cnv->toULength = 0; - - ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/ - cnv->toUnicodeStatus = 0; - goto morebytes; - } - - while (mySource < sourceLimit && myTarget < targetLimit) - { - ch = *(mySource++); - if (ch < 0x80) /* Simple case */ - { - *(myTarget++) = (UChar) ch; - *(myOffsets++) = offsetNum++; - } - else - { - toUBytes[0] = (char)ch; - inBytes = bytesFromUTF8[ch]; - i = 1; - -morebytes: - while (i < inBytes) - { - if (mySource < sourceLimit) - { - toUBytes[i] = (char) (ch2 = *mySource); - if (!U8_IS_TRAIL(ch2)) - { - break; /* i < inBytes */ - } - ch = (ch << 6) + ch2; - ++mySource; - i++; - } - else - { - cnv->toUnicodeStatus = ch; - cnv->mode = inBytes; - cnv->toULength = (int8_t)i; - goto donefornow; - } - } - - /* Remove the accumulated high bits */ - ch -= offsetsFromUTF8[inBytes]; - - /* - * Legal UTF-8 byte sequences in Unicode 3.0.1 and up: - * - use only trail bytes after a lead byte (checked above) - * - use the right number of trail bytes for a given lead byte - * - encode a code point <= U+10ffff - * - use the fewest possible number of bytes for their code points - * - use at most 4 bytes (for i>=5 it is 0x10ffff= utf8_minChar32[i] && - (isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch))) - { - /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ - if (ch <= MAXIMUM_UCS2) - { - /* fits in 16 bits */ - *(myTarget++) = (UChar) ch; - *(myOffsets++) = offsetNum; - } - else - { - /* write out the surrogates */ - ch -= HALF_BASE; - *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START); - *(myOffsets++) = offsetNum; - ch = (ch & HALF_MASK) + SURROGATE_LOW_START; - if (myTarget < targetLimit) - { - *(myTarget++) = (UChar)ch; - *(myOffsets++) = offsetNum; - } - else - { - cnv->UCharErrorBuffer[0] = (UChar) ch; - cnv->UCharErrorBufferLength = 1; - *err = U_BUFFER_OVERFLOW_ERROR; - } - } - offsetNum += i; - } - else - { - cnv->toULength = (int8_t)i; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - } - -donefornow: - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) - { /* End of target buffer */ - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = myTarget; - args->source = (const char *) mySource; - args->offsets = myOffsets; -} - -U_CFUNC void ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args, - UErrorCode * err) -{ - UConverter *cnv = args->converter; - const UChar *mySource = args->source; - const UChar *sourceLimit = args->sourceLimit; - uint8_t *myTarget = (uint8_t *) args->target; - const uint8_t *targetLimit = (uint8_t *) args->targetLimit; - uint8_t *tempPtr; - UChar32 ch; - uint8_t tempBuf[4]; - int32_t indexToWrite; - UBool isNotCESU8 = !hasCESU8Data(cnv); - - if (cnv->fromUChar32 && myTarget < targetLimit) - { - ch = cnv->fromUChar32; - cnv->fromUChar32 = 0; - goto lowsurrogate; - } - - while (mySource < sourceLimit && myTarget < targetLimit) - { - ch = *(mySource++); - - if (ch < 0x80) /* Single byte */ - { - *(myTarget++) = (uint8_t) ch; - } - else if (ch < 0x800) /* Double byte */ - { - *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0); - if (myTarget < targetLimit) - { - *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80); - } - else - { - cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80); - cnv->charErrorBufferLength = 1; - *err = U_BUFFER_OVERFLOW_ERROR; - } - } - else { - /* Check for surrogates */ - if(U16_IS_SURROGATE(ch) && isNotCESU8) { -lowsurrogate: - if (mySource < sourceLimit) { - /* test both code units */ - if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) { - /* convert and consume this supplementary code point */ - ch=U16_GET_SUPPLEMENTARY(ch, *mySource); - ++mySource; - /* exit this condition tree */ - } - else { - /* this is an unpaired trail or lead code unit */ - /* callback(illegal) */ - cnv->fromUChar32 = ch; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - else { - /* no more input */ - cnv->fromUChar32 = ch; - break; - } - } - - /* Do we write the buffer directly for speed, - or do we have to be careful about target buffer space? */ - tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf); - - if (ch <= MAXIMUM_UCS2) { - indexToWrite = 2; - tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0); - } - else { - indexToWrite = 3; - tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0); - tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80); - } - tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80); - tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80); - - if (tempPtr == myTarget) { - /* There was enough space to write the codepoint directly. */ - myTarget += (indexToWrite + 1); - } - else { - /* We might run out of room soon. Write it slowly. */ - for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) { - if (myTarget < targetLimit) { - *(myTarget++) = *tempPtr; - } - else { - cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr; - *err = U_BUFFER_OVERFLOW_ERROR; - } - } - } - } - } - - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) - { - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = (char *) myTarget; - args->source = mySource; -} - -U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, - UErrorCode * err) -{ - UConverter *cnv = args->converter; - const UChar *mySource = args->source; - int32_t *myOffsets = args->offsets; - const UChar *sourceLimit = args->sourceLimit; - uint8_t *myTarget = (uint8_t *) args->target; - const uint8_t *targetLimit = (uint8_t *) args->targetLimit; - uint8_t *tempPtr; - UChar32 ch; - int32_t offsetNum, nextSourceIndex; - int32_t indexToWrite; - uint8_t tempBuf[4]; - UBool isNotCESU8 = !hasCESU8Data(cnv); - - if (cnv->fromUChar32 && myTarget < targetLimit) - { - ch = cnv->fromUChar32; - cnv->fromUChar32 = 0; - offsetNum = -1; - nextSourceIndex = 0; - goto lowsurrogate; - } else { - offsetNum = 0; - } - - while (mySource < sourceLimit && myTarget < targetLimit) - { - ch = *(mySource++); - - if (ch < 0x80) /* Single byte */ - { - *(myOffsets++) = offsetNum++; - *(myTarget++) = (char) ch; - } - else if (ch < 0x800) /* Double byte */ - { - *(myOffsets++) = offsetNum; - *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0); - if (myTarget < targetLimit) - { - *(myOffsets++) = offsetNum++; - *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80); - } - else - { - cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80); - cnv->charErrorBufferLength = 1; - *err = U_BUFFER_OVERFLOW_ERROR; - } - } - else - /* Check for surrogates */ - { - nextSourceIndex = offsetNum + 1; - - if(U16_IS_SURROGATE(ch) && isNotCESU8) { -lowsurrogate: - if (mySource < sourceLimit) { - /* test both code units */ - if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) { - /* convert and consume this supplementary code point */ - ch=U16_GET_SUPPLEMENTARY(ch, *mySource); - ++mySource; - ++nextSourceIndex; - /* exit this condition tree */ - } - else { - /* this is an unpaired trail or lead code unit */ - /* callback(illegal) */ - cnv->fromUChar32 = ch; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - else { - /* no more input */ - cnv->fromUChar32 = ch; - break; - } - } - - /* Do we write the buffer directly for speed, - or do we have to be careful about target buffer space? */ - tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf); - - if (ch <= MAXIMUM_UCS2) { - indexToWrite = 2; - tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0); - } - else { - indexToWrite = 3; - tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0); - tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80); - } - tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80); - tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80); - - if (tempPtr == myTarget) { - /* There was enough space to write the codepoint directly. */ - myTarget += (indexToWrite + 1); - myOffsets[0] = offsetNum; - myOffsets[1] = offsetNum; - myOffsets[2] = offsetNum; - if (indexToWrite >= 3) { - myOffsets[3] = offsetNum; - } - myOffsets += (indexToWrite + 1); - } - else { - /* We might run out of room soon. Write it slowly. */ - for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) { - if (myTarget < targetLimit) - { - *(myOffsets++) = offsetNum; - *(myTarget++) = *tempPtr; - } - else - { - cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr; - *err = U_BUFFER_OVERFLOW_ERROR; - } - } - } - offsetNum = nextSourceIndex; - } - } - - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) - { - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = (char *) myTarget; - args->source = mySource; - args->offsets = myOffsets; -} - -static UChar32 ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args, - UErrorCode *err) { - UConverter *cnv; - const uint8_t *sourceInitial; - const uint8_t *source; - uint16_t extraBytesToWrite; - uint8_t myByte; - UChar32 ch; - int8_t i, isLegalSequence; - - /* UTF-8 only here, the framework handles CESU-8 to combine surrogate pairs */ - - cnv = args->converter; - sourceInitial = source = (const uint8_t *)args->source; - if (source >= (const uint8_t *)args->sourceLimit) - { - /* no input */ - *err = U_INDEX_OUTOFBOUNDS_ERROR; - return 0xffff; - } - - myByte = (uint8_t)*(source++); - if (myByte < 0x80) - { - args->source = (const char *)source; - return (UChar32)myByte; - } - - extraBytesToWrite = (uint16_t)bytesFromUTF8[myByte]; - if (extraBytesToWrite == 0) { - cnv->toUBytes[0] = myByte; - cnv->toULength = 1; - *err = U_ILLEGAL_CHAR_FOUND; - args->source = (const char *)source; - return 0xffff; - } - - /*The byte sequence is longer than the buffer area passed*/ - if (((const char *)source + extraBytesToWrite - 1) > args->sourceLimit) - { - /* check if all of the remaining bytes are trail bytes */ - cnv->toUBytes[0] = myByte; - i = 1; - *err = U_TRUNCATED_CHAR_FOUND; - while(source < (const uint8_t *)args->sourceLimit) { - if(U8_IS_TRAIL(myByte = *source)) { - cnv->toUBytes[i++] = myByte; - ++source; - } else { - /* error even before we run out of input */ - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - cnv->toULength = i; - args->source = (const char *)source; - return 0xffff; - } - - isLegalSequence = 1; - ch = myByte << 6; - switch(extraBytesToWrite) - { - /* note: code falls through cases! (sic)*/ - case 6: - ch += (myByte = *source); - ch <<= 6; - if (!U8_IS_TRAIL(myByte)) - { - isLegalSequence = 0; - break; - } - ++source; - U_FALLTHROUGH; - case 5: - ch += (myByte = *source); - ch <<= 6; - if (!U8_IS_TRAIL(myByte)) - { - isLegalSequence = 0; - break; - } - ++source; - U_FALLTHROUGH; - case 4: - ch += (myByte = *source); - ch <<= 6; - if (!U8_IS_TRAIL(myByte)) - { - isLegalSequence = 0; - break; - } - ++source; - U_FALLTHROUGH; - case 3: - ch += (myByte = *source); - ch <<= 6; - if (!U8_IS_TRAIL(myByte)) - { - isLegalSequence = 0; - break; - } - ++source; - U_FALLTHROUGH; - case 2: - ch += (myByte = *source); - if (!U8_IS_TRAIL(myByte)) - { - isLegalSequence = 0; - break; - } - ++source; - }; - ch -= offsetsFromUTF8[extraBytesToWrite]; - args->source = (const char *)source; - - /* - * Legal UTF-8 byte sequences in Unicode 3.0.1 and up: - * - use only trail bytes after a lead byte (checked above) - * - use the right number of trail bytes for a given lead byte - * - encode a code point <= U+10ffff - * - use the fewest possible number of bytes for their code points - * - use at most 4 bytes (for i>=5 it is 0x10ffff= utf8_minChar32[extraBytesToWrite] && - !U_IS_SURROGATE(ch) - ) { - return ch; /* return the code point */ - } - - for(i = 0; sourceInitial < source; ++i) { - cnv->toUBytes[i] = *sourceInitial++; - } - cnv->toULength = i; - *err = U_ILLEGAL_CHAR_FOUND; - return 0xffff; -} - -/* UTF-8-from-UTF-8 conversion functions ------------------------------------ */ - -/* minimum code point values for n-byte UTF-8 sequences, n=0..4 */ -static const UChar32 -utf8_minLegal[5]={ 0, 0, 0x80, 0x800, 0x10000 }; - -/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */ -static const UChar32 -utf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 }; - -/* "Convert" UTF-8 to UTF-8: Validate and copy. Modified from ucnv_DBCSFromUTF8(). */ -static void -ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, - UConverterToUnicodeArgs *pToUArgs, - UErrorCode *pErrorCode) { - UConverter *utf8; - const uint8_t *source, *sourceLimit; - uint8_t *target; - int32_t targetCapacity; - int32_t count; - - int8_t oldToULength, toULength, toULimit; - - UChar32 c; - uint8_t b, t1, t2; - - /* set up the local pointers */ - utf8=pToUArgs->converter; - source=(uint8_t *)pToUArgs->source; - sourceLimit=(uint8_t *)pToUArgs->sourceLimit; - target=(uint8_t *)pFromUArgs->target; - targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); - - /* get the converter state from the UTF-8 UConverter */ - c=(UChar32)utf8->toUnicodeStatus; - if(c!=0) { - toULength=oldToULength=utf8->toULength; - toULimit=(int8_t)utf8->mode; - } else { - toULength=oldToULength=toULimit=0; - } - - count=(int32_t)(sourceLimit-source)+oldToULength; - if(counttargetCapacity) { - count=targetCapacity; - } - - i=0; - while(i<3 && i<(count-toULimit)) { - b=source[count-oldToULength-i-1]; - if(U8_IS_TRAIL(b)) { - ++i; - } else { - if(itoUnicodeStatus=0; - utf8->toULength=0; - goto moreBytes; - /* See note in ucnv_SBCSFromUTF8() about this goto. */ - } - - /* conversion loop */ - while(count>0) { - b=*source++; - if((int8_t)b>=0) { - /* convert ASCII */ - *target++=b; - --count; - continue; - } else { - if(b>0xe0) { - if( /* handle U+1000..U+D7FF inline */ - (t1=source[0]) >= 0x80 && ((b<0xed && (t1 <= 0xbf)) || - (b==0xed && (t1 <= 0x9f))) && - (t2=source[1]) >= 0x80 && t2 <= 0xbf - ) { - source+=2; - *target++=b; - *target++=t1; - *target++=t2; - count-=3; - continue; - } - } else if(b<0xe0) { - if( /* handle U+0080..U+07FF inline */ - b>=0xc2 && - (t1=*source) >= 0x80 && t1 <= 0xbf - ) { - ++source; - *target++=b; - *target++=t1; - count-=2; - continue; - } - } else if(b==0xe0) { - if( /* handle U+0800..U+0FFF inline */ - (t1=source[0]) >= 0xa0 && t1 <= 0xbf && - (t2=source[1]) >= 0x80 && t2 <= 0xbf - ) { - source+=2; - *target++=b; - *target++=t1; - *target++=t2; - count-=3; - continue; - } - } - - /* handle "complicated" and error cases, and continuing partial characters */ - oldToULength=0; - toULength=1; - toULimit=U8_COUNT_TRAIL_BYTES(b)+1; - c=b; -moreBytes: - while(toULengthtoUBytes[oldToULength++]=*source++; - } - utf8->toUnicodeStatus=c; - utf8->toULength=toULength; - utf8->mode=toULimit; - pToUArgs->source=(char *)source; - pFromUArgs->target=(char *)target; - return; - } - } - - if( toULength==toULimit && /* consumed all trail bytes */ - (toULength==3 || toULength==2) && /* BMP */ - (c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] && - (c<=0xd7ff || 0xe000<=c) /* not a surrogate */ - ) { - /* legal byte sequence for BMP code point */ - } else if( - toULength==toULimit && toULength==4 && - (0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff) - ) { - /* legal byte sequence for supplementary code point */ - } else { - /* error handling: illegal UTF-8 byte sequence */ - source-=(toULength-oldToULength); - while(oldToULengthtoUBytes[oldToULength++]=*source++; - } - utf8->toULength=toULength; - pToUArgs->source=(char *)source; - pFromUArgs->target=(char *)target; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - return; - } - - /* copy the legal byte sequence to the target */ - { - int8_t i; - - for(i=0; itoUBytes[i]; - } - source-=(toULength-oldToULength); - for(; itargetLimit) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } else { - b=*source; - toULimit=U8_COUNT_TRAIL_BYTES(b)+1; - if(toULimit>(sourceLimit-source)) { - /* collect a truncated byte sequence */ - toULength=0; - c=b; - for(;;) { - utf8->toUBytes[toULength++]=b; - if(++source==sourceLimit) { - /* partial byte sequence at end of source */ - utf8->toUnicodeStatus=c; - utf8->toULength=toULength; - utf8->mode=toULimit; - break; - } else if(!U8_IS_TRAIL(b=*source)) { - /* lead byte in trail byte position */ - utf8->toULength=toULength; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } - c=(c<<6)+b; - } - } else { - /* partial-sequence target overflow: fall back to the pivoting implementation */ - *pErrorCode=U_USING_DEFAULT_WARNING; - } - } - } - - /* write back the updated pointers */ - pToUArgs->source=(char *)source; - pFromUArgs->target=(char *)target; -} - -/* UTF-8 converter data ----------------------------------------------------- */ - -static const UConverterImpl _UTF8Impl={ - UCNV_UTF8, - - NULL, - NULL, - - NULL, - NULL, - NULL, - - ucnv_toUnicode_UTF8, - ucnv_toUnicode_UTF8_OFFSETS_LOGIC, - ucnv_fromUnicode_UTF8, - ucnv_fromUnicode_UTF8_OFFSETS_LOGIC, - ucnv_getNextUChar_UTF8, - - NULL, - NULL, - NULL, - NULL, - ucnv_getNonSurrogateUnicodeSet, - - ucnv_UTF8FromUTF8, - ucnv_UTF8FromUTF8 -}; - -/* The 1208 CCSID refers to any version of Unicode of UTF-8 */ -static const UConverterStaticData _UTF8StaticData={ - sizeof(UConverterStaticData), - "UTF-8", - 1208, UCNV_IBM, UCNV_UTF8, - 1, 3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */ - { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - - -const UConverterSharedData _UTF8Data= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF8StaticData, &_UTF8Impl); - -/* CESU-8 converter data ---------------------------------------------------- */ - -static const UConverterImpl _CESU8Impl={ - UCNV_CESU8, - - NULL, - NULL, - - NULL, - NULL, - NULL, - - ucnv_toUnicode_UTF8, - ucnv_toUnicode_UTF8_OFFSETS_LOGIC, - ucnv_fromUnicode_UTF8, - ucnv_fromUnicode_UTF8_OFFSETS_LOGIC, - NULL, - - NULL, - NULL, - NULL, - NULL, - ucnv_getCompleteUnicodeSet, - - NULL, - NULL -}; - -static const UConverterStaticData _CESU8StaticData={ - sizeof(UConverterStaticData), - "CESU-8", - 9400, /* CCSID for CESU-8 */ - UCNV_UNKNOWN, UCNV_CESU8, 1, 3, - { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - - -const UConverterSharedData _CESU8Data= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_CESU8StaticData, &_CESU8Impl); - -#endif diff --git a/deps/icu-small/source/common/ucnv_u8.cpp b/deps/icu-small/source/common/ucnv_u8.cpp new file mode 100644 index 0000000000..b2d26f9c3b --- /dev/null +++ b/deps/icu-small/source/common/ucnv_u8.cpp @@ -0,0 +1,1104 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2002-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnv_u8.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002jul01 +* created by: Markus W. Scherer +* +* UTF-8 converter implementation. Used to be in ucnv_utf.c. +* +* Also, CESU-8 implementation, see UTR 26. +* The CESU-8 converter uses all the same functions as the +* UTF-8 converter, with a branch for converting supplementary code points. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/utf.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "cmemory.h" + +/* Prototypes --------------------------------------------------------------- */ + +/* Keep these here to make finicky compilers happy */ + +U_CFUNC void ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs *args, + UErrorCode *err); +U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs *args, + UErrorCode *err); + + +/* UTF-8 -------------------------------------------------------------------- */ + +/* UTF-8 Conversion DATA + * for more information see Unicode Standard 2.0, Transformation Formats Appendix A-9 + */ +/*static const uint32_t REPLACEMENT_CHARACTER = 0x0000FFFD;*/ +#define MAXIMUM_UCS2 0x0000FFFF +#define MAXIMUM_UTF 0x0010FFFF +#define MAXIMUM_UCS4 0x7FFFFFFF +#define HALF_SHIFT 10 +#define HALF_BASE 0x0010000 +#define HALF_MASK 0x3FF +#define SURROGATE_HIGH_START 0xD800 +#define SURROGATE_HIGH_END 0xDBFF +#define SURROGATE_LOW_START 0xDC00 +#define SURROGATE_LOW_END 0xDFFF + +/* -SURROGATE_LOW_START + HALF_BASE */ +#define SURROGATE_LOW_BASE 9216 + +static const uint32_t offsetsFromUTF8[7] = {0, + (uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080, + (uint32_t) 0x03C82080, (uint32_t) 0xFA082080, (uint32_t) 0x82082080 +}; + +/* END OF UTF-8 Conversion DATA */ + +static const int8_t bytesFromUTF8[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0 +}; + +/* + * Starting with Unicode 3.0.1: + * UTF-8 byte sequences of length N _must_ encode code points of or above utf8_minChar32[N]; + * byte sequences with more than 4 bytes are illegal in UTF-8, + * which is tested with impossible values for them + */ +static const uint32_t +utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff }; + +static UBool hasCESU8Data(const UConverter *cnv) +{ +#if UCONFIG_ONLY_HTML_CONVERSION + return FALSE; +#else + return (UBool)(cnv->sharedData == &_CESU8Data); +#endif +} +U_CDECL_BEGIN +static void U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, + UErrorCode * err) +{ + UConverter *cnv = args->converter; + const unsigned char *mySource = (unsigned char *) args->source; + UChar *myTarget = args->target; + const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; + const UChar *targetLimit = args->targetLimit; + unsigned char *toUBytes = cnv->toUBytes; + UBool isCESU8 = hasCESU8Data(cnv); + uint32_t ch, ch2 = 0; + int32_t i, inBytes; + + /* Restore size of current sequence */ + if (cnv->toUnicodeStatus && myTarget < targetLimit) + { + inBytes = cnv->mode; /* restore # of bytes to consume */ + i = cnv->toULength; /* restore # of bytes consumed */ + cnv->toULength = 0; + + ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/ + cnv->toUnicodeStatus = 0; + goto morebytes; + } + + + while (mySource < sourceLimit && myTarget < targetLimit) + { + ch = *(mySource++); + if (ch < 0x80) /* Simple case */ + { + *(myTarget++) = (UChar) ch; + } + else + { + /* store the first char */ + toUBytes[0] = (char)ch; + inBytes = bytesFromUTF8[ch]; /* lookup current sequence length */ + i = 1; + +morebytes: + while (i < inBytes) + { + if (mySource < sourceLimit) + { + toUBytes[i] = (char) (ch2 = *mySource); + if (!U8_IS_TRAIL(ch2)) + { + break; /* i < inBytes */ + } + ch = (ch << 6) + ch2; + ++mySource; + i++; + } + else + { + /* stores a partially calculated target*/ + cnv->toUnicodeStatus = ch; + cnv->mode = inBytes; + cnv->toULength = (int8_t) i; + goto donefornow; + } + } + + /* Remove the accumulated high bits */ + ch -= offsetsFromUTF8[inBytes]; + + /* + * Legal UTF-8 byte sequences in Unicode 3.0.1 and up: + * - use only trail bytes after a lead byte (checked above) + * - use the right number of trail bytes for a given lead byte + * - encode a code point <= U+10ffff + * - use the fewest possible number of bytes for their code points + * - use at most 4 bytes (for i>=5 it is 0x10ffff= utf8_minChar32[i] && + (isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch))) + { + /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ + if (ch <= MAXIMUM_UCS2) + { + /* fits in 16 bits */ + *(myTarget++) = (UChar) ch; + } + else + { + /* write out the surrogates */ + ch -= HALF_BASE; + *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START); + ch = (ch & HALF_MASK) + SURROGATE_LOW_START; + if (myTarget < targetLimit) + { + *(myTarget++) = (UChar)ch; + } + else + { + /* Put in overflow buffer (not handled here) */ + cnv->UCharErrorBuffer[0] = (UChar) ch; + cnv->UCharErrorBufferLength = 1; + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } + else + { + cnv->toULength = (int8_t)i; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + } + +donefornow: + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) + { + /* End of target buffer */ + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = myTarget; + args->source = (const char *) mySource; +} + +static void U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs * args, + UErrorCode * err) +{ + UConverter *cnv = args->converter; + const unsigned char *mySource = (unsigned char *) args->source; + UChar *myTarget = args->target; + int32_t *myOffsets = args->offsets; + int32_t offsetNum = 0; + const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; + const UChar *targetLimit = args->targetLimit; + unsigned char *toUBytes = cnv->toUBytes; + UBool isCESU8 = hasCESU8Data(cnv); + uint32_t ch, ch2 = 0; + int32_t i, inBytes; + + /* Restore size of current sequence */ + if (cnv->toUnicodeStatus && myTarget < targetLimit) + { + inBytes = cnv->mode; /* restore # of bytes to consume */ + i = cnv->toULength; /* restore # of bytes consumed */ + cnv->toULength = 0; + + ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/ + cnv->toUnicodeStatus = 0; + goto morebytes; + } + + while (mySource < sourceLimit && myTarget < targetLimit) + { + ch = *(mySource++); + if (ch < 0x80) /* Simple case */ + { + *(myTarget++) = (UChar) ch; + *(myOffsets++) = offsetNum++; + } + else + { + toUBytes[0] = (char)ch; + inBytes = bytesFromUTF8[ch]; + i = 1; + +morebytes: + while (i < inBytes) + { + if (mySource < sourceLimit) + { + toUBytes[i] = (char) (ch2 = *mySource); + if (!U8_IS_TRAIL(ch2)) + { + break; /* i < inBytes */ + } + ch = (ch << 6) + ch2; + ++mySource; + i++; + } + else + { + cnv->toUnicodeStatus = ch; + cnv->mode = inBytes; + cnv->toULength = (int8_t)i; + goto donefornow; + } + } + + /* Remove the accumulated high bits */ + ch -= offsetsFromUTF8[inBytes]; + + /* + * Legal UTF-8 byte sequences in Unicode 3.0.1 and up: + * - use only trail bytes after a lead byte (checked above) + * - use the right number of trail bytes for a given lead byte + * - encode a code point <= U+10ffff + * - use the fewest possible number of bytes for their code points + * - use at most 4 bytes (for i>=5 it is 0x10ffff= utf8_minChar32[i] && + (isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch))) + { + /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ + if (ch <= MAXIMUM_UCS2) + { + /* fits in 16 bits */ + *(myTarget++) = (UChar) ch; + *(myOffsets++) = offsetNum; + } + else + { + /* write out the surrogates */ + ch -= HALF_BASE; + *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START); + *(myOffsets++) = offsetNum; + ch = (ch & HALF_MASK) + SURROGATE_LOW_START; + if (myTarget < targetLimit) + { + *(myTarget++) = (UChar)ch; + *(myOffsets++) = offsetNum; + } + else + { + cnv->UCharErrorBuffer[0] = (UChar) ch; + cnv->UCharErrorBufferLength = 1; + *err = U_BUFFER_OVERFLOW_ERROR; + } + } + offsetNum += i; + } + else + { + cnv->toULength = (int8_t)i; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + } + +donefornow: + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) + { /* End of target buffer */ + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = myTarget; + args->source = (const char *) mySource; + args->offsets = myOffsets; +} +U_CDECL_END + +U_CFUNC void U_CALLCONV ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args, + UErrorCode * err) +{ + UConverter *cnv = args->converter; + const UChar *mySource = args->source; + const UChar *sourceLimit = args->sourceLimit; + uint8_t *myTarget = (uint8_t *) args->target; + const uint8_t *targetLimit = (uint8_t *) args->targetLimit; + uint8_t *tempPtr; + UChar32 ch; + uint8_t tempBuf[4]; + int32_t indexToWrite; + UBool isNotCESU8 = !hasCESU8Data(cnv); + + if (cnv->fromUChar32 && myTarget < targetLimit) + { + ch = cnv->fromUChar32; + cnv->fromUChar32 = 0; + goto lowsurrogate; + } + + while (mySource < sourceLimit && myTarget < targetLimit) + { + ch = *(mySource++); + + if (ch < 0x80) /* Single byte */ + { + *(myTarget++) = (uint8_t) ch; + } + else if (ch < 0x800) /* Double byte */ + { + *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0); + if (myTarget < targetLimit) + { + *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80); + } + else + { + cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80); + cnv->charErrorBufferLength = 1; + *err = U_BUFFER_OVERFLOW_ERROR; + } + } + else { + /* Check for surrogates */ + if(U16_IS_SURROGATE(ch) && isNotCESU8) { +lowsurrogate: + if (mySource < sourceLimit) { + /* test both code units */ + if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) { + /* convert and consume this supplementary code point */ + ch=U16_GET_SUPPLEMENTARY(ch, *mySource); + ++mySource; + /* exit this condition tree */ + } + else { + /* this is an unpaired trail or lead code unit */ + /* callback(illegal) */ + cnv->fromUChar32 = ch; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + else { + /* no more input */ + cnv->fromUChar32 = ch; + break; + } + } + + /* Do we write the buffer directly for speed, + or do we have to be careful about target buffer space? */ + tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf); + + if (ch <= MAXIMUM_UCS2) { + indexToWrite = 2; + tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0); + } + else { + indexToWrite = 3; + tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0); + tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80); + } + tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80); + tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80); + + if (tempPtr == myTarget) { + /* There was enough space to write the codepoint directly. */ + myTarget += (indexToWrite + 1); + } + else { + /* We might run out of room soon. Write it slowly. */ + for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) { + if (myTarget < targetLimit) { + *(myTarget++) = *tempPtr; + } + else { + cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr; + *err = U_BUFFER_OVERFLOW_ERROR; + } + } + } + } + } + + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) + { + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = (char *) myTarget; + args->source = mySource; +} + +U_CFUNC void U_CALLCONV ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, + UErrorCode * err) +{ + UConverter *cnv = args->converter; + const UChar *mySource = args->source; + int32_t *myOffsets = args->offsets; + const UChar *sourceLimit = args->sourceLimit; + uint8_t *myTarget = (uint8_t *) args->target; + const uint8_t *targetLimit = (uint8_t *) args->targetLimit; + uint8_t *tempPtr; + UChar32 ch; + int32_t offsetNum, nextSourceIndex; + int32_t indexToWrite; + uint8_t tempBuf[4]; + UBool isNotCESU8 = !hasCESU8Data(cnv); + + if (cnv->fromUChar32 && myTarget < targetLimit) + { + ch = cnv->fromUChar32; + cnv->fromUChar32 = 0; + offsetNum = -1; + nextSourceIndex = 0; + goto lowsurrogate; + } else { + offsetNum = 0; + } + + while (mySource < sourceLimit && myTarget < targetLimit) + { + ch = *(mySource++); + + if (ch < 0x80) /* Single byte */ + { + *(myOffsets++) = offsetNum++; + *(myTarget++) = (char) ch; + } + else if (ch < 0x800) /* Double byte */ + { + *(myOffsets++) = offsetNum; + *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0); + if (myTarget < targetLimit) + { + *(myOffsets++) = offsetNum++; + *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80); + } + else + { + cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80); + cnv->charErrorBufferLength = 1; + *err = U_BUFFER_OVERFLOW_ERROR; + } + } + else + /* Check for surrogates */ + { + nextSourceIndex = offsetNum + 1; + + if(U16_IS_SURROGATE(ch) && isNotCESU8) { +lowsurrogate: + if (mySource < sourceLimit) { + /* test both code units */ + if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) { + /* convert and consume this supplementary code point */ + ch=U16_GET_SUPPLEMENTARY(ch, *mySource); + ++mySource; + ++nextSourceIndex; + /* exit this condition tree */ + } + else { + /* this is an unpaired trail or lead code unit */ + /* callback(illegal) */ + cnv->fromUChar32 = ch; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + else { + /* no more input */ + cnv->fromUChar32 = ch; + break; + } + } + + /* Do we write the buffer directly for speed, + or do we have to be careful about target buffer space? */ + tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf); + + if (ch <= MAXIMUM_UCS2) { + indexToWrite = 2; + tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0); + } + else { + indexToWrite = 3; + tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0); + tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80); + } + tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80); + tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80); + + if (tempPtr == myTarget) { + /* There was enough space to write the codepoint directly. */ + myTarget += (indexToWrite + 1); + myOffsets[0] = offsetNum; + myOffsets[1] = offsetNum; + myOffsets[2] = offsetNum; + if (indexToWrite >= 3) { + myOffsets[3] = offsetNum; + } + myOffsets += (indexToWrite + 1); + } + else { + /* We might run out of room soon. Write it slowly. */ + for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) { + if (myTarget < targetLimit) + { + *(myOffsets++) = offsetNum; + *(myTarget++) = *tempPtr; + } + else + { + cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr; + *err = U_BUFFER_OVERFLOW_ERROR; + } + } + } + offsetNum = nextSourceIndex; + } + } + + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) + { + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = (char *) myTarget; + args->source = mySource; + args->offsets = myOffsets; +} + +U_CDECL_BEGIN +static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args, + UErrorCode *err) { + UConverter *cnv; + const uint8_t *sourceInitial; + const uint8_t *source; + uint16_t extraBytesToWrite; + uint8_t myByte; + UChar32 ch; + int8_t i, isLegalSequence; + + /* UTF-8 only here, the framework handles CESU-8 to combine surrogate pairs */ + + cnv = args->converter; + sourceInitial = source = (const uint8_t *)args->source; + if (source >= (const uint8_t *)args->sourceLimit) + { + /* no input */ + *err = U_INDEX_OUTOFBOUNDS_ERROR; + return 0xffff; + } + + myByte = (uint8_t)*(source++); + if (myByte < 0x80) + { + args->source = (const char *)source; + return (UChar32)myByte; + } + + extraBytesToWrite = (uint16_t)bytesFromUTF8[myByte]; + if (extraBytesToWrite == 0) { + cnv->toUBytes[0] = myByte; + cnv->toULength = 1; + *err = U_ILLEGAL_CHAR_FOUND; + args->source = (const char *)source; + return 0xffff; + } + + /*The byte sequence is longer than the buffer area passed*/ + if (((const char *)source + extraBytesToWrite - 1) > args->sourceLimit) + { + /* check if all of the remaining bytes are trail bytes */ + cnv->toUBytes[0] = myByte; + i = 1; + *err = U_TRUNCATED_CHAR_FOUND; + while(source < (const uint8_t *)args->sourceLimit) { + if(U8_IS_TRAIL(myByte = *source)) { + cnv->toUBytes[i++] = myByte; + ++source; + } else { + /* error even before we run out of input */ + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + cnv->toULength = i; + args->source = (const char *)source; + return 0xffff; + } + + isLegalSequence = 1; + ch = myByte << 6; + switch(extraBytesToWrite) + { + /* note: code falls through cases! (sic)*/ + case 6: + ch += (myByte = *source); + ch <<= 6; + if (!U8_IS_TRAIL(myByte)) + { + isLegalSequence = 0; + break; + } + ++source; + U_FALLTHROUGH; + case 5: + ch += (myByte = *source); + ch <<= 6; + if (!U8_IS_TRAIL(myByte)) + { + isLegalSequence = 0; + break; + } + ++source; + U_FALLTHROUGH; + case 4: + ch += (myByte = *source); + ch <<= 6; + if (!U8_IS_TRAIL(myByte)) + { + isLegalSequence = 0; + break; + } + ++source; + U_FALLTHROUGH; + case 3: + ch += (myByte = *source); + ch <<= 6; + if (!U8_IS_TRAIL(myByte)) + { + isLegalSequence = 0; + break; + } + ++source; + U_FALLTHROUGH; + case 2: + ch += (myByte = *source); + if (!U8_IS_TRAIL(myByte)) + { + isLegalSequence = 0; + break; + } + ++source; + }; + ch -= offsetsFromUTF8[extraBytesToWrite]; + args->source = (const char *)source; + + /* + * Legal UTF-8 byte sequences in Unicode 3.0.1 and up: + * - use only trail bytes after a lead byte (checked above) + * - use the right number of trail bytes for a given lead byte + * - encode a code point <= U+10ffff + * - use the fewest possible number of bytes for their code points + * - use at most 4 bytes (for i>=5 it is 0x10ffff= utf8_minChar32[extraBytesToWrite] && + !U_IS_SURROGATE(ch) + ) { + return ch; /* return the code point */ + } + + for(i = 0; sourceInitial < source; ++i) { + cnv->toUBytes[i] = *sourceInitial++; + } + cnv->toULength = i; + *err = U_ILLEGAL_CHAR_FOUND; + return 0xffff; +} +U_CDECL_END + +/* UTF-8-from-UTF-8 conversion functions ------------------------------------ */ + +/* minimum code point values for n-byte UTF-8 sequences, n=0..4 */ +static const UChar32 +utf8_minLegal[5]={ 0, 0, 0x80, 0x800, 0x10000 }; + +/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */ +static const UChar32 +utf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 }; + +U_CDECL_BEGIN +/* "Convert" UTF-8 to UTF-8: Validate and copy. Modified from ucnv_DBCSFromUTF8(). */ +static void U_CALLCONV +ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, + UConverterToUnicodeArgs *pToUArgs, + UErrorCode *pErrorCode) { + UConverter *utf8; + const uint8_t *source, *sourceLimit; + uint8_t *target; + int32_t targetCapacity; + int32_t count; + + int8_t oldToULength, toULength, toULimit; + + UChar32 c; + uint8_t b, t1, t2; + + /* set up the local pointers */ + utf8=pToUArgs->converter; + source=(uint8_t *)pToUArgs->source; + sourceLimit=(uint8_t *)pToUArgs->sourceLimit; + target=(uint8_t *)pFromUArgs->target; + targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); + + /* get the converter state from the UTF-8 UConverter */ + c=(UChar32)utf8->toUnicodeStatus; + if(c!=0) { + toULength=oldToULength=utf8->toULength; + toULimit=(int8_t)utf8->mode; + } else { + toULength=oldToULength=toULimit=0; + } + + count=(int32_t)(sourceLimit-source)+oldToULength; + if(counttargetCapacity) { + count=targetCapacity; + } + + i=0; + while(i<3 && i<(count-toULimit)) { + b=source[count-oldToULength-i-1]; + if(U8_IS_TRAIL(b)) { + ++i; + } else { + if(itoUnicodeStatus=0; + utf8->toULength=0; + goto moreBytes; + /* See note in ucnv_SBCSFromUTF8() about this goto. */ + } + + /* conversion loop */ + while(count>0) { + b=*source++; + if((int8_t)b>=0) { + /* convert ASCII */ + *target++=b; + --count; + continue; + } else { + if(b>0xe0) { + if( /* handle U+1000..U+D7FF inline */ + (t1=source[0]) >= 0x80 && ((b<0xed && (t1 <= 0xbf)) || + (b==0xed && (t1 <= 0x9f))) && + (t2=source[1]) >= 0x80 && t2 <= 0xbf + ) { + source+=2; + *target++=b; + *target++=t1; + *target++=t2; + count-=3; + continue; + } + } else if(b<0xe0) { + if( /* handle U+0080..U+07FF inline */ + b>=0xc2 && + (t1=*source) >= 0x80 && t1 <= 0xbf + ) { + ++source; + *target++=b; + *target++=t1; + count-=2; + continue; + } + } else if(b==0xe0) { + if( /* handle U+0800..U+0FFF inline */ + (t1=source[0]) >= 0xa0 && t1 <= 0xbf && + (t2=source[1]) >= 0x80 && t2 <= 0xbf + ) { + source+=2; + *target++=b; + *target++=t1; + *target++=t2; + count-=3; + continue; + } + } + + /* handle "complicated" and error cases, and continuing partial characters */ + oldToULength=0; + toULength=1; + toULimit=U8_COUNT_TRAIL_BYTES(b)+1; + c=b; +moreBytes: + while(toULengthtoUBytes[oldToULength++]=*source++; + } + utf8->toUnicodeStatus=c; + utf8->toULength=toULength; + utf8->mode=toULimit; + pToUArgs->source=(char *)source; + pFromUArgs->target=(char *)target; + return; + } + } + + if( toULength==toULimit && /* consumed all trail bytes */ + (toULength==3 || toULength==2) && /* BMP */ + (c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] && + (c<=0xd7ff || 0xe000<=c) /* not a surrogate */ + ) { + /* legal byte sequence for BMP code point */ + } else if( + toULength==toULimit && toULength==4 && + (0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff) + ) { + /* legal byte sequence for supplementary code point */ + } else { + /* error handling: illegal UTF-8 byte sequence */ + source-=(toULength-oldToULength); + while(oldToULengthtoUBytes[oldToULength++]=*source++; + } + utf8->toULength=toULength; + pToUArgs->source=(char *)source; + pFromUArgs->target=(char *)target; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + return; + } + + /* copy the legal byte sequence to the target */ + { + int8_t i; + + for(i=0; itoUBytes[i]; + } + source-=(toULength-oldToULength); + for(; itargetLimit) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } else { + b=*source; + toULimit=U8_COUNT_TRAIL_BYTES(b)+1; + if(toULimit>(sourceLimit-source)) { + /* collect a truncated byte sequence */ + toULength=0; + c=b; + for(;;) { + utf8->toUBytes[toULength++]=b; + if(++source==sourceLimit) { + /* partial byte sequence at end of source */ + utf8->toUnicodeStatus=c; + utf8->toULength=toULength; + utf8->mode=toULimit; + break; + } else if(!U8_IS_TRAIL(b=*source)) { + /* lead byte in trail byte position */ + utf8->toULength=toULength; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } + c=(c<<6)+b; + } + } else { + /* partial-sequence target overflow: fall back to the pivoting implementation */ + *pErrorCode=U_USING_DEFAULT_WARNING; + } + } + } + + /* write back the updated pointers */ + pToUArgs->source=(char *)source; + pFromUArgs->target=(char *)target; +} + +U_CDECL_END + +/* UTF-8 converter data ----------------------------------------------------- */ + +static const UConverterImpl _UTF8Impl={ + UCNV_UTF8, + + NULL, + NULL, + + NULL, + NULL, + NULL, + + ucnv_toUnicode_UTF8, + ucnv_toUnicode_UTF8_OFFSETS_LOGIC, + ucnv_fromUnicode_UTF8, + ucnv_fromUnicode_UTF8_OFFSETS_LOGIC, + ucnv_getNextUChar_UTF8, + + NULL, + NULL, + NULL, + NULL, + ucnv_getNonSurrogateUnicodeSet, + + ucnv_UTF8FromUTF8, + ucnv_UTF8FromUTF8 +}; + +/* The 1208 CCSID refers to any version of Unicode of UTF-8 */ +static const UConverterStaticData _UTF8StaticData={ + sizeof(UConverterStaticData), + "UTF-8", + 1208, UCNV_IBM, UCNV_UTF8, + 1, 3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */ + { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + + +const UConverterSharedData _UTF8Data= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF8StaticData, &_UTF8Impl); + +/* CESU-8 converter data ---------------------------------------------------- */ + +static const UConverterImpl _CESU8Impl={ + UCNV_CESU8, + + NULL, + NULL, + + NULL, + NULL, + NULL, + + ucnv_toUnicode_UTF8, + ucnv_toUnicode_UTF8_OFFSETS_LOGIC, + ucnv_fromUnicode_UTF8, + ucnv_fromUnicode_UTF8_OFFSETS_LOGIC, + NULL, + + NULL, + NULL, + NULL, + NULL, + ucnv_getCompleteUnicodeSet, + + NULL, + NULL +}; + +static const UConverterStaticData _CESU8StaticData={ + sizeof(UConverterStaticData), + "CESU-8", + 9400, /* CCSID for CESU-8 */ + UCNV_UNKNOWN, UCNV_CESU8, 1, 3, + { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + + +const UConverterSharedData _CESU8Data= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_CESU8StaticData, &_CESU8Impl); + +#endif diff --git a/deps/icu-small/source/common/ucnvbocu.cpp b/deps/icu-small/source/common/ucnvbocu.cpp index 69763ca321..5b66c5059a 100644 --- a/deps/icu-small/source/common/ucnvbocu.cpp +++ b/deps/icu-small/source/common/ucnvbocu.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: ucnvbocu.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/ucnvdisp.c b/deps/icu-small/source/common/ucnvdisp.c deleted file mode 100644 index e30f665f4a..0000000000 --- a/deps/icu-small/source/common/ucnvdisp.c +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1998-2004, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* ucnvdisp.c: -* Implements APIs for the ICU's codeset conversion library display names. -* -* Modification History: -* -* Date Name Description -* 04/04/99 helena Fixed internal header inclusion. -* 05/09/00 helena Added implementation to handle fallback mappings. -* 06/20/2000 helena OS/400 port changes; mostly typecast. -* 09/08/2004 grhoten split from ucnv.c -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ustring.h" -#include "unicode/ures.h" -#include "unicode/ucnv.h" -#include "cstring.h" -#include "ustr_imp.h" -#include "ucnv_imp.h" -#include "putilimp.h" - -U_CAPI int32_t U_EXPORT2 -ucnv_getDisplayName(const UConverter *cnv, - const char *displayLocale, - UChar *displayName, int32_t displayNameCapacity, - UErrorCode *pErrorCode) { - UResourceBundle *rb; - const UChar *name; - int32_t length; - UErrorCode localStatus = U_ZERO_ERROR; - - /* check arguments */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - if(cnv==NULL || displayNameCapacity<0 || (displayNameCapacity>0 && displayName==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* open the resource bundle and get the display name string */ - rb=ures_open(NULL, displayLocale, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - /* use the internal name as the key */ - name=ures_getStringByKey(rb, cnv->sharedData->staticData->name, &length, &localStatus); - ures_close(rb); - - if(U_SUCCESS(localStatus)) { - /* copy the string */ - if (*pErrorCode == U_ZERO_ERROR) { - *pErrorCode = localStatus; - } - u_memcpy(displayName, name, uprv_min(length, displayNameCapacity)*U_SIZEOF_UCHAR); - } else { - /* convert the internal name into a Unicode string */ - length=(int32_t)uprv_strlen(cnv->sharedData->staticData->name); - u_charsToUChars(cnv->sharedData->staticData->name, displayName, uprv_min(length, displayNameCapacity)); - } - return u_terminateUChars(displayName, displayNameCapacity, length, pErrorCode); -} - -#endif - -/* - * Hey, Emacs, please set the following: - * - * Local Variables: - * indent-tabs-mode: nil - * End: - * - */ diff --git a/deps/icu-small/source/common/ucnvdisp.cpp b/deps/icu-small/source/common/ucnvdisp.cpp new file mode 100644 index 0000000000..ac86b98597 --- /dev/null +++ b/deps/icu-small/source/common/ucnvdisp.cpp @@ -0,0 +1,88 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1998-2004, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* ucnvdisp.c: +* Implements APIs for the ICU's codeset conversion library display names. +* +* Modification History: +* +* Date Name Description +* 04/04/99 helena Fixed internal header inclusion. +* 05/09/00 helena Added implementation to handle fallback mappings. +* 06/20/2000 helena OS/400 port changes; mostly typecast. +* 09/08/2004 grhoten split from ucnv.c +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ustring.h" +#include "unicode/ures.h" +#include "unicode/ucnv.h" +#include "cstring.h" +#include "ustr_imp.h" +#include "ucnv_imp.h" +#include "putilimp.h" + +U_CAPI int32_t U_EXPORT2 +ucnv_getDisplayName(const UConverter *cnv, + const char *displayLocale, + UChar *displayName, int32_t displayNameCapacity, + UErrorCode *pErrorCode) { + UResourceBundle *rb; + const UChar *name; + int32_t length; + UErrorCode localStatus = U_ZERO_ERROR; + + /* check arguments */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + if(cnv==NULL || displayNameCapacity<0 || (displayNameCapacity>0 && displayName==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* open the resource bundle and get the display name string */ + rb=ures_open(NULL, displayLocale, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return 0; + } + + /* use the internal name as the key */ + name=ures_getStringByKey(rb, cnv->sharedData->staticData->name, &length, &localStatus); + ures_close(rb); + + if(U_SUCCESS(localStatus)) { + /* copy the string */ + if (*pErrorCode == U_ZERO_ERROR) { + *pErrorCode = localStatus; + } + u_memcpy(displayName, name, uprv_min(length, displayNameCapacity)*U_SIZEOF_UCHAR); + } else { + /* convert the internal name into a Unicode string */ + length=(int32_t)uprv_strlen(cnv->sharedData->staticData->name); + u_charsToUChars(cnv->sharedData->staticData->name, displayName, uprv_min(length, displayNameCapacity)); + } + return u_terminateUChars(displayName, displayNameCapacity, length, pErrorCode); +} + +#endif + +/* + * Hey, Emacs, please set the following: + * + * Local Variables: + * indent-tabs-mode: nil + * End: + * + */ diff --git a/deps/icu-small/source/common/ucnvhz.c b/deps/icu-small/source/common/ucnvhz.c deleted file mode 100644 index dc5785b2ad..0000000000 --- a/deps/icu-small/source/common/ucnvhz.c +++ /dev/null @@ -1,631 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2000-2015, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ucnvhz.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2000oct16 -* created by: Ram Viswanadha -* 10/31/2000 Ram Implemented offsets logic function -* -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION - -#include "cmemory.h" -#include "unicode/ucnv.h" -#include "unicode/ucnv_cb.h" -#include "unicode/uset.h" -#include "unicode/utf16.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "ucnv_imp.h" - -#define UCNV_TILDE 0x7E /* ~ */ -#define UCNV_OPEN_BRACE 0x7B /* { */ -#define UCNV_CLOSE_BRACE 0x7D /* } */ -#define SB_ESCAPE "\x7E\x7D" -#define DB_ESCAPE "\x7E\x7B" -#define TILDE_ESCAPE "\x7E\x7E" -#define ESC_LEN 2 - - -#define CONCAT_ESCAPE_MACRO( args, targetIndex,targetLength,strToAppend, err, len,sourceIndex){ \ - while(len-->0){ \ - if(targetIndex < targetLength){ \ - args->target[targetIndex] = (unsigned char) *strToAppend; \ - if(args->offsets!=NULL){ \ - *(offsets++) = sourceIndex-1; \ - } \ - targetIndex++; \ - } \ - else{ \ - args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; \ - *err =U_BUFFER_OVERFLOW_ERROR; \ - } \ - strToAppend++; \ - } \ -} - - -typedef struct{ - UConverter* gbConverter; - int32_t targetIndex; - int32_t sourceIndex; - UBool isEscapeAppended; - UBool isStateDBCS; - UBool isTargetUCharDBCS; - UBool isEmptySegment; -}UConverterDataHZ; - - - -static void -_HZOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ - UConverter *gbConverter; - if(pArgs->onlyTestIsLoadable) { - ucnv_canCreateConverter("GBK", errorCode); /* errorCode carries result */ - return; - } - gbConverter = ucnv_open("GBK", errorCode); - if(U_FAILURE(*errorCode)) { - return; - } - cnv->toUnicodeStatus = 0; - cnv->fromUnicodeStatus= 0; - cnv->mode=0; - cnv->fromUChar32=0x0000; - cnv->extraInfo = uprv_calloc(1, sizeof(UConverterDataHZ)); - if(cnv->extraInfo != NULL){ - ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = gbConverter; - } - else { - ucnv_close(gbConverter); - *errorCode = U_MEMORY_ALLOCATION_ERROR; - return; - } -} - -static void -_HZClose(UConverter *cnv){ - if(cnv->extraInfo != NULL) { - ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter); - if(!cnv->isExtraLocal) { - uprv_free(cnv->extraInfo); - } - cnv->extraInfo = NULL; - } -} - -static void -_HZReset(UConverter *cnv, UConverterResetChoice choice){ - if(choice<=UCNV_RESET_TO_UNICODE) { - cnv->toUnicodeStatus = 0; - cnv->mode=0; - if(cnv->extraInfo != NULL){ - ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE; - ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE; - } - } - if(choice!=UCNV_RESET_TO_UNICODE) { - cnv->fromUnicodeStatus= 0; - cnv->fromUChar32=0x0000; - if(cnv->extraInfo != NULL){ - ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE; - ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0; - ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0; - ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE; - } - } -} - -/**************************************HZ Encoding************************************************* -* Rules for HZ encoding -* -* In ASCII mode, a byte is interpreted as an ASCII character, unless a -* '~' is encountered. The character '~' is an escape character. By -* convention, it must be immediately followed ONLY by '~', '{' or '\n' -* (), with the following special meaning. - -* 1. The escape sequence '~~' is interpreted as a '~'. -* 2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB. -* 3. The escape sequence '~\n' is a line-continuation marker to be -* consumed with no output produced. -* In GB mode, characters are interpreted two bytes at a time as (pure) -* GB codes until the escape-from-GB code '~}' is read. This code -* switches the mode from GB back to ASCII. (Note that the escape- -* from-GB code '~}' ($7E7D) is outside the defined GB range.) -* -* Source: RFC 1842 -* -* Note that the formal syntax in RFC 1842 is invalid. I assume that the -* intended definition of single-byte-segment is as follows (pedberg): -* single-byte-segment = single-byte-seq 1*single-byte-char -*/ - - -static void -UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, - UErrorCode* err){ - char tempBuf[2]; - const char *mySource = ( char *) args->source; - UChar *myTarget = args->target; - const char *mySourceLimit = args->sourceLimit; - UChar32 targetUniChar = 0x0000; - int32_t mySourceChar = 0x0000; - UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo); - tempBuf[0]=0; - tempBuf[1]=0; - - /* Calling code already handles this situation. */ - /*if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySourceLimit < args->source)){ - *err = U_ILLEGAL_ARGUMENT_ERROR; - return; - }*/ - - while(mySource< mySourceLimit){ - - if(myTarget < args->targetLimit){ - - mySourceChar= (unsigned char) *mySource++; - - if(args->converter->mode == UCNV_TILDE) { - /* second byte after ~ */ - args->converter->mode=0; - switch(mySourceChar) { - case 0x0A: - /* no output for ~\n (line-continuation marker) */ - continue; - case UCNV_TILDE: - if(args->offsets) { - args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2); - } - *(myTarget++)=(UChar)mySourceChar; - myData->isEmptySegment = FALSE; - continue; - case UCNV_OPEN_BRACE: - case UCNV_CLOSE_BRACE: - myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE); - if (myData->isEmptySegment) { - myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ - *err = U_ILLEGAL_ESCAPE_SEQUENCE; - args->converter->toUCallbackReason = UCNV_IRREGULAR; - args->converter->toUBytes[0] = UCNV_TILDE; - args->converter->toUBytes[1] = mySourceChar; - args->converter->toULength = 2; - args->target = myTarget; - args->source = mySource; - return; - } - myData->isEmptySegment = TRUE; - continue; - default: - /* if the first byte is equal to TILDE and the trail byte - * is not a valid byte then it is an error condition - */ - /* - * Ticket 5691: consistent illegal sequences: - * - We include at least the first byte in the illegal sequence. - * - If any of the non-initial bytes could be the start of a character, - * we stop the illegal sequence before the first one of those. - */ - myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ - *err = U_ILLEGAL_ESCAPE_SEQUENCE; - args->converter->toUBytes[0] = UCNV_TILDE; - if( myData->isStateDBCS ? - (0x21 <= mySourceChar && mySourceChar <= 0x7e) : - mySourceChar <= 0x7f - ) { - /* The current byte could be the start of a character: Back it out. */ - args->converter->toULength = 1; - --mySource; - } else { - /* Include the current byte in the illegal sequence. */ - args->converter->toUBytes[1] = mySourceChar; - args->converter->toULength = 2; - } - args->target = myTarget; - args->source = mySource; - return; - } - } else if(myData->isStateDBCS) { - if(args->converter->toUnicodeStatus == 0x00){ - /* lead byte */ - if(mySourceChar == UCNV_TILDE) { - args->converter->mode = UCNV_TILDE; - } else { - /* add another bit to distinguish a 0 byte from not having seen a lead byte */ - args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100); - myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */ - } - continue; - } - else{ - /* trail byte */ - int leadIsOk, trailIsOk; - uint32_t leadByte = args->converter->toUnicodeStatus & 0xff; - targetUniChar = 0xffff; - /* - * Ticket 5691: consistent illegal sequences: - * - We include at least the first byte in the illegal sequence. - * - If any of the non-initial bytes could be the start of a character, - * we stop the illegal sequence before the first one of those. - * - * In HZ DBCS, if the second byte is in the 21..7e range, - * we report only the first byte as the illegal sequence. - * Otherwise we convert or report the pair of bytes. - */ - leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21); - trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); - if (leadIsOk && trailIsOk) { - tempBuf[0] = (char) (leadByte+0x80) ; - tempBuf[1] = (char) (mySourceChar+0x80); - targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, - tempBuf, 2, args->converter->useFallback); - mySourceChar= (leadByte << 8) | mySourceChar; - } else if (trailIsOk) { - /* report a single illegal byte and continue with the following DBCS starter byte */ - --mySource; - mySourceChar = (int32_t)leadByte; - } else { - /* report a pair of illegal bytes if the second byte is not a DBCS starter */ - /* add another bit so that the code below writes 2 bytes in case of error */ - mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar; - } - args->converter->toUnicodeStatus =0x00; - } - } - else{ - if(mySourceChar == UCNV_TILDE) { - args->converter->mode = UCNV_TILDE; - continue; - } else if(mySourceChar <= 0x7f) { - targetUniChar = (UChar)mySourceChar; /* ASCII */ - myData->isEmptySegment = FALSE; /* the segment has something valid */ - } else { - targetUniChar = 0xffff; - myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ - } - } - if(targetUniChar < 0xfffe){ - if(args->offsets) { - args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 1-(myData->isStateDBCS)); - } - - *(myTarget++)=(UChar)targetUniChar; - } - else /* targetUniChar>=0xfffe */ { - if(targetUniChar == 0xfffe){ - *err = U_INVALID_CHAR_FOUND; - } - else{ - *err = U_ILLEGAL_CHAR_FOUND; - } - if(mySourceChar > 0xff){ - args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8); - args->converter->toUBytes[1] = (uint8_t)mySourceChar; - args->converter->toULength=2; - } - else{ - args->converter->toUBytes[0] = (uint8_t)mySourceChar; - args->converter->toULength=1; - } - break; - } - } - else{ - *err =U_BUFFER_OVERFLOW_ERROR; - break; - } - } - - args->target = myTarget; - args->source = mySource; -} - - -static void -UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, - UErrorCode * err){ - const UChar *mySource = args->source; - char *myTarget = args->target; - int32_t* offsets = args->offsets; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = (int32_t)(args->targetLimit - myTarget); - int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source); - uint32_t targetUniChar = 0x0000; - UChar32 mySourceChar = 0x0000; - UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo; - UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS; - UBool oldIsTargetUCharDBCS; - int len =0; - const char* escSeq=NULL; - - /* Calling code already handles this situation. */ - /*if ((args->converter == NULL) || (args->targetLimit < myTarget) || (args->sourceLimit < args->source)){ - *err = U_ILLEGAL_ARGUMENT_ERROR; - return; - }*/ - if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) { - goto getTrail; - } - /*writing the char to the output stream */ - while (mySourceIndex < mySourceLength){ - targetUniChar = missingCharMarker; - if (myTargetIndex < targetLength){ - - mySourceChar = (UChar) mySource[mySourceIndex++]; - - - oldIsTargetUCharDBCS = isTargetUCharDBCS; - if(mySourceChar ==UCNV_TILDE){ - /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);*/ - len = ESC_LEN; - escSeq = TILDE_ESCAPE; - CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); - continue; - } else if(mySourceChar <= 0x7f) { - targetUniChar = mySourceChar; - } else { - int32_t length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData, - mySourceChar,&targetUniChar,args->converter->useFallback); - /* we can only use lead bytes 21..7D and trail bytes 21..7E */ - if( length == 2 && - (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) && - (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1) - ) { - targetUniChar -= 0x8080; - } else { - targetUniChar = missingCharMarker; - } - } - if (targetUniChar != missingCharMarker){ - myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF); - if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){ - /*Shifting from a double byte to single byte mode*/ - if(!isTargetUCharDBCS){ - len =ESC_LEN; - escSeq = SB_ESCAPE; - CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); - myConverterData->isEscapeAppended = TRUE; - } - else{ /* Shifting from a single byte to double byte mode*/ - len =ESC_LEN; - escSeq = DB_ESCAPE; - CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); - myConverterData->isEscapeAppended = TRUE; - - } - } - - if(isTargetUCharDBCS){ - if( myTargetIndex > 8); - if(offsets){ - *(offsets++) = mySourceIndex-1; - } - if(myTargetIndex < targetLength){ - myTarget[myTargetIndex++] =(char) targetUniChar; - if(offsets){ - *(offsets++) = mySourceIndex-1; - } - }else{ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; - *err = U_BUFFER_OVERFLOW_ERROR; - } - }else{ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) (targetUniChar >> 8); - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; - *err = U_BUFFER_OVERFLOW_ERROR; - } - - }else{ - if( myTargetIndex converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; - *err = U_BUFFER_OVERFLOW_ERROR; - } - } - - } - else{ - /* oops.. the code point is unassigned */ - /*Handle surrogates */ - /*check if the char is a First surrogate*/ - if(U16_IS_SURROGATE(mySourceChar)) { - if(U16_IS_SURROGATE_LEAD(mySourceChar)) { - args->converter->fromUChar32=mySourceChar; -getTrail: - /*look ahead to find the trail surrogate*/ - if(mySourceIndex < mySourceLength) { - /* test the following code unit */ - UChar trail=(UChar) args->source[mySourceIndex]; - if(U16_IS_TRAIL(trail)) { - ++mySourceIndex; - mySourceChar=U16_GET_SUPPLEMENTARY(args->converter->fromUChar32, trail); - args->converter->fromUChar32=0x00; - /* there are no surrogates in GB2312*/ - *err = U_INVALID_CHAR_FOUND; - /* exit this condition tree */ - } else { - /* this is an unmatched lead code unit (1st surrogate) */ - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - } - } else { - /* no more input */ - *err = U_ZERO_ERROR; - } - } else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - } - } else { - /* callback(unassigned) for a BMP code point */ - *err = U_INVALID_CHAR_FOUND; - } - - args->converter->fromUChar32=mySourceChar; - break; - } - } - else{ - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - targetUniChar=missingCharMarker; - } - - args->target += myTargetIndex; - args->source += mySourceIndex; - myConverterData->isTargetUCharDBCS = isTargetUCharDBCS; -} - -static void -_HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) { - UConverter *cnv = args->converter; - UConverterDataHZ *convData=(UConverterDataHZ *) cnv->extraInfo; - char *p; - char buffer[4]; - p = buffer; - - if( convData->isTargetUCharDBCS){ - *p++= UCNV_TILDE; - *p++= UCNV_CLOSE_BRACE; - convData->isTargetUCharDBCS=FALSE; - } - *p++= (char)cnv->subChars[0]; - - ucnv_cbFromUWriteBytes(args, - buffer, (int32_t)(p - buffer), - offsetIndex, err); -} - -/* - * Structure for cloning an HZ converter into a single memory block. - * ucnv_safeClone() of the HZ converter will align the entire cloneHZStruct, - * and then ucnv_safeClone() of the sub-converter may additionally align - * subCnv inside the cloneHZStruct, for which we need the deadSpace after - * subCnv. This is because UAlignedMemory may be larger than the actually - * necessary alignment size for the platform. - * The other cloneHZStruct fields will not be moved around, - * and are aligned properly with cloneHZStruct's alignment. - */ -struct cloneHZStruct -{ - UConverter cnv; - UConverter subCnv; - UAlignedMemory deadSpace; - UConverterDataHZ mydata; -}; - - -static UConverter * -_HZ_SafeClone(const UConverter *cnv, - void *stackBuffer, - int32_t *pBufferSize, - UErrorCode *status) -{ - struct cloneHZStruct * localClone; - int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct); - - if (U_FAILURE(*status)){ - return 0; - } - - if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */ - *pBufferSize = bufferSizeNeeded; - return 0; - } - - localClone = (struct cloneHZStruct *)stackBuffer; - /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ - - uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ)); - localClone->cnv.extraInfo = &localClone->mydata; - localClone->cnv.isExtraLocal = TRUE; - - /* deep-clone the sub-converter */ - size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */ - ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter = - ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status); - - return &localClone->cnv; -} - -static void -_HZ_GetUnicodeSet(const UConverter *cnv, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode) { - /* HZ converts all of ASCII */ - sa->addRange(sa->set, 0, 0x7f); - - /* add all of the code points that the sub-converter handles */ - ucnv_MBCSGetFilteredUnicodeSetForUnicode( - ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData, - sa, which, UCNV_SET_FILTER_HZ, - pErrorCode); -} - -static const UConverterImpl _HZImpl={ - - UCNV_HZ, - - NULL, - NULL, - - _HZOpen, - _HZClose, - _HZReset, - - UConverter_toUnicode_HZ_OFFSETS_LOGIC, - UConverter_toUnicode_HZ_OFFSETS_LOGIC, - UConverter_fromUnicode_HZ_OFFSETS_LOGIC, - UConverter_fromUnicode_HZ_OFFSETS_LOGIC, - NULL, - - NULL, - NULL, - _HZ_WriteSub, - _HZ_SafeClone, - _HZ_GetUnicodeSet -}; - -static const UConverterStaticData _HZStaticData={ - sizeof(UConverterStaticData), - "HZ", - 0, - UCNV_IBM, - UCNV_HZ, - 1, - 4, - { 0x1a, 0, 0, 0 }, - 1, - FALSE, - FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */ - -}; - -const UConverterSharedData _HZData= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_HZStaticData, &_HZImpl); - -#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION */ diff --git a/deps/icu-small/source/common/ucnvhz.cpp b/deps/icu-small/source/common/ucnvhz.cpp new file mode 100644 index 0000000000..5a24575f05 --- /dev/null +++ b/deps/icu-small/source/common/ucnvhz.cpp @@ -0,0 +1,633 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2000-2015, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnvhz.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2000oct16 +* created by: Ram Viswanadha +* 10/31/2000 Ram Implemented offsets logic function +* +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + +#include "cmemory.h" +#include "unicode/ucnv.h" +#include "unicode/ucnv_cb.h" +#include "unicode/uset.h" +#include "unicode/utf16.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "ucnv_imp.h" + +#define UCNV_TILDE 0x7E /* ~ */ +#define UCNV_OPEN_BRACE 0x7B /* { */ +#define UCNV_CLOSE_BRACE 0x7D /* } */ +#define SB_ESCAPE "\x7E\x7D" +#define DB_ESCAPE "\x7E\x7B" +#define TILDE_ESCAPE "\x7E\x7E" +#define ESC_LEN 2 + + +#define CONCAT_ESCAPE_MACRO( args, targetIndex,targetLength,strToAppend, err, len,sourceIndex){ \ + while(len-->0){ \ + if(targetIndex < targetLength){ \ + args->target[targetIndex] = (unsigned char) *strToAppend; \ + if(args->offsets!=NULL){ \ + *(offsets++) = sourceIndex-1; \ + } \ + targetIndex++; \ + } \ + else{ \ + args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; \ + *err =U_BUFFER_OVERFLOW_ERROR; \ + } \ + strToAppend++; \ + } \ +} + + +typedef struct{ + UConverter* gbConverter; + int32_t targetIndex; + int32_t sourceIndex; + UBool isEscapeAppended; + UBool isStateDBCS; + UBool isTargetUCharDBCS; + UBool isEmptySegment; +}UConverterDataHZ; + + +U_CDECL_BEGIN +static void U_CALLCONV +_HZOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ + UConverter *gbConverter; + if(pArgs->onlyTestIsLoadable) { + ucnv_canCreateConverter("GBK", errorCode); /* errorCode carries result */ + return; + } + gbConverter = ucnv_open("GBK", errorCode); + if(U_FAILURE(*errorCode)) { + return; + } + cnv->toUnicodeStatus = 0; + cnv->fromUnicodeStatus= 0; + cnv->mode=0; + cnv->fromUChar32=0x0000; + cnv->extraInfo = uprv_calloc(1, sizeof(UConverterDataHZ)); + if(cnv->extraInfo != NULL){ + ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = gbConverter; + } + else { + ucnv_close(gbConverter); + *errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } +} + +static void U_CALLCONV +_HZClose(UConverter *cnv){ + if(cnv->extraInfo != NULL) { + ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter); + if(!cnv->isExtraLocal) { + uprv_free(cnv->extraInfo); + } + cnv->extraInfo = NULL; + } +} + +static void U_CALLCONV +_HZReset(UConverter *cnv, UConverterResetChoice choice){ + if(choice<=UCNV_RESET_TO_UNICODE) { + cnv->toUnicodeStatus = 0; + cnv->mode=0; + if(cnv->extraInfo != NULL){ + ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE; + ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE; + } + } + if(choice!=UCNV_RESET_TO_UNICODE) { + cnv->fromUnicodeStatus= 0; + cnv->fromUChar32=0x0000; + if(cnv->extraInfo != NULL){ + ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE; + ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0; + ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0; + ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE; + } + } +} + +/**************************************HZ Encoding************************************************* +* Rules for HZ encoding +* +* In ASCII mode, a byte is interpreted as an ASCII character, unless a +* '~' is encountered. The character '~' is an escape character. By +* convention, it must be immediately followed ONLY by '~', '{' or '\n' +* (), with the following special meaning. + +* 1. The escape sequence '~~' is interpreted as a '~'. +* 2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB. +* 3. The escape sequence '~\n' is a line-continuation marker to be +* consumed with no output produced. +* In GB mode, characters are interpreted two bytes at a time as (pure) +* GB codes until the escape-from-GB code '~}' is read. This code +* switches the mode from GB back to ASCII. (Note that the escape- +* from-GB code '~}' ($7E7D) is outside the defined GB range.) +* +* Source: RFC 1842 +* +* Note that the formal syntax in RFC 1842 is invalid. I assume that the +* intended definition of single-byte-segment is as follows (pedberg): +* single-byte-segment = single-byte-seq 1*single-byte-char +*/ + + +static void U_CALLCONV +UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, + UErrorCode* err){ + char tempBuf[2]; + const char *mySource = ( char *) args->source; + UChar *myTarget = args->target; + const char *mySourceLimit = args->sourceLimit; + UChar32 targetUniChar = 0x0000; + int32_t mySourceChar = 0x0000; + UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo); + tempBuf[0]=0; + tempBuf[1]=0; + + /* Calling code already handles this situation. */ + /*if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySourceLimit < args->source)){ + *err = U_ILLEGAL_ARGUMENT_ERROR; + return; + }*/ + + while(mySource< mySourceLimit){ + + if(myTarget < args->targetLimit){ + + mySourceChar= (unsigned char) *mySource++; + + if(args->converter->mode == UCNV_TILDE) { + /* second byte after ~ */ + args->converter->mode=0; + switch(mySourceChar) { + case 0x0A: + /* no output for ~\n (line-continuation marker) */ + continue; + case UCNV_TILDE: + if(args->offsets) { + args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2); + } + *(myTarget++)=(UChar)mySourceChar; + myData->isEmptySegment = FALSE; + continue; + case UCNV_OPEN_BRACE: + case UCNV_CLOSE_BRACE: + myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE); + if (myData->isEmptySegment) { + myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ + *err = U_ILLEGAL_ESCAPE_SEQUENCE; + args->converter->toUCallbackReason = UCNV_IRREGULAR; + args->converter->toUBytes[0] = UCNV_TILDE; + args->converter->toUBytes[1] = mySourceChar; + args->converter->toULength = 2; + args->target = myTarget; + args->source = mySource; + return; + } + myData->isEmptySegment = TRUE; + continue; + default: + /* if the first byte is equal to TILDE and the trail byte + * is not a valid byte then it is an error condition + */ + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + */ + myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ + *err = U_ILLEGAL_ESCAPE_SEQUENCE; + args->converter->toUBytes[0] = UCNV_TILDE; + if( myData->isStateDBCS ? + (0x21 <= mySourceChar && mySourceChar <= 0x7e) : + mySourceChar <= 0x7f + ) { + /* The current byte could be the start of a character: Back it out. */ + args->converter->toULength = 1; + --mySource; + } else { + /* Include the current byte in the illegal sequence. */ + args->converter->toUBytes[1] = mySourceChar; + args->converter->toULength = 2; + } + args->target = myTarget; + args->source = mySource; + return; + } + } else if(myData->isStateDBCS) { + if(args->converter->toUnicodeStatus == 0x00){ + /* lead byte */ + if(mySourceChar == UCNV_TILDE) { + args->converter->mode = UCNV_TILDE; + } else { + /* add another bit to distinguish a 0 byte from not having seen a lead byte */ + args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100); + myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */ + } + continue; + } + else{ + /* trail byte */ + int leadIsOk, trailIsOk; + uint32_t leadByte = args->converter->toUnicodeStatus & 0xff; + targetUniChar = 0xffff; + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + * + * In HZ DBCS, if the second byte is in the 21..7e range, + * we report only the first byte as the illegal sequence. + * Otherwise we convert or report the pair of bytes. + */ + leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21); + trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); + if (leadIsOk && trailIsOk) { + tempBuf[0] = (char) (leadByte+0x80) ; + tempBuf[1] = (char) (mySourceChar+0x80); + targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, + tempBuf, 2, args->converter->useFallback); + mySourceChar= (leadByte << 8) | mySourceChar; + } else if (trailIsOk) { + /* report a single illegal byte and continue with the following DBCS starter byte */ + --mySource; + mySourceChar = (int32_t)leadByte; + } else { + /* report a pair of illegal bytes if the second byte is not a DBCS starter */ + /* add another bit so that the code below writes 2 bytes in case of error */ + mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar; + } + args->converter->toUnicodeStatus =0x00; + } + } + else{ + if(mySourceChar == UCNV_TILDE) { + args->converter->mode = UCNV_TILDE; + continue; + } else if(mySourceChar <= 0x7f) { + targetUniChar = (UChar)mySourceChar; /* ASCII */ + myData->isEmptySegment = FALSE; /* the segment has something valid */ + } else { + targetUniChar = 0xffff; + myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ + } + } + if(targetUniChar < 0xfffe){ + if(args->offsets) { + args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 1-(myData->isStateDBCS)); + } + + *(myTarget++)=(UChar)targetUniChar; + } + else /* targetUniChar>=0xfffe */ { + if(targetUniChar == 0xfffe){ + *err = U_INVALID_CHAR_FOUND; + } + else{ + *err = U_ILLEGAL_CHAR_FOUND; + } + if(mySourceChar > 0xff){ + args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8); + args->converter->toUBytes[1] = (uint8_t)mySourceChar; + args->converter->toULength=2; + } + else{ + args->converter->toUBytes[0] = (uint8_t)mySourceChar; + args->converter->toULength=1; + } + break; + } + } + else{ + *err =U_BUFFER_OVERFLOW_ERROR; + break; + } + } + + args->target = myTarget; + args->source = mySource; +} + + +static void U_CALLCONV +UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, + UErrorCode * err){ + const UChar *mySource = args->source; + char *myTarget = args->target; + int32_t* offsets = args->offsets; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = (int32_t)(args->targetLimit - myTarget); + int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source); + uint32_t targetUniChar = 0x0000; + UChar32 mySourceChar = 0x0000; + UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo; + UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS; + UBool oldIsTargetUCharDBCS; + int len =0; + const char* escSeq=NULL; + + /* Calling code already handles this situation. */ + /*if ((args->converter == NULL) || (args->targetLimit < myTarget) || (args->sourceLimit < args->source)){ + *err = U_ILLEGAL_ARGUMENT_ERROR; + return; + }*/ + if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) { + goto getTrail; + } + /*writing the char to the output stream */ + while (mySourceIndex < mySourceLength){ + targetUniChar = missingCharMarker; + if (myTargetIndex < targetLength){ + + mySourceChar = (UChar) mySource[mySourceIndex++]; + + + oldIsTargetUCharDBCS = isTargetUCharDBCS; + if(mySourceChar ==UCNV_TILDE){ + /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);*/ + len = ESC_LEN; + escSeq = TILDE_ESCAPE; + CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); + continue; + } else if(mySourceChar <= 0x7f) { + targetUniChar = mySourceChar; + } else { + int32_t length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData, + mySourceChar,&targetUniChar,args->converter->useFallback); + /* we can only use lead bytes 21..7D and trail bytes 21..7E */ + if( length == 2 && + (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) && + (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1) + ) { + targetUniChar -= 0x8080; + } else { + targetUniChar = missingCharMarker; + } + } + if (targetUniChar != missingCharMarker){ + myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF); + if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){ + /*Shifting from a double byte to single byte mode*/ + if(!isTargetUCharDBCS){ + len =ESC_LEN; + escSeq = SB_ESCAPE; + CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); + myConverterData->isEscapeAppended = TRUE; + } + else{ /* Shifting from a single byte to double byte mode*/ + len =ESC_LEN; + escSeq = DB_ESCAPE; + CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); + myConverterData->isEscapeAppended = TRUE; + + } + } + + if(isTargetUCharDBCS){ + if( myTargetIndex > 8); + if(offsets){ + *(offsets++) = mySourceIndex-1; + } + if(myTargetIndex < targetLength){ + myTarget[myTargetIndex++] =(char) targetUniChar; + if(offsets){ + *(offsets++) = mySourceIndex-1; + } + }else{ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; + *err = U_BUFFER_OVERFLOW_ERROR; + } + }else{ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) (targetUniChar >> 8); + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; + *err = U_BUFFER_OVERFLOW_ERROR; + } + + }else{ + if( myTargetIndex converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; + *err = U_BUFFER_OVERFLOW_ERROR; + } + } + + } + else{ + /* oops.. the code point is unassigned */ + /*Handle surrogates */ + /*check if the char is a First surrogate*/ + if(U16_IS_SURROGATE(mySourceChar)) { + if(U16_IS_SURROGATE_LEAD(mySourceChar)) { + args->converter->fromUChar32=mySourceChar; +getTrail: + /*look ahead to find the trail surrogate*/ + if(mySourceIndex < mySourceLength) { + /* test the following code unit */ + UChar trail=(UChar) args->source[mySourceIndex]; + if(U16_IS_TRAIL(trail)) { + ++mySourceIndex; + mySourceChar=U16_GET_SUPPLEMENTARY(args->converter->fromUChar32, trail); + args->converter->fromUChar32=0x00; + /* there are no surrogates in GB2312*/ + *err = U_INVALID_CHAR_FOUND; + /* exit this condition tree */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + } + } else { + /* no more input */ + *err = U_ZERO_ERROR; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + } + } else { + /* callback(unassigned) for a BMP code point */ + *err = U_INVALID_CHAR_FOUND; + } + + args->converter->fromUChar32=mySourceChar; + break; + } + } + else{ + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + targetUniChar=missingCharMarker; + } + + args->target += myTargetIndex; + args->source += mySourceIndex; + myConverterData->isTargetUCharDBCS = isTargetUCharDBCS; +} + +static void U_CALLCONV +_HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) { + UConverter *cnv = args->converter; + UConverterDataHZ *convData=(UConverterDataHZ *) cnv->extraInfo; + char *p; + char buffer[4]; + p = buffer; + + if( convData->isTargetUCharDBCS){ + *p++= UCNV_TILDE; + *p++= UCNV_CLOSE_BRACE; + convData->isTargetUCharDBCS=FALSE; + } + *p++= (char)cnv->subChars[0]; + + ucnv_cbFromUWriteBytes(args, + buffer, (int32_t)(p - buffer), + offsetIndex, err); +} + +/* + * Structure for cloning an HZ converter into a single memory block. + * ucnv_safeClone() of the HZ converter will align the entire cloneHZStruct, + * and then ucnv_safeClone() of the sub-converter may additionally align + * subCnv inside the cloneHZStruct, for which we need the deadSpace after + * subCnv. This is because UAlignedMemory may be larger than the actually + * necessary alignment size for the platform. + * The other cloneHZStruct fields will not be moved around, + * and are aligned properly with cloneHZStruct's alignment. + */ +struct cloneHZStruct +{ + UConverter cnv; + UConverter subCnv; + UAlignedMemory deadSpace; + UConverterDataHZ mydata; +}; + + +static UConverter * U_CALLCONV +_HZ_SafeClone(const UConverter *cnv, + void *stackBuffer, + int32_t *pBufferSize, + UErrorCode *status) +{ + struct cloneHZStruct * localClone; + int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct); + + if (U_FAILURE(*status)){ + return 0; + } + + if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */ + *pBufferSize = bufferSizeNeeded; + return 0; + } + + localClone = (struct cloneHZStruct *)stackBuffer; + /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ + + uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ)); + localClone->cnv.extraInfo = &localClone->mydata; + localClone->cnv.isExtraLocal = TRUE; + + /* deep-clone the sub-converter */ + size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */ + ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter = + ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status); + + return &localClone->cnv; +} + +static void U_CALLCONV +_HZ_GetUnicodeSet(const UConverter *cnv, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) { + /* HZ converts all of ASCII */ + sa->addRange(sa->set, 0, 0x7f); + + /* add all of the code points that the sub-converter handles */ + ucnv_MBCSGetFilteredUnicodeSetForUnicode( + ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData, + sa, which, UCNV_SET_FILTER_HZ, + pErrorCode); +} +U_CDECL_END +static const UConverterImpl _HZImpl={ + + UCNV_HZ, + + NULL, + NULL, + + _HZOpen, + _HZClose, + _HZReset, + + UConverter_toUnicode_HZ_OFFSETS_LOGIC, + UConverter_toUnicode_HZ_OFFSETS_LOGIC, + UConverter_fromUnicode_HZ_OFFSETS_LOGIC, + UConverter_fromUnicode_HZ_OFFSETS_LOGIC, + NULL, + + NULL, + NULL, + _HZ_WriteSub, + _HZ_SafeClone, + _HZ_GetUnicodeSet, + NULL, + NULL +}; + +static const UConverterStaticData _HZStaticData={ + sizeof(UConverterStaticData), + "HZ", + 0, + UCNV_IBM, + UCNV_HZ, + 1, + 4, + { 0x1a, 0, 0, 0 }, + 1, + FALSE, + FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */ + +}; + +const UConverterSharedData _HZData= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_HZStaticData, &_HZImpl); + +#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION */ diff --git a/deps/icu-small/source/common/ucnvisci.c b/deps/icu-small/source/common/ucnvisci.c deleted file mode 100644 index 8b50915351..0000000000 --- a/deps/icu-small/source/common/ucnvisci.c +++ /dev/null @@ -1,1625 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2000-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ucnvisci.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2001JUN26 -* created by: Ram Viswanadha -* -* Date Name Description -* 24/7/2001 Ram Added support for EXT character handling -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION - -#include "unicode/ucnv.h" -#include "unicode/ucnv_cb.h" -#include "unicode/utf16.h" -#include "cmemory.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "cstring.h" -#include "uassert.h" - -#define UCNV_OPTIONS_VERSION_MASK 0xf -#define NUKTA 0x093c -#define HALANT 0x094d -#define ZWNJ 0x200c /* Zero Width Non Joiner */ -#define ZWJ 0x200d /* Zero width Joiner */ -#define INVALID_CHAR 0xffff -#define ATR 0xEF /* Attribute code */ -#define EXT 0xF0 /* Extension code */ -#define DANDA 0x0964 -#define DOUBLE_DANDA 0x0965 -#define ISCII_NUKTA 0xE9 -#define ISCII_HALANT 0xE8 -#define ISCII_DANDA 0xEA -#define ISCII_INV 0xD9 -#define ISCII_VOWEL_SIGN_E 0xE0 -#define INDIC_BLOCK_BEGIN 0x0900 -#define INDIC_BLOCK_END 0x0D7F -#define INDIC_RANGE (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN) -#define VOCALLIC_RR 0x0931 -#define LF 0x0A -#define ASCII_END 0xA0 -#define NO_CHAR_MARKER 0xFFFE -#define TELUGU_DELTA DELTA * TELUGU -#define DEV_ABBR_SIGN 0x0970 -#define DEV_ANUDATTA 0x0952 -#define EXT_RANGE_BEGIN 0xA1 -#define EXT_RANGE_END 0xEE - -#define PNJ_DELTA 0x0100 -#define PNJ_BINDI 0x0A02 -#define PNJ_TIPPI 0x0A70 -#define PNJ_SIGN_VIRAMA 0x0A4D -#define PNJ_ADHAK 0x0A71 -#define PNJ_HA 0x0A39 -#define PNJ_RRA 0x0A5C - -typedef enum { - DEVANAGARI =0, - BENGALI, - GURMUKHI, - GUJARATI, - ORIYA, - TAMIL, - TELUGU, - KANNADA, - MALAYALAM, - DELTA=0x80 -}UniLang; - -/** - * Enumeration for switching code pages if + - * is encountered - */ -typedef enum { - DEF = 0x40, - RMN = 0x41, - DEV = 0x42, - BNG = 0x43, - TML = 0x44, - TLG = 0x45, - ASM = 0x46, - ORI = 0x47, - KND = 0x48, - MLM = 0x49, - GJR = 0x4A, - PNJ = 0x4B, - ARB = 0x71, - PES = 0x72, - URD = 0x73, - SND = 0x74, - KSM = 0x75, - PST = 0x76 -}ISCIILang; - -typedef enum { - DEV_MASK =0x80, - PNJ_MASK =0x40, - GJR_MASK =0x20, - ORI_MASK =0x10, - BNG_MASK =0x08, - KND_MASK =0x04, - MLM_MASK =0x02, - TML_MASK =0x01, - ZERO =0x00 -}MaskEnum; - -#define ISCII_CNV_PREFIX "ISCII,version=" - -typedef struct { - UChar contextCharToUnicode; /* previous Unicode codepoint for contextual analysis */ - UChar contextCharFromUnicode; /* previous Unicode codepoint for contextual analysis */ - uint16_t defDeltaToUnicode; /* delta for switching to default state when DEF is encountered */ - uint16_t currentDeltaFromUnicode; /* current delta in Indic block */ - uint16_t currentDeltaToUnicode; /* current delta in Indic block */ - MaskEnum currentMaskFromUnicode; /* mask for current state in toUnicode */ - MaskEnum currentMaskToUnicode; /* mask for current state in toUnicode */ - MaskEnum defMaskToUnicode; /* mask for default state in toUnicode */ - UBool isFirstBuffer; /* boolean for fromUnicode to see if we need to announce the first script */ - UBool resetToDefaultToUnicode; /* boolean for reseting to default delta and mask when a newline is encountered*/ - char name[sizeof(ISCII_CNV_PREFIX) + 1]; - UChar32 prevToUnicodeStatus; /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */ -} UConverterDataISCII; - -typedef struct LookupDataStruct { - UniLang uniLang; - MaskEnum maskEnum; - ISCIILang isciiLang; -} LookupDataStruct; - -static const LookupDataStruct lookupInitialData[]={ - { DEVANAGARI, DEV_MASK, DEV }, - { BENGALI, BNG_MASK, BNG }, - { GURMUKHI, PNJ_MASK, PNJ }, - { GUJARATI, GJR_MASK, GJR }, - { ORIYA, ORI_MASK, ORI }, - { TAMIL, TML_MASK, TML }, - { TELUGU, KND_MASK, TLG }, - { KANNADA, KND_MASK, KND }, - { MALAYALAM, MLM_MASK, MLM } -}; - -/* - * For special handling of certain Gurmukhi characters. - * Bit 0 (value 1): PNJ consonant - * Bit 1 (value 2): PNJ Bindi Tippi - */ -static const uint8_t pnjMap[80] = { - /* 0A00..0A0F */ - 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0A10..0A1F */ - 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - /* 0A20..0A2F */ - 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, - /* 0A30..0A3F */ - 3, 0, 0, 0, 0, 3, 3, 0, 3, 3, 0, 0, 0, 0, 0, 2, - /* 0A40..0A4F */ - 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -static UBool -isPNJConsonant(UChar32 c) { - if (c < 0xa00 || 0xa50 <= c) { - return FALSE; - } else { - return (UBool)(pnjMap[c - 0xa00] & 1); - } -} - -static UBool -isPNJBindiTippi(UChar32 c) { - if (c < 0xa00 || 0xa50 <= c) { - return FALSE; - } else { - return (UBool)(pnjMap[c - 0xa00] >> 1); - } -} - -static void _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) { - if(pArgs->onlyTestIsLoadable) { - return; - } - - cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII)); - - if (cnv->extraInfo != NULL) { - int32_t len=0; - UConverterDataISCII *converterData= - (UConverterDataISCII *) cnv->extraInfo; - converterData->contextCharToUnicode=NO_CHAR_MARKER; - cnv->toUnicodeStatus = missingCharMarker; - converterData->contextCharFromUnicode=0x0000; - converterData->resetToDefaultToUnicode=FALSE; - /* check if the version requested is supported */ - if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) { - /* initialize state variables */ - converterData->currentDeltaFromUnicode - = converterData->currentDeltaToUnicode - = converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA); - - converterData->currentMaskFromUnicode - = converterData->currentMaskToUnicode - = converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum; - - converterData->isFirstBuffer=TRUE; - (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX); - len = (int32_t)uprv_strlen(converterData->name); - converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0'); - converterData->name[len+1]=0; - - converterData->prevToUnicodeStatus = 0x0000; - } else { - uprv_free(cnv->extraInfo); - cnv->extraInfo = NULL; - *errorCode = U_ILLEGAL_ARGUMENT_ERROR; - } - - } else { - *errorCode =U_MEMORY_ALLOCATION_ERROR; - } -} - -static void _ISCIIClose(UConverter *cnv) { - if (cnv->extraInfo!=NULL) { - if (!cnv->isExtraLocal) { - uprv_free(cnv->extraInfo); - } - cnv->extraInfo=NULL; - } -} - -static const char* _ISCIIgetName(const UConverter* cnv) { - if (cnv->extraInfo) { - UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo; - return myData->name; - } - return NULL; -} - -static void _ISCIIReset(UConverter *cnv, UConverterResetChoice choice) { - UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo); - if (choice<=UCNV_RESET_TO_UNICODE) { - cnv->toUnicodeStatus = missingCharMarker; - cnv->mode=0; - data->currentDeltaToUnicode=data->defDeltaToUnicode; - data->currentMaskToUnicode = data->defMaskToUnicode; - data->contextCharToUnicode=NO_CHAR_MARKER; - data->prevToUnicodeStatus = 0x0000; - } - if (choice!=UCNV_RESET_TO_UNICODE) { - cnv->fromUChar32=0x0000; - data->contextCharFromUnicode=0x00; - data->currentMaskFromUnicode=data->defMaskToUnicode; - data->currentDeltaFromUnicode=data->defDeltaToUnicode; - data->isFirstBuffer=TRUE; - data->resetToDefaultToUnicode=FALSE; - } -} - -/** - * The values in validity table are indexed by the lower bits of Unicode - * range 0x0900 - 0x09ff. The values have a structure like: - * --------------------------------------------------------------- - * | DEV | PNJ | GJR | ORI | BNG | TLG | MLM | TML | - * | | | | | ASM | KND | | | - * --------------------------------------------------------------- - * If a code point is valid in a particular script - * then that bit is turned on - * - * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for - * to represent these languages - * - * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case - * and combine and use 1 bit to represent these languages. - * - * TODO: It is probably easier to understand and maintain to change this - * to use uint16_t and give each of the 9 Unicode/script blocks its own bit. - */ - -static const uint8_t validityTable[128] = { -/* This state table is tool generated please do not edit unless you know exactly what you are doing */ -/* Note: This table was edited to mirror the Windows XP implementation */ -/*ISCII:Valid:Unicode */ -/*0xa0 : 0x00: 0x900 */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xa1 : 0xb8: 0x901 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , -/*0xa2 : 0xfe: 0x902 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xa3 : 0xbf: 0x903 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0x00 : 0x00: 0x904 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xa4 : 0xff: 0x905 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xa5 : 0xff: 0x906 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xa6 : 0xff: 0x907 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xa7 : 0xff: 0x908 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xa8 : 0xff: 0x909 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xa9 : 0xff: 0x90a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xaa : 0xfe: 0x90b */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0x00 : 0x00: 0x90c */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xae : 0x80: 0x90d */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xab : 0x87: 0x90e */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , -/*0xac : 0xff: 0x90f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xad : 0xff: 0x910 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xb2 : 0x80: 0x911 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xaf : 0x87: 0x912 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , -/*0xb0 : 0xff: 0x913 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xb1 : 0xff: 0x914 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xb3 : 0xff: 0x915 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xb4 : 0xfe: 0x916 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xb5 : 0xfe: 0x917 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xb6 : 0xfe: 0x918 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xb7 : 0xff: 0x919 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xb8 : 0xff: 0x91a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xb9 : 0xfe: 0x91b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xba : 0xff: 0x91c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xbb : 0xfe: 0x91d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xbc : 0xff: 0x91e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xbd : 0xff: 0x91f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xbe : 0xfe: 0x920 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xbf : 0xfe: 0x921 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xc0 : 0xfe: 0x922 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xc1 : 0xff: 0x923 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xc2 : 0xff: 0x924 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xc3 : 0xfe: 0x925 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xc4 : 0xfe: 0x926 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xc5 : 0xfe: 0x927 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xc6 : 0xff: 0x928 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xc7 : 0x81: 0x929 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + TML_MASK , -/*0xc8 : 0xff: 0x92a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xc9 : 0xfe: 0x92b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xca : 0xfe: 0x92c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xcb : 0xfe: 0x92d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xcc : 0xfe: 0x92e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xcd : 0xff: 0x92f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xcf : 0xff: 0x930 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xd0 : 0x87: 0x931 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK , -/*0xd1 : 0xff: 0x932 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xd2 : 0xb7: 0x933 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK , -/*0xd3 : 0x83: 0x934 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK , -/*0xd4 : 0xff: 0x935 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK , -/*0xd5 : 0xfe: 0x936 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xd6 : 0xbf: 0x937 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xd7 : 0xff: 0x938 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xd8 : 0xff: 0x939 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0x00 : 0x00: 0x93A */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x93B */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xe9 : 0xda: 0x93c */ DEV_MASK + PNJ_MASK + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x93d */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xda : 0xff: 0x93e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xdb : 0xff: 0x93f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xdc : 0xff: 0x940 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xdd : 0xff: 0x941 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xde : 0xff: 0x942 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xdf : 0xbe: 0x943 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0x00 : 0x00: 0x944 */ DEV_MASK + ZERO + GJR_MASK + ZERO + BNG_MASK + KND_MASK + ZERO + ZERO , -/*0xe3 : 0x80: 0x945 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xe0 : 0x87: 0x946 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , -/*0xe1 : 0xff: 0x947 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xe2 : 0xff: 0x948 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xe7 : 0x80: 0x949 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xe4 : 0x87: 0x94a */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , -/*0xe5 : 0xff: 0x94b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xe6 : 0xff: 0x94c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xe8 : 0xff: 0x94d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xec : 0x00: 0x94e */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xed : 0x00: 0x94f */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x950 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x951 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x952 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x953 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x954 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x955 */ ZERO + ZERO + ZERO + ZERO + ZERO + KND_MASK + ZERO + ZERO , -/*0x00 : 0x00: 0x956 */ ZERO + ZERO + ZERO + ORI_MASK + ZERO + KND_MASK + ZERO + ZERO , -/*0x00 : 0x00: 0x957 */ ZERO + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + MLM_MASK + ZERO , -/*0x00 : 0x00: 0x958 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x959 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x95a */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x95b */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x95c */ DEV_MASK + PNJ_MASK + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x95d */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x95e */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xce : 0x98: 0x95f */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x960 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0x00 : 0x00: 0x961 */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0x00 : 0x00: 0x962 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x963 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , -/*0xea : 0xf8: 0x964 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xf1 : 0xff: 0x966 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf2 : 0xff: 0x967 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf3 : 0xff: 0x968 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf4 : 0xff: 0x969 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf5 : 0xff: 0x96a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf6 : 0xff: 0x96b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf7 : 0xff: 0x96c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf8 : 0xff: 0x96d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf9 : 0xff: 0x96e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xfa : 0xff: 0x96f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0x00 : 0x80: 0x970 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/* - * The length of the array is 128 to provide values for 0x900..0x97f. - * The last 15 entries for 0x971..0x97f of the validity table are all zero - * because no Indic script uses such Unicode code points. - */ -/*0x00 : 0x00: 0x9yz */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO -}; - -static const uint16_t fromUnicodeTable[128]={ - 0x00a0 ,/* 0x0900 */ - 0x00a1 ,/* 0x0901 */ - 0x00a2 ,/* 0x0902 */ - 0x00a3 ,/* 0x0903 */ - 0xa4e0 ,/* 0x0904 */ - 0x00a4 ,/* 0x0905 */ - 0x00a5 ,/* 0x0906 */ - 0x00a6 ,/* 0x0907 */ - 0x00a7 ,/* 0x0908 */ - 0x00a8 ,/* 0x0909 */ - 0x00a9 ,/* 0x090a */ - 0x00aa ,/* 0x090b */ - 0xA6E9 ,/* 0x090c */ - 0x00ae ,/* 0x090d */ - 0x00ab ,/* 0x090e */ - 0x00ac ,/* 0x090f */ - 0x00ad ,/* 0x0910 */ - 0x00b2 ,/* 0x0911 */ - 0x00af ,/* 0x0912 */ - 0x00b0 ,/* 0x0913 */ - 0x00b1 ,/* 0x0914 */ - 0x00b3 ,/* 0x0915 */ - 0x00b4 ,/* 0x0916 */ - 0x00b5 ,/* 0x0917 */ - 0x00b6 ,/* 0x0918 */ - 0x00b7 ,/* 0x0919 */ - 0x00b8 ,/* 0x091a */ - 0x00b9 ,/* 0x091b */ - 0x00ba ,/* 0x091c */ - 0x00bb ,/* 0x091d */ - 0x00bc ,/* 0x091e */ - 0x00bd ,/* 0x091f */ - 0x00be ,/* 0x0920 */ - 0x00bf ,/* 0x0921 */ - 0x00c0 ,/* 0x0922 */ - 0x00c1 ,/* 0x0923 */ - 0x00c2 ,/* 0x0924 */ - 0x00c3 ,/* 0x0925 */ - 0x00c4 ,/* 0x0926 */ - 0x00c5 ,/* 0x0927 */ - 0x00c6 ,/* 0x0928 */ - 0x00c7 ,/* 0x0929 */ - 0x00c8 ,/* 0x092a */ - 0x00c9 ,/* 0x092b */ - 0x00ca ,/* 0x092c */ - 0x00cb ,/* 0x092d */ - 0x00cc ,/* 0x092e */ - 0x00cd ,/* 0x092f */ - 0x00cf ,/* 0x0930 */ - 0x00d0 ,/* 0x0931 */ - 0x00d1 ,/* 0x0932 */ - 0x00d2 ,/* 0x0933 */ - 0x00d3 ,/* 0x0934 */ - 0x00d4 ,/* 0x0935 */ - 0x00d5 ,/* 0x0936 */ - 0x00d6 ,/* 0x0937 */ - 0x00d7 ,/* 0x0938 */ - 0x00d8 ,/* 0x0939 */ - 0xFFFF ,/* 0x093A */ - 0xFFFF ,/* 0x093B */ - 0x00e9 ,/* 0x093c */ - 0xEAE9 ,/* 0x093d */ - 0x00da ,/* 0x093e */ - 0x00db ,/* 0x093f */ - 0x00dc ,/* 0x0940 */ - 0x00dd ,/* 0x0941 */ - 0x00de ,/* 0x0942 */ - 0x00df ,/* 0x0943 */ - 0xDFE9 ,/* 0x0944 */ - 0x00e3 ,/* 0x0945 */ - 0x00e0 ,/* 0x0946 */ - 0x00e1 ,/* 0x0947 */ - 0x00e2 ,/* 0x0948 */ - 0x00e7 ,/* 0x0949 */ - 0x00e4 ,/* 0x094a */ - 0x00e5 ,/* 0x094b */ - 0x00e6 ,/* 0x094c */ - 0x00e8 ,/* 0x094d */ - 0x00ec ,/* 0x094e */ - 0x00ed ,/* 0x094f */ - 0xA1E9 ,/* 0x0950 */ /* OM Symbol */ - 0xFFFF ,/* 0x0951 */ - 0xF0B8 ,/* 0x0952 */ - 0xFFFF ,/* 0x0953 */ - 0xFFFF ,/* 0x0954 */ - 0xFFFF ,/* 0x0955 */ - 0xFFFF ,/* 0x0956 */ - 0xFFFF ,/* 0x0957 */ - 0xb3e9 ,/* 0x0958 */ - 0xb4e9 ,/* 0x0959 */ - 0xb5e9 ,/* 0x095a */ - 0xbae9 ,/* 0x095b */ - 0xbfe9 ,/* 0x095c */ - 0xC0E9 ,/* 0x095d */ - 0xc9e9 ,/* 0x095e */ - 0x00ce ,/* 0x095f */ - 0xAAe9 ,/* 0x0960 */ - 0xA7E9 ,/* 0x0961 */ - 0xDBE9 ,/* 0x0962 */ - 0xDCE9 ,/* 0x0963 */ - 0x00ea ,/* 0x0964 */ - 0xeaea ,/* 0x0965 */ - 0x00f1 ,/* 0x0966 */ - 0x00f2 ,/* 0x0967 */ - 0x00f3 ,/* 0x0968 */ - 0x00f4 ,/* 0x0969 */ - 0x00f5 ,/* 0x096a */ - 0x00f6 ,/* 0x096b */ - 0x00f7 ,/* 0x096c */ - 0x00f8 ,/* 0x096d */ - 0x00f9 ,/* 0x096e */ - 0x00fa ,/* 0x096f */ - 0xF0BF ,/* 0x0970 */ - 0xFFFF ,/* 0x0971 */ - 0xFFFF ,/* 0x0972 */ - 0xFFFF ,/* 0x0973 */ - 0xFFFF ,/* 0x0974 */ - 0xFFFF ,/* 0x0975 */ - 0xFFFF ,/* 0x0976 */ - 0xFFFF ,/* 0x0977 */ - 0xFFFF ,/* 0x0978 */ - 0xFFFF ,/* 0x0979 */ - 0xFFFF ,/* 0x097a */ - 0xFFFF ,/* 0x097b */ - 0xFFFF ,/* 0x097c */ - 0xFFFF ,/* 0x097d */ - 0xFFFF ,/* 0x097e */ - 0xFFFF ,/* 0x097f */ -}; -static const uint16_t toUnicodeTable[256]={ - 0x0000,/* 0x00 */ - 0x0001,/* 0x01 */ - 0x0002,/* 0x02 */ - 0x0003,/* 0x03 */ - 0x0004,/* 0x04 */ - 0x0005,/* 0x05 */ - 0x0006,/* 0x06 */ - 0x0007,/* 0x07 */ - 0x0008,/* 0x08 */ - 0x0009,/* 0x09 */ - 0x000a,/* 0x0a */ - 0x000b,/* 0x0b */ - 0x000c,/* 0x0c */ - 0x000d,/* 0x0d */ - 0x000e,/* 0x0e */ - 0x000f,/* 0x0f */ - 0x0010,/* 0x10 */ - 0x0011,/* 0x11 */ - 0x0012,/* 0x12 */ - 0x0013,/* 0x13 */ - 0x0014,/* 0x14 */ - 0x0015,/* 0x15 */ - 0x0016,/* 0x16 */ - 0x0017,/* 0x17 */ - 0x0018,/* 0x18 */ - 0x0019,/* 0x19 */ - 0x001a,/* 0x1a */ - 0x001b,/* 0x1b */ - 0x001c,/* 0x1c */ - 0x001d,/* 0x1d */ - 0x001e,/* 0x1e */ - 0x001f,/* 0x1f */ - 0x0020,/* 0x20 */ - 0x0021,/* 0x21 */ - 0x0022,/* 0x22 */ - 0x0023,/* 0x23 */ - 0x0024,/* 0x24 */ - 0x0025,/* 0x25 */ - 0x0026,/* 0x26 */ - 0x0027,/* 0x27 */ - 0x0028,/* 0x28 */ - 0x0029,/* 0x29 */ - 0x002a,/* 0x2a */ - 0x002b,/* 0x2b */ - 0x002c,/* 0x2c */ - 0x002d,/* 0x2d */ - 0x002e,/* 0x2e */ - 0x002f,/* 0x2f */ - 0x0030,/* 0x30 */ - 0x0031,/* 0x31 */ - 0x0032,/* 0x32 */ - 0x0033,/* 0x33 */ - 0x0034,/* 0x34 */ - 0x0035,/* 0x35 */ - 0x0036,/* 0x36 */ - 0x0037,/* 0x37 */ - 0x0038,/* 0x38 */ - 0x0039,/* 0x39 */ - 0x003A,/* 0x3A */ - 0x003B,/* 0x3B */ - 0x003c,/* 0x3c */ - 0x003d,/* 0x3d */ - 0x003e,/* 0x3e */ - 0x003f,/* 0x3f */ - 0x0040,/* 0x40 */ - 0x0041,/* 0x41 */ - 0x0042,/* 0x42 */ - 0x0043,/* 0x43 */ - 0x0044,/* 0x44 */ - 0x0045,/* 0x45 */ - 0x0046,/* 0x46 */ - 0x0047,/* 0x47 */ - 0x0048,/* 0x48 */ - 0x0049,/* 0x49 */ - 0x004a,/* 0x4a */ - 0x004b,/* 0x4b */ - 0x004c,/* 0x4c */ - 0x004d,/* 0x4d */ - 0x004e,/* 0x4e */ - 0x004f,/* 0x4f */ - 0x0050,/* 0x50 */ - 0x0051,/* 0x51 */ - 0x0052,/* 0x52 */ - 0x0053,/* 0x53 */ - 0x0054,/* 0x54 */ - 0x0055,/* 0x55 */ - 0x0056,/* 0x56 */ - 0x0057,/* 0x57 */ - 0x0058,/* 0x58 */ - 0x0059,/* 0x59 */ - 0x005a,/* 0x5a */ - 0x005b,/* 0x5b */ - 0x005c,/* 0x5c */ - 0x005d,/* 0x5d */ - 0x005e,/* 0x5e */ - 0x005f,/* 0x5f */ - 0x0060,/* 0x60 */ - 0x0061,/* 0x61 */ - 0x0062,/* 0x62 */ - 0x0063,/* 0x63 */ - 0x0064,/* 0x64 */ - 0x0065,/* 0x65 */ - 0x0066,/* 0x66 */ - 0x0067,/* 0x67 */ - 0x0068,/* 0x68 */ - 0x0069,/* 0x69 */ - 0x006a,/* 0x6a */ - 0x006b,/* 0x6b */ - 0x006c,/* 0x6c */ - 0x006d,/* 0x6d */ - 0x006e,/* 0x6e */ - 0x006f,/* 0x6f */ - 0x0070,/* 0x70 */ - 0x0071,/* 0x71 */ - 0x0072,/* 0x72 */ - 0x0073,/* 0x73 */ - 0x0074,/* 0x74 */ - 0x0075,/* 0x75 */ - 0x0076,/* 0x76 */ - 0x0077,/* 0x77 */ - 0x0078,/* 0x78 */ - 0x0079,/* 0x79 */ - 0x007a,/* 0x7a */ - 0x007b,/* 0x7b */ - 0x007c,/* 0x7c */ - 0x007d,/* 0x7d */ - 0x007e,/* 0x7e */ - 0x007f,/* 0x7f */ - 0x0080,/* 0x80 */ - 0x0081,/* 0x81 */ - 0x0082,/* 0x82 */ - 0x0083,/* 0x83 */ - 0x0084,/* 0x84 */ - 0x0085,/* 0x85 */ - 0x0086,/* 0x86 */ - 0x0087,/* 0x87 */ - 0x0088,/* 0x88 */ - 0x0089,/* 0x89 */ - 0x008a,/* 0x8a */ - 0x008b,/* 0x8b */ - 0x008c,/* 0x8c */ - 0x008d,/* 0x8d */ - 0x008e,/* 0x8e */ - 0x008f,/* 0x8f */ - 0x0090,/* 0x90 */ - 0x0091,/* 0x91 */ - 0x0092,/* 0x92 */ - 0x0093,/* 0x93 */ - 0x0094,/* 0x94 */ - 0x0095,/* 0x95 */ - 0x0096,/* 0x96 */ - 0x0097,/* 0x97 */ - 0x0098,/* 0x98 */ - 0x0099,/* 0x99 */ - 0x009a,/* 0x9a */ - 0x009b,/* 0x9b */ - 0x009c,/* 0x9c */ - 0x009d,/* 0x9d */ - 0x009e,/* 0x9e */ - 0x009f,/* 0x9f */ - 0x00A0,/* 0xa0 */ - 0x0901,/* 0xa1 */ - 0x0902,/* 0xa2 */ - 0x0903,/* 0xa3 */ - 0x0905,/* 0xa4 */ - 0x0906,/* 0xa5 */ - 0x0907,/* 0xa6 */ - 0x0908,/* 0xa7 */ - 0x0909,/* 0xa8 */ - 0x090a,/* 0xa9 */ - 0x090b,/* 0xaa */ - 0x090e,/* 0xab */ - 0x090f,/* 0xac */ - 0x0910,/* 0xad */ - 0x090d,/* 0xae */ - 0x0912,/* 0xaf */ - 0x0913,/* 0xb0 */ - 0x0914,/* 0xb1 */ - 0x0911,/* 0xb2 */ - 0x0915,/* 0xb3 */ - 0x0916,/* 0xb4 */ - 0x0917,/* 0xb5 */ - 0x0918,/* 0xb6 */ - 0x0919,/* 0xb7 */ - 0x091a,/* 0xb8 */ - 0x091b,/* 0xb9 */ - 0x091c,/* 0xba */ - 0x091d,/* 0xbb */ - 0x091e,/* 0xbc */ - 0x091f,/* 0xbd */ - 0x0920,/* 0xbe */ - 0x0921,/* 0xbf */ - 0x0922,/* 0xc0 */ - 0x0923,/* 0xc1 */ - 0x0924,/* 0xc2 */ - 0x0925,/* 0xc3 */ - 0x0926,/* 0xc4 */ - 0x0927,/* 0xc5 */ - 0x0928,/* 0xc6 */ - 0x0929,/* 0xc7 */ - 0x092a,/* 0xc8 */ - 0x092b,/* 0xc9 */ - 0x092c,/* 0xca */ - 0x092d,/* 0xcb */ - 0x092e,/* 0xcc */ - 0x092f,/* 0xcd */ - 0x095f,/* 0xce */ - 0x0930,/* 0xcf */ - 0x0931,/* 0xd0 */ - 0x0932,/* 0xd1 */ - 0x0933,/* 0xd2 */ - 0x0934,/* 0xd3 */ - 0x0935,/* 0xd4 */ - 0x0936,/* 0xd5 */ - 0x0937,/* 0xd6 */ - 0x0938,/* 0xd7 */ - 0x0939,/* 0xd8 */ - 0x200D,/* 0xd9 */ - 0x093e,/* 0xda */ - 0x093f,/* 0xdb */ - 0x0940,/* 0xdc */ - 0x0941,/* 0xdd */ - 0x0942,/* 0xde */ - 0x0943,/* 0xdf */ - 0x0946,/* 0xe0 */ - 0x0947,/* 0xe1 */ - 0x0948,/* 0xe2 */ - 0x0945,/* 0xe3 */ - 0x094a,/* 0xe4 */ - 0x094b,/* 0xe5 */ - 0x094c,/* 0xe6 */ - 0x0949,/* 0xe7 */ - 0x094d,/* 0xe8 */ - 0x093c,/* 0xe9 */ - 0x0964,/* 0xea */ - 0xFFFF,/* 0xeb */ - 0xFFFF,/* 0xec */ - 0xFFFF,/* 0xed */ - 0xFFFF,/* 0xee */ - 0xFFFF,/* 0xef */ - 0xFFFF,/* 0xf0 */ - 0x0966,/* 0xf1 */ - 0x0967,/* 0xf2 */ - 0x0968,/* 0xf3 */ - 0x0969,/* 0xf4 */ - 0x096a,/* 0xf5 */ - 0x096b,/* 0xf6 */ - 0x096c,/* 0xf7 */ - 0x096d,/* 0xf8 */ - 0x096e,/* 0xf9 */ - 0x096f,/* 0xfa */ - 0xFFFF,/* 0xfb */ - 0xFFFF,/* 0xfc */ - 0xFFFF,/* 0xfd */ - 0xFFFF,/* 0xfe */ - 0xFFFF /* 0xff */ -}; - -static const uint16_t vowelSignESpecialCases[][2]={ - { 2 /*length of array*/ , 0 }, - { 0xA4 , 0x0904 }, -}; - -static const uint16_t nuktaSpecialCases[][2]={ - { 16 /*length of array*/ , 0 }, - { 0xA6 , 0x090c }, - { 0xEA , 0x093D }, - { 0xDF , 0x0944 }, - { 0xA1 , 0x0950 }, - { 0xb3 , 0x0958 }, - { 0xb4 , 0x0959 }, - { 0xb5 , 0x095a }, - { 0xba , 0x095b }, - { 0xbf , 0x095c }, - { 0xC0 , 0x095d }, - { 0xc9 , 0x095e }, - { 0xAA , 0x0960 }, - { 0xA7 , 0x0961 }, - { 0xDB , 0x0962 }, - { 0xDC , 0x0963 }, -}; - - -#define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err){ \ - int32_t offset = (int32_t)(source - args->source-1); \ - /* write the targetUniChar to target */ \ - if(target < targetLimit){ \ - if(targetByteUnit <= 0xFF){ \ - *(target)++ = (uint8_t)(targetByteUnit); \ - if(offsets){ \ - *(offsets++) = offset; \ - } \ - }else{ \ - if (targetByteUnit > 0xFFFF) { \ - *(target)++ = (uint8_t)(targetByteUnit>>16); \ - if (offsets) { \ - --offset; \ - *(offsets++) = offset; \ - } \ - } \ - if (!(target < targetLimit)) { \ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ - (uint8_t)(targetByteUnit >> 8); \ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ - (uint8_t)targetByteUnit; \ - *err = U_BUFFER_OVERFLOW_ERROR; \ - } else { \ - *(target)++ = (uint8_t)(targetByteUnit>>8); \ - if(offsets){ \ - *(offsets++) = offset; \ - } \ - if(target < targetLimit){ \ - *(target)++ = (uint8_t) targetByteUnit; \ - if(offsets){ \ - *(offsets++) = offset ; \ - } \ - }else{ \ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\ - (uint8_t) (targetByteUnit); \ - *err = U_BUFFER_OVERFLOW_ERROR; \ - } \ - } \ - } \ - }else{ \ - if (targetByteUnit & 0xFF0000) { \ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ - (uint8_t) (targetByteUnit >>16); \ - } \ - if(targetByteUnit & 0xFF00){ \ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ - (uint8_t) (targetByteUnit >>8); \ - } \ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ - (uint8_t) (targetByteUnit); \ - *err = U_BUFFER_OVERFLOW_ERROR; \ - } \ -} - -/* Rules: - * Explicit Halant : - * + - * Soft Halant : - * + - */ - -static void UConverter_fromUnicode_ISCII_OFFSETS_LOGIC( - UConverterFromUnicodeArgs * args, UErrorCode * err) { - const UChar *source = args->source; - const UChar *sourceLimit = args->sourceLimit; - unsigned char *target = (unsigned char *) args->target; - unsigned char *targetLimit = (unsigned char *) args->targetLimit; - int32_t* offsets = args->offsets; - uint32_t targetByteUnit = 0x0000; - UChar32 sourceChar = 0x0000; - UChar32 tempContextFromUnicode = 0x0000; /* For special handling of the Gurmukhi script. */ - UConverterDataISCII *converterData; - uint16_t newDelta=0; - uint16_t range = 0; - UBool deltaChanged = FALSE; - - if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - /* initialize data */ - converterData=(UConverterDataISCII*)args->converter->extraInfo; - newDelta=converterData->currentDeltaFromUnicode; - range = (uint16_t)(newDelta/DELTA); - - if ((sourceChar = args->converter->fromUChar32)!=0) { - goto getTrail; - } - - /*writing the char to the output stream */ - while (source < sourceLimit) { - /* Write the language code following LF only if LF is not the last character. */ - if (args->converter->fromUnicodeStatus == LF) { - targetByteUnit = ATR<<8; - targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang; - args->converter->fromUnicodeStatus = 0x0000; - /* now append ATR and language code */ - WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err); - if (U_FAILURE(*err)) { - break; - } - } - - sourceChar = *source++; - tempContextFromUnicode = converterData->contextCharFromUnicode; - - targetByteUnit = missingCharMarker; - - /*check if input is in ASCII and C0 control codes range*/ - if (sourceChar <= ASCII_END) { - args->converter->fromUnicodeStatus = sourceChar; - WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err); - if (U_FAILURE(*err)) { - break; - } - continue; - } - switch (sourceChar) { - case ZWNJ: - /* contextChar has HALANT */ - if (converterData->contextCharFromUnicode) { - converterData->contextCharFromUnicode = 0x00; - targetByteUnit = ISCII_HALANT; - } else { - /* consume ZWNJ and continue */ - converterData->contextCharFromUnicode = 0x00; - continue; - } - break; - case ZWJ: - /* contextChar has HALANT */ - if (converterData->contextCharFromUnicode) { - targetByteUnit = ISCII_NUKTA; - } else { - targetByteUnit =ISCII_INV; - } - converterData->contextCharFromUnicode = 0x00; - break; - default: - /* is the sourceChar in the INDIC_RANGE? */ - if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) { - /* Danda and Double Danda are valid in Northern scripts.. since Unicode - * does not include these codepoints in all Northern scrips we need to - * filter them out - */ - if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) { - /* find out to which block the souceChar belongs*/ - range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA); - newDelta =(uint16_t)(range*DELTA); - - /* Now are we in the same block as the previous? */ - if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) { - converterData->currentDeltaFromUnicode = newDelta; - converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum; - deltaChanged =TRUE; - converterData->isFirstBuffer=FALSE; - } - - if (converterData->currentDeltaFromUnicode == PNJ_DELTA) { - if (sourceChar == PNJ_TIPPI) { - /* Make sure Tippi is converterd to Bindi. */ - sourceChar = PNJ_BINDI; - } else if (sourceChar == PNJ_ADHAK) { - /* This is for consonant cluster handling. */ - converterData->contextCharFromUnicode = PNJ_ADHAK; - } - - } - /* Normalize all Indic codepoints to Devanagari and map them to ISCII */ - /* now subtract the new delta from sourceChar*/ - sourceChar -= converterData->currentDeltaFromUnicode; - } - - /* get the target byte unit */ - targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar]; - - /* is the code point valid in current script? */ - if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) { - /* Vocallic RR is assigned in ISCII Telugu and Unicode */ - if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) { - targetByteUnit=missingCharMarker; - } - } - - if (deltaChanged) { - /* we are in a script block which is different than - * previous sourceChar's script block write ATR and language codes - */ - uint32_t temp=0; - temp =(uint16_t)(ATR<<8); - temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang); - /* reset */ - deltaChanged=FALSE; - /* now append ATR and language code */ - WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err); - if (U_FAILURE(*err)) { - break; - } - } - - if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) { - continue; - } - } - /* reset context char */ - converterData->contextCharFromUnicode = 0x00; - break; - } - if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) { - /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */ - /* reset context char */ - converterData->contextCharFromUnicode = 0x0000; - targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit; - /* write targetByteUnit to target */ - WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err); - if (U_FAILURE(*err)) { - break; - } - } else if (targetByteUnit != missingCharMarker) { - if (targetByteUnit==ISCII_HALANT) { - converterData->contextCharFromUnicode = (UChar)targetByteUnit; - } - /* write targetByteUnit to target*/ - WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err); - if (U_FAILURE(*err)) { - break; - } - } else { - /* oops.. the code point is unassigned */ - /*check if the char is a First surrogate*/ - if (U16_IS_SURROGATE(sourceChar)) { - if (U16_IS_SURROGATE_LEAD(sourceChar)) { -getTrail: - /*look ahead to find the trail surrogate*/ - if (source < sourceLimit) { - /* test the following code unit */ - UChar trail= (*source); - if (U16_IS_TRAIL(trail)) { - source++; - sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); - *err =U_INVALID_CHAR_FOUND; - /* convert this surrogate code point */ - /* exit this condition tree */ - } else { - /* this is an unmatched lead code unit (1st surrogate) */ - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - } - } else { - /* no more input */ - *err = U_ZERO_ERROR; - } - } else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - } - } else { - /* callback(unassigned) for a BMP code point */ - *err = U_INVALID_CHAR_FOUND; - } - - args->converter->fromUChar32=sourceChar; - break; - } - }/* end while(mySourceIndexsource = source; - args->target = (char*)target; -} - -static const uint16_t lookupTable[][2]={ - { ZERO, ZERO }, /*DEFALT*/ - { ZERO, ZERO }, /*ROMAN*/ - { DEVANAGARI, DEV_MASK }, - { BENGALI, BNG_MASK }, - { TAMIL, TML_MASK }, - { TELUGU, KND_MASK }, - { BENGALI, BNG_MASK }, - { ORIYA, ORI_MASK }, - { KANNADA, KND_MASK }, - { MALAYALAM, MLM_MASK }, - { GUJARATI, GJR_MASK }, - { GURMUKHI, PNJ_MASK } -}; - -#define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err){\ - /* add offset to current Indic Block */ \ - if(targetUniChar>ASCII_END && \ - targetUniChar != ZWJ && \ - targetUniChar != ZWNJ && \ - targetUniChar != DANDA && \ - targetUniChar != DOUBLE_DANDA){ \ - \ - targetUniChar+=(uint16_t)(delta); \ - } \ - /* now write the targetUniChar */ \ - if(targettargetLimit){ \ - *(target)++ = (UChar)targetUniChar; \ - if(offsets){ \ - *(offsets)++ = (int32_t)(offset); \ - } \ - }else{ \ - args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] = \ - (UChar)targetUniChar; \ - *err = U_BUFFER_OVERFLOW_ERROR; \ - } \ -} - -#define GET_MAPPING(sourceChar,targetUniChar,data){ \ - targetUniChar = toUnicodeTable[(sourceChar)] ; \ - /* is the code point valid in current script? */ \ - if(sourceChar> ASCII_END && \ - (validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){ \ - /* Vocallic RR is assigne in ISCII Telugu and Unicode */ \ - if(data->currentDeltaToUnicode!=(TELUGU_DELTA) || \ - targetUniChar!=VOCALLIC_RR){ \ - targetUniChar=missingCharMarker; \ - } \ - } \ -} - -/*********** - * Rules for ISCII to Unicode converter - * ISCII is stateful encoding. To convert ISCII bytes to Unicode, - * which has both precomposed and decomposed forms characters - * pre-context and post-context need to be considered. - * - * Post context - * i) ATR : Attribute code is used to declare the font and script switching. - * Currently we only switch scripts and font codes consumed without generating an error - * ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure, - * obsolete characters - * Pre context - * i) Halant: if preceeded by a halant then it is a explicit halant - * ii) Nukta : - * a) if preceeded by a halant then it is a soft halant - * b) if preceeded by specific consonants and the ligatures have pre-composed - * characters in Unicode then convert to pre-composed characters - * iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda - * - */ - -static void UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) { - const char *source = ( char *) args->source; - UChar *target = args->target; - const char *sourceLimit = args->sourceLimit; - const UChar* targetLimit = args->targetLimit; - uint32_t targetUniChar = 0x0000; - uint8_t sourceChar = 0x0000; - UConverterDataISCII* data; - UChar32* toUnicodeStatus=NULL; - UChar32 tempTargetUniChar = 0x0000; - UChar* contextCharToUnicode= NULL; - UBool found; - int i; - int offset = 0; - - if ((args->converter == NULL) || (target < args->target) || (source < args->source)) { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - data = (UConverterDataISCII*)(args->converter->extraInfo); - contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */ - toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/ - - while (U_SUCCESS(*err) && sourcecurrentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA); - data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1]; - } else if (sourceChar==DEF) { - /* switch back to default */ - data->currentDeltaToUnicode = data->defDeltaToUnicode; - data->currentMaskToUnicode = data->defMaskToUnicode; - } else { - if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) { - /* these are display codes consume and continue */ - } else { - *err =U_ILLEGAL_CHAR_FOUND; - /* reset */ - *contextCharToUnicode=NO_CHAR_MARKER; - goto CALLBACK; - } - } - - /* reset */ - *contextCharToUnicode=NO_CHAR_MARKER; - - continue; - - } else if (*contextCharToUnicode==EXT) { - /* check if sourceChar is in 0xA1-0xEE range */ - if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) { - /* We currently support only Anudatta and Devanagari abbreviation sign */ - if (sourceChar==0xBF || sourceChar == 0xB8) { - targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA; - - /* find out if the mapping is valid in this state */ - if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) { - *contextCharToUnicode= NO_CHAR_MARKER; - - /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ - if (data->prevToUnicodeStatus) { - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); - data->prevToUnicodeStatus = 0x0000; - } - /* write to target */ - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); - - continue; - } - } - /* byte unit is unassigned */ - targetUniChar = missingCharMarker; - *err= U_INVALID_CHAR_FOUND; - } else { - /* only 0xA1 - 0xEE are legal after EXT char */ - *contextCharToUnicode= NO_CHAR_MARKER; - *err = U_ILLEGAL_CHAR_FOUND; - } - goto CALLBACK; - } else if (*contextCharToUnicode==ISCII_INV) { - if (sourceChar==ISCII_HALANT) { - targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */ - } else { - targetUniChar = ZWJ; - } - - /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ - if (data->prevToUnicodeStatus) { - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); - data->prevToUnicodeStatus = 0x0000; - } - /* write to target */ - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); - /* reset */ - *contextCharToUnicode=NO_CHAR_MARKER; - } - - /* look at the pre-context and perform special processing */ - switch (sourceChar) { - case ISCII_INV: - case EXT: - case ATR: - *contextCharToUnicode = (UChar)sourceChar; - - if (*toUnicodeStatus != missingCharMarker) { - /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ - if (data->prevToUnicodeStatus) { - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); - data->prevToUnicodeStatus = 0x0000; - } - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err); - *toUnicodeStatus = missingCharMarker; - } - continue; - case ISCII_DANDA: - /* handle double danda*/ - if (*contextCharToUnicode== ISCII_DANDA) { - targetUniChar = DOUBLE_DANDA; - /* clear the context */ - *contextCharToUnicode = NO_CHAR_MARKER; - *toUnicodeStatus = missingCharMarker; - } else { - GET_MAPPING(sourceChar,targetUniChar,data); - *contextCharToUnicode = sourceChar; - } - break; - case ISCII_HALANT: - /* handle explicit halant */ - if (*contextCharToUnicode == ISCII_HALANT) { - targetUniChar = ZWNJ; - /* clear the context */ - *contextCharToUnicode = NO_CHAR_MARKER; - } else { - GET_MAPPING(sourceChar,targetUniChar,data); - *contextCharToUnicode = sourceChar; - } - break; - case 0x0A: - case 0x0D: - data->resetToDefaultToUnicode = TRUE; - GET_MAPPING(sourceChar,targetUniChar,data) - ; - *contextCharToUnicode = sourceChar; - break; - - case ISCII_VOWEL_SIGN_E: - i=1; - found=FALSE; - for (; icurrentMaskToUnicode) { - /*targetUniChar += data->currentDeltaToUnicode ;*/ - *contextCharToUnicode= NO_CHAR_MARKER; - *toUnicodeStatus = missingCharMarker; - break; - } - } - GET_MAPPING(sourceChar,targetUniChar,data); - *contextCharToUnicode = sourceChar; - break; - - case ISCII_NUKTA: - /* handle soft halant */ - if (*contextCharToUnicode == ISCII_HALANT) { - targetUniChar = ZWJ; - /* clear the context */ - *contextCharToUnicode = NO_CHAR_MARKER; - break; - } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) { - /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ - if (data->prevToUnicodeStatus) { - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); - data->prevToUnicodeStatus = 0x0000; - } - /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi. - * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39). - */ - targetUniChar = PNJ_RRA; - WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); - if (U_SUCCESS(*err)) { - targetUniChar = PNJ_SIGN_VIRAMA; - WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); - if (U_SUCCESS(*err)) { - targetUniChar = PNJ_HA; - WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); - } else { - args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA; - } - } else { - args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA; - args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA; - } - *toUnicodeStatus = missingCharMarker; - data->contextCharToUnicode = NO_CHAR_MARKER; - continue; - } else { - /* try to handle + ISCII_NUKTA special mappings */ - i=1; - found =FALSE; - for (; icurrentMaskToUnicode) { - /*targetUniChar += data->currentDeltaToUnicode ;*/ - *contextCharToUnicode= NO_CHAR_MARKER; - *toUnicodeStatus = missingCharMarker; - if (data->currentDeltaToUnicode == PNJ_DELTA) { - /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ - if (data->prevToUnicodeStatus) { - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); - data->prevToUnicodeStatus = 0x0000; - } - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); - continue; - } - break; - } - /* else fall through to default */ - } - /* else fall through to default */ - U_FALLTHROUGH; - } - default:GET_MAPPING(sourceChar,targetUniChar,data) - ; - *contextCharToUnicode = sourceChar; - break; - } - - if (*toUnicodeStatus != missingCharMarker) { - /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */ - if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) && - (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus) { - /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */ - offset = (int)(source-args->source - 3); - tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */ - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err); - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err); - data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */ - *toUnicodeStatus = missingCharMarker; - continue; - } else { - /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ - if (data->prevToUnicodeStatus) { - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); - data->prevToUnicodeStatus = 0x0000; - } - /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script. - * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi. - */ - if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) { - targetUniChar = PNJ_TIPPI - PNJ_DELTA; - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err); - } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) { - /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */ - data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA; - } else { - /* write the previously mapped codepoint */ - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err); - } - } - *toUnicodeStatus = missingCharMarker; - } - - if (targetUniChar != missingCharMarker) { - /* now save the targetUniChar for delayed write */ - *toUnicodeStatus = (UChar) targetUniChar; - if (data->resetToDefaultToUnicode==TRUE) { - data->currentDeltaToUnicode = data->defDeltaToUnicode; - data->currentMaskToUnicode = data->defMaskToUnicode; - data->resetToDefaultToUnicode=FALSE; - } - } else { - - /* we reach here only if targetUniChar == missingCharMarker - * so assign codes to reason and err - */ - *err = U_INVALID_CHAR_FOUND; -CALLBACK: - args->converter->toUBytes[0] = (uint8_t) sourceChar; - args->converter->toULength = 1; - break; - } - - } else { - *err =U_BUFFER_OVERFLOW_ERROR; - break; - } - } - - if (U_SUCCESS(*err) && args->flush && source == sourceLimit) { - /* end of the input stream */ - UConverter *cnv = args->converter; - - if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) { - /* set toUBytes[] */ - cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode; - cnv->toULength = 1; - - /* avoid looping on truncated sequences */ - *contextCharToUnicode = NO_CHAR_MARKER; - } else { - cnv->toULength = 0; - } - - if (*toUnicodeStatus != missingCharMarker) { - /* output a remaining target character */ - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err); - *toUnicodeStatus = missingCharMarker; - } - } - - args->target = target; - args->source = source; -} - -/* structure for SafeClone calculations */ -struct cloneISCIIStruct { - UConverter cnv; - UConverterDataISCII mydata; -}; - -static UConverter * -_ISCII_SafeClone(const UConverter *cnv, - void *stackBuffer, - int32_t *pBufferSize, - UErrorCode *status) -{ - struct cloneISCIIStruct * localClone; - int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct); - - if (U_FAILURE(*status)) { - return 0; - } - - if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */ - *pBufferSize = bufferSizeNeeded; - return 0; - } - - localClone = (struct cloneISCIIStruct *)stackBuffer; - /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ - - uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII)); - localClone->cnv.extraInfo = &localClone->mydata; - localClone->cnv.isExtraLocal = TRUE; - - return &localClone->cnv; -} - -static void -_ISCIIGetUnicodeSet(const UConverter *cnv, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode) -{ - int32_t idx, script; - uint8_t mask; - - /* Since all ISCII versions allow switching to other ISCII - scripts, we add all roundtrippable characters to this set. */ - sa->addRange(sa->set, 0, ASCII_END); - for (script = DEVANAGARI; script <= MALAYALAM; script++) { - mask = (uint8_t)(lookupInitialData[script].maskEnum); - for (idx = 0; idx < DELTA; idx++) { - /* added check for TELUGU character */ - if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) { - sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN); - } - } - } - sa->add(sa->set, DANDA); - sa->add(sa->set, DOUBLE_DANDA); - sa->add(sa->set, ZWNJ); - sa->add(sa->set, ZWJ); -} - -static const UConverterImpl _ISCIIImpl={ - - UCNV_ISCII, - - NULL, - NULL, - - _ISCIIOpen, - _ISCIIClose, - _ISCIIReset, - - UConverter_toUnicode_ISCII_OFFSETS_LOGIC, - UConverter_toUnicode_ISCII_OFFSETS_LOGIC, - UConverter_fromUnicode_ISCII_OFFSETS_LOGIC, - UConverter_fromUnicode_ISCII_OFFSETS_LOGIC, - NULL, - - NULL, - _ISCIIgetName, - NULL, - _ISCII_SafeClone, - _ISCIIGetUnicodeSet -}; - -static const UConverterStaticData _ISCIIStaticData={ - sizeof(UConverterStaticData), - "ISCII", - 0, - UCNV_IBM, - UCNV_ISCII, - 1, - 4, - { 0x1a, 0, 0, 0 }, - 0x1, - FALSE, - FALSE, - 0x0, - 0x0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */ - -}; - -const UConverterSharedData _ISCIIData= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISCIIStaticData, &_ISCIIImpl); - -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ diff --git a/deps/icu-small/source/common/ucnvisci.cpp b/deps/icu-small/source/common/ucnvisci.cpp new file mode 100644 index 0000000000..d0c07f2b27 --- /dev/null +++ b/deps/icu-small/source/common/ucnvisci.cpp @@ -0,0 +1,1635 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2000-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnvisci.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2001JUN26 +* created by: Ram Viswanadha +* +* Date Name Description +* 24/7/2001 Ram Added support for EXT character handling +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/ucnv_cb.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "cstring.h" +#include "uassert.h" + +#define UCNV_OPTIONS_VERSION_MASK 0xf +#define NUKTA 0x093c +#define HALANT 0x094d +#define ZWNJ 0x200c /* Zero Width Non Joiner */ +#define ZWJ 0x200d /* Zero width Joiner */ +#define INVALID_CHAR 0xffff +#define ATR 0xEF /* Attribute code */ +#define EXT 0xF0 /* Extension code */ +#define DANDA 0x0964 +#define DOUBLE_DANDA 0x0965 +#define ISCII_NUKTA 0xE9 +#define ISCII_HALANT 0xE8 +#define ISCII_DANDA 0xEA +#define ISCII_INV 0xD9 +#define ISCII_VOWEL_SIGN_E 0xE0 +#define INDIC_BLOCK_BEGIN 0x0900 +#define INDIC_BLOCK_END 0x0D7F +#define INDIC_RANGE (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN) +#define VOCALLIC_RR 0x0931 +#define LF 0x0A +#define ASCII_END 0xA0 +#define NO_CHAR_MARKER 0xFFFE +#define TELUGU_DELTA DELTA * TELUGU +#define DEV_ABBR_SIGN 0x0970 +#define DEV_ANUDATTA 0x0952 +#define EXT_RANGE_BEGIN 0xA1 +#define EXT_RANGE_END 0xEE + +#define PNJ_DELTA 0x0100 +#define PNJ_BINDI 0x0A02 +#define PNJ_TIPPI 0x0A70 +#define PNJ_SIGN_VIRAMA 0x0A4D +#define PNJ_ADHAK 0x0A71 +#define PNJ_HA 0x0A39 +#define PNJ_RRA 0x0A5C + +typedef enum { + DEVANAGARI =0, + BENGALI, + GURMUKHI, + GUJARATI, + ORIYA, + TAMIL, + TELUGU, + KANNADA, + MALAYALAM, + DELTA=0x80 +}UniLang; + +/** + * Enumeration for switching code pages if + + * is encountered + */ +typedef enum { + DEF = 0x40, + RMN = 0x41, + DEV = 0x42, + BNG = 0x43, + TML = 0x44, + TLG = 0x45, + ASM = 0x46, + ORI = 0x47, + KND = 0x48, + MLM = 0x49, + GJR = 0x4A, + PNJ = 0x4B, + ARB = 0x71, + PES = 0x72, + URD = 0x73, + SND = 0x74, + KSM = 0x75, + PST = 0x76 +}ISCIILang; + +typedef enum { + DEV_MASK =0x80, + PNJ_MASK =0x40, + GJR_MASK =0x20, + ORI_MASK =0x10, + BNG_MASK =0x08, + KND_MASK =0x04, + MLM_MASK =0x02, + TML_MASK =0x01, + ZERO =0x00 +}MaskEnum; + +#define ISCII_CNV_PREFIX "ISCII,version=" + +typedef struct { + UChar contextCharToUnicode; /* previous Unicode codepoint for contextual analysis */ + UChar contextCharFromUnicode; /* previous Unicode codepoint for contextual analysis */ + uint16_t defDeltaToUnicode; /* delta for switching to default state when DEF is encountered */ + uint16_t currentDeltaFromUnicode; /* current delta in Indic block */ + uint16_t currentDeltaToUnicode; /* current delta in Indic block */ + MaskEnum currentMaskFromUnicode; /* mask for current state in toUnicode */ + MaskEnum currentMaskToUnicode; /* mask for current state in toUnicode */ + MaskEnum defMaskToUnicode; /* mask for default state in toUnicode */ + UBool isFirstBuffer; /* boolean for fromUnicode to see if we need to announce the first script */ + UBool resetToDefaultToUnicode; /* boolean for reseting to default delta and mask when a newline is encountered*/ + char name[sizeof(ISCII_CNV_PREFIX) + 1]; + UChar32 prevToUnicodeStatus; /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */ +} UConverterDataISCII; + +typedef struct LookupDataStruct { + UniLang uniLang; + MaskEnum maskEnum; + ISCIILang isciiLang; +} LookupDataStruct; + +static const LookupDataStruct lookupInitialData[]={ + { DEVANAGARI, DEV_MASK, DEV }, + { BENGALI, BNG_MASK, BNG }, + { GURMUKHI, PNJ_MASK, PNJ }, + { GUJARATI, GJR_MASK, GJR }, + { ORIYA, ORI_MASK, ORI }, + { TAMIL, TML_MASK, TML }, + { TELUGU, KND_MASK, TLG }, + { KANNADA, KND_MASK, KND }, + { MALAYALAM, MLM_MASK, MLM } +}; + +/* + * For special handling of certain Gurmukhi characters. + * Bit 0 (value 1): PNJ consonant + * Bit 1 (value 2): PNJ Bindi Tippi + */ +static const uint8_t pnjMap[80] = { + /* 0A00..0A0F */ + 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0A10..0A1F */ + 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* 0A20..0A2F */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, + /* 0A30..0A3F */ + 3, 0, 0, 0, 0, 3, 3, 0, 3, 3, 0, 0, 0, 0, 0, 2, + /* 0A40..0A4F */ + 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static UBool +isPNJConsonant(UChar32 c) { + if (c < 0xa00 || 0xa50 <= c) { + return FALSE; + } else { + return (UBool)(pnjMap[c - 0xa00] & 1); + } +} + +static UBool +isPNJBindiTippi(UChar32 c) { + if (c < 0xa00 || 0xa50 <= c) { + return FALSE; + } else { + return (UBool)(pnjMap[c - 0xa00] >> 1); + } +} +U_CDECL_BEGIN +static void U_CALLCONV +_ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) { + if(pArgs->onlyTestIsLoadable) { + return; + } + + cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII)); + + if (cnv->extraInfo != NULL) { + int32_t len=0; + UConverterDataISCII *converterData= + (UConverterDataISCII *) cnv->extraInfo; + converterData->contextCharToUnicode=NO_CHAR_MARKER; + cnv->toUnicodeStatus = missingCharMarker; + converterData->contextCharFromUnicode=0x0000; + converterData->resetToDefaultToUnicode=FALSE; + /* check if the version requested is supported */ + if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) { + /* initialize state variables */ + converterData->currentDeltaFromUnicode + = converterData->currentDeltaToUnicode + = converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA); + + converterData->currentMaskFromUnicode + = converterData->currentMaskToUnicode + = converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum; + + converterData->isFirstBuffer=TRUE; + (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX); + len = (int32_t)uprv_strlen(converterData->name); + converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0'); + converterData->name[len+1]=0; + + converterData->prevToUnicodeStatus = 0x0000; + } else { + uprv_free(cnv->extraInfo); + cnv->extraInfo = NULL; + *errorCode = U_ILLEGAL_ARGUMENT_ERROR; + } + + } else { + *errorCode =U_MEMORY_ALLOCATION_ERROR; + } +} + +static void U_CALLCONV +_ISCIIClose(UConverter *cnv) { + if (cnv->extraInfo!=NULL) { + if (!cnv->isExtraLocal) { + uprv_free(cnv->extraInfo); + } + cnv->extraInfo=NULL; + } +} + +static const char* U_CALLCONV +_ISCIIgetName(const UConverter* cnv) { + if (cnv->extraInfo) { + UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo; + return myData->name; + } + return NULL; +} + +static void U_CALLCONV +_ISCIIReset(UConverter *cnv, UConverterResetChoice choice) { + UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo); + if (choice<=UCNV_RESET_TO_UNICODE) { + cnv->toUnicodeStatus = missingCharMarker; + cnv->mode=0; + data->currentDeltaToUnicode=data->defDeltaToUnicode; + data->currentMaskToUnicode = data->defMaskToUnicode; + data->contextCharToUnicode=NO_CHAR_MARKER; + data->prevToUnicodeStatus = 0x0000; + } + if (choice!=UCNV_RESET_TO_UNICODE) { + cnv->fromUChar32=0x0000; + data->contextCharFromUnicode=0x00; + data->currentMaskFromUnicode=data->defMaskToUnicode; + data->currentDeltaFromUnicode=data->defDeltaToUnicode; + data->isFirstBuffer=TRUE; + data->resetToDefaultToUnicode=FALSE; + } +} + +/** + * The values in validity table are indexed by the lower bits of Unicode + * range 0x0900 - 0x09ff. The values have a structure like: + * --------------------------------------------------------------- + * | DEV | PNJ | GJR | ORI | BNG | TLG | MLM | TML | + * | | | | | ASM | KND | | | + * --------------------------------------------------------------- + * If a code point is valid in a particular script + * then that bit is turned on + * + * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for + * to represent these languages + * + * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case + * and combine and use 1 bit to represent these languages. + * + * TODO: It is probably easier to understand and maintain to change this + * to use uint16_t and give each of the 9 Unicode/script blocks its own bit. + */ + +static const uint8_t validityTable[128] = { +/* This state table is tool generated please do not edit unless you know exactly what you are doing */ +/* Note: This table was edited to mirror the Windows XP implementation */ +/*ISCII:Valid:Unicode */ +/*0xa0 : 0x00: 0x900 */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xa1 : 0xb8: 0x901 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , +/*0xa2 : 0xfe: 0x902 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xa3 : 0xbf: 0x903 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0x00 : 0x00: 0x904 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xa4 : 0xff: 0x905 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xa5 : 0xff: 0x906 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xa6 : 0xff: 0x907 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xa7 : 0xff: 0x908 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xa8 : 0xff: 0x909 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xa9 : 0xff: 0x90a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xaa : 0xfe: 0x90b */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0x00 : 0x00: 0x90c */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xae : 0x80: 0x90d */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xab : 0x87: 0x90e */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , +/*0xac : 0xff: 0x90f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xad : 0xff: 0x910 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xb2 : 0x80: 0x911 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xaf : 0x87: 0x912 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , +/*0xb0 : 0xff: 0x913 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xb1 : 0xff: 0x914 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xb3 : 0xff: 0x915 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xb4 : 0xfe: 0x916 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xb5 : 0xfe: 0x917 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xb6 : 0xfe: 0x918 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xb7 : 0xff: 0x919 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xb8 : 0xff: 0x91a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xb9 : 0xfe: 0x91b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xba : 0xff: 0x91c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xbb : 0xfe: 0x91d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xbc : 0xff: 0x91e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xbd : 0xff: 0x91f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xbe : 0xfe: 0x920 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xbf : 0xfe: 0x921 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xc0 : 0xfe: 0x922 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xc1 : 0xff: 0x923 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xc2 : 0xff: 0x924 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xc3 : 0xfe: 0x925 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xc4 : 0xfe: 0x926 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xc5 : 0xfe: 0x927 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xc6 : 0xff: 0x928 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xc7 : 0x81: 0x929 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + TML_MASK , +/*0xc8 : 0xff: 0x92a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xc9 : 0xfe: 0x92b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xca : 0xfe: 0x92c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xcb : 0xfe: 0x92d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xcc : 0xfe: 0x92e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xcd : 0xff: 0x92f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xcf : 0xff: 0x930 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xd0 : 0x87: 0x931 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK , +/*0xd1 : 0xff: 0x932 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xd2 : 0xb7: 0x933 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK , +/*0xd3 : 0x83: 0x934 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK , +/*0xd4 : 0xff: 0x935 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK , +/*0xd5 : 0xfe: 0x936 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xd6 : 0xbf: 0x937 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xd7 : 0xff: 0x938 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xd8 : 0xff: 0x939 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0x00 : 0x00: 0x93A */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x93B */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xe9 : 0xda: 0x93c */ DEV_MASK + PNJ_MASK + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x93d */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xda : 0xff: 0x93e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xdb : 0xff: 0x93f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xdc : 0xff: 0x940 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xdd : 0xff: 0x941 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xde : 0xff: 0x942 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xdf : 0xbe: 0x943 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0x00 : 0x00: 0x944 */ DEV_MASK + ZERO + GJR_MASK + ZERO + BNG_MASK + KND_MASK + ZERO + ZERO , +/*0xe3 : 0x80: 0x945 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xe0 : 0x87: 0x946 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , +/*0xe1 : 0xff: 0x947 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xe2 : 0xff: 0x948 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xe7 : 0x80: 0x949 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xe4 : 0x87: 0x94a */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , +/*0xe5 : 0xff: 0x94b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xe6 : 0xff: 0x94c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xe8 : 0xff: 0x94d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xec : 0x00: 0x94e */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xed : 0x00: 0x94f */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x950 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x951 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x952 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x953 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x954 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x955 */ ZERO + ZERO + ZERO + ZERO + ZERO + KND_MASK + ZERO + ZERO , +/*0x00 : 0x00: 0x956 */ ZERO + ZERO + ZERO + ORI_MASK + ZERO + KND_MASK + ZERO + ZERO , +/*0x00 : 0x00: 0x957 */ ZERO + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + MLM_MASK + ZERO , +/*0x00 : 0x00: 0x958 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x959 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x95a */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x95b */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x95c */ DEV_MASK + PNJ_MASK + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x95d */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x95e */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xce : 0x98: 0x95f */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x960 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0x00 : 0x00: 0x961 */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0x00 : 0x00: 0x962 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x963 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , +/*0xea : 0xf8: 0x964 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xf1 : 0xff: 0x966 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf2 : 0xff: 0x967 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf3 : 0xff: 0x968 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf4 : 0xff: 0x969 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf5 : 0xff: 0x96a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf6 : 0xff: 0x96b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf7 : 0xff: 0x96c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf8 : 0xff: 0x96d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf9 : 0xff: 0x96e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xfa : 0xff: 0x96f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0x00 : 0x80: 0x970 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/* + * The length of the array is 128 to provide values for 0x900..0x97f. + * The last 15 entries for 0x971..0x97f of the validity table are all zero + * because no Indic script uses such Unicode code points. + */ +/*0x00 : 0x00: 0x9yz */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO +}; + +static const uint16_t fromUnicodeTable[128]={ + 0x00a0 ,/* 0x0900 */ + 0x00a1 ,/* 0x0901 */ + 0x00a2 ,/* 0x0902 */ + 0x00a3 ,/* 0x0903 */ + 0xa4e0 ,/* 0x0904 */ + 0x00a4 ,/* 0x0905 */ + 0x00a5 ,/* 0x0906 */ + 0x00a6 ,/* 0x0907 */ + 0x00a7 ,/* 0x0908 */ + 0x00a8 ,/* 0x0909 */ + 0x00a9 ,/* 0x090a */ + 0x00aa ,/* 0x090b */ + 0xA6E9 ,/* 0x090c */ + 0x00ae ,/* 0x090d */ + 0x00ab ,/* 0x090e */ + 0x00ac ,/* 0x090f */ + 0x00ad ,/* 0x0910 */ + 0x00b2 ,/* 0x0911 */ + 0x00af ,/* 0x0912 */ + 0x00b0 ,/* 0x0913 */ + 0x00b1 ,/* 0x0914 */ + 0x00b3 ,/* 0x0915 */ + 0x00b4 ,/* 0x0916 */ + 0x00b5 ,/* 0x0917 */ + 0x00b6 ,/* 0x0918 */ + 0x00b7 ,/* 0x0919 */ + 0x00b8 ,/* 0x091a */ + 0x00b9 ,/* 0x091b */ + 0x00ba ,/* 0x091c */ + 0x00bb ,/* 0x091d */ + 0x00bc ,/* 0x091e */ + 0x00bd ,/* 0x091f */ + 0x00be ,/* 0x0920 */ + 0x00bf ,/* 0x0921 */ + 0x00c0 ,/* 0x0922 */ + 0x00c1 ,/* 0x0923 */ + 0x00c2 ,/* 0x0924 */ + 0x00c3 ,/* 0x0925 */ + 0x00c4 ,/* 0x0926 */ + 0x00c5 ,/* 0x0927 */ + 0x00c6 ,/* 0x0928 */ + 0x00c7 ,/* 0x0929 */ + 0x00c8 ,/* 0x092a */ + 0x00c9 ,/* 0x092b */ + 0x00ca ,/* 0x092c */ + 0x00cb ,/* 0x092d */ + 0x00cc ,/* 0x092e */ + 0x00cd ,/* 0x092f */ + 0x00cf ,/* 0x0930 */ + 0x00d0 ,/* 0x0931 */ + 0x00d1 ,/* 0x0932 */ + 0x00d2 ,/* 0x0933 */ + 0x00d3 ,/* 0x0934 */ + 0x00d4 ,/* 0x0935 */ + 0x00d5 ,/* 0x0936 */ + 0x00d6 ,/* 0x0937 */ + 0x00d7 ,/* 0x0938 */ + 0x00d8 ,/* 0x0939 */ + 0xFFFF ,/* 0x093A */ + 0xFFFF ,/* 0x093B */ + 0x00e9 ,/* 0x093c */ + 0xEAE9 ,/* 0x093d */ + 0x00da ,/* 0x093e */ + 0x00db ,/* 0x093f */ + 0x00dc ,/* 0x0940 */ + 0x00dd ,/* 0x0941 */ + 0x00de ,/* 0x0942 */ + 0x00df ,/* 0x0943 */ + 0xDFE9 ,/* 0x0944 */ + 0x00e3 ,/* 0x0945 */ + 0x00e0 ,/* 0x0946 */ + 0x00e1 ,/* 0x0947 */ + 0x00e2 ,/* 0x0948 */ + 0x00e7 ,/* 0x0949 */ + 0x00e4 ,/* 0x094a */ + 0x00e5 ,/* 0x094b */ + 0x00e6 ,/* 0x094c */ + 0x00e8 ,/* 0x094d */ + 0x00ec ,/* 0x094e */ + 0x00ed ,/* 0x094f */ + 0xA1E9 ,/* 0x0950 */ /* OM Symbol */ + 0xFFFF ,/* 0x0951 */ + 0xF0B8 ,/* 0x0952 */ + 0xFFFF ,/* 0x0953 */ + 0xFFFF ,/* 0x0954 */ + 0xFFFF ,/* 0x0955 */ + 0xFFFF ,/* 0x0956 */ + 0xFFFF ,/* 0x0957 */ + 0xb3e9 ,/* 0x0958 */ + 0xb4e9 ,/* 0x0959 */ + 0xb5e9 ,/* 0x095a */ + 0xbae9 ,/* 0x095b */ + 0xbfe9 ,/* 0x095c */ + 0xC0E9 ,/* 0x095d */ + 0xc9e9 ,/* 0x095e */ + 0x00ce ,/* 0x095f */ + 0xAAe9 ,/* 0x0960 */ + 0xA7E9 ,/* 0x0961 */ + 0xDBE9 ,/* 0x0962 */ + 0xDCE9 ,/* 0x0963 */ + 0x00ea ,/* 0x0964 */ + 0xeaea ,/* 0x0965 */ + 0x00f1 ,/* 0x0966 */ + 0x00f2 ,/* 0x0967 */ + 0x00f3 ,/* 0x0968 */ + 0x00f4 ,/* 0x0969 */ + 0x00f5 ,/* 0x096a */ + 0x00f6 ,/* 0x096b */ + 0x00f7 ,/* 0x096c */ + 0x00f8 ,/* 0x096d */ + 0x00f9 ,/* 0x096e */ + 0x00fa ,/* 0x096f */ + 0xF0BF ,/* 0x0970 */ + 0xFFFF ,/* 0x0971 */ + 0xFFFF ,/* 0x0972 */ + 0xFFFF ,/* 0x0973 */ + 0xFFFF ,/* 0x0974 */ + 0xFFFF ,/* 0x0975 */ + 0xFFFF ,/* 0x0976 */ + 0xFFFF ,/* 0x0977 */ + 0xFFFF ,/* 0x0978 */ + 0xFFFF ,/* 0x0979 */ + 0xFFFF ,/* 0x097a */ + 0xFFFF ,/* 0x097b */ + 0xFFFF ,/* 0x097c */ + 0xFFFF ,/* 0x097d */ + 0xFFFF ,/* 0x097e */ + 0xFFFF ,/* 0x097f */ +}; +static const uint16_t toUnicodeTable[256]={ + 0x0000,/* 0x00 */ + 0x0001,/* 0x01 */ + 0x0002,/* 0x02 */ + 0x0003,/* 0x03 */ + 0x0004,/* 0x04 */ + 0x0005,/* 0x05 */ + 0x0006,/* 0x06 */ + 0x0007,/* 0x07 */ + 0x0008,/* 0x08 */ + 0x0009,/* 0x09 */ + 0x000a,/* 0x0a */ + 0x000b,/* 0x0b */ + 0x000c,/* 0x0c */ + 0x000d,/* 0x0d */ + 0x000e,/* 0x0e */ + 0x000f,/* 0x0f */ + 0x0010,/* 0x10 */ + 0x0011,/* 0x11 */ + 0x0012,/* 0x12 */ + 0x0013,/* 0x13 */ + 0x0014,/* 0x14 */ + 0x0015,/* 0x15 */ + 0x0016,/* 0x16 */ + 0x0017,/* 0x17 */ + 0x0018,/* 0x18 */ + 0x0019,/* 0x19 */ + 0x001a,/* 0x1a */ + 0x001b,/* 0x1b */ + 0x001c,/* 0x1c */ + 0x001d,/* 0x1d */ + 0x001e,/* 0x1e */ + 0x001f,/* 0x1f */ + 0x0020,/* 0x20 */ + 0x0021,/* 0x21 */ + 0x0022,/* 0x22 */ + 0x0023,/* 0x23 */ + 0x0024,/* 0x24 */ + 0x0025,/* 0x25 */ + 0x0026,/* 0x26 */ + 0x0027,/* 0x27 */ + 0x0028,/* 0x28 */ + 0x0029,/* 0x29 */ + 0x002a,/* 0x2a */ + 0x002b,/* 0x2b */ + 0x002c,/* 0x2c */ + 0x002d,/* 0x2d */ + 0x002e,/* 0x2e */ + 0x002f,/* 0x2f */ + 0x0030,/* 0x30 */ + 0x0031,/* 0x31 */ + 0x0032,/* 0x32 */ + 0x0033,/* 0x33 */ + 0x0034,/* 0x34 */ + 0x0035,/* 0x35 */ + 0x0036,/* 0x36 */ + 0x0037,/* 0x37 */ + 0x0038,/* 0x38 */ + 0x0039,/* 0x39 */ + 0x003A,/* 0x3A */ + 0x003B,/* 0x3B */ + 0x003c,/* 0x3c */ + 0x003d,/* 0x3d */ + 0x003e,/* 0x3e */ + 0x003f,/* 0x3f */ + 0x0040,/* 0x40 */ + 0x0041,/* 0x41 */ + 0x0042,/* 0x42 */ + 0x0043,/* 0x43 */ + 0x0044,/* 0x44 */ + 0x0045,/* 0x45 */ + 0x0046,/* 0x46 */ + 0x0047,/* 0x47 */ + 0x0048,/* 0x48 */ + 0x0049,/* 0x49 */ + 0x004a,/* 0x4a */ + 0x004b,/* 0x4b */ + 0x004c,/* 0x4c */ + 0x004d,/* 0x4d */ + 0x004e,/* 0x4e */ + 0x004f,/* 0x4f */ + 0x0050,/* 0x50 */ + 0x0051,/* 0x51 */ + 0x0052,/* 0x52 */ + 0x0053,/* 0x53 */ + 0x0054,/* 0x54 */ + 0x0055,/* 0x55 */ + 0x0056,/* 0x56 */ + 0x0057,/* 0x57 */ + 0x0058,/* 0x58 */ + 0x0059,/* 0x59 */ + 0x005a,/* 0x5a */ + 0x005b,/* 0x5b */ + 0x005c,/* 0x5c */ + 0x005d,/* 0x5d */ + 0x005e,/* 0x5e */ + 0x005f,/* 0x5f */ + 0x0060,/* 0x60 */ + 0x0061,/* 0x61 */ + 0x0062,/* 0x62 */ + 0x0063,/* 0x63 */ + 0x0064,/* 0x64 */ + 0x0065,/* 0x65 */ + 0x0066,/* 0x66 */ + 0x0067,/* 0x67 */ + 0x0068,/* 0x68 */ + 0x0069,/* 0x69 */ + 0x006a,/* 0x6a */ + 0x006b,/* 0x6b */ + 0x006c,/* 0x6c */ + 0x006d,/* 0x6d */ + 0x006e,/* 0x6e */ + 0x006f,/* 0x6f */ + 0x0070,/* 0x70 */ + 0x0071,/* 0x71 */ + 0x0072,/* 0x72 */ + 0x0073,/* 0x73 */ + 0x0074,/* 0x74 */ + 0x0075,/* 0x75 */ + 0x0076,/* 0x76 */ + 0x0077,/* 0x77 */ + 0x0078,/* 0x78 */ + 0x0079,/* 0x79 */ + 0x007a,/* 0x7a */ + 0x007b,/* 0x7b */ + 0x007c,/* 0x7c */ + 0x007d,/* 0x7d */ + 0x007e,/* 0x7e */ + 0x007f,/* 0x7f */ + 0x0080,/* 0x80 */ + 0x0081,/* 0x81 */ + 0x0082,/* 0x82 */ + 0x0083,/* 0x83 */ + 0x0084,/* 0x84 */ + 0x0085,/* 0x85 */ + 0x0086,/* 0x86 */ + 0x0087,/* 0x87 */ + 0x0088,/* 0x88 */ + 0x0089,/* 0x89 */ + 0x008a,/* 0x8a */ + 0x008b,/* 0x8b */ + 0x008c,/* 0x8c */ + 0x008d,/* 0x8d */ + 0x008e,/* 0x8e */ + 0x008f,/* 0x8f */ + 0x0090,/* 0x90 */ + 0x0091,/* 0x91 */ + 0x0092,/* 0x92 */ + 0x0093,/* 0x93 */ + 0x0094,/* 0x94 */ + 0x0095,/* 0x95 */ + 0x0096,/* 0x96 */ + 0x0097,/* 0x97 */ + 0x0098,/* 0x98 */ + 0x0099,/* 0x99 */ + 0x009a,/* 0x9a */ + 0x009b,/* 0x9b */ + 0x009c,/* 0x9c */ + 0x009d,/* 0x9d */ + 0x009e,/* 0x9e */ + 0x009f,/* 0x9f */ + 0x00A0,/* 0xa0 */ + 0x0901,/* 0xa1 */ + 0x0902,/* 0xa2 */ + 0x0903,/* 0xa3 */ + 0x0905,/* 0xa4 */ + 0x0906,/* 0xa5 */ + 0x0907,/* 0xa6 */ + 0x0908,/* 0xa7 */ + 0x0909,/* 0xa8 */ + 0x090a,/* 0xa9 */ + 0x090b,/* 0xaa */ + 0x090e,/* 0xab */ + 0x090f,/* 0xac */ + 0x0910,/* 0xad */ + 0x090d,/* 0xae */ + 0x0912,/* 0xaf */ + 0x0913,/* 0xb0 */ + 0x0914,/* 0xb1 */ + 0x0911,/* 0xb2 */ + 0x0915,/* 0xb3 */ + 0x0916,/* 0xb4 */ + 0x0917,/* 0xb5 */ + 0x0918,/* 0xb6 */ + 0x0919,/* 0xb7 */ + 0x091a,/* 0xb8 */ + 0x091b,/* 0xb9 */ + 0x091c,/* 0xba */ + 0x091d,/* 0xbb */ + 0x091e,/* 0xbc */ + 0x091f,/* 0xbd */ + 0x0920,/* 0xbe */ + 0x0921,/* 0xbf */ + 0x0922,/* 0xc0 */ + 0x0923,/* 0xc1 */ + 0x0924,/* 0xc2 */ + 0x0925,/* 0xc3 */ + 0x0926,/* 0xc4 */ + 0x0927,/* 0xc5 */ + 0x0928,/* 0xc6 */ + 0x0929,/* 0xc7 */ + 0x092a,/* 0xc8 */ + 0x092b,/* 0xc9 */ + 0x092c,/* 0xca */ + 0x092d,/* 0xcb */ + 0x092e,/* 0xcc */ + 0x092f,/* 0xcd */ + 0x095f,/* 0xce */ + 0x0930,/* 0xcf */ + 0x0931,/* 0xd0 */ + 0x0932,/* 0xd1 */ + 0x0933,/* 0xd2 */ + 0x0934,/* 0xd3 */ + 0x0935,/* 0xd4 */ + 0x0936,/* 0xd5 */ + 0x0937,/* 0xd6 */ + 0x0938,/* 0xd7 */ + 0x0939,/* 0xd8 */ + 0x200D,/* 0xd9 */ + 0x093e,/* 0xda */ + 0x093f,/* 0xdb */ + 0x0940,/* 0xdc */ + 0x0941,/* 0xdd */ + 0x0942,/* 0xde */ + 0x0943,/* 0xdf */ + 0x0946,/* 0xe0 */ + 0x0947,/* 0xe1 */ + 0x0948,/* 0xe2 */ + 0x0945,/* 0xe3 */ + 0x094a,/* 0xe4 */ + 0x094b,/* 0xe5 */ + 0x094c,/* 0xe6 */ + 0x0949,/* 0xe7 */ + 0x094d,/* 0xe8 */ + 0x093c,/* 0xe9 */ + 0x0964,/* 0xea */ + 0xFFFF,/* 0xeb */ + 0xFFFF,/* 0xec */ + 0xFFFF,/* 0xed */ + 0xFFFF,/* 0xee */ + 0xFFFF,/* 0xef */ + 0xFFFF,/* 0xf0 */ + 0x0966,/* 0xf1 */ + 0x0967,/* 0xf2 */ + 0x0968,/* 0xf3 */ + 0x0969,/* 0xf4 */ + 0x096a,/* 0xf5 */ + 0x096b,/* 0xf6 */ + 0x096c,/* 0xf7 */ + 0x096d,/* 0xf8 */ + 0x096e,/* 0xf9 */ + 0x096f,/* 0xfa */ + 0xFFFF,/* 0xfb */ + 0xFFFF,/* 0xfc */ + 0xFFFF,/* 0xfd */ + 0xFFFF,/* 0xfe */ + 0xFFFF /* 0xff */ +}; + +static const uint16_t vowelSignESpecialCases[][2]={ + { 2 /*length of array*/ , 0 }, + { 0xA4 , 0x0904 }, +}; + +static const uint16_t nuktaSpecialCases[][2]={ + { 16 /*length of array*/ , 0 }, + { 0xA6 , 0x090c }, + { 0xEA , 0x093D }, + { 0xDF , 0x0944 }, + { 0xA1 , 0x0950 }, + { 0xb3 , 0x0958 }, + { 0xb4 , 0x0959 }, + { 0xb5 , 0x095a }, + { 0xba , 0x095b }, + { 0xbf , 0x095c }, + { 0xC0 , 0x095d }, + { 0xc9 , 0x095e }, + { 0xAA , 0x0960 }, + { 0xA7 , 0x0961 }, + { 0xDB , 0x0962 }, + { 0xDC , 0x0963 }, +}; + + +#define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err){ \ + int32_t offset = (int32_t)(source - args->source-1); \ + /* write the targetUniChar to target */ \ + if(target < targetLimit){ \ + if(targetByteUnit <= 0xFF){ \ + *(target)++ = (uint8_t)(targetByteUnit); \ + if(offsets){ \ + *(offsets++) = offset; \ + } \ + }else{ \ + if (targetByteUnit > 0xFFFF) { \ + *(target)++ = (uint8_t)(targetByteUnit>>16); \ + if (offsets) { \ + --offset; \ + *(offsets++) = offset; \ + } \ + } \ + if (!(target < targetLimit)) { \ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ + (uint8_t)(targetByteUnit >> 8); \ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ + (uint8_t)targetByteUnit; \ + *err = U_BUFFER_OVERFLOW_ERROR; \ + } else { \ + *(target)++ = (uint8_t)(targetByteUnit>>8); \ + if(offsets){ \ + *(offsets++) = offset; \ + } \ + if(target < targetLimit){ \ + *(target)++ = (uint8_t) targetByteUnit; \ + if(offsets){ \ + *(offsets++) = offset ; \ + } \ + }else{ \ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\ + (uint8_t) (targetByteUnit); \ + *err = U_BUFFER_OVERFLOW_ERROR; \ + } \ + } \ + } \ + }else{ \ + if (targetByteUnit & 0xFF0000) { \ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ + (uint8_t) (targetByteUnit >>16); \ + } \ + if(targetByteUnit & 0xFF00){ \ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ + (uint8_t) (targetByteUnit >>8); \ + } \ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ + (uint8_t) (targetByteUnit); \ + *err = U_BUFFER_OVERFLOW_ERROR; \ + } \ +} + +/* Rules: + * Explicit Halant : + * + + * Soft Halant : + * + + */ +static void U_CALLCONV +UConverter_fromUnicode_ISCII_OFFSETS_LOGIC( + UConverterFromUnicodeArgs * args, UErrorCode * err) { + const UChar *source = args->source; + const UChar *sourceLimit = args->sourceLimit; + unsigned char *target = (unsigned char *) args->target; + unsigned char *targetLimit = (unsigned char *) args->targetLimit; + int32_t* offsets = args->offsets; + uint32_t targetByteUnit = 0x0000; + UChar32 sourceChar = 0x0000; + UChar32 tempContextFromUnicode = 0x0000; /* For special handling of the Gurmukhi script. */ + UConverterDataISCII *converterData; + uint16_t newDelta=0; + uint16_t range = 0; + UBool deltaChanged = FALSE; + + if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + /* initialize data */ + converterData=(UConverterDataISCII*)args->converter->extraInfo; + newDelta=converterData->currentDeltaFromUnicode; + range = (uint16_t)(newDelta/DELTA); + + if ((sourceChar = args->converter->fromUChar32)!=0) { + goto getTrail; + } + + /*writing the char to the output stream */ + while (source < sourceLimit) { + /* Write the language code following LF only if LF is not the last character. */ + if (args->converter->fromUnicodeStatus == LF) { + targetByteUnit = ATR<<8; + targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang; + args->converter->fromUnicodeStatus = 0x0000; + /* now append ATR and language code */ + WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err); + if (U_FAILURE(*err)) { + break; + } + } + + sourceChar = *source++; + tempContextFromUnicode = converterData->contextCharFromUnicode; + + targetByteUnit = missingCharMarker; + + /*check if input is in ASCII and C0 control codes range*/ + if (sourceChar <= ASCII_END) { + args->converter->fromUnicodeStatus = sourceChar; + WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err); + if (U_FAILURE(*err)) { + break; + } + continue; + } + switch (sourceChar) { + case ZWNJ: + /* contextChar has HALANT */ + if (converterData->contextCharFromUnicode) { + converterData->contextCharFromUnicode = 0x00; + targetByteUnit = ISCII_HALANT; + } else { + /* consume ZWNJ and continue */ + converterData->contextCharFromUnicode = 0x00; + continue; + } + break; + case ZWJ: + /* contextChar has HALANT */ + if (converterData->contextCharFromUnicode) { + targetByteUnit = ISCII_NUKTA; + } else { + targetByteUnit =ISCII_INV; + } + converterData->contextCharFromUnicode = 0x00; + break; + default: + /* is the sourceChar in the INDIC_RANGE? */ + if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) { + /* Danda and Double Danda are valid in Northern scripts.. since Unicode + * does not include these codepoints in all Northern scrips we need to + * filter them out + */ + if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) { + /* find out to which block the souceChar belongs*/ + range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA); + newDelta =(uint16_t)(range*DELTA); + + /* Now are we in the same block as the previous? */ + if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) { + converterData->currentDeltaFromUnicode = newDelta; + converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum; + deltaChanged =TRUE; + converterData->isFirstBuffer=FALSE; + } + + if (converterData->currentDeltaFromUnicode == PNJ_DELTA) { + if (sourceChar == PNJ_TIPPI) { + /* Make sure Tippi is converterd to Bindi. */ + sourceChar = PNJ_BINDI; + } else if (sourceChar == PNJ_ADHAK) { + /* This is for consonant cluster handling. */ + converterData->contextCharFromUnicode = PNJ_ADHAK; + } + + } + /* Normalize all Indic codepoints to Devanagari and map them to ISCII */ + /* now subtract the new delta from sourceChar*/ + sourceChar -= converterData->currentDeltaFromUnicode; + } + + /* get the target byte unit */ + targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar]; + + /* is the code point valid in current script? */ + if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) { + /* Vocallic RR is assigned in ISCII Telugu and Unicode */ + if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) { + targetByteUnit=missingCharMarker; + } + } + + if (deltaChanged) { + /* we are in a script block which is different than + * previous sourceChar's script block write ATR and language codes + */ + uint32_t temp=0; + temp =(uint16_t)(ATR<<8); + temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang); + /* reset */ + deltaChanged=FALSE; + /* now append ATR and language code */ + WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err); + if (U_FAILURE(*err)) { + break; + } + } + + if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) { + continue; + } + } + /* reset context char */ + converterData->contextCharFromUnicode = 0x00; + break; + } + if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) { + /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */ + /* reset context char */ + converterData->contextCharFromUnicode = 0x0000; + targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit; + /* write targetByteUnit to target */ + WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err); + if (U_FAILURE(*err)) { + break; + } + } else if (targetByteUnit != missingCharMarker) { + if (targetByteUnit==ISCII_HALANT) { + converterData->contextCharFromUnicode = (UChar)targetByteUnit; + } + /* write targetByteUnit to target*/ + WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err); + if (U_FAILURE(*err)) { + break; + } + } else { + /* oops.. the code point is unassigned */ + /*check if the char is a First surrogate*/ + if (U16_IS_SURROGATE(sourceChar)) { + if (U16_IS_SURROGATE_LEAD(sourceChar)) { +getTrail: + /*look ahead to find the trail surrogate*/ + if (source < sourceLimit) { + /* test the following code unit */ + UChar trail= (*source); + if (U16_IS_TRAIL(trail)) { + source++; + sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); + *err =U_INVALID_CHAR_FOUND; + /* convert this surrogate code point */ + /* exit this condition tree */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + } + } else { + /* no more input */ + *err = U_ZERO_ERROR; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + } + } else { + /* callback(unassigned) for a BMP code point */ + *err = U_INVALID_CHAR_FOUND; + } + + args->converter->fromUChar32=sourceChar; + break; + } + }/* end while(mySourceIndexsource = source; + args->target = (char*)target; +} + +static const uint16_t lookupTable[][2]={ + { ZERO, ZERO }, /*DEFALT*/ + { ZERO, ZERO }, /*ROMAN*/ + { DEVANAGARI, DEV_MASK }, + { BENGALI, BNG_MASK }, + { TAMIL, TML_MASK }, + { TELUGU, KND_MASK }, + { BENGALI, BNG_MASK }, + { ORIYA, ORI_MASK }, + { KANNADA, KND_MASK }, + { MALAYALAM, MLM_MASK }, + { GUJARATI, GJR_MASK }, + { GURMUKHI, PNJ_MASK } +}; + +#define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err){\ + /* add offset to current Indic Block */ \ + if(targetUniChar>ASCII_END && \ + targetUniChar != ZWJ && \ + targetUniChar != ZWNJ && \ + targetUniChar != DANDA && \ + targetUniChar != DOUBLE_DANDA){ \ + \ + targetUniChar+=(uint16_t)(delta); \ + } \ + /* now write the targetUniChar */ \ + if(targettargetLimit){ \ + *(target)++ = (UChar)targetUniChar; \ + if(offsets){ \ + *(offsets)++ = (int32_t)(offset); \ + } \ + }else{ \ + args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] = \ + (UChar)targetUniChar; \ + *err = U_BUFFER_OVERFLOW_ERROR; \ + } \ +} + +#define GET_MAPPING(sourceChar,targetUniChar,data){ \ + targetUniChar = toUnicodeTable[(sourceChar)] ; \ + /* is the code point valid in current script? */ \ + if(sourceChar> ASCII_END && \ + (validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){ \ + /* Vocallic RR is assigne in ISCII Telugu and Unicode */ \ + if(data->currentDeltaToUnicode!=(TELUGU_DELTA) || \ + targetUniChar!=VOCALLIC_RR){ \ + targetUniChar=missingCharMarker; \ + } \ + } \ +} + +/*********** + * Rules for ISCII to Unicode converter + * ISCII is stateful encoding. To convert ISCII bytes to Unicode, + * which has both precomposed and decomposed forms characters + * pre-context and post-context need to be considered. + * + * Post context + * i) ATR : Attribute code is used to declare the font and script switching. + * Currently we only switch scripts and font codes consumed without generating an error + * ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure, + * obsolete characters + * Pre context + * i) Halant: if preceeded by a halant then it is a explicit halant + * ii) Nukta : + * a) if preceeded by a halant then it is a soft halant + * b) if preceeded by specific consonants and the ligatures have pre-composed + * characters in Unicode then convert to pre-composed characters + * iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda + * + */ + +static void U_CALLCONV +UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) { + const char *source = ( char *) args->source; + UChar *target = args->target; + const char *sourceLimit = args->sourceLimit; + const UChar* targetLimit = args->targetLimit; + uint32_t targetUniChar = 0x0000; + uint8_t sourceChar = 0x0000; + UConverterDataISCII* data; + UChar32* toUnicodeStatus=NULL; + UChar32 tempTargetUniChar = 0x0000; + UChar* contextCharToUnicode= NULL; + UBool found; + int i; + int offset = 0; + + if ((args->converter == NULL) || (target < args->target) || (source < args->source)) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + data = (UConverterDataISCII*)(args->converter->extraInfo); + contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */ + toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/ + + while (U_SUCCESS(*err) && sourcecurrentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA); + data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1]; + } else if (sourceChar==DEF) { + /* switch back to default */ + data->currentDeltaToUnicode = data->defDeltaToUnicode; + data->currentMaskToUnicode = data->defMaskToUnicode; + } else { + if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) { + /* these are display codes consume and continue */ + } else { + *err =U_ILLEGAL_CHAR_FOUND; + /* reset */ + *contextCharToUnicode=NO_CHAR_MARKER; + goto CALLBACK; + } + } + + /* reset */ + *contextCharToUnicode=NO_CHAR_MARKER; + + continue; + + } else if (*contextCharToUnicode==EXT) { + /* check if sourceChar is in 0xA1-0xEE range */ + if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) { + /* We currently support only Anudatta and Devanagari abbreviation sign */ + if (sourceChar==0xBF || sourceChar == 0xB8) { + targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA; + + /* find out if the mapping is valid in this state */ + if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) { + *contextCharToUnicode= NO_CHAR_MARKER; + + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data->prevToUnicodeStatus) { + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); + data->prevToUnicodeStatus = 0x0000; + } + /* write to target */ + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); + + continue; + } + } + /* byte unit is unassigned */ + targetUniChar = missingCharMarker; + *err= U_INVALID_CHAR_FOUND; + } else { + /* only 0xA1 - 0xEE are legal after EXT char */ + *contextCharToUnicode= NO_CHAR_MARKER; + *err = U_ILLEGAL_CHAR_FOUND; + } + goto CALLBACK; + } else if (*contextCharToUnicode==ISCII_INV) { + if (sourceChar==ISCII_HALANT) { + targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */ + } else { + targetUniChar = ZWJ; + } + + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data->prevToUnicodeStatus) { + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); + data->prevToUnicodeStatus = 0x0000; + } + /* write to target */ + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); + /* reset */ + *contextCharToUnicode=NO_CHAR_MARKER; + } + + /* look at the pre-context and perform special processing */ + switch (sourceChar) { + case ISCII_INV: + case EXT: + case ATR: + *contextCharToUnicode = (UChar)sourceChar; + + if (*toUnicodeStatus != missingCharMarker) { + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data->prevToUnicodeStatus) { + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); + data->prevToUnicodeStatus = 0x0000; + } + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err); + *toUnicodeStatus = missingCharMarker; + } + continue; + case ISCII_DANDA: + /* handle double danda*/ + if (*contextCharToUnicode== ISCII_DANDA) { + targetUniChar = DOUBLE_DANDA; + /* clear the context */ + *contextCharToUnicode = NO_CHAR_MARKER; + *toUnicodeStatus = missingCharMarker; + } else { + GET_MAPPING(sourceChar,targetUniChar,data); + *contextCharToUnicode = sourceChar; + } + break; + case ISCII_HALANT: + /* handle explicit halant */ + if (*contextCharToUnicode == ISCII_HALANT) { + targetUniChar = ZWNJ; + /* clear the context */ + *contextCharToUnicode = NO_CHAR_MARKER; + } else { + GET_MAPPING(sourceChar,targetUniChar,data); + *contextCharToUnicode = sourceChar; + } + break; + case 0x0A: + case 0x0D: + data->resetToDefaultToUnicode = TRUE; + GET_MAPPING(sourceChar,targetUniChar,data) + ; + *contextCharToUnicode = sourceChar; + break; + + case ISCII_VOWEL_SIGN_E: + i=1; + found=FALSE; + for (; icurrentMaskToUnicode) { + /*targetUniChar += data->currentDeltaToUnicode ;*/ + *contextCharToUnicode= NO_CHAR_MARKER; + *toUnicodeStatus = missingCharMarker; + break; + } + } + GET_MAPPING(sourceChar,targetUniChar,data); + *contextCharToUnicode = sourceChar; + break; + + case ISCII_NUKTA: + /* handle soft halant */ + if (*contextCharToUnicode == ISCII_HALANT) { + targetUniChar = ZWJ; + /* clear the context */ + *contextCharToUnicode = NO_CHAR_MARKER; + break; + } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) { + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data->prevToUnicodeStatus) { + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); + data->prevToUnicodeStatus = 0x0000; + } + /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi. + * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39). + */ + targetUniChar = PNJ_RRA; + WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); + if (U_SUCCESS(*err)) { + targetUniChar = PNJ_SIGN_VIRAMA; + WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); + if (U_SUCCESS(*err)) { + targetUniChar = PNJ_HA; + WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); + } else { + args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA; + } + } else { + args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA; + args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA; + } + *toUnicodeStatus = missingCharMarker; + data->contextCharToUnicode = NO_CHAR_MARKER; + continue; + } else { + /* try to handle + ISCII_NUKTA special mappings */ + i=1; + found =FALSE; + for (; icurrentMaskToUnicode) { + /*targetUniChar += data->currentDeltaToUnicode ;*/ + *contextCharToUnicode= NO_CHAR_MARKER; + *toUnicodeStatus = missingCharMarker; + if (data->currentDeltaToUnicode == PNJ_DELTA) { + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data->prevToUnicodeStatus) { + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); + data->prevToUnicodeStatus = 0x0000; + } + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); + continue; + } + break; + } + /* else fall through to default */ + } + /* else fall through to default */ + U_FALLTHROUGH; + } + default:GET_MAPPING(sourceChar,targetUniChar,data) + ; + *contextCharToUnicode = sourceChar; + break; + } + + if (*toUnicodeStatus != missingCharMarker) { + /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */ + if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) && + (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && ((UChar32)(targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus)) { + /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */ + offset = (int)(source-args->source - 3); + tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */ + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err); + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err); + data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */ + *toUnicodeStatus = missingCharMarker; + continue; + } else { + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data->prevToUnicodeStatus) { + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); + data->prevToUnicodeStatus = 0x0000; + } + /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script. + * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi. + */ + if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) { + targetUniChar = PNJ_TIPPI - PNJ_DELTA; + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err); + } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) { + /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */ + data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA; + } else { + /* write the previously mapped codepoint */ + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err); + } + } + *toUnicodeStatus = missingCharMarker; + } + + if (targetUniChar != missingCharMarker) { + /* now save the targetUniChar for delayed write */ + *toUnicodeStatus = (UChar) targetUniChar; + if (data->resetToDefaultToUnicode==TRUE) { + data->currentDeltaToUnicode = data->defDeltaToUnicode; + data->currentMaskToUnicode = data->defMaskToUnicode; + data->resetToDefaultToUnicode=FALSE; + } + } else { + + /* we reach here only if targetUniChar == missingCharMarker + * so assign codes to reason and err + */ + *err = U_INVALID_CHAR_FOUND; +CALLBACK: + args->converter->toUBytes[0] = (uint8_t) sourceChar; + args->converter->toULength = 1; + break; + } + + } else { + *err =U_BUFFER_OVERFLOW_ERROR; + break; + } + } + + if (U_SUCCESS(*err) && args->flush && source == sourceLimit) { + /* end of the input stream */ + UConverter *cnv = args->converter; + + if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) { + /* set toUBytes[] */ + cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode; + cnv->toULength = 1; + + /* avoid looping on truncated sequences */ + *contextCharToUnicode = NO_CHAR_MARKER; + } else { + cnv->toULength = 0; + } + + if (*toUnicodeStatus != missingCharMarker) { + /* output a remaining target character */ + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err); + *toUnicodeStatus = missingCharMarker; + } + } + + args->target = target; + args->source = source; +} + +/* structure for SafeClone calculations */ +struct cloneISCIIStruct { + UConverter cnv; + UConverterDataISCII mydata; +}; + +static UConverter * U_CALLCONV +_ISCII_SafeClone(const UConverter *cnv, + void *stackBuffer, + int32_t *pBufferSize, + UErrorCode *status) +{ + struct cloneISCIIStruct * localClone; + int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct); + + if (U_FAILURE(*status)) { + return 0; + } + + if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */ + *pBufferSize = bufferSizeNeeded; + return 0; + } + + localClone = (struct cloneISCIIStruct *)stackBuffer; + /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ + + uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII)); + localClone->cnv.extraInfo = &localClone->mydata; + localClone->cnv.isExtraLocal = TRUE; + + return &localClone->cnv; +} + +static void U_CALLCONV +_ISCIIGetUnicodeSet(const UConverter *cnv, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) +{ + (void)cnv; + (void)which; + (void)pErrorCode; + int32_t idx, script; + uint8_t mask; + + /* Since all ISCII versions allow switching to other ISCII + scripts, we add all roundtrippable characters to this set. */ + sa->addRange(sa->set, 0, ASCII_END); + for (script = DEVANAGARI; script <= MALAYALAM; script++) { + mask = (uint8_t)(lookupInitialData[script].maskEnum); + for (idx = 0; idx < DELTA; idx++) { + /* added check for TELUGU character */ + if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) { + sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN); + } + } + } + sa->add(sa->set, DANDA); + sa->add(sa->set, DOUBLE_DANDA); + sa->add(sa->set, ZWNJ); + sa->add(sa->set, ZWJ); +} +U_CDECL_END +static const UConverterImpl _ISCIIImpl={ + + UCNV_ISCII, + + NULL, + NULL, + + _ISCIIOpen, + _ISCIIClose, + _ISCIIReset, + + UConverter_toUnicode_ISCII_OFFSETS_LOGIC, + UConverter_toUnicode_ISCII_OFFSETS_LOGIC, + UConverter_fromUnicode_ISCII_OFFSETS_LOGIC, + UConverter_fromUnicode_ISCII_OFFSETS_LOGIC, + NULL, + + NULL, + _ISCIIgetName, + NULL, + _ISCII_SafeClone, + _ISCIIGetUnicodeSet, + NULL, + NULL +}; + +static const UConverterStaticData _ISCIIStaticData={ + sizeof(UConverterStaticData), + "ISCII", + 0, + UCNV_IBM, + UCNV_ISCII, + 1, + 4, + { 0x1a, 0, 0, 0 }, + 0x1, + FALSE, + FALSE, + 0x0, + 0x0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */ + +}; + +const UConverterSharedData _ISCIIData= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISCIIStaticData, &_ISCIIImpl); + +#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ diff --git a/deps/icu-small/source/common/ucnvlat1.c b/deps/icu-small/source/common/ucnvlat1.c deleted file mode 100644 index fe11b1844c..0000000000 --- a/deps/icu-small/source/common/ucnvlat1.c +++ /dev/null @@ -1,740 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2000-2015, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ucnvlat1.cpp -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2000feb07 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ucnv.h" -#include "unicode/uset.h" -#include "unicode/utf8.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" - -/* control optimizations according to the platform */ -#define LATIN1_UNROLL_FROM_UNICODE 1 - -/* ISO 8859-1 --------------------------------------------------------------- */ - -/* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */ -static void -_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - const uint8_t *source; - UChar *target; - int32_t targetCapacity, length; - int32_t *offsets; - - int32_t sourceIndex; - - /* set up the local pointers */ - source=(const uint8_t *)pArgs->source; - target=pArgs->target; - targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); - offsets=pArgs->offsets; - - sourceIndex=0; - - /* - * since the conversion here is 1:1 UChar:uint8_t, we need only one counter - * for the minimum of the sourceLength and targetCapacity - */ - length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source); - if(length<=targetCapacity) { - targetCapacity=length; - } else { - /* target will be full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - length=targetCapacity; - } - - if(targetCapacity>=8) { - /* This loop is unrolled for speed and improved pipelining. */ - int32_t count, loops; - - loops=count=targetCapacity>>3; - length=targetCapacity&=0x7; - do { - target[0]=source[0]; - target[1]=source[1]; - target[2]=source[2]; - target[3]=source[3]; - target[4]=source[4]; - target[5]=source[5]; - target[6]=source[6]; - target[7]=source[7]; - target+=8; - source+=8; - } while(--count>0); - - if(offsets!=NULL) { - do { - offsets[0]=sourceIndex++; - offsets[1]=sourceIndex++; - offsets[2]=sourceIndex++; - offsets[3]=sourceIndex++; - offsets[4]=sourceIndex++; - offsets[5]=sourceIndex++; - offsets[6]=sourceIndex++; - offsets[7]=sourceIndex++; - offsets+=8; - } while(--loops>0); - } - } - - /* conversion loop */ - while(targetCapacity>0) { - *target++=*source++; - --targetCapacity; - } - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - - /* set offsets */ - if(offsets!=NULL) { - while(length>0) { - *offsets++=sourceIndex++; - --length; - } - pArgs->offsets=offsets; - } -} - -/* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */ -static UChar32 -_Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - const uint8_t *source=(const uint8_t *)pArgs->source; - if(source<(const uint8_t *)pArgs->sourceLimit) { - pArgs->source=(const char *)(source+1); - return *source; - } - - /* no output because of empty input */ - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0xffff; -} - -/* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */ -static void -_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const UChar *source, *sourceLimit; - uint8_t *target, *oldTarget; - int32_t targetCapacity, length; - int32_t *offsets; - - UChar32 cp; - UChar c, max; - - int32_t sourceIndex; - - /* set up the local pointers */ - cnv=pArgs->converter; - source=pArgs->source; - sourceLimit=pArgs->sourceLimit; - target=oldTarget=(uint8_t *)pArgs->target; - targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); - offsets=pArgs->offsets; - - if(cnv->sharedData==&_Latin1Data) { - max=0xff; /* Latin-1 */ - } else { - max=0x7f; /* US-ASCII */ - } - - /* get the converter state from UConverter */ - cp=cnv->fromUChar32; - - /* sourceIndex=-1 if the current character began in the previous buffer */ - sourceIndex= cp==0 ? 0 : -1; - - /* - * since the conversion here is 1:1 UChar:uint8_t, we need only one counter - * for the minimum of the sourceLength and targetCapacity - */ - length=(int32_t)(sourceLimit-source); - if(length0) { - goto getTrail; - } - -#if LATIN1_UNROLL_FROM_UNICODE - /* unroll the loop with the most common case */ - if(targetCapacity>=16) { - int32_t count, loops; - UChar u, oredChars; - - loops=count=targetCapacity>>4; - do { - oredChars=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - - /* were all 16 entries really valid? */ - if(oredChars>max) { - /* no, return to the first of these 16 */ - source-=16; - target-=16; - break; - } - } while(--count>0); - count=loops-count; - targetCapacity-=16*count; - - if(offsets!=NULL) { - oldTarget+=16*count; - while(count>0) { - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - --count; - } - } - } -#endif - - /* conversion loop */ - c=0; - while(targetCapacity>0 && (c=*source++)<=max) { - /* convert the Unicode code point */ - *target++=(uint8_t)c; - --targetCapacity; - } - - if(c>max) { - cp=c; - if(!U_IS_SURROGATE(cp)) { - /* callback(unassigned) */ - } else if(U_IS_SURROGATE_LEAD(cp)) { -getTrail: - if(sourcefromUChar32=cp; - goto noMoreInput; - } - } else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - } - - *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND; - cnv->fromUChar32=cp; - } -noMoreInput: - - /* set offsets since the start */ - if(offsets!=NULL) { - size_t count=target-oldTarget; - while(count>0) { - *offsets++=sourceIndex++; - --count; - } - } - - if(U_SUCCESS(*pErrorCode) && source=(uint8_t *)pArgs->targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - - /* write back the updated pointers */ - pArgs->source=source; - pArgs->target=(char *)target; - pArgs->offsets=offsets; -} - -/* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */ -static void -ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, - UConverterToUnicodeArgs *pToUArgs, - UErrorCode *pErrorCode) { - UConverter *utf8; - const uint8_t *source, *sourceLimit; - uint8_t *target; - int32_t targetCapacity; - - UChar32 c; - uint8_t b, t1; - - /* set up the local pointers */ - utf8=pToUArgs->converter; - source=(uint8_t *)pToUArgs->source; - sourceLimit=(uint8_t *)pToUArgs->sourceLimit; - target=(uint8_t *)pFromUArgs->target; - targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); - - /* get the converter state from the UTF-8 UConverter */ - c=(UChar32)utf8->toUnicodeStatus; - if(c!=0 && source=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) { - ++source; - *target++=(uint8_t)(((c&3)<<6)|t1); - --targetCapacity; - - utf8->toUnicodeStatus=0; - utf8->toULength=0; - } else { - /* complicated, illegal or unmappable input: fall back to the pivoting implementation */ - *pErrorCode=U_USING_DEFAULT_WARNING; - return; - } - } - - /* - * Make sure that the last byte sequence before sourceLimit is complete - * or runs into a lead byte. - * In the conversion loop compare source with sourceLimit only once - * per multi-byte character. - * For Latin-1, adjust sourceLimit only for 1 trail byte because - * the conversion loop handles at most 2-byte sequences. - */ - if(source0) { - b=*source++; - if((int8_t)b>=0) { - /* convert ASCII */ - *target++=(uint8_t)b; - --targetCapacity; - } else if( /* handle U+0080..U+00FF inline */ - b>=0xc2 && b<=0xc3 && - (t1=(uint8_t)(*source-0x80)) <= 0x3f - ) { - ++source; - *target++=(uint8_t)(((b&3)<<6)|t1); - --targetCapacity; - } else { - /* complicated, illegal or unmappable input: fall back to the pivoting implementation */ - pToUArgs->source=(char *)(source-1); - pFromUArgs->target=(char *)target; - *pErrorCode=U_USING_DEFAULT_WARNING; - return; - } - } else { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - - /* - * The sourceLimit may have been adjusted before the conversion loop - * to stop before a truncated sequence. - * If so, then collect the truncated sequence now. - * For Latin-1, there is at most exactly one lead byte because of the - * smaller sourceLimit adjustment logic. - */ - if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { - utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++; - utf8->toULength=1; - utf8->mode=U8_COUNT_TRAIL_BYTES(b)+1; - } - - /* write back the updated pointers */ - pToUArgs->source=(char *)source; - pFromUArgs->target=(char *)target; -} - -static void -_Latin1GetUnicodeSet(const UConverter *cnv, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode) { - sa->addRange(sa->set, 0, 0xff); -} - -static const UConverterImpl _Latin1Impl={ - UCNV_LATIN_1, - - NULL, - NULL, - - NULL, - NULL, - NULL, - - _Latin1ToUnicodeWithOffsets, - _Latin1ToUnicodeWithOffsets, - _Latin1FromUnicodeWithOffsets, - _Latin1FromUnicodeWithOffsets, - _Latin1GetNextUChar, - - NULL, - NULL, - NULL, - NULL, - _Latin1GetUnicodeSet, - - NULL, - ucnv_Latin1FromUTF8 -}; - -static const UConverterStaticData _Latin1StaticData={ - sizeof(UConverterStaticData), - "ISO-8859-1", - 819, UCNV_IBM, UCNV_LATIN_1, 1, 1, - { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -const UConverterSharedData _Latin1Data= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Latin1StaticData, &_Latin1Impl); - -/* US-ASCII ----------------------------------------------------------------- */ - -/* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */ -static void -_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - const uint8_t *source, *sourceLimit; - UChar *target, *oldTarget; - int32_t targetCapacity, length; - int32_t *offsets; - - int32_t sourceIndex; - - uint8_t c; - - /* set up the local pointers */ - source=(const uint8_t *)pArgs->source; - sourceLimit=(const uint8_t *)pArgs->sourceLimit; - target=oldTarget=pArgs->target; - targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); - offsets=pArgs->offsets; - - /* sourceIndex=-1 if the current character began in the previous buffer */ - sourceIndex=0; - - /* - * since the conversion here is 1:1 UChar:uint8_t, we need only one counter - * for the minimum of the sourceLength and targetCapacity - */ - length=(int32_t)(sourceLimit-source); - if(length=8) { - /* This loop is unrolled for speed and improved pipelining. */ - int32_t count, loops; - UChar oredChars; - - loops=count=targetCapacity>>3; - do { - oredChars=target[0]=source[0]; - oredChars|=target[1]=source[1]; - oredChars|=target[2]=source[2]; - oredChars|=target[3]=source[3]; - oredChars|=target[4]=source[4]; - oredChars|=target[5]=source[5]; - oredChars|=target[6]=source[6]; - oredChars|=target[7]=source[7]; - - /* were all 16 entries really valid? */ - if(oredChars>0x7f) { - /* no, return to the first of these 16 */ - break; - } - source+=8; - target+=8; - } while(--count>0); - count=loops-count; - targetCapacity-=count*8; - - if(offsets!=NULL) { - oldTarget+=count*8; - while(count>0) { - offsets[0]=sourceIndex++; - offsets[1]=sourceIndex++; - offsets[2]=sourceIndex++; - offsets[3]=sourceIndex++; - offsets[4]=sourceIndex++; - offsets[5]=sourceIndex++; - offsets[6]=sourceIndex++; - offsets[7]=sourceIndex++; - offsets+=8; - --count; - } - } - } - - /* conversion loop */ - c=0; - while(targetCapacity>0 && (c=*source++)<=0x7f) { - *target++=c; - --targetCapacity; - } - - if(c>0x7f) { - /* callback(illegal); copy the current bytes to toUBytes[] */ - UConverter *cnv=pArgs->converter; - cnv->toUBytes[0]=c; - cnv->toULength=1; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } else if(source=pArgs->targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - - /* set offsets since the start */ - if(offsets!=NULL) { - size_t count=target-oldTarget; - while(count>0) { - *offsets++=sourceIndex++; - --count; - } - } - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; -} - -/* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */ -static UChar32 -_ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - const uint8_t *source; - uint8_t b; - - source=(const uint8_t *)pArgs->source; - if(source<(const uint8_t *)pArgs->sourceLimit) { - b=*source++; - pArgs->source=(const char *)source; - if(b<=0x7f) { - return b; - } else { - UConverter *cnv=pArgs->converter; - cnv->toUBytes[0]=b; - cnv->toULength=1; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - return 0xffff; - } - } - - /* no output because of empty input */ - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0xffff; -} - -/* "Convert" UTF-8 to US-ASCII: Validate and copy. */ -static void -ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, - UConverterToUnicodeArgs *pToUArgs, - UErrorCode *pErrorCode) { - const uint8_t *source, *sourceLimit; - uint8_t *target; - int32_t targetCapacity, length; - - uint8_t c; - - if(pToUArgs->converter->toUnicodeStatus!=0) { - /* no handling of partial UTF-8 characters here, fall back to pivoting */ - *pErrorCode=U_USING_DEFAULT_WARNING; - return; - } - - /* set up the local pointers */ - source=(const uint8_t *)pToUArgs->source; - sourceLimit=(const uint8_t *)pToUArgs->sourceLimit; - target=(uint8_t *)pFromUArgs->target; - targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); - - /* - * since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter - * for the minimum of the sourceLength and targetCapacity - */ - length=(int32_t)(sourceLimit-source); - if(length=16) { - int32_t count, loops; - uint8_t oredChars; - - loops=count=targetCapacity>>4; - do { - oredChars=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - - /* were all 16 entries really valid? */ - if(oredChars>0x7f) { - /* no, return to the first of these 16 */ - source-=16; - target-=16; - break; - } - } while(--count>0); - count=loops-count; - targetCapacity-=16*count; - } - - /* conversion loop */ - c=0; - while(targetCapacity>0 && (c=*source)<=0x7f) { - ++source; - *target++=c; - --targetCapacity; - } - - if(c>0x7f) { - /* non-ASCII character, handle in standard converter */ - *pErrorCode=U_USING_DEFAULT_WARNING; - } else if(source=(const uint8_t *)pFromUArgs->targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - - /* write back the updated pointers */ - pToUArgs->source=(const char *)source; - pFromUArgs->target=(char *)target; -} - -static void -_ASCIIGetUnicodeSet(const UConverter *cnv, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode) { - sa->addRange(sa->set, 0, 0x7f); -} - -static const UConverterImpl _ASCIIImpl={ - UCNV_US_ASCII, - - NULL, - NULL, - - NULL, - NULL, - NULL, - - _ASCIIToUnicodeWithOffsets, - _ASCIIToUnicodeWithOffsets, - _Latin1FromUnicodeWithOffsets, - _Latin1FromUnicodeWithOffsets, - _ASCIIGetNextUChar, - - NULL, - NULL, - NULL, - NULL, - _ASCIIGetUnicodeSet, - - NULL, - ucnv_ASCIIFromUTF8 -}; - -static const UConverterStaticData _ASCIIStaticData={ - sizeof(UConverterStaticData), - "US-ASCII", - 367, UCNV_IBM, UCNV_US_ASCII, 1, 1, - { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -const UConverterSharedData _ASCIIData= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ASCIIStaticData, &_ASCIIImpl); - -#endif diff --git a/deps/icu-small/source/common/ucnvlat1.cpp b/deps/icu-small/source/common/ucnvlat1.cpp new file mode 100644 index 0000000000..7a0dccd446 --- /dev/null +++ b/deps/icu-small/source/common/ucnvlat1.cpp @@ -0,0 +1,751 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2000-2015, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnvlat1.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2000feb07 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/uset.h" +#include "unicode/utf8.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" + +/* control optimizations according to the platform */ +#define LATIN1_UNROLL_FROM_UNICODE 1 + +/* ISO 8859-1 --------------------------------------------------------------- */ + +/* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */ +U_CDECL_BEGIN +static void U_CALLCONV +_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + const uint8_t *source; + UChar *target; + int32_t targetCapacity, length; + int32_t *offsets; + + int32_t sourceIndex; + + /* set up the local pointers */ + source=(const uint8_t *)pArgs->source; + target=pArgs->target; + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); + offsets=pArgs->offsets; + + sourceIndex=0; + + /* + * since the conversion here is 1:1 UChar:uint8_t, we need only one counter + * for the minimum of the sourceLength and targetCapacity + */ + length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source); + if(length<=targetCapacity) { + targetCapacity=length; + } else { + /* target will be full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + length=targetCapacity; + } + + if(targetCapacity>=8) { + /* This loop is unrolled for speed and improved pipelining. */ + int32_t count, loops; + + loops=count=targetCapacity>>3; + length=targetCapacity&=0x7; + do { + target[0]=source[0]; + target[1]=source[1]; + target[2]=source[2]; + target[3]=source[3]; + target[4]=source[4]; + target[5]=source[5]; + target[6]=source[6]; + target[7]=source[7]; + target+=8; + source+=8; + } while(--count>0); + + if(offsets!=NULL) { + do { + offsets[0]=sourceIndex++; + offsets[1]=sourceIndex++; + offsets[2]=sourceIndex++; + offsets[3]=sourceIndex++; + offsets[4]=sourceIndex++; + offsets[5]=sourceIndex++; + offsets[6]=sourceIndex++; + offsets[7]=sourceIndex++; + offsets+=8; + } while(--loops>0); + } + } + + /* conversion loop */ + while(targetCapacity>0) { + *target++=*source++; + --targetCapacity; + } + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + + /* set offsets */ + if(offsets!=NULL) { + while(length>0) { + *offsets++=sourceIndex++; + --length; + } + pArgs->offsets=offsets; + } +} + +/* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */ +static UChar32 U_CALLCONV +_Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + const uint8_t *source=(const uint8_t *)pArgs->source; + if(source<(const uint8_t *)pArgs->sourceLimit) { + pArgs->source=(const char *)(source+1); + return *source; + } + + /* no output because of empty input */ + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0xffff; +} + +/* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */ +static void U_CALLCONV +_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const UChar *source, *sourceLimit; + uint8_t *target, *oldTarget; + int32_t targetCapacity, length; + int32_t *offsets; + + UChar32 cp; + UChar c, max; + + int32_t sourceIndex; + + /* set up the local pointers */ + cnv=pArgs->converter; + source=pArgs->source; + sourceLimit=pArgs->sourceLimit; + target=oldTarget=(uint8_t *)pArgs->target; + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); + offsets=pArgs->offsets; + + if(cnv->sharedData==&_Latin1Data) { + max=0xff; /* Latin-1 */ + } else { + max=0x7f; /* US-ASCII */ + } + + /* get the converter state from UConverter */ + cp=cnv->fromUChar32; + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex= cp==0 ? 0 : -1; + + /* + * since the conversion here is 1:1 UChar:uint8_t, we need only one counter + * for the minimum of the sourceLength and targetCapacity + */ + length=(int32_t)(sourceLimit-source); + if(length0) { + goto getTrail; + } + +#if LATIN1_UNROLL_FROM_UNICODE + /* unroll the loop with the most common case */ + if(targetCapacity>=16) { + int32_t count, loops; + UChar u, oredChars; + + loops=count=targetCapacity>>4; + do { + oredChars=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + + /* were all 16 entries really valid? */ + if(oredChars>max) { + /* no, return to the first of these 16 */ + source-=16; + target-=16; + break; + } + } while(--count>0); + count=loops-count; + targetCapacity-=16*count; + + if(offsets!=NULL) { + oldTarget+=16*count; + while(count>0) { + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + --count; + } + } + } +#endif + + /* conversion loop */ + c=0; + while(targetCapacity>0 && (c=*source++)<=max) { + /* convert the Unicode code point */ + *target++=(uint8_t)c; + --targetCapacity; + } + + if(c>max) { + cp=c; + if(!U_IS_SURROGATE(cp)) { + /* callback(unassigned) */ + } else if(U_IS_SURROGATE_LEAD(cp)) { +getTrail: + if(sourcefromUChar32=cp; + goto noMoreInput; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + } + + *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND; + cnv->fromUChar32=cp; + } +noMoreInput: + + /* set offsets since the start */ + if(offsets!=NULL) { + size_t count=target-oldTarget; + while(count>0) { + *offsets++=sourceIndex++; + --count; + } + } + + if(U_SUCCESS(*pErrorCode) && source=(uint8_t *)pArgs->targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + + /* write back the updated pointers */ + pArgs->source=source; + pArgs->target=(char *)target; + pArgs->offsets=offsets; +} + +/* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */ +static void U_CALLCONV +ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, + UConverterToUnicodeArgs *pToUArgs, + UErrorCode *pErrorCode) { + UConverter *utf8; + const uint8_t *source, *sourceLimit; + uint8_t *target; + int32_t targetCapacity; + + UChar32 c; + uint8_t b, t1; + + /* set up the local pointers */ + utf8=pToUArgs->converter; + source=(uint8_t *)pToUArgs->source; + sourceLimit=(uint8_t *)pToUArgs->sourceLimit; + target=(uint8_t *)pFromUArgs->target; + targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); + + /* get the converter state from the UTF-8 UConverter */ + c=(UChar32)utf8->toUnicodeStatus; + if(c!=0 && source=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) { + ++source; + *target++=(uint8_t)(((c&3)<<6)|t1); + --targetCapacity; + + utf8->toUnicodeStatus=0; + utf8->toULength=0; + } else { + /* complicated, illegal or unmappable input: fall back to the pivoting implementation */ + *pErrorCode=U_USING_DEFAULT_WARNING; + return; + } + } + + /* + * Make sure that the last byte sequence before sourceLimit is complete + * or runs into a lead byte. + * In the conversion loop compare source with sourceLimit only once + * per multi-byte character. + * For Latin-1, adjust sourceLimit only for 1 trail byte because + * the conversion loop handles at most 2-byte sequences. + */ + if(source0) { + b=*source++; + if((int8_t)b>=0) { + /* convert ASCII */ + *target++=(uint8_t)b; + --targetCapacity; + } else if( /* handle U+0080..U+00FF inline */ + b>=0xc2 && b<=0xc3 && + (t1=(uint8_t)(*source-0x80)) <= 0x3f + ) { + ++source; + *target++=(uint8_t)(((b&3)<<6)|t1); + --targetCapacity; + } else { + /* complicated, illegal or unmappable input: fall back to the pivoting implementation */ + pToUArgs->source=(char *)(source-1); + pFromUArgs->target=(char *)target; + *pErrorCode=U_USING_DEFAULT_WARNING; + return; + } + } else { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + + /* + * The sourceLimit may have been adjusted before the conversion loop + * to stop before a truncated sequence. + * If so, then collect the truncated sequence now. + * For Latin-1, there is at most exactly one lead byte because of the + * smaller sourceLimit adjustment logic. + */ + if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { + utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++; + utf8->toULength=1; + utf8->mode=U8_COUNT_TRAIL_BYTES(b)+1; + } + + /* write back the updated pointers */ + pToUArgs->source=(char *)source; + pFromUArgs->target=(char *)target; +} + +static void U_CALLCONV +_Latin1GetUnicodeSet(const UConverter *cnv, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) { + (void)cnv; + (void)which; + (void)pErrorCode; + sa->addRange(sa->set, 0, 0xff); +} +U_CDECL_END + + +static const UConverterImpl _Latin1Impl={ + UCNV_LATIN_1, + + NULL, + NULL, + + NULL, + NULL, + NULL, + + _Latin1ToUnicodeWithOffsets, + _Latin1ToUnicodeWithOffsets, + _Latin1FromUnicodeWithOffsets, + _Latin1FromUnicodeWithOffsets, + _Latin1GetNextUChar, + + NULL, + NULL, + NULL, + NULL, + _Latin1GetUnicodeSet, + + NULL, + ucnv_Latin1FromUTF8 +}; + +static const UConverterStaticData _Latin1StaticData={ + sizeof(UConverterStaticData), + "ISO-8859-1", + 819, UCNV_IBM, UCNV_LATIN_1, 1, 1, + { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +const UConverterSharedData _Latin1Data= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Latin1StaticData, &_Latin1Impl); + +/* US-ASCII ----------------------------------------------------------------- */ + +U_CDECL_BEGIN +/* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */ +static void U_CALLCONV +_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + const uint8_t *source, *sourceLimit; + UChar *target, *oldTarget; + int32_t targetCapacity, length; + int32_t *offsets; + + int32_t sourceIndex; + + uint8_t c; + + /* set up the local pointers */ + source=(const uint8_t *)pArgs->source; + sourceLimit=(const uint8_t *)pArgs->sourceLimit; + target=oldTarget=pArgs->target; + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); + offsets=pArgs->offsets; + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex=0; + + /* + * since the conversion here is 1:1 UChar:uint8_t, we need only one counter + * for the minimum of the sourceLength and targetCapacity + */ + length=(int32_t)(sourceLimit-source); + if(length=8) { + /* This loop is unrolled for speed and improved pipelining. */ + int32_t count, loops; + UChar oredChars; + + loops=count=targetCapacity>>3; + do { + oredChars=target[0]=source[0]; + oredChars|=target[1]=source[1]; + oredChars|=target[2]=source[2]; + oredChars|=target[3]=source[3]; + oredChars|=target[4]=source[4]; + oredChars|=target[5]=source[5]; + oredChars|=target[6]=source[6]; + oredChars|=target[7]=source[7]; + + /* were all 16 entries really valid? */ + if(oredChars>0x7f) { + /* no, return to the first of these 16 */ + break; + } + source+=8; + target+=8; + } while(--count>0); + count=loops-count; + targetCapacity-=count*8; + + if(offsets!=NULL) { + oldTarget+=count*8; + while(count>0) { + offsets[0]=sourceIndex++; + offsets[1]=sourceIndex++; + offsets[2]=sourceIndex++; + offsets[3]=sourceIndex++; + offsets[4]=sourceIndex++; + offsets[5]=sourceIndex++; + offsets[6]=sourceIndex++; + offsets[7]=sourceIndex++; + offsets+=8; + --count; + } + } + } + + /* conversion loop */ + c=0; + while(targetCapacity>0 && (c=*source++)<=0x7f) { + *target++=c; + --targetCapacity; + } + + if(c>0x7f) { + /* callback(illegal); copy the current bytes to toUBytes[] */ + UConverter *cnv=pArgs->converter; + cnv->toUBytes[0]=c; + cnv->toULength=1; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } else if(source=pArgs->targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + + /* set offsets since the start */ + if(offsets!=NULL) { + size_t count=target-oldTarget; + while(count>0) { + *offsets++=sourceIndex++; + --count; + } + } + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + pArgs->offsets=offsets; +} + +/* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */ +static UChar32 U_CALLCONV +_ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + const uint8_t *source; + uint8_t b; + + source=(const uint8_t *)pArgs->source; + if(source<(const uint8_t *)pArgs->sourceLimit) { + b=*source++; + pArgs->source=(const char *)source; + if(b<=0x7f) { + return b; + } else { + UConverter *cnv=pArgs->converter; + cnv->toUBytes[0]=b; + cnv->toULength=1; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + return 0xffff; + } + } + + /* no output because of empty input */ + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0xffff; +} + +/* "Convert" UTF-8 to US-ASCII: Validate and copy. */ +static void U_CALLCONV +ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, + UConverterToUnicodeArgs *pToUArgs, + UErrorCode *pErrorCode) { + const uint8_t *source, *sourceLimit; + uint8_t *target; + int32_t targetCapacity, length; + + uint8_t c; + + if(pToUArgs->converter->toUnicodeStatus!=0) { + /* no handling of partial UTF-8 characters here, fall back to pivoting */ + *pErrorCode=U_USING_DEFAULT_WARNING; + return; + } + + /* set up the local pointers */ + source=(const uint8_t *)pToUArgs->source; + sourceLimit=(const uint8_t *)pToUArgs->sourceLimit; + target=(uint8_t *)pFromUArgs->target; + targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); + + /* + * since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter + * for the minimum of the sourceLength and targetCapacity + */ + length=(int32_t)(sourceLimit-source); + if(length=16) { + int32_t count, loops; + uint8_t oredChars; + + loops=count=targetCapacity>>4; + do { + oredChars=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + + /* were all 16 entries really valid? */ + if(oredChars>0x7f) { + /* no, return to the first of these 16 */ + source-=16; + target-=16; + break; + } + } while(--count>0); + count=loops-count; + targetCapacity-=16*count; + } + + /* conversion loop */ + c=0; + while(targetCapacity>0 && (c=*source)<=0x7f) { + ++source; + *target++=c; + --targetCapacity; + } + + if(c>0x7f) { + /* non-ASCII character, handle in standard converter */ + *pErrorCode=U_USING_DEFAULT_WARNING; + } else if(source=(const uint8_t *)pFromUArgs->targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + + /* write back the updated pointers */ + pToUArgs->source=(const char *)source; + pFromUArgs->target=(char *)target; +} + +static void U_CALLCONV +_ASCIIGetUnicodeSet(const UConverter *cnv, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) { + (void)cnv; + (void)which; + (void)pErrorCode; + sa->addRange(sa->set, 0, 0x7f); +} +U_CDECL_END + +static const UConverterImpl _ASCIIImpl={ + UCNV_US_ASCII, + + NULL, + NULL, + + NULL, + NULL, + NULL, + + _ASCIIToUnicodeWithOffsets, + _ASCIIToUnicodeWithOffsets, + _Latin1FromUnicodeWithOffsets, + _Latin1FromUnicodeWithOffsets, + _ASCIIGetNextUChar, + + NULL, + NULL, + NULL, + NULL, + _ASCIIGetUnicodeSet, + + NULL, + ucnv_ASCIIFromUTF8 +}; + +static const UConverterStaticData _ASCIIStaticData={ + sizeof(UConverterStaticData), + "US-ASCII", + 367, UCNV_IBM, UCNV_US_ASCII, 1, 1, + { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +const UConverterSharedData _ASCIIData= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ASCIIStaticData, &_ASCIIImpl); + +#endif diff --git a/deps/icu-small/source/common/ucnvmbcs.cpp b/deps/icu-small/source/common/ucnvmbcs.cpp index 0b598dbc34..4412be6739 100644 --- a/deps/icu-small/source/common/ucnvmbcs.cpp +++ b/deps/icu-small/source/common/ucnvmbcs.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: ucnvmbcs.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/ucnvmbcs.h b/deps/icu-small/source/common/ucnvmbcs.h index 12d50b545c..a750b92e49 100644 --- a/deps/icu-small/source/common/ucnvmbcs.h +++ b/deps/icu-small/source/common/ucnvmbcs.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: ucnvmbcs.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/ucnvscsu.c b/deps/icu-small/source/common/ucnvscsu.c deleted file mode 100644 index 4228b44e7f..0000000000 --- a/deps/icu-small/source/common/ucnvscsu.c +++ /dev/null @@ -1,2043 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2000-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: ucnvscsu.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2000nov18 -* created by: Markus W. Scherer -* -* This is an implementation of the Standard Compression Scheme for Unicode -* as defined in http://www.unicode.org/unicode/reports/tr6/ . -* Reserved commands and window settings are treated as illegal sequences and -* will result in callback calls. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION - -#include "unicode/ucnv.h" -#include "unicode/ucnv_cb.h" -#include "unicode/utf16.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "cmemory.h" - -/* SCSU definitions --------------------------------------------------------- */ - -/* SCSU command byte values */ -enum { - SQ0=0x01, /* Quote from window pair 0 */ - SQ7=0x08, /* Quote from window pair 7 */ - SDX=0x0B, /* Define a window as extended */ - Srs=0x0C, /* reserved */ - SQU=0x0E, /* Quote a single Unicode character */ - SCU=0x0F, /* Change to Unicode mode */ - SC0=0x10, /* Select window 0 */ - SC7=0x17, /* Select window 7 */ - SD0=0x18, /* Define and select window 0 */ - SD7=0x1F, /* Define and select window 7 */ - - UC0=0xE0, /* Select window 0 */ - UC7=0xE7, /* Select window 7 */ - UD0=0xE8, /* Define and select window 0 */ - UD7=0xEF, /* Define and select window 7 */ - UQU=0xF0, /* Quote a single Unicode character */ - UDX=0xF1, /* Define a Window as extended */ - Urs=0xF2 /* reserved */ -}; - -enum { - /* - * Unicode code points from 3400 to E000 are not adressible by - * dynamic window, since in these areas no short run alphabets are - * found. Therefore add gapOffset to all values from gapThreshold. - */ - gapThreshold=0x68, - gapOffset=0xAC00, - - /* values between reservedStart and fixedThreshold are reserved */ - reservedStart=0xA8, - - /* use table of predefined fixed offsets for values from fixedThreshold */ - fixedThreshold=0xF9 -}; - -/* constant offsets for the 8 static windows */ -static const uint32_t staticOffsets[8]={ - 0x0000, /* ASCII for quoted tags */ - 0x0080, /* Latin - 1 Supplement (for access to punctuation) */ - 0x0100, /* Latin Extended-A */ - 0x0300, /* Combining Diacritical Marks */ - 0x2000, /* General Punctuation */ - 0x2080, /* Currency Symbols */ - 0x2100, /* Letterlike Symbols and Number Forms */ - 0x3000 /* CJK Symbols and punctuation */ -}; - -/* initial offsets for the 8 dynamic (sliding) windows */ -static const uint32_t initialDynamicOffsets[8]={ - 0x0080, /* Latin-1 */ - 0x00C0, /* Latin Extended A */ - 0x0400, /* Cyrillic */ - 0x0600, /* Arabic */ - 0x0900, /* Devanagari */ - 0x3040, /* Hiragana */ - 0x30A0, /* Katakana */ - 0xFF00 /* Fullwidth ASCII */ -}; - -/* Table of fixed predefined Offsets */ -static const uint32_t fixedOffsets[]={ - /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */ - /* 0xFA */ 0x0250, /* IPA extensions */ - /* 0xFB */ 0x0370, /* Greek */ - /* 0xFC */ 0x0530, /* Armenian */ - /* 0xFD */ 0x3040, /* Hiragana */ - /* 0xFE */ 0x30A0, /* Katakana */ - /* 0xFF */ 0xFF60 /* Halfwidth Katakana */ -}; - -/* state values */ -enum { - readCommand, - quotePairOne, - quotePairTwo, - quoteOne, - definePairOne, - definePairTwo, - defineOne -}; - -typedef struct SCSUData { - /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */ - uint32_t toUDynamicOffsets[8]; - uint32_t fromUDynamicOffsets[8]; - - /* state machine state - toUnicode */ - UBool toUIsSingleByteMode; - uint8_t toUState; - int8_t toUQuoteWindow, toUDynamicWindow; - uint8_t toUByteOne; - uint8_t toUPadding[3]; - - /* state machine state - fromUnicode */ - UBool fromUIsSingleByteMode; - int8_t fromUDynamicWindow; - - /* - * windowUse[] keeps track of the use of the dynamic windows: - * At nextWindowUseIndex there is the least recently used window, - * and the following windows (in a wrapping manner) are more and more - * recently used. - * At nextWindowUseIndex-1 there is the most recently used window. - */ - uint8_t locale; - int8_t nextWindowUseIndex; - int8_t windowUse[8]; -} SCSUData; - -static const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 }; -static const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 }; - -enum { - lGeneric, l_ja -}; - -/* SCSU setup functions ----------------------------------------------------- */ - -static void -_SCSUReset(UConverter *cnv, UConverterResetChoice choice) { - SCSUData *scsu=(SCSUData *)cnv->extraInfo; - - if(choice<=UCNV_RESET_TO_UNICODE) { - /* reset toUnicode */ - uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32); - - scsu->toUIsSingleByteMode=TRUE; - scsu->toUState=readCommand; - scsu->toUQuoteWindow=scsu->toUDynamicWindow=0; - scsu->toUByteOne=0; - - cnv->toULength=0; - } - if(choice!=UCNV_RESET_TO_UNICODE) { - /* reset fromUnicode */ - uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32); - - scsu->fromUIsSingleByteMode=TRUE; - scsu->fromUDynamicWindow=0; - - scsu->nextWindowUseIndex=0; - switch(scsu->locale) { - case l_ja: - uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8); - break; - default: - uprv_memcpy(scsu->windowUse, initialWindowUse, 8); - break; - } - - cnv->fromUChar32=0; - } -} - -static void -_SCSUOpen(UConverter *cnv, - UConverterLoadArgs *pArgs, - UErrorCode *pErrorCode) { - const char *locale=pArgs->locale; - if(pArgs->onlyTestIsLoadable) { - return; - } - cnv->extraInfo=uprv_malloc(sizeof(SCSUData)); - if(cnv->extraInfo!=NULL) { - if(locale!=NULL && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) { - ((SCSUData *)cnv->extraInfo)->locale=l_ja; - } else { - ((SCSUData *)cnv->extraInfo)->locale=lGeneric; - } - _SCSUReset(cnv, UCNV_RESET_BOTH); - } else { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - } - - /* Set the substitution character U+fffd as a Unicode string. */ - cnv->subUChars[0]=0xfffd; - cnv->subCharLen=-1; -} - -static void -_SCSUClose(UConverter *cnv) { - if(cnv->extraInfo!=NULL) { - if(!cnv->isExtraLocal) { - uprv_free(cnv->extraInfo); - } - cnv->extraInfo=NULL; - } -} - -/* SCSU-to-Unicode conversion functions ------------------------------------- */ - -static void -_SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - SCSUData *scsu; - const uint8_t *source, *sourceLimit; - UChar *target; - const UChar *targetLimit; - int32_t *offsets; - UBool isSingleByteMode; - uint8_t state, byteOne; - int8_t quoteWindow, dynamicWindow; - - int32_t sourceIndex, nextSourceIndex; - - uint8_t b; - - /* set up the local pointers */ - cnv=pArgs->converter; - scsu=(SCSUData *)cnv->extraInfo; - - source=(const uint8_t *)pArgs->source; - sourceLimit=(const uint8_t *)pArgs->sourceLimit; - target=pArgs->target; - targetLimit=pArgs->targetLimit; - offsets=pArgs->offsets; - - /* get the state machine state */ - isSingleByteMode=scsu->toUIsSingleByteMode; - state=scsu->toUState; - quoteWindow=scsu->toUQuoteWindow; - dynamicWindow=scsu->toUDynamicWindow; - byteOne=scsu->toUByteOne; - - /* sourceIndex=-1 if the current character began in the previous buffer */ - sourceIndex=state==readCommand ? 0 : -1; - nextSourceIndex=0; - - /* - * conversion "loop" - * - * For performance, this is not a normal C loop. - * Instead, there are two code blocks for the two SCSU modes. - * The function branches to either one, and a change of the mode is done with a goto to - * the other branch. - * - * Each branch has two conventional loops: - * - a fast-path loop for the most common codes in the mode - * - a loop for all other codes in the mode - * When the fast-path runs into a code that it cannot handle, its loop ends and it - * runs into the following loop to handle the other codes. - * The end of the input or output buffer is also handled by the slower loop. - * The slow loop jumps (goto) to the fast-path loop again as soon as possible. - * - * The callback handling is done by returning with an error code. - * The conversion framework actually calls the callback function. - */ - if(isSingleByteMode) { - /* fast path for single-byte mode */ - if(state==readCommand) { -fastSingle: - while(source=0x20) { - ++source; - ++nextSourceIndex; - if(b<=0x7f) { - /* write US-ASCII graphic character or DEL */ - *target++=(UChar)b; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - } else { - /* write from dynamic window */ - uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f); - if(c<=0xffff) { - *target++=(UChar)c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - } else { - /* output surrogate pair */ - *target++=(UChar)(0xd7c0+(c>>10)); - if(targetUCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); - cnv->UCharErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - goto endloop; - } - } - } - sourceIndex=nextSourceIndex; - } - } - - /* normal state machine for single-byte mode, minus handling for what fastSingle covers */ -singleByteMode: - while(source=targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - b=*source++; - ++nextSourceIndex; - switch(state) { - case readCommand: - /* redundant conditions are commented out */ - /* here: b<0x20 because otherwise we would be in fastSingle */ - if((1UL<toUBytes[0]=b; - cnv->toULength=1; - goto endloop; - } - - /* store the first byte of a multibyte sequence in toUBytes[] */ - cnv->toUBytes[0]=b; - cnv->toULength=1; - break; - case quotePairOne: - byteOne=b; - cnv->toUBytes[1]=b; - cnv->toULength=2; - state=quotePairTwo; - break; - case quotePairTwo: - *target++=(UChar)((byteOne<<8)|b); - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - sourceIndex=nextSourceIndex; - state=readCommand; - goto fastSingle; - case quoteOne: - if(b<0x80) { - /* all static offsets are in the BMP */ - *target++=(UChar)(staticOffsets[quoteWindow]+b); - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - } else { - /* write from dynamic window */ - uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f); - if(c<=0xffff) { - *target++=(UChar)c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - } else { - /* output surrogate pair */ - *target++=(UChar)(0xd7c0+(c>>10)); - if(targetUCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); - cnv->UCharErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - goto endloop; - } - } - } - sourceIndex=nextSourceIndex; - state=readCommand; - goto fastSingle; - case definePairOne: - dynamicWindow=(int8_t)((b>>5)&7); - byteOne=(uint8_t)(b&0x1f); - cnv->toUBytes[1]=b; - cnv->toULength=2; - state=definePairTwo; - break; - case definePairTwo: - scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL); - sourceIndex=nextSourceIndex; - state=readCommand; - goto fastSingle; - case defineOne: - if(b==0) { - /* callback(illegal): Reserved window offset value 0 */ - cnv->toUBytes[1]=b; - cnv->toULength=2; - goto endloop; - } else if(btoUDynamicOffsets[dynamicWindow]=b<<7UL; - } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) { - scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset; - } else if(b>=fixedThreshold) { - scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold]; - } else { - /* callback(illegal): Reserved window offset value 0xa8..0xf8 */ - cnv->toUBytes[1]=b; - cnv->toULength=2; - goto endloop; - } - sourceIndex=nextSourceIndex; - state=readCommand; - goto fastSingle; - } - } - } else { - /* fast path for Unicode mode */ - if(state==readCommand) { -fastUnicode: - while(source+1(Urs-UC0)) { - *target++=(UChar)((b<<8)|source[1]); - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - sourceIndex=nextSourceIndex; - nextSourceIndex+=2; - source+=2; - } - } - - /* normal state machine for Unicode mode */ -/* unicodeByteMode: */ - while(source=targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - b=*source++; - ++nextSourceIndex; - switch(state) { - case readCommand: - if((uint8_t)(b-UC0)>(Urs-UC0)) { - byteOne=b; - cnv->toUBytes[0]=b; - cnv->toULength=1; - state=quotePairTwo; - } else if(/* UC0<=b && */ b<=UC7) { - dynamicWindow=(int8_t)(b-UC0); - sourceIndex=nextSourceIndex; - isSingleByteMode=TRUE; - goto fastSingle; - } else if(/* UD0<=b && */ b<=UD7) { - dynamicWindow=(int8_t)(b-UD0); - isSingleByteMode=TRUE; - cnv->toUBytes[0]=b; - cnv->toULength=1; - state=defineOne; - goto singleByteMode; - } else if(b==UDX) { - isSingleByteMode=TRUE; - cnv->toUBytes[0]=b; - cnv->toULength=1; - state=definePairOne; - goto singleByteMode; - } else if(b==UQU) { - cnv->toUBytes[0]=b; - cnv->toULength=1; - state=quotePairOne; - } else /* Urs */ { - /* callback(illegal) */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - cnv->toUBytes[0]=b; - cnv->toULength=1; - goto endloop; - } - break; - case quotePairOne: - byteOne=b; - cnv->toUBytes[1]=b; - cnv->toULength=2; - state=quotePairTwo; - break; - case quotePairTwo: - *target++=(UChar)((byteOne<<8)|b); - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - sourceIndex=nextSourceIndex; - state=readCommand; - goto fastUnicode; - } - } - } -endloop: - - /* set the converter state back into UConverter */ - if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) { - /* reset to deal with the next character */ - state=readCommand; - } else if(state==readCommand) { - /* not in a multi-byte sequence, reset toULength */ - cnv->toULength=0; - } - scsu->toUIsSingleByteMode=isSingleByteMode; - scsu->toUState=state; - scsu->toUQuoteWindow=quoteWindow; - scsu->toUDynamicWindow=dynamicWindow; - scsu->toUByteOne=byteOne; - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; - return; -} - -/* - * Identical to _SCSUToUnicodeWithOffsets but without offset handling. - * If a change is made in the original function, then either - * change this function the same way or - * re-copy the original function and remove the variables - * offsets, sourceIndex, and nextSourceIndex. - */ -static void -_SCSUToUnicode(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - SCSUData *scsu; - const uint8_t *source, *sourceLimit; - UChar *target; - const UChar *targetLimit; - UBool isSingleByteMode; - uint8_t state, byteOne; - int8_t quoteWindow, dynamicWindow; - - uint8_t b; - - /* set up the local pointers */ - cnv=pArgs->converter; - scsu=(SCSUData *)cnv->extraInfo; - - source=(const uint8_t *)pArgs->source; - sourceLimit=(const uint8_t *)pArgs->sourceLimit; - target=pArgs->target; - targetLimit=pArgs->targetLimit; - - /* get the state machine state */ - isSingleByteMode=scsu->toUIsSingleByteMode; - state=scsu->toUState; - quoteWindow=scsu->toUQuoteWindow; - dynamicWindow=scsu->toUDynamicWindow; - byteOne=scsu->toUByteOne; - - /* - * conversion "loop" - * - * For performance, this is not a normal C loop. - * Instead, there are two code blocks for the two SCSU modes. - * The function branches to either one, and a change of the mode is done with a goto to - * the other branch. - * - * Each branch has two conventional loops: - * - a fast-path loop for the most common codes in the mode - * - a loop for all other codes in the mode - * When the fast-path runs into a code that it cannot handle, its loop ends and it - * runs into the following loop to handle the other codes. - * The end of the input or output buffer is also handled by the slower loop. - * The slow loop jumps (goto) to the fast-path loop again as soon as possible. - * - * The callback handling is done by returning with an error code. - * The conversion framework actually calls the callback function. - */ - if(isSingleByteMode) { - /* fast path for single-byte mode */ - if(state==readCommand) { -fastSingle: - while(source=0x20) { - ++source; - if(b<=0x7f) { - /* write US-ASCII graphic character or DEL */ - *target++=(UChar)b; - } else { - /* write from dynamic window */ - uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f); - if(c<=0xffff) { - *target++=(UChar)c; - } else { - /* output surrogate pair */ - *target++=(UChar)(0xd7c0+(c>>10)); - if(targetUCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); - cnv->UCharErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - goto endloop; - } - } - } - } - } - - /* normal state machine for single-byte mode, minus handling for what fastSingle covers */ -singleByteMode: - while(source=targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - b=*source++; - switch(state) { - case readCommand: - /* redundant conditions are commented out */ - /* here: b<0x20 because otherwise we would be in fastSingle */ - if((1UL<toUBytes[0]=b; - cnv->toULength=1; - goto endloop; - } - - /* store the first byte of a multibyte sequence in toUBytes[] */ - cnv->toUBytes[0]=b; - cnv->toULength=1; - break; - case quotePairOne: - byteOne=b; - cnv->toUBytes[1]=b; - cnv->toULength=2; - state=quotePairTwo; - break; - case quotePairTwo: - *target++=(UChar)((byteOne<<8)|b); - state=readCommand; - goto fastSingle; - case quoteOne: - if(b<0x80) { - /* all static offsets are in the BMP */ - *target++=(UChar)(staticOffsets[quoteWindow]+b); - } else { - /* write from dynamic window */ - uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f); - if(c<=0xffff) { - *target++=(UChar)c; - } else { - /* output surrogate pair */ - *target++=(UChar)(0xd7c0+(c>>10)); - if(targetUCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); - cnv->UCharErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - goto endloop; - } - } - } - state=readCommand; - goto fastSingle; - case definePairOne: - dynamicWindow=(int8_t)((b>>5)&7); - byteOne=(uint8_t)(b&0x1f); - cnv->toUBytes[1]=b; - cnv->toULength=2; - state=definePairTwo; - break; - case definePairTwo: - scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL); - state=readCommand; - goto fastSingle; - case defineOne: - if(b==0) { - /* callback(illegal): Reserved window offset value 0 */ - cnv->toUBytes[1]=b; - cnv->toULength=2; - goto endloop; - } else if(btoUDynamicOffsets[dynamicWindow]=b<<7UL; - } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) { - scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset; - } else if(b>=fixedThreshold) { - scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold]; - } else { - /* callback(illegal): Reserved window offset value 0xa8..0xf8 */ - cnv->toUBytes[1]=b; - cnv->toULength=2; - goto endloop; - } - state=readCommand; - goto fastSingle; - } - } - } else { - /* fast path for Unicode mode */ - if(state==readCommand) { -fastUnicode: - while(source+1(Urs-UC0)) { - *target++=(UChar)((b<<8)|source[1]); - source+=2; - } - } - - /* normal state machine for Unicode mode */ -/* unicodeByteMode: */ - while(source=targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - b=*source++; - switch(state) { - case readCommand: - if((uint8_t)(b-UC0)>(Urs-UC0)) { - byteOne=b; - cnv->toUBytes[0]=b; - cnv->toULength=1; - state=quotePairTwo; - } else if(/* UC0<=b && */ b<=UC7) { - dynamicWindow=(int8_t)(b-UC0); - isSingleByteMode=TRUE; - goto fastSingle; - } else if(/* UD0<=b && */ b<=UD7) { - dynamicWindow=(int8_t)(b-UD0); - isSingleByteMode=TRUE; - cnv->toUBytes[0]=b; - cnv->toULength=1; - state=defineOne; - goto singleByteMode; - } else if(b==UDX) { - isSingleByteMode=TRUE; - cnv->toUBytes[0]=b; - cnv->toULength=1; - state=definePairOne; - goto singleByteMode; - } else if(b==UQU) { - cnv->toUBytes[0]=b; - cnv->toULength=1; - state=quotePairOne; - } else /* Urs */ { - /* callback(illegal) */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - cnv->toUBytes[0]=b; - cnv->toULength=1; - goto endloop; - } - break; - case quotePairOne: - byteOne=b; - cnv->toUBytes[1]=b; - cnv->toULength=2; - state=quotePairTwo; - break; - case quotePairTwo: - *target++=(UChar)((byteOne<<8)|b); - state=readCommand; - goto fastUnicode; - } - } - } -endloop: - - /* set the converter state back into UConverter */ - if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) { - /* reset to deal with the next character */ - state=readCommand; - } else if(state==readCommand) { - /* not in a multi-byte sequence, reset toULength */ - cnv->toULength=0; - } - scsu->toUIsSingleByteMode=isSingleByteMode; - scsu->toUState=state; - scsu->toUQuoteWindow=quoteWindow; - scsu->toUDynamicWindow=dynamicWindow; - scsu->toUByteOne=byteOne; - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - return; -} - -/* SCSU-from-Unicode conversion functions ----------------------------------- */ - -/* - * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve - * reasonable results. The lookahead is minimal. - * Many cases are simple: - * A character fits directly into the current mode, a dynamic or static window, - * or is not compressible. These cases are tested first. - * Real compression heuristics are applied to the rest, in code branches for - * single/Unicode mode and BMP/supplementary code points. - * The heuristics used here are extremely simple. - */ - -/* get the number of the window that this character is in, or -1 */ -static int8_t -getWindow(const uint32_t offsets[8], uint32_t c) { - int i; - for(i=0; i<8; ++i) { - if((uint32_t)(c-offsets[i])<=0x7f) { - return (int8_t)(i); - } - } - return -1; -} - -/* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */ -static UBool -isInOffsetWindowOrDirect(uint32_t offset, uint32_t c) { - return (UBool)(c<=offset+0x7f && - (c>=offset || (c<=0x7f && - (c>=0x20 || (1UL<windowUse[scsu->nextWindowUseIndex]; - if(++scsu->nextWindowUseIndex==8) { - scsu->nextWindowUseIndex=0; - } - return window; -} - -/* - * useDynamicWindow() adjusts - * windowUse[] and nextWindowUseIndex for the algorithm to choose - * the next dynamic window to be defined; - * a subclass may override it and provide its own algorithm. - */ -static void -useDynamicWindow(SCSUData *scsu, int8_t window) { - /* - * move the existing window, which just became the most recently used one, - * up in windowUse[] to nextWindowUseIndex-1 - */ - - /* first, find the index of the window - backwards to favor the more recently used windows */ - int i, j; - - i=scsu->nextWindowUseIndex; - do { - if(--i<0) { - i=7; - } - } while(scsu->windowUse[i]!=window); - - /* now copy each windowUse[i+1] to [i] */ - j=i+1; - if(j==8) { - j=0; - } - while(j!=scsu->nextWindowUseIndex) { - scsu->windowUse[i]=scsu->windowUse[j]; - i=j; - if(++j==8) { j=0; } - } - - /* finally, set the window into the most recently used index */ - scsu->windowUse[i]=window; -} - -/* - * calculate the offset and the code for a dynamic window that contains the character - * takes fixed offsets into account - * the offset of the window is stored in the offset variable, - * the code is returned - * - * return offset code: -1 none <=0xff code for SDn/UDn else code for SDX/UDX, subtract 0x200 to get the true code - */ -static int -getDynamicOffset(uint32_t c, uint32_t *pOffset) { - int i; - - for(i=0; i<7; ++i) { - if((uint32_t)(c-fixedOffsets[i])<=0x7f) { - *pOffset=fixedOffsets[i]; - return 0xf9+i; - } - } - - if(c<0x80) { - /* No dynamic window for US-ASCII. */ - return -1; - } else if(c<0x3400 || - (uint32_t)(c-0x10000)<(0x14000-0x10000) || - (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000) - ) { - /* This character is in a code range for a "small", i.e., reasonably windowable, script. */ - *pOffset=c&0x7fffff80; - return (int)(c>>7); - } else if(0xe000<=c && c!=0xfeff && c<0xfff0) { - /* For these characters we need to take the gapOffset into account. */ - *pOffset=c&0x7fffff80; - return (int)((c-gapOffset)>>7); - } else { - return -1; - } -} - -/* - * Idea for compression: - * - save SCSUData and other state before really starting work - * - at endloop, see if compression could be better with just unicode mode - * - don't do this if a callback has been called - * - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning - * - different buffer handling! - * - * Drawback or need for corrective handling: - * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and - * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible - * not only for compression but also for HTML/XML documents with following charset/encoding announcers. - * - * How to achieve both? - * - Only replace the result after an SDX or SCU? - */ - -static void -_SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - SCSUData *scsu; - const UChar *source, *sourceLimit; - uint8_t *target; - int32_t targetCapacity; - int32_t *offsets; - - UBool isSingleByteMode; - uint8_t dynamicWindow; - uint32_t currentOffset; - - uint32_t c, delta; - - int32_t sourceIndex, nextSourceIndex; - - int32_t length; - - /* variables for compression heuristics */ - uint32_t offset; - UChar lead, trail; - int code; - int8_t window; - - /* set up the local pointers */ - cnv=pArgs->converter; - scsu=(SCSUData *)cnv->extraInfo; - - /* set up the local pointers */ - source=pArgs->source; - sourceLimit=pArgs->sourceLimit; - target=(uint8_t *)pArgs->target; - targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); - offsets=pArgs->offsets; - - /* get the state machine state */ - isSingleByteMode=scsu->fromUIsSingleByteMode; - dynamicWindow=scsu->fromUDynamicWindow; - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; - - c=cnv->fromUChar32; - - /* sourceIndex=-1 if the current character began in the previous buffer */ - sourceIndex= c==0 ? 0 : -1; - nextSourceIndex=0; - - /* similar conversion "loop" as in toUnicode */ -loop: - if(isSingleByteMode) { - if(c!=0 && targetCapacity>0) { - goto getTrailSingle; - } - - /* state machine for single-byte mode */ -/* singleByteMode: */ - while(sourcefromUDynamicOffsets, c))>=0) { - /* there is a dynamic window that contains this character, change to it */ - dynamicWindow=window; - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; - length=2; - goto outputBytes; - } else if((code=getDynamicOffset(c, &offset))>=0) { - /* might check if there are more characters in this window to come */ - /* define an extended window with this character */ - code-=0x200; - dynamicWindow=getNextDynamicWindow(scsu); - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; - length=4; - goto outputBytes; - } else { - /* change to Unicode mode and output this (lead, trail) pair */ - isSingleByteMode=FALSE; - *target++=(uint8_t)SCU; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - --targetCapacity; - c=((uint32_t)lead<<16)|trail; - length=4; - goto outputBytes; - } - } else if(c<0xa0) { - /* quote C1 control character */ - c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */ - length=2; - goto outputBytes; - } else if(c==0xfeff || c>=0xfff0) { - /* quote signature character=byte order mark and specials */ - c|=SQU<<16; - length=3; - goto outputBytes; - } else { - /* compress all other BMP characters */ - if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { - /* there is a window defined that contains this character - switch to it or quote from it? */ - if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) { - /* change to dynamic window */ - dynamicWindow=window; - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; - length=2; - goto outputBytes; - } else { - /* quote from dynamic window */ - c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80; - length=2; - goto outputBytes; - } - } else if((window=getWindow(staticOffsets, c))>=0) { - /* quote from static window */ - c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]); - length=2; - goto outputBytes; - } else if((code=getDynamicOffset(c, &offset))>=0) { - /* define a dynamic window with this character */ - dynamicWindow=getNextDynamicWindow(scsu); - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; - length=3; - goto outputBytes; - } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) && - (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400)) - ) { - /* - * this character is not compressible (a BMP ideograph or similar); - * switch to Unicode mode if this is the last character in the block - * or there is at least one more ideograph following immediately - */ - isSingleByteMode=FALSE; - c|=SCU<<16; - length=3; - goto outputBytes; - } else { - /* quote Unicode */ - c|=SQU<<16; - length=3; - goto outputBytes; - } - } - - /* normal end of conversion: prepare for a new character */ - c=0; - sourceIndex=nextSourceIndex; - } - } else { - if(c!=0 && targetCapacity>0) { - goto getTrailUnicode; - } - - /* state machine for Unicode mode */ -/* unicodeByteMode: */ - while(source=2) { - *target++=(uint8_t)(c>>8); - *target++=(uint8_t)c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - *offsets++=sourceIndex; - } - targetCapacity-=2; - } else { - length=2; - goto outputBytes; - } - } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) { - /* compress BMP character if the following one is not an uncompressible ideograph */ - if(!(sourcefromUDynamicOffsets, c))>=0) { - /* there is a dynamic window that contains this character, change to it */ - isSingleByteMode=TRUE; - dynamicWindow=window; - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; - length=2; - goto outputBytes; - } else if((code=getDynamicOffset(c, &offset))>=0) { - /* define a dynamic window with this character */ - isSingleByteMode=TRUE; - dynamicWindow=getNextDynamicWindow(scsu); - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; - length=3; - goto outputBytes; - } - } - - /* don't know how to compress this character, just write it directly */ - length=2; - goto outputBytes; - } else if(c<0xe000) { - /* c is a surrogate */ - if(U16_IS_SURROGATE_LEAD(c)) { -getTrailUnicode: - lead=(UChar)c; - if(sourcefromUDynamicOffsets, c))>=0 && - !(sourcefromUDynamicOffsets[dynamicWindow]; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; - length=2; - goto outputBytes; - } else if(source=0 - ) { - /* two supplementary characters in (probably) the same window - define an extended one */ - isSingleByteMode=TRUE; - code-=0x200; - dynamicWindow=getNextDynamicWindow(scsu); - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; - length=4; - goto outputBytes; - } else { - /* don't know how to compress this character, just write it directly */ - c=((uint32_t)lead<<16)|trail; - length=4; - goto outputBytes; - } - } else /* 0xe000<=c<0xf300 */ { - /* quote to avoid SCSU tags */ - c|=UQU<<16; - length=3; - goto outputBytes; - } - - /* normal end of conversion: prepare for a new character */ - c=0; - sourceIndex=nextSourceIndex; - } - } -endloop: - - /* set the converter state back into UConverter */ - scsu->fromUIsSingleByteMode=isSingleByteMode; - scsu->fromUDynamicWindow=dynamicWindow; - - cnv->fromUChar32=c; - - /* write back the updated pointers */ - pArgs->source=source; - pArgs->target=(char *)target; - pArgs->offsets=offsets; - return; - -outputBytes: - /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */ - /* from the first if in the loop we know that targetCapacity>0 */ - if(length<=targetCapacity) { - if(offsets==NULL) { - switch(length) { - /* each branch falls through to the next one */ - case 4: - *target++=(uint8_t)(c>>24); - U_FALLTHROUGH; - case 3: - *target++=(uint8_t)(c>>16); - U_FALLTHROUGH; - case 2: - *target++=(uint8_t)(c>>8); - U_FALLTHROUGH; - case 1: - *target++=(uint8_t)c; - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - } else { - switch(length) { - /* each branch falls through to the next one */ - case 4: - *target++=(uint8_t)(c>>24); - *offsets++=sourceIndex; - U_FALLTHROUGH; - case 3: - *target++=(uint8_t)(c>>16); - *offsets++=sourceIndex; - U_FALLTHROUGH; - case 2: - *target++=(uint8_t)(c>>8); - *offsets++=sourceIndex; - U_FALLTHROUGH; - case 1: - *target++=(uint8_t)c; - *offsets++=sourceIndex; - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - } - targetCapacity-=length; - - /* normal end of conversion: prepare for a new character */ - c=0; - sourceIndex=nextSourceIndex; - goto loop; - } else { - uint8_t *p; - - /* - * We actually do this backwards here: - * In order to save an intermediate variable, we output - * first to the overflow buffer what does not fit into the - * regular target. - */ - /* we know that 0<=targetCapacitycharErrorBuffer; - switch(length) { - /* each branch falls through to the next one */ - case 4: - *p++=(uint8_t)(c>>24); - U_FALLTHROUGH; - case 3: - *p++=(uint8_t)(c>>16); - U_FALLTHROUGH; - case 2: - *p++=(uint8_t)(c>>8); - U_FALLTHROUGH; - case 1: - *p=(uint8_t)c; - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - cnv->charErrorBufferLength=(int8_t)length; - - /* now output what fits into the regular target */ - c>>=8*length; /* length was reduced by targetCapacity */ - switch(targetCapacity) { - /* each branch falls through to the next one */ - case 3: - *target++=(uint8_t)(c>>16); - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - U_FALLTHROUGH; - case 2: - *target++=(uint8_t)(c>>8); - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - U_FALLTHROUGH; - case 1: - *target++=(uint8_t)c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - U_FALLTHROUGH; - default: - break; - } - - /* target overflow */ - targetCapacity=0; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - c=0; - goto endloop; - } -} - -/* - * Identical to _SCSUFromUnicodeWithOffsets but without offset handling. - * If a change is made in the original function, then either - * change this function the same way or - * re-copy the original function and remove the variables - * offsets, sourceIndex, and nextSourceIndex. - */ -static void -_SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - SCSUData *scsu; - const UChar *source, *sourceLimit; - uint8_t *target; - int32_t targetCapacity; - - UBool isSingleByteMode; - uint8_t dynamicWindow; - uint32_t currentOffset; - - uint32_t c, delta; - - int32_t length; - - /* variables for compression heuristics */ - uint32_t offset; - UChar lead, trail; - int code; - int8_t window; - - /* set up the local pointers */ - cnv=pArgs->converter; - scsu=(SCSUData *)cnv->extraInfo; - - /* set up the local pointers */ - source=pArgs->source; - sourceLimit=pArgs->sourceLimit; - target=(uint8_t *)pArgs->target; - targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); - - /* get the state machine state */ - isSingleByteMode=scsu->fromUIsSingleByteMode; - dynamicWindow=scsu->fromUDynamicWindow; - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; - - c=cnv->fromUChar32; - - /* similar conversion "loop" as in toUnicode */ -loop: - if(isSingleByteMode) { - if(c!=0 && targetCapacity>0) { - goto getTrailSingle; - } - - /* state machine for single-byte mode */ -/* singleByteMode: */ - while(sourcefromUDynamicOffsets, c))>=0) { - /* there is a dynamic window that contains this character, change to it */ - dynamicWindow=window; - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; - length=2; - goto outputBytes; - } else if((code=getDynamicOffset(c, &offset))>=0) { - /* might check if there are more characters in this window to come */ - /* define an extended window with this character */ - code-=0x200; - dynamicWindow=getNextDynamicWindow(scsu); - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; - length=4; - goto outputBytes; - } else { - /* change to Unicode mode and output this (lead, trail) pair */ - isSingleByteMode=FALSE; - *target++=(uint8_t)SCU; - --targetCapacity; - c=((uint32_t)lead<<16)|trail; - length=4; - goto outputBytes; - } - } else if(c<0xa0) { - /* quote C1 control character */ - c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */ - length=2; - goto outputBytes; - } else if(c==0xfeff || c>=0xfff0) { - /* quote signature character=byte order mark and specials */ - c|=SQU<<16; - length=3; - goto outputBytes; - } else { - /* compress all other BMP characters */ - if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { - /* there is a window defined that contains this character - switch to it or quote from it? */ - if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) { - /* change to dynamic window */ - dynamicWindow=window; - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; - length=2; - goto outputBytes; - } else { - /* quote from dynamic window */ - c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80; - length=2; - goto outputBytes; - } - } else if((window=getWindow(staticOffsets, c))>=0) { - /* quote from static window */ - c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]); - length=2; - goto outputBytes; - } else if((code=getDynamicOffset(c, &offset))>=0) { - /* define a dynamic window with this character */ - dynamicWindow=getNextDynamicWindow(scsu); - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; - length=3; - goto outputBytes; - } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) && - (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400)) - ) { - /* - * this character is not compressible (a BMP ideograph or similar); - * switch to Unicode mode if this is the last character in the block - * or there is at least one more ideograph following immediately - */ - isSingleByteMode=FALSE; - c|=SCU<<16; - length=3; - goto outputBytes; - } else { - /* quote Unicode */ - c|=SQU<<16; - length=3; - goto outputBytes; - } - } - - /* normal end of conversion: prepare for a new character */ - c=0; - } - } else { - if(c!=0 && targetCapacity>0) { - goto getTrailUnicode; - } - - /* state machine for Unicode mode */ -/* unicodeByteMode: */ - while(source=2) { - *target++=(uint8_t)(c>>8); - *target++=(uint8_t)c; - targetCapacity-=2; - } else { - length=2; - goto outputBytes; - } - } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) { - /* compress BMP character if the following one is not an uncompressible ideograph */ - if(!(sourcefromUDynamicOffsets, c))>=0) { - /* there is a dynamic window that contains this character, change to it */ - isSingleByteMode=TRUE; - dynamicWindow=window; - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; - length=2; - goto outputBytes; - } else if((code=getDynamicOffset(c, &offset))>=0) { - /* define a dynamic window with this character */ - isSingleByteMode=TRUE; - dynamicWindow=getNextDynamicWindow(scsu); - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; - length=3; - goto outputBytes; - } - } - - /* don't know how to compress this character, just write it directly */ - length=2; - goto outputBytes; - } else if(c<0xe000) { - /* c is a surrogate */ - if(U16_IS_SURROGATE_LEAD(c)) { -getTrailUnicode: - lead=(UChar)c; - if(sourcefromUDynamicOffsets, c))>=0 && - !(sourcefromUDynamicOffsets[dynamicWindow]; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; - length=2; - goto outputBytes; - } else if(source=0 - ) { - /* two supplementary characters in (probably) the same window - define an extended one */ - isSingleByteMode=TRUE; - code-=0x200; - dynamicWindow=getNextDynamicWindow(scsu); - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; - length=4; - goto outputBytes; - } else { - /* don't know how to compress this character, just write it directly */ - c=((uint32_t)lead<<16)|trail; - length=4; - goto outputBytes; - } - } else /* 0xe000<=c<0xf300 */ { - /* quote to avoid SCSU tags */ - c|=UQU<<16; - length=3; - goto outputBytes; - } - - /* normal end of conversion: prepare for a new character */ - c=0; - } - } -endloop: - - /* set the converter state back into UConverter */ - scsu->fromUIsSingleByteMode=isSingleByteMode; - scsu->fromUDynamicWindow=dynamicWindow; - - cnv->fromUChar32=c; - - /* write back the updated pointers */ - pArgs->source=source; - pArgs->target=(char *)target; - return; - -outputBytes: - /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */ - /* from the first if in the loop we know that targetCapacity>0 */ - if(length<=targetCapacity) { - switch(length) { - /* each branch falls through to the next one */ - case 4: - *target++=(uint8_t)(c>>24); - U_FALLTHROUGH; - case 3: - *target++=(uint8_t)(c>>16); - U_FALLTHROUGH; - case 2: - *target++=(uint8_t)(c>>8); - U_FALLTHROUGH; - case 1: - *target++=(uint8_t)c; - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - targetCapacity-=length; - - /* normal end of conversion: prepare for a new character */ - c=0; - goto loop; - } else { - uint8_t *p; - - /* - * We actually do this backwards here: - * In order to save an intermediate variable, we output - * first to the overflow buffer what does not fit into the - * regular target. - */ - /* we know that 0<=targetCapacitycharErrorBuffer; - switch(length) { - /* each branch falls through to the next one */ - case 4: - *p++=(uint8_t)(c>>24); - U_FALLTHROUGH; - case 3: - *p++=(uint8_t)(c>>16); - U_FALLTHROUGH; - case 2: - *p++=(uint8_t)(c>>8); - U_FALLTHROUGH; - case 1: - *p=(uint8_t)c; - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - cnv->charErrorBufferLength=(int8_t)length; - - /* now output what fits into the regular target */ - c>>=8*length; /* length was reduced by targetCapacity */ - switch(targetCapacity) { - /* each branch falls through to the next one */ - case 3: - *target++=(uint8_t)(c>>16); - U_FALLTHROUGH; - case 2: - *target++=(uint8_t)(c>>8); - U_FALLTHROUGH; - case 1: - *target++=(uint8_t)c; - U_FALLTHROUGH; - default: - break; - } - - /* target overflow */ - targetCapacity=0; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - c=0; - goto endloop; - } -} - -/* miscellaneous ------------------------------------------------------------ */ - -static const char * -_SCSUGetName(const UConverter *cnv) { - SCSUData *scsu=(SCSUData *)cnv->extraInfo; - - switch(scsu->locale) { - case l_ja: - return "SCSU,locale=ja"; - default: - return "SCSU"; - } -} - -/* structure for SafeClone calculations */ -struct cloneSCSUStruct -{ - UConverter cnv; - SCSUData mydata; -}; - -static UConverter * -_SCSUSafeClone(const UConverter *cnv, - void *stackBuffer, - int32_t *pBufferSize, - UErrorCode *status) -{ - struct cloneSCSUStruct * localClone; - int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct); - - if (U_FAILURE(*status)){ - return 0; - } - - if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */ - *pBufferSize = bufferSizeNeeded; - return 0; - } - - localClone = (struct cloneSCSUStruct *)stackBuffer; - /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ - - uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData)); - localClone->cnv.extraInfo = &localClone->mydata; - localClone->cnv.isExtraLocal = TRUE; - - return &localClone->cnv; -} - - -static const UConverterImpl _SCSUImpl={ - UCNV_SCSU, - - NULL, - NULL, - - _SCSUOpen, - _SCSUClose, - _SCSUReset, - - _SCSUToUnicode, - _SCSUToUnicodeWithOffsets, - _SCSUFromUnicode, - _SCSUFromUnicodeWithOffsets, - NULL, - - NULL, - _SCSUGetName, - NULL, - _SCSUSafeClone, - ucnv_getCompleteUnicodeSet -}; - -static const UConverterStaticData _SCSUStaticData={ - sizeof(UConverterStaticData), - "SCSU", - 1212, /* CCSID for SCSU */ - UCNV_IBM, UCNV_SCSU, - 1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */ - /* - * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode - * substitution string. - */ - { 0x0e, 0xff, 0xfd, 0 }, 3, - FALSE, FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -const UConverterSharedData _SCSUData= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_SCSUStaticData, &_SCSUImpl); - -#endif diff --git a/deps/icu-small/source/common/ucnvscsu.cpp b/deps/icu-small/source/common/ucnvscsu.cpp new file mode 100644 index 0000000000..eb7b7ad5c8 --- /dev/null +++ b/deps/icu-small/source/common/ucnvscsu.cpp @@ -0,0 +1,2045 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2000-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ucnvscsu.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2000nov18 +* created by: Markus W. Scherer +* +* This is an implementation of the Standard Compression Scheme for Unicode +* as defined in http://www.unicode.org/unicode/reports/tr6/ . +* Reserved commands and window settings are treated as illegal sequences and +* will result in callback calls. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/ucnv_cb.h" +#include "unicode/utf16.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "cmemory.h" + +/* SCSU definitions --------------------------------------------------------- */ + +/* SCSU command byte values */ +enum { + SQ0=0x01, /* Quote from window pair 0 */ + SQ7=0x08, /* Quote from window pair 7 */ + SDX=0x0B, /* Define a window as extended */ + Srs=0x0C, /* reserved */ + SQU=0x0E, /* Quote a single Unicode character */ + SCU=0x0F, /* Change to Unicode mode */ + SC0=0x10, /* Select window 0 */ + SC7=0x17, /* Select window 7 */ + SD0=0x18, /* Define and select window 0 */ + SD7=0x1F, /* Define and select window 7 */ + + UC0=0xE0, /* Select window 0 */ + UC7=0xE7, /* Select window 7 */ + UD0=0xE8, /* Define and select window 0 */ + UD7=0xEF, /* Define and select window 7 */ + UQU=0xF0, /* Quote a single Unicode character */ + UDX=0xF1, /* Define a Window as extended */ + Urs=0xF2 /* reserved */ +}; + +enum { + /* + * Unicode code points from 3400 to E000 are not adressible by + * dynamic window, since in these areas no short run alphabets are + * found. Therefore add gapOffset to all values from gapThreshold. + */ + gapThreshold=0x68, + gapOffset=0xAC00, + + /* values between reservedStart and fixedThreshold are reserved */ + reservedStart=0xA8, + + /* use table of predefined fixed offsets for values from fixedThreshold */ + fixedThreshold=0xF9 +}; + +/* constant offsets for the 8 static windows */ +static const uint32_t staticOffsets[8]={ + 0x0000, /* ASCII for quoted tags */ + 0x0080, /* Latin - 1 Supplement (for access to punctuation) */ + 0x0100, /* Latin Extended-A */ + 0x0300, /* Combining Diacritical Marks */ + 0x2000, /* General Punctuation */ + 0x2080, /* Currency Symbols */ + 0x2100, /* Letterlike Symbols and Number Forms */ + 0x3000 /* CJK Symbols and punctuation */ +}; + +/* initial offsets for the 8 dynamic (sliding) windows */ +static const uint32_t initialDynamicOffsets[8]={ + 0x0080, /* Latin-1 */ + 0x00C0, /* Latin Extended A */ + 0x0400, /* Cyrillic */ + 0x0600, /* Arabic */ + 0x0900, /* Devanagari */ + 0x3040, /* Hiragana */ + 0x30A0, /* Katakana */ + 0xFF00 /* Fullwidth ASCII */ +}; + +/* Table of fixed predefined Offsets */ +static const uint32_t fixedOffsets[]={ + /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */ + /* 0xFA */ 0x0250, /* IPA extensions */ + /* 0xFB */ 0x0370, /* Greek */ + /* 0xFC */ 0x0530, /* Armenian */ + /* 0xFD */ 0x3040, /* Hiragana */ + /* 0xFE */ 0x30A0, /* Katakana */ + /* 0xFF */ 0xFF60 /* Halfwidth Katakana */ +}; + +/* state values */ +enum { + readCommand, + quotePairOne, + quotePairTwo, + quoteOne, + definePairOne, + definePairTwo, + defineOne +}; + +typedef struct SCSUData { + /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */ + uint32_t toUDynamicOffsets[8]; + uint32_t fromUDynamicOffsets[8]; + + /* state machine state - toUnicode */ + UBool toUIsSingleByteMode; + uint8_t toUState; + int8_t toUQuoteWindow, toUDynamicWindow; + uint8_t toUByteOne; + uint8_t toUPadding[3]; + + /* state machine state - fromUnicode */ + UBool fromUIsSingleByteMode; + int8_t fromUDynamicWindow; + + /* + * windowUse[] keeps track of the use of the dynamic windows: + * At nextWindowUseIndex there is the least recently used window, + * and the following windows (in a wrapping manner) are more and more + * recently used. + * At nextWindowUseIndex-1 there is the most recently used window. + */ + uint8_t locale; + int8_t nextWindowUseIndex; + int8_t windowUse[8]; +} SCSUData; + +static const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 }; +static const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 }; + +enum { + lGeneric, l_ja +}; + +/* SCSU setup functions ----------------------------------------------------- */ +U_CDECL_BEGIN +static void U_CALLCONV +_SCSUReset(UConverter *cnv, UConverterResetChoice choice) { + SCSUData *scsu=(SCSUData *)cnv->extraInfo; + + if(choice<=UCNV_RESET_TO_UNICODE) { + /* reset toUnicode */ + uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32); + + scsu->toUIsSingleByteMode=TRUE; + scsu->toUState=readCommand; + scsu->toUQuoteWindow=scsu->toUDynamicWindow=0; + scsu->toUByteOne=0; + + cnv->toULength=0; + } + if(choice!=UCNV_RESET_TO_UNICODE) { + /* reset fromUnicode */ + uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32); + + scsu->fromUIsSingleByteMode=TRUE; + scsu->fromUDynamicWindow=0; + + scsu->nextWindowUseIndex=0; + switch(scsu->locale) { + case l_ja: + uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8); + break; + default: + uprv_memcpy(scsu->windowUse, initialWindowUse, 8); + break; + } + + cnv->fromUChar32=0; + } +} + +static void U_CALLCONV +_SCSUOpen(UConverter *cnv, + UConverterLoadArgs *pArgs, + UErrorCode *pErrorCode) { + const char *locale=pArgs->locale; + if(pArgs->onlyTestIsLoadable) { + return; + } + cnv->extraInfo=uprv_malloc(sizeof(SCSUData)); + if(cnv->extraInfo!=NULL) { + if(locale!=NULL && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) { + ((SCSUData *)cnv->extraInfo)->locale=l_ja; + } else { + ((SCSUData *)cnv->extraInfo)->locale=lGeneric; + } + _SCSUReset(cnv, UCNV_RESET_BOTH); + } else { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + } + + /* Set the substitution character U+fffd as a Unicode string. */ + cnv->subUChars[0]=0xfffd; + cnv->subCharLen=-1; +} + +static void U_CALLCONV +_SCSUClose(UConverter *cnv) { + if(cnv->extraInfo!=NULL) { + if(!cnv->isExtraLocal) { + uprv_free(cnv->extraInfo); + } + cnv->extraInfo=NULL; + } +} + +/* SCSU-to-Unicode conversion functions ------------------------------------- */ + +static void U_CALLCONV +_SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + SCSUData *scsu; + const uint8_t *source, *sourceLimit; + UChar *target; + const UChar *targetLimit; + int32_t *offsets; + UBool isSingleByteMode; + uint8_t state, byteOne; + int8_t quoteWindow, dynamicWindow; + + int32_t sourceIndex, nextSourceIndex; + + uint8_t b; + + /* set up the local pointers */ + cnv=pArgs->converter; + scsu=(SCSUData *)cnv->extraInfo; + + source=(const uint8_t *)pArgs->source; + sourceLimit=(const uint8_t *)pArgs->sourceLimit; + target=pArgs->target; + targetLimit=pArgs->targetLimit; + offsets=pArgs->offsets; + + /* get the state machine state */ + isSingleByteMode=scsu->toUIsSingleByteMode; + state=scsu->toUState; + quoteWindow=scsu->toUQuoteWindow; + dynamicWindow=scsu->toUDynamicWindow; + byteOne=scsu->toUByteOne; + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex=state==readCommand ? 0 : -1; + nextSourceIndex=0; + + /* + * conversion "loop" + * + * For performance, this is not a normal C loop. + * Instead, there are two code blocks for the two SCSU modes. + * The function branches to either one, and a change of the mode is done with a goto to + * the other branch. + * + * Each branch has two conventional loops: + * - a fast-path loop for the most common codes in the mode + * - a loop for all other codes in the mode + * When the fast-path runs into a code that it cannot handle, its loop ends and it + * runs into the following loop to handle the other codes. + * The end of the input or output buffer is also handled by the slower loop. + * The slow loop jumps (goto) to the fast-path loop again as soon as possible. + * + * The callback handling is done by returning with an error code. + * The conversion framework actually calls the callback function. + */ + if(isSingleByteMode) { + /* fast path for single-byte mode */ + if(state==readCommand) { +fastSingle: + while(source=0x20) { + ++source; + ++nextSourceIndex; + if(b<=0x7f) { + /* write US-ASCII graphic character or DEL */ + *target++=(UChar)b; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + } else { + /* write from dynamic window */ + uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f); + if(c<=0xffff) { + *target++=(UChar)c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + } else { + /* output surrogate pair */ + *target++=(UChar)(0xd7c0+(c>>10)); + if(targetUCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); + cnv->UCharErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + goto endloop; + } + } + } + sourceIndex=nextSourceIndex; + } + } + + /* normal state machine for single-byte mode, minus handling for what fastSingle covers */ +singleByteMode: + while(source=targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + b=*source++; + ++nextSourceIndex; + switch(state) { + case readCommand: + /* redundant conditions are commented out */ + /* here: b<0x20 because otherwise we would be in fastSingle */ + if((1UL<toUBytes[0]=b; + cnv->toULength=1; + goto endloop; + } + + /* store the first byte of a multibyte sequence in toUBytes[] */ + cnv->toUBytes[0]=b; + cnv->toULength=1; + break; + case quotePairOne: + byteOne=b; + cnv->toUBytes[1]=b; + cnv->toULength=2; + state=quotePairTwo; + break; + case quotePairTwo: + *target++=(UChar)((byteOne<<8)|b); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + sourceIndex=nextSourceIndex; + state=readCommand; + goto fastSingle; + case quoteOne: + if(b<0x80) { + /* all static offsets are in the BMP */ + *target++=(UChar)(staticOffsets[quoteWindow]+b); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + } else { + /* write from dynamic window */ + uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f); + if(c<=0xffff) { + *target++=(UChar)c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + } else { + /* output surrogate pair */ + *target++=(UChar)(0xd7c0+(c>>10)); + if(targetUCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); + cnv->UCharErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + goto endloop; + } + } + } + sourceIndex=nextSourceIndex; + state=readCommand; + goto fastSingle; + case definePairOne: + dynamicWindow=(int8_t)((b>>5)&7); + byteOne=(uint8_t)(b&0x1f); + cnv->toUBytes[1]=b; + cnv->toULength=2; + state=definePairTwo; + break; + case definePairTwo: + scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL); + sourceIndex=nextSourceIndex; + state=readCommand; + goto fastSingle; + case defineOne: + if(b==0) { + /* callback(illegal): Reserved window offset value 0 */ + cnv->toUBytes[1]=b; + cnv->toULength=2; + goto endloop; + } else if(btoUDynamicOffsets[dynamicWindow]=b<<7UL; + } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) { + scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset; + } else if(b>=fixedThreshold) { + scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold]; + } else { + /* callback(illegal): Reserved window offset value 0xa8..0xf8 */ + cnv->toUBytes[1]=b; + cnv->toULength=2; + goto endloop; + } + sourceIndex=nextSourceIndex; + state=readCommand; + goto fastSingle; + } + } + } else { + /* fast path for Unicode mode */ + if(state==readCommand) { +fastUnicode: + while(source+1(Urs-UC0)) { + *target++=(UChar)((b<<8)|source[1]); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + sourceIndex=nextSourceIndex; + nextSourceIndex+=2; + source+=2; + } + } + + /* normal state machine for Unicode mode */ +/* unicodeByteMode: */ + while(source=targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + b=*source++; + ++nextSourceIndex; + switch(state) { + case readCommand: + if((uint8_t)(b-UC0)>(Urs-UC0)) { + byteOne=b; + cnv->toUBytes[0]=b; + cnv->toULength=1; + state=quotePairTwo; + } else if(/* UC0<=b && */ b<=UC7) { + dynamicWindow=(int8_t)(b-UC0); + sourceIndex=nextSourceIndex; + isSingleByteMode=TRUE; + goto fastSingle; + } else if(/* UD0<=b && */ b<=UD7) { + dynamicWindow=(int8_t)(b-UD0); + isSingleByteMode=TRUE; + cnv->toUBytes[0]=b; + cnv->toULength=1; + state=defineOne; + goto singleByteMode; + } else if(b==UDX) { + isSingleByteMode=TRUE; + cnv->toUBytes[0]=b; + cnv->toULength=1; + state=definePairOne; + goto singleByteMode; + } else if(b==UQU) { + cnv->toUBytes[0]=b; + cnv->toULength=1; + state=quotePairOne; + } else /* Urs */ { + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + cnv->toUBytes[0]=b; + cnv->toULength=1; + goto endloop; + } + break; + case quotePairOne: + byteOne=b; + cnv->toUBytes[1]=b; + cnv->toULength=2; + state=quotePairTwo; + break; + case quotePairTwo: + *target++=(UChar)((byteOne<<8)|b); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + sourceIndex=nextSourceIndex; + state=readCommand; + goto fastUnicode; + } + } + } +endloop: + + /* set the converter state back into UConverter */ + if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) { + /* reset to deal with the next character */ + state=readCommand; + } else if(state==readCommand) { + /* not in a multi-byte sequence, reset toULength */ + cnv->toULength=0; + } + scsu->toUIsSingleByteMode=isSingleByteMode; + scsu->toUState=state; + scsu->toUQuoteWindow=quoteWindow; + scsu->toUDynamicWindow=dynamicWindow; + scsu->toUByteOne=byteOne; + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + pArgs->offsets=offsets; + return; +} + +/* + * Identical to _SCSUToUnicodeWithOffsets but without offset handling. + * If a change is made in the original function, then either + * change this function the same way or + * re-copy the original function and remove the variables + * offsets, sourceIndex, and nextSourceIndex. + */ +static void U_CALLCONV +_SCSUToUnicode(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + SCSUData *scsu; + const uint8_t *source, *sourceLimit; + UChar *target; + const UChar *targetLimit; + UBool isSingleByteMode; + uint8_t state, byteOne; + int8_t quoteWindow, dynamicWindow; + + uint8_t b; + + /* set up the local pointers */ + cnv=pArgs->converter; + scsu=(SCSUData *)cnv->extraInfo; + + source=(const uint8_t *)pArgs->source; + sourceLimit=(const uint8_t *)pArgs->sourceLimit; + target=pArgs->target; + targetLimit=pArgs->targetLimit; + + /* get the state machine state */ + isSingleByteMode=scsu->toUIsSingleByteMode; + state=scsu->toUState; + quoteWindow=scsu->toUQuoteWindow; + dynamicWindow=scsu->toUDynamicWindow; + byteOne=scsu->toUByteOne; + + /* + * conversion "loop" + * + * For performance, this is not a normal C loop. + * Instead, there are two code blocks for the two SCSU modes. + * The function branches to either one, and a change of the mode is done with a goto to + * the other branch. + * + * Each branch has two conventional loops: + * - a fast-path loop for the most common codes in the mode + * - a loop for all other codes in the mode + * When the fast-path runs into a code that it cannot handle, its loop ends and it + * runs into the following loop to handle the other codes. + * The end of the input or output buffer is also handled by the slower loop. + * The slow loop jumps (goto) to the fast-path loop again as soon as possible. + * + * The callback handling is done by returning with an error code. + * The conversion framework actually calls the callback function. + */ + if(isSingleByteMode) { + /* fast path for single-byte mode */ + if(state==readCommand) { +fastSingle: + while(source=0x20) { + ++source; + if(b<=0x7f) { + /* write US-ASCII graphic character or DEL */ + *target++=(UChar)b; + } else { + /* write from dynamic window */ + uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f); + if(c<=0xffff) { + *target++=(UChar)c; + } else { + /* output surrogate pair */ + *target++=(UChar)(0xd7c0+(c>>10)); + if(targetUCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); + cnv->UCharErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + goto endloop; + } + } + } + } + } + + /* normal state machine for single-byte mode, minus handling for what fastSingle covers */ +singleByteMode: + while(source=targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + b=*source++; + switch(state) { + case readCommand: + /* redundant conditions are commented out */ + /* here: b<0x20 because otherwise we would be in fastSingle */ + if((1UL<toUBytes[0]=b; + cnv->toULength=1; + goto endloop; + } + + /* store the first byte of a multibyte sequence in toUBytes[] */ + cnv->toUBytes[0]=b; + cnv->toULength=1; + break; + case quotePairOne: + byteOne=b; + cnv->toUBytes[1]=b; + cnv->toULength=2; + state=quotePairTwo; + break; + case quotePairTwo: + *target++=(UChar)((byteOne<<8)|b); + state=readCommand; + goto fastSingle; + case quoteOne: + if(b<0x80) { + /* all static offsets are in the BMP */ + *target++=(UChar)(staticOffsets[quoteWindow]+b); + } else { + /* write from dynamic window */ + uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f); + if(c<=0xffff) { + *target++=(UChar)c; + } else { + /* output surrogate pair */ + *target++=(UChar)(0xd7c0+(c>>10)); + if(targetUCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); + cnv->UCharErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + goto endloop; + } + } + } + state=readCommand; + goto fastSingle; + case definePairOne: + dynamicWindow=(int8_t)((b>>5)&7); + byteOne=(uint8_t)(b&0x1f); + cnv->toUBytes[1]=b; + cnv->toULength=2; + state=definePairTwo; + break; + case definePairTwo: + scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL); + state=readCommand; + goto fastSingle; + case defineOne: + if(b==0) { + /* callback(illegal): Reserved window offset value 0 */ + cnv->toUBytes[1]=b; + cnv->toULength=2; + goto endloop; + } else if(btoUDynamicOffsets[dynamicWindow]=b<<7UL; + } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) { + scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset; + } else if(b>=fixedThreshold) { + scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold]; + } else { + /* callback(illegal): Reserved window offset value 0xa8..0xf8 */ + cnv->toUBytes[1]=b; + cnv->toULength=2; + goto endloop; + } + state=readCommand; + goto fastSingle; + } + } + } else { + /* fast path for Unicode mode */ + if(state==readCommand) { +fastUnicode: + while(source+1(Urs-UC0)) { + *target++=(UChar)((b<<8)|source[1]); + source+=2; + } + } + + /* normal state machine for Unicode mode */ +/* unicodeByteMode: */ + while(source=targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + b=*source++; + switch(state) { + case readCommand: + if((uint8_t)(b-UC0)>(Urs-UC0)) { + byteOne=b; + cnv->toUBytes[0]=b; + cnv->toULength=1; + state=quotePairTwo; + } else if(/* UC0<=b && */ b<=UC7) { + dynamicWindow=(int8_t)(b-UC0); + isSingleByteMode=TRUE; + goto fastSingle; + } else if(/* UD0<=b && */ b<=UD7) { + dynamicWindow=(int8_t)(b-UD0); + isSingleByteMode=TRUE; + cnv->toUBytes[0]=b; + cnv->toULength=1; + state=defineOne; + goto singleByteMode; + } else if(b==UDX) { + isSingleByteMode=TRUE; + cnv->toUBytes[0]=b; + cnv->toULength=1; + state=definePairOne; + goto singleByteMode; + } else if(b==UQU) { + cnv->toUBytes[0]=b; + cnv->toULength=1; + state=quotePairOne; + } else /* Urs */ { + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + cnv->toUBytes[0]=b; + cnv->toULength=1; + goto endloop; + } + break; + case quotePairOne: + byteOne=b; + cnv->toUBytes[1]=b; + cnv->toULength=2; + state=quotePairTwo; + break; + case quotePairTwo: + *target++=(UChar)((byteOne<<8)|b); + state=readCommand; + goto fastUnicode; + } + } + } +endloop: + + /* set the converter state back into UConverter */ + if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) { + /* reset to deal with the next character */ + state=readCommand; + } else if(state==readCommand) { + /* not in a multi-byte sequence, reset toULength */ + cnv->toULength=0; + } + scsu->toUIsSingleByteMode=isSingleByteMode; + scsu->toUState=state; + scsu->toUQuoteWindow=quoteWindow; + scsu->toUDynamicWindow=dynamicWindow; + scsu->toUByteOne=byteOne; + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + return; +} +U_CDECL_END +/* SCSU-from-Unicode conversion functions ----------------------------------- */ + +/* + * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve + * reasonable results. The lookahead is minimal. + * Many cases are simple: + * A character fits directly into the current mode, a dynamic or static window, + * or is not compressible. These cases are tested first. + * Real compression heuristics are applied to the rest, in code branches for + * single/Unicode mode and BMP/supplementary code points. + * The heuristics used here are extremely simple. + */ + +/* get the number of the window that this character is in, or -1 */ +static int8_t +getWindow(const uint32_t offsets[8], uint32_t c) { + int i; + for(i=0; i<8; ++i) { + if((uint32_t)(c-offsets[i])<=0x7f) { + return (int8_t)(i); + } + } + return -1; +} + +/* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */ +static UBool +isInOffsetWindowOrDirect(uint32_t offset, uint32_t c) { + return (UBool)(c<=offset+0x7f && + (c>=offset || (c<=0x7f && + (c>=0x20 || (1UL<windowUse[scsu->nextWindowUseIndex]; + if(++scsu->nextWindowUseIndex==8) { + scsu->nextWindowUseIndex=0; + } + return window; +} + +/* + * useDynamicWindow() adjusts + * windowUse[] and nextWindowUseIndex for the algorithm to choose + * the next dynamic window to be defined; + * a subclass may override it and provide its own algorithm. + */ +static void +useDynamicWindow(SCSUData *scsu, int8_t window) { + /* + * move the existing window, which just became the most recently used one, + * up in windowUse[] to nextWindowUseIndex-1 + */ + + /* first, find the index of the window - backwards to favor the more recently used windows */ + int i, j; + + i=scsu->nextWindowUseIndex; + do { + if(--i<0) { + i=7; + } + } while(scsu->windowUse[i]!=window); + + /* now copy each windowUse[i+1] to [i] */ + j=i+1; + if(j==8) { + j=0; + } + while(j!=scsu->nextWindowUseIndex) { + scsu->windowUse[i]=scsu->windowUse[j]; + i=j; + if(++j==8) { j=0; } + } + + /* finally, set the window into the most recently used index */ + scsu->windowUse[i]=window; +} + +/* + * calculate the offset and the code for a dynamic window that contains the character + * takes fixed offsets into account + * the offset of the window is stored in the offset variable, + * the code is returned + * + * return offset code: -1 none <=0xff code for SDn/UDn else code for SDX/UDX, subtract 0x200 to get the true code + */ +static int +getDynamicOffset(uint32_t c, uint32_t *pOffset) { + int i; + + for(i=0; i<7; ++i) { + if((uint32_t)(c-fixedOffsets[i])<=0x7f) { + *pOffset=fixedOffsets[i]; + return 0xf9+i; + } + } + + if(c<0x80) { + /* No dynamic window for US-ASCII. */ + return -1; + } else if(c<0x3400 || + (uint32_t)(c-0x10000)<(0x14000-0x10000) || + (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000) + ) { + /* This character is in a code range for a "small", i.e., reasonably windowable, script. */ + *pOffset=c&0x7fffff80; + return (int)(c>>7); + } else if(0xe000<=c && c!=0xfeff && c<0xfff0) { + /* For these characters we need to take the gapOffset into account. */ + *pOffset=c&0x7fffff80; + return (int)((c-gapOffset)>>7); + } else { + return -1; + } +} +U_CDECL_BEGIN +/* + * Idea for compression: + * - save SCSUData and other state before really starting work + * - at endloop, see if compression could be better with just unicode mode + * - don't do this if a callback has been called + * - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning + * - different buffer handling! + * + * Drawback or need for corrective handling: + * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and + * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible + * not only for compression but also for HTML/XML documents with following charset/encoding announcers. + * + * How to achieve both? + * - Only replace the result after an SDX or SCU? + */ + +static void U_CALLCONV +_SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + SCSUData *scsu; + const UChar *source, *sourceLimit; + uint8_t *target; + int32_t targetCapacity; + int32_t *offsets; + + UBool isSingleByteMode; + uint8_t dynamicWindow; + uint32_t currentOffset; + + uint32_t c, delta; + + int32_t sourceIndex, nextSourceIndex; + + int32_t length; + + /* variables for compression heuristics */ + uint32_t offset; + UChar lead, trail; + int code; + int8_t window; + + /* set up the local pointers */ + cnv=pArgs->converter; + scsu=(SCSUData *)cnv->extraInfo; + + /* set up the local pointers */ + source=pArgs->source; + sourceLimit=pArgs->sourceLimit; + target=(uint8_t *)pArgs->target; + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); + offsets=pArgs->offsets; + + /* get the state machine state */ + isSingleByteMode=scsu->fromUIsSingleByteMode; + dynamicWindow=scsu->fromUDynamicWindow; + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; + + c=cnv->fromUChar32; + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex= c==0 ? 0 : -1; + nextSourceIndex=0; + + /* similar conversion "loop" as in toUnicode */ +loop: + if(isSingleByteMode) { + if(c!=0 && targetCapacity>0) { + goto getTrailSingle; + } + + /* state machine for single-byte mode */ +/* singleByteMode: */ + while(sourcefromUDynamicOffsets, c))>=0) { + /* there is a dynamic window that contains this character, change to it */ + dynamicWindow=window; + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; + length=2; + goto outputBytes; + } else if((code=getDynamicOffset(c, &offset))>=0) { + /* might check if there are more characters in this window to come */ + /* define an extended window with this character */ + code-=0x200; + dynamicWindow=getNextDynamicWindow(scsu); + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; + length=4; + goto outputBytes; + } else { + /* change to Unicode mode and output this (lead, trail) pair */ + isSingleByteMode=FALSE; + *target++=(uint8_t)SCU; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + --targetCapacity; + c=((uint32_t)lead<<16)|trail; + length=4; + goto outputBytes; + } + } else if(c<0xa0) { + /* quote C1 control character */ + c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */ + length=2; + goto outputBytes; + } else if(c==0xfeff || c>=0xfff0) { + /* quote signature character=byte order mark and specials */ + c|=SQU<<16; + length=3; + goto outputBytes; + } else { + /* compress all other BMP characters */ + if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { + /* there is a window defined that contains this character - switch to it or quote from it? */ + if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) { + /* change to dynamic window */ + dynamicWindow=window; + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; + length=2; + goto outputBytes; + } else { + /* quote from dynamic window */ + c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80; + length=2; + goto outputBytes; + } + } else if((window=getWindow(staticOffsets, c))>=0) { + /* quote from static window */ + c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]); + length=2; + goto outputBytes; + } else if((code=getDynamicOffset(c, &offset))>=0) { + /* define a dynamic window with this character */ + dynamicWindow=getNextDynamicWindow(scsu); + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; + length=3; + goto outputBytes; + } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) && + (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400)) + ) { + /* + * this character is not compressible (a BMP ideograph or similar); + * switch to Unicode mode if this is the last character in the block + * or there is at least one more ideograph following immediately + */ + isSingleByteMode=FALSE; + c|=SCU<<16; + length=3; + goto outputBytes; + } else { + /* quote Unicode */ + c|=SQU<<16; + length=3; + goto outputBytes; + } + } + + /* normal end of conversion: prepare for a new character */ + c=0; + sourceIndex=nextSourceIndex; + } + } else { + if(c!=0 && targetCapacity>0) { + goto getTrailUnicode; + } + + /* state machine for Unicode mode */ +/* unicodeByteMode: */ + while(source=2) { + *target++=(uint8_t)(c>>8); + *target++=(uint8_t)c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex; + } + targetCapacity-=2; + } else { + length=2; + goto outputBytes; + } + } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) { + /* compress BMP character if the following one is not an uncompressible ideograph */ + if(!(sourcefromUDynamicOffsets, c))>=0) { + /* there is a dynamic window that contains this character, change to it */ + isSingleByteMode=TRUE; + dynamicWindow=window; + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; + length=2; + goto outputBytes; + } else if((code=getDynamicOffset(c, &offset))>=0) { + /* define a dynamic window with this character */ + isSingleByteMode=TRUE; + dynamicWindow=getNextDynamicWindow(scsu); + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; + length=3; + goto outputBytes; + } + } + + /* don't know how to compress this character, just write it directly */ + length=2; + goto outputBytes; + } else if(c<0xe000) { + /* c is a surrogate */ + if(U16_IS_SURROGATE_LEAD(c)) { +getTrailUnicode: + lead=(UChar)c; + if(sourcefromUDynamicOffsets, c))>=0 && + !(sourcefromUDynamicOffsets[dynamicWindow]; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; + length=2; + goto outputBytes; + } else if(source=0 + ) { + /* two supplementary characters in (probably) the same window - define an extended one */ + isSingleByteMode=TRUE; + code-=0x200; + dynamicWindow=getNextDynamicWindow(scsu); + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; + length=4; + goto outputBytes; + } else { + /* don't know how to compress this character, just write it directly */ + c=((uint32_t)lead<<16)|trail; + length=4; + goto outputBytes; + } + } else /* 0xe000<=c<0xf300 */ { + /* quote to avoid SCSU tags */ + c|=UQU<<16; + length=3; + goto outputBytes; + } + + /* normal end of conversion: prepare for a new character */ + c=0; + sourceIndex=nextSourceIndex; + } + } +endloop: + + /* set the converter state back into UConverter */ + scsu->fromUIsSingleByteMode=isSingleByteMode; + scsu->fromUDynamicWindow=dynamicWindow; + + cnv->fromUChar32=c; + + /* write back the updated pointers */ + pArgs->source=source; + pArgs->target=(char *)target; + pArgs->offsets=offsets; + return; + +outputBytes: + /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */ + /* from the first if in the loop we know that targetCapacity>0 */ + if(length<=targetCapacity) { + if(offsets==NULL) { + switch(length) { + /* each branch falls through to the next one */ + case 4: + *target++=(uint8_t)(c>>24); + U_FALLTHROUGH; + case 3: + *target++=(uint8_t)(c>>16); + U_FALLTHROUGH; + case 2: + *target++=(uint8_t)(c>>8); + U_FALLTHROUGH; + case 1: + *target++=(uint8_t)c; + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + } else { + switch(length) { + /* each branch falls through to the next one */ + case 4: + *target++=(uint8_t)(c>>24); + *offsets++=sourceIndex; + U_FALLTHROUGH; + case 3: + *target++=(uint8_t)(c>>16); + *offsets++=sourceIndex; + U_FALLTHROUGH; + case 2: + *target++=(uint8_t)(c>>8); + *offsets++=sourceIndex; + U_FALLTHROUGH; + case 1: + *target++=(uint8_t)c; + *offsets++=sourceIndex; + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + } + targetCapacity-=length; + + /* normal end of conversion: prepare for a new character */ + c=0; + sourceIndex=nextSourceIndex; + goto loop; + } else { + uint8_t *p; + + /* + * We actually do this backwards here: + * In order to save an intermediate variable, we output + * first to the overflow buffer what does not fit into the + * regular target. + */ + /* we know that 0<=targetCapacitycharErrorBuffer; + switch(length) { + /* each branch falls through to the next one */ + case 4: + *p++=(uint8_t)(c>>24); + U_FALLTHROUGH; + case 3: + *p++=(uint8_t)(c>>16); + U_FALLTHROUGH; + case 2: + *p++=(uint8_t)(c>>8); + U_FALLTHROUGH; + case 1: + *p=(uint8_t)c; + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + cnv->charErrorBufferLength=(int8_t)length; + + /* now output what fits into the regular target */ + c>>=8*length; /* length was reduced by targetCapacity */ + switch(targetCapacity) { + /* each branch falls through to the next one */ + case 3: + *target++=(uint8_t)(c>>16); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + U_FALLTHROUGH; + case 2: + *target++=(uint8_t)(c>>8); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + U_FALLTHROUGH; + case 1: + *target++=(uint8_t)c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + U_FALLTHROUGH; + default: + break; + } + + /* target overflow */ + targetCapacity=0; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + c=0; + goto endloop; + } +} + +/* + * Identical to _SCSUFromUnicodeWithOffsets but without offset handling. + * If a change is made in the original function, then either + * change this function the same way or + * re-copy the original function and remove the variables + * offsets, sourceIndex, and nextSourceIndex. + */ +static void U_CALLCONV +_SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + SCSUData *scsu; + const UChar *source, *sourceLimit; + uint8_t *target; + int32_t targetCapacity; + + UBool isSingleByteMode; + uint8_t dynamicWindow; + uint32_t currentOffset; + + uint32_t c, delta; + + int32_t length; + + /* variables for compression heuristics */ + uint32_t offset; + UChar lead, trail; + int code; + int8_t window; + + /* set up the local pointers */ + cnv=pArgs->converter; + scsu=(SCSUData *)cnv->extraInfo; + + /* set up the local pointers */ + source=pArgs->source; + sourceLimit=pArgs->sourceLimit; + target=(uint8_t *)pArgs->target; + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); + + /* get the state machine state */ + isSingleByteMode=scsu->fromUIsSingleByteMode; + dynamicWindow=scsu->fromUDynamicWindow; + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; + + c=cnv->fromUChar32; + + /* similar conversion "loop" as in toUnicode */ +loop: + if(isSingleByteMode) { + if(c!=0 && targetCapacity>0) { + goto getTrailSingle; + } + + /* state machine for single-byte mode */ +/* singleByteMode: */ + while(sourcefromUDynamicOffsets, c))>=0) { + /* there is a dynamic window that contains this character, change to it */ + dynamicWindow=window; + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; + length=2; + goto outputBytes; + } else if((code=getDynamicOffset(c, &offset))>=0) { + /* might check if there are more characters in this window to come */ + /* define an extended window with this character */ + code-=0x200; + dynamicWindow=getNextDynamicWindow(scsu); + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; + length=4; + goto outputBytes; + } else { + /* change to Unicode mode and output this (lead, trail) pair */ + isSingleByteMode=FALSE; + *target++=(uint8_t)SCU; + --targetCapacity; + c=((uint32_t)lead<<16)|trail; + length=4; + goto outputBytes; + } + } else if(c<0xa0) { + /* quote C1 control character */ + c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */ + length=2; + goto outputBytes; + } else if(c==0xfeff || c>=0xfff0) { + /* quote signature character=byte order mark and specials */ + c|=SQU<<16; + length=3; + goto outputBytes; + } else { + /* compress all other BMP characters */ + if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { + /* there is a window defined that contains this character - switch to it or quote from it? */ + if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) { + /* change to dynamic window */ + dynamicWindow=window; + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; + length=2; + goto outputBytes; + } else { + /* quote from dynamic window */ + c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80; + length=2; + goto outputBytes; + } + } else if((window=getWindow(staticOffsets, c))>=0) { + /* quote from static window */ + c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]); + length=2; + goto outputBytes; + } else if((code=getDynamicOffset(c, &offset))>=0) { + /* define a dynamic window with this character */ + dynamicWindow=getNextDynamicWindow(scsu); + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; + length=3; + goto outputBytes; + } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) && + (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400)) + ) { + /* + * this character is not compressible (a BMP ideograph or similar); + * switch to Unicode mode if this is the last character in the block + * or there is at least one more ideograph following immediately + */ + isSingleByteMode=FALSE; + c|=SCU<<16; + length=3; + goto outputBytes; + } else { + /* quote Unicode */ + c|=SQU<<16; + length=3; + goto outputBytes; + } + } + + /* normal end of conversion: prepare for a new character */ + c=0; + } + } else { + if(c!=0 && targetCapacity>0) { + goto getTrailUnicode; + } + + /* state machine for Unicode mode */ +/* unicodeByteMode: */ + while(source=2) { + *target++=(uint8_t)(c>>8); + *target++=(uint8_t)c; + targetCapacity-=2; + } else { + length=2; + goto outputBytes; + } + } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) { + /* compress BMP character if the following one is not an uncompressible ideograph */ + if(!(sourcefromUDynamicOffsets, c))>=0) { + /* there is a dynamic window that contains this character, change to it */ + isSingleByteMode=TRUE; + dynamicWindow=window; + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; + length=2; + goto outputBytes; + } else if((code=getDynamicOffset(c, &offset))>=0) { + /* define a dynamic window with this character */ + isSingleByteMode=TRUE; + dynamicWindow=getNextDynamicWindow(scsu); + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; + length=3; + goto outputBytes; + } + } + + /* don't know how to compress this character, just write it directly */ + length=2; + goto outputBytes; + } else if(c<0xe000) { + /* c is a surrogate */ + if(U16_IS_SURROGATE_LEAD(c)) { +getTrailUnicode: + lead=(UChar)c; + if(sourcefromUDynamicOffsets, c))>=0 && + !(sourcefromUDynamicOffsets[dynamicWindow]; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; + length=2; + goto outputBytes; + } else if(source=0 + ) { + /* two supplementary characters in (probably) the same window - define an extended one */ + isSingleByteMode=TRUE; + code-=0x200; + dynamicWindow=getNextDynamicWindow(scsu); + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; + length=4; + goto outputBytes; + } else { + /* don't know how to compress this character, just write it directly */ + c=((uint32_t)lead<<16)|trail; + length=4; + goto outputBytes; + } + } else /* 0xe000<=c<0xf300 */ { + /* quote to avoid SCSU tags */ + c|=UQU<<16; + length=3; + goto outputBytes; + } + + /* normal end of conversion: prepare for a new character */ + c=0; + } + } +endloop: + + /* set the converter state back into UConverter */ + scsu->fromUIsSingleByteMode=isSingleByteMode; + scsu->fromUDynamicWindow=dynamicWindow; + + cnv->fromUChar32=c; + + /* write back the updated pointers */ + pArgs->source=source; + pArgs->target=(char *)target; + return; + +outputBytes: + /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */ + /* from the first if in the loop we know that targetCapacity>0 */ + if(length<=targetCapacity) { + switch(length) { + /* each branch falls through to the next one */ + case 4: + *target++=(uint8_t)(c>>24); + U_FALLTHROUGH; + case 3: + *target++=(uint8_t)(c>>16); + U_FALLTHROUGH; + case 2: + *target++=(uint8_t)(c>>8); + U_FALLTHROUGH; + case 1: + *target++=(uint8_t)c; + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + targetCapacity-=length; + + /* normal end of conversion: prepare for a new character */ + c=0; + goto loop; + } else { + uint8_t *p; + + /* + * We actually do this backwards here: + * In order to save an intermediate variable, we output + * first to the overflow buffer what does not fit into the + * regular target. + */ + /* we know that 0<=targetCapacitycharErrorBuffer; + switch(length) { + /* each branch falls through to the next one */ + case 4: + *p++=(uint8_t)(c>>24); + U_FALLTHROUGH; + case 3: + *p++=(uint8_t)(c>>16); + U_FALLTHROUGH; + case 2: + *p++=(uint8_t)(c>>8); + U_FALLTHROUGH; + case 1: + *p=(uint8_t)c; + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + cnv->charErrorBufferLength=(int8_t)length; + + /* now output what fits into the regular target */ + c>>=8*length; /* length was reduced by targetCapacity */ + switch(targetCapacity) { + /* each branch falls through to the next one */ + case 3: + *target++=(uint8_t)(c>>16); + U_FALLTHROUGH; + case 2: + *target++=(uint8_t)(c>>8); + U_FALLTHROUGH; + case 1: + *target++=(uint8_t)c; + U_FALLTHROUGH; + default: + break; + } + + /* target overflow */ + targetCapacity=0; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + c=0; + goto endloop; + } +} + +/* miscellaneous ------------------------------------------------------------ */ + +static const char * U_CALLCONV +_SCSUGetName(const UConverter *cnv) { + SCSUData *scsu=(SCSUData *)cnv->extraInfo; + + switch(scsu->locale) { + case l_ja: + return "SCSU,locale=ja"; + default: + return "SCSU"; + } +} + +/* structure for SafeClone calculations */ +struct cloneSCSUStruct +{ + UConverter cnv; + SCSUData mydata; +}; + +static UConverter * U_CALLCONV +_SCSUSafeClone(const UConverter *cnv, + void *stackBuffer, + int32_t *pBufferSize, + UErrorCode *status) +{ + struct cloneSCSUStruct * localClone; + int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct); + + if (U_FAILURE(*status)){ + return 0; + } + + if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */ + *pBufferSize = bufferSizeNeeded; + return 0; + } + + localClone = (struct cloneSCSUStruct *)stackBuffer; + /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ + + uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData)); + localClone->cnv.extraInfo = &localClone->mydata; + localClone->cnv.isExtraLocal = TRUE; + + return &localClone->cnv; +} +U_CDECL_END + +static const UConverterImpl _SCSUImpl={ + UCNV_SCSU, + + NULL, + NULL, + + _SCSUOpen, + _SCSUClose, + _SCSUReset, + + _SCSUToUnicode, + _SCSUToUnicodeWithOffsets, + _SCSUFromUnicode, + _SCSUFromUnicodeWithOffsets, + NULL, + + NULL, + _SCSUGetName, + NULL, + _SCSUSafeClone, + ucnv_getCompleteUnicodeSet, + NULL, + NULL +}; + +static const UConverterStaticData _SCSUStaticData={ + sizeof(UConverterStaticData), + "SCSU", + 1212, /* CCSID for SCSU */ + UCNV_IBM, UCNV_SCSU, + 1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */ + /* + * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode + * substitution string. + */ + { 0x0e, 0xff, 0xfd, 0 }, 3, + FALSE, FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +const UConverterSharedData _SCSUData= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_SCSUStaticData, &_SCSUImpl); + +#endif diff --git a/deps/icu-small/source/common/ucnvsel.cpp b/deps/icu-small/source/common/ucnvsel.cpp index f6384cf749..90c7a18b93 100644 --- a/deps/icu-small/source/common/ucnvsel.cpp +++ b/deps/icu-small/source/common/ucnvsel.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/common/ucol_data.h b/deps/icu-small/source/common/ucol_data.h index cdd328ecca..83f54abba1 100644 --- a/deps/icu-small/source/common/ucol_data.h +++ b/deps/icu-small/source/common/ucol_data.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: ucol_data.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/ucol_swp.cpp b/deps/icu-small/source/common/ucol_swp.cpp index ddec0ec1c6..3055abaca3 100644 --- a/deps/icu-small/source/common/ucol_swp.cpp +++ b/deps/icu-small/source/common/ucol_swp.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: ucol_swp.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/ucol_swp.h b/deps/icu-small/source/common/ucol_swp.h index 422436dd30..fd8be9aa54 100644 --- a/deps/icu-small/source/common/ucol_swp.h +++ b/deps/icu-small/source/common/ucol_swp.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: ucol_swp.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/ucurr.cpp b/deps/icu-small/source/common/ucurr.cpp index 41fd8aa212..085f994858 100644 --- a/deps/icu-small/source/common/ucurr.cpp +++ b/deps/icu-small/source/common/ucurr.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -1030,7 +1030,8 @@ collectCurrencyNames(const char* locale, const UnicodeString *symbol; while ((symbol = iter.next()) != NULL) { (*currencySymbols)[*total_currency_symbol_count].IsoCode = iso; - (*currencySymbols)[*total_currency_symbol_count].currencyName = (UChar*) symbol->getBuffer(); + (*currencySymbols)[*total_currency_symbol_count].currencyName = + const_cast(symbol->getBuffer()); (*currencySymbols)[*total_currency_symbol_count].flag = 0; (*currencySymbols)[(*total_currency_symbol_count)++].currencyNameLen = symbol->length(); } diff --git a/deps/icu-small/source/common/ucurrimp.h b/deps/icu-small/source/common/ucurrimp.h index b35d6f47c6..6e468fd4c9 100644 --- a/deps/icu-small/source/common/ucurrimp.h +++ b/deps/icu-small/source/common/ucurrimp.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/udata.cpp b/deps/icu-small/source/common/udata.cpp index 7585855171..aa23ab719a 100644 --- a/deps/icu-small/source/common/udata.cpp +++ b/deps/icu-small/source/common/udata.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: udata.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -110,8 +110,12 @@ static u_atomic_int32_t gHaveTriedToLoadCommonData = ATOMIC_INT32_T_INITIALIZER( static UHashtable *gCommonDataCache = NULL; /* Global hash table of opened ICU data files. */ static icu::UInitOnce gCommonDataCacheInitOnce = U_INITONCE_INITIALIZER; +#if U_PLATFORM_HAS_WINUWP_API == 0 static UDataFileAccess gDataFileAccess = UDATA_DEFAULT_ACCESS; // Access not synchronized. // Modifying is documented as thread-unsafe. +#else +static UDataFileAccess gDataFileAccess = UDATA_NO_FILES; // Windows UWP looks in one spot explicitly +#endif static UBool U_CALLCONV udata_cleanup(void) @@ -619,12 +623,14 @@ U_NAMESPACE_END /*----------------------------------------------------------------------* * * - * Add a static reference to the common data library * + * Add a static reference to the common data library * * Unless overridden by an explicit udata_setCommonData, this will be * * our common data. * * * *----------------------------------------------------------------------*/ +#if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP Platform does not support dll icu data at this time extern "C" const DataHeader U_DATA_API U_ICUDATA_ENTRY_POINT; +#endif /* * This would be a good place for weak-linkage declarations of @@ -672,6 +678,7 @@ openCommonData(const char *path, /* Path from OpenChoice? */ if(gCommonICUDataArray[commonDataIndex] != NULL) { return gCommonICUDataArray[commonDataIndex]; } +#if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP Platform does not support dll icu data at this time int32_t i; for(i = 0; i < commonDataIndex; ++i) { if(gCommonICUDataArray[i]->pHeader == &U_ICUDATA_ENTRY_POINT) { @@ -679,6 +686,7 @@ openCommonData(const char *path, /* Path from OpenChoice? */ return NULL; } } +#endif } /* Add the linked-in data to the list. */ @@ -694,11 +702,13 @@ openCommonData(const char *path, /* Path from OpenChoice? */ setCommonICUDataPointer(uprv_getICUData_conversion(), FALSE, pErrorCode); } */ +#if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP Platform does not support dll icu data at this time setCommonICUDataPointer(&U_ICUDATA_ENTRY_POINT, FALSE, pErrorCode); { Mutex lock; return gCommonICUDataArray[commonDataIndex]; } +#endif } @@ -1245,9 +1255,14 @@ doOpenChoice(const char *path, const char *type, const char *name, fprintf(stderr, " tocEntryPath = %s\n", tocEntryName.data()); #endif +#if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP Platform does not support dll icu data at this time if(path == NULL) { path = COMMON_DATA_NAME; /* "icudt26e" */ } +#else + // Windows UWP expects only a single data file. + path = COMMON_DATA_NAME; /* "icudt26e" */ +#endif /************************ Begin loop looking for ind. files ***************/ #ifdef UDATA_DEBUG diff --git a/deps/icu-small/source/common/udatamem.c b/deps/icu-small/source/common/udatamem.c deleted file mode 100644 index daa919373b..0000000000 --- a/deps/icu-small/source/common/udatamem.c +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************/ - - -/*---------------------------------------------------------------------------------- - * - * UDataMemory A class-like struct that serves as a handle to a piece of memory - * that contains some ICU data (resource, converters, whatever.) - * - * When an application opens ICU data (with udata_open, for example, - * a UDataMemory * is returned. - * - *----------------------------------------------------------------------------------*/ - -#include "unicode/utypes.h" -#include "cmemory.h" -#include "unicode/udata.h" - -#include "udatamem.h" - -U_CFUNC void UDataMemory_init(UDataMemory *This) { - uprv_memset(This, 0, sizeof(UDataMemory)); - This->length=-1; -} - - -U_CFUNC void UDatamemory_assign(UDataMemory *dest, UDataMemory *source) { - /* UDataMemory Assignment. Destination UDataMemory must be initialized first. */ - UBool mallocedFlag = dest->heapAllocated; - uprv_memcpy(dest, source, sizeof(UDataMemory)); - dest->heapAllocated = mallocedFlag; -} - -U_CFUNC UDataMemory *UDataMemory_createNewInstance(UErrorCode *pErr) { - UDataMemory *This; - - if (U_FAILURE(*pErr)) { - return NULL; - } - This = uprv_malloc(sizeof(UDataMemory)); - if (This == NULL) { - *pErr = U_MEMORY_ALLOCATION_ERROR; } - else { - UDataMemory_init(This); - This->heapAllocated = TRUE; - } - return This; -} - - -U_CFUNC const DataHeader * -UDataMemory_normalizeDataPointer(const void *p) { - /* allow the data to be optionally prepended with an alignment-forcing double value */ - const DataHeader *pdh = (const DataHeader *)p; - if(pdh==NULL || (pdh->dataHeader.magic1==0xda && pdh->dataHeader.magic2==0x27)) { - return pdh; - } else { -#if U_PLATFORM == U_PF_OS400 - /* - TODO: Fix this once the compiler implements this feature. Keep in sync with genccode.c - - This is here because this platform can't currently put - const data into the read-only pages of an object or - shared library (service program). Only strings are allowed in read-only - pages, so we use char * strings to store the data. - - In order to prevent the beginning of the data from ever matching the - magic numbers we must skip the initial double. - [grhoten 4/24/2003] - */ - return (const DataHeader *)*((const void **)p+1); -#else - return (const DataHeader *)((const double *)p+1); -#endif - } -} - - -U_CFUNC void UDataMemory_setData (UDataMemory *This, const void *dataAddr) { - This->pHeader = UDataMemory_normalizeDataPointer(dataAddr); -} - - -U_CAPI void U_EXPORT2 -udata_close(UDataMemory *pData) { - if(pData!=NULL) { - uprv_unmapFile(pData); - if(pData->heapAllocated ) { - uprv_free(pData); - } else { - UDataMemory_init(pData); - } - } -} - -U_CAPI const void * U_EXPORT2 -udata_getMemory(UDataMemory *pData) { - if(pData!=NULL && pData->pHeader!=NULL) { - return (char *)(pData->pHeader)+udata_getHeaderSize(pData->pHeader); - } else { - return NULL; - } -} - -/** - * Get the length of the data item if possible. - * The length may be up to 15 bytes larger than the actual data. - * - * TODO Consider making this function public. - * It would have to return the actual length in more cases. - * For example, the length of the last item in a .dat package could be - * computed from the size of the whole .dat package minus the offset of the - * last item. - * The size of a file that was directly memory-mapped could be determined - * using some system API. - * - * In order to get perfect values for all data items, we may have to add a - * length field to UDataInfo, but that complicates data generation - * and may be overkill. - * - * @param pData The data item. - * @return the length of the data item, or -1 if not known - * @internal Currently used only in cintltst/udatatst.c - */ -U_CAPI int32_t U_EXPORT2 -udata_getLength(const UDataMemory *pData) { - if(pData!=NULL && pData->pHeader!=NULL && pData->length>=0) { - /* - * subtract the header size, - * return only the size of the actual data starting at udata_getMemory() - */ - return pData->length-udata_getHeaderSize(pData->pHeader); - } else { - return -1; - } -} - -/** - * Get the memory including the data header. - * Used in cintltst/udatatst.c - * @internal - */ -U_CAPI const void * U_EXPORT2 -udata_getRawMemory(const UDataMemory *pData) { - if(pData!=NULL && pData->pHeader!=NULL) { - return pData->pHeader; - } else { - return NULL; - } -} - -U_CFUNC UBool UDataMemory_isLoaded(const UDataMemory *This) { - return This->pHeader != NULL; -} diff --git a/deps/icu-small/source/common/udatamem.cpp b/deps/icu-small/source/common/udatamem.cpp new file mode 100644 index 0000000000..6bf7c01235 --- /dev/null +++ b/deps/icu-small/source/common/udatamem.cpp @@ -0,0 +1,161 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************/ + + +/*---------------------------------------------------------------------------------- + * + * UDataMemory A class-like struct that serves as a handle to a piece of memory + * that contains some ICU data (resource, converters, whatever.) + * + * When an application opens ICU data (with udata_open, for example, + * a UDataMemory * is returned. + * + *----------------------------------------------------------------------------------*/ + +#include "unicode/utypes.h" +#include "cmemory.h" +#include "unicode/udata.h" + +#include "udatamem.h" + +U_CFUNC void UDataMemory_init(UDataMemory *This) { + uprv_memset(This, 0, sizeof(UDataMemory)); + This->length=-1; +} + + +U_CFUNC void UDatamemory_assign(UDataMemory *dest, UDataMemory *source) { + /* UDataMemory Assignment. Destination UDataMemory must be initialized first. */ + UBool mallocedFlag = dest->heapAllocated; + uprv_memcpy(dest, source, sizeof(UDataMemory)); + dest->heapAllocated = mallocedFlag; +} + +U_CFUNC UDataMemory *UDataMemory_createNewInstance(UErrorCode *pErr) { + UDataMemory *This; + + if (U_FAILURE(*pErr)) { + return NULL; + } + This = (UDataMemory *)uprv_malloc(sizeof(UDataMemory)); + if (This == NULL) { + *pErr = U_MEMORY_ALLOCATION_ERROR; } + else { + UDataMemory_init(This); + This->heapAllocated = TRUE; + } + return This; +} + + +U_CFUNC const DataHeader * +UDataMemory_normalizeDataPointer(const void *p) { + /* allow the data to be optionally prepended with an alignment-forcing double value */ + const DataHeader *pdh = (const DataHeader *)p; + if(pdh==NULL || (pdh->dataHeader.magic1==0xda && pdh->dataHeader.magic2==0x27)) { + return pdh; + } else { +#if U_PLATFORM == U_PF_OS400 + /* + TODO: Fix this once the compiler implements this feature. Keep in sync with genccode.c + + This is here because this platform can't currently put + const data into the read-only pages of an object or + shared library (service program). Only strings are allowed in read-only + pages, so we use char * strings to store the data. + + In order to prevent the beginning of the data from ever matching the + magic numbers we must skip the initial double. + [grhoten 4/24/2003] + */ + return (const DataHeader *)*((const void **)p+1); +#else + return (const DataHeader *)((const double *)p+1); +#endif + } +} + + +U_CFUNC void UDataMemory_setData (UDataMemory *This, const void *dataAddr) { + This->pHeader = UDataMemory_normalizeDataPointer(dataAddr); +} + + +U_CAPI void U_EXPORT2 +udata_close(UDataMemory *pData) { + if(pData!=NULL) { + uprv_unmapFile(pData); + if(pData->heapAllocated ) { + uprv_free(pData); + } else { + UDataMemory_init(pData); + } + } +} + +U_CAPI const void * U_EXPORT2 +udata_getMemory(UDataMemory *pData) { + if(pData!=NULL && pData->pHeader!=NULL) { + return (char *)(pData->pHeader)+udata_getHeaderSize(pData->pHeader); + } else { + return NULL; + } +} + +/** + * Get the length of the data item if possible. + * The length may be up to 15 bytes larger than the actual data. + * + * TODO Consider making this function public. + * It would have to return the actual length in more cases. + * For example, the length of the last item in a .dat package could be + * computed from the size of the whole .dat package minus the offset of the + * last item. + * The size of a file that was directly memory-mapped could be determined + * using some system API. + * + * In order to get perfect values for all data items, we may have to add a + * length field to UDataInfo, but that complicates data generation + * and may be overkill. + * + * @param pData The data item. + * @return the length of the data item, or -1 if not known + * @internal Currently used only in cintltst/udatatst.c + */ +U_CAPI int32_t U_EXPORT2 +udata_getLength(const UDataMemory *pData) { + if(pData!=NULL && pData->pHeader!=NULL && pData->length>=0) { + /* + * subtract the header size, + * return only the size of the actual data starting at udata_getMemory() + */ + return pData->length-udata_getHeaderSize(pData->pHeader); + } else { + return -1; + } +} + +/** + * Get the memory including the data header. + * Used in cintltst/udatatst.c + * @internal + */ +U_CAPI const void * U_EXPORT2 +udata_getRawMemory(const UDataMemory *pData) { + if(pData!=NULL && pData->pHeader!=NULL) { + return pData->pHeader; + } else { + return NULL; + } +} + +U_CFUNC UBool UDataMemory_isLoaded(const UDataMemory *This) { + return This->pHeader != NULL; +} diff --git a/deps/icu-small/source/common/udatamem.h b/deps/icu-small/source/common/udatamem.h index 385a77722c..a05dd69756 100644 --- a/deps/icu-small/source/common/udatamem.h +++ b/deps/icu-small/source/common/udatamem.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/udataswp.c b/deps/icu-small/source/common/udataswp.c deleted file mode 100644 index f47ac1f5e0..0000000000 --- a/deps/icu-small/source/common/udataswp.c +++ /dev/null @@ -1,473 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2003-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: udataswp.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2003jun05 -* created by: Markus W. Scherer -* -* Definitions for ICU data transformations for different platforms, -* changing between big- and little-endian data and/or between -* charset families (ASCII<->EBCDIC). -*/ - -#include -#include "unicode/utypes.h" -#include "unicode/udata.h" /* UDataInfo */ -#include "ucmndata.h" /* DataHeader */ -#include "cmemory.h" -#include "udataswp.h" - -/* swapping primitives ------------------------------------------------------ */ - -static int32_t U_CALLCONV -uprv_swapArray16(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const uint16_t *p; - uint16_t *q; - int32_t count; - uint16_t x; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* setup and swapping */ - p=(const uint16_t *)inData; - q=(uint16_t *)outData; - count=length/2; - while(count>0) { - x=*p++; - *q++=(uint16_t)((x<<8)|(x>>8)); - --count; - } - - return length; -} - -static int32_t U_CALLCONV -uprv_copyArray16(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - if(length>0 && inData!=outData) { - uprv_memcpy(outData, inData, length); - } - return length; -} - -static int32_t U_CALLCONV -uprv_swapArray32(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const uint32_t *p; - uint32_t *q; - int32_t count; - uint32_t x; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* setup and swapping */ - p=(const uint32_t *)inData; - q=(uint32_t *)outData; - count=length/4; - while(count>0) { - x=*p++; - *q++=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24)); - --count; - } - - return length; -} - -static int32_t U_CALLCONV -uprv_copyArray32(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - if(length>0 && inData!=outData) { - uprv_memcpy(outData, inData, length); - } - return length; -} - -static int32_t U_CALLCONV -uprv_swapArray64(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const uint64_t *p; - uint64_t *q; - int32_t count; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* setup and swapping */ - p=(const uint64_t *)inData; - q=(uint64_t *)outData; - count=length/8; - while(count>0) { - uint64_t x=*p++; - x=(x<<56)|((x&0xff00)<<40)|((x&0xff0000)<<24)|((x&0xff000000)<<8)| - ((x>>8)&0xff000000)|((x>>24)&0xff0000)|((x>>40)&0xff00)|(x>>56); - *q++=x; - --count; - } - - return length; -} - -static int32_t U_CALLCONV -uprv_copyArray64(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - if(length>0 && inData!=outData) { - uprv_memcpy(outData, inData, length); - } - return length; -} - -static uint16_t U_CALLCONV -uprv_readSwapUInt16(uint16_t x) { - return (uint16_t)((x<<8)|(x>>8)); -} - -static uint16_t U_CALLCONV -uprv_readDirectUInt16(uint16_t x) { - return x; -} - -static uint32_t U_CALLCONV -uprv_readSwapUInt32(uint32_t x) { - return (uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24)); -} - -static uint32_t U_CALLCONV -uprv_readDirectUInt32(uint32_t x) { - return x; -} - -static void U_CALLCONV -uprv_writeSwapUInt16(uint16_t *p, uint16_t x) { - *p=(uint16_t)((x<<8)|(x>>8)); -} - -static void U_CALLCONV -uprv_writeDirectUInt16(uint16_t *p, uint16_t x) { - *p=x; -} - -static void U_CALLCONV -uprv_writeSwapUInt32(uint32_t *p, uint32_t x) { - *p=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24)); -} - -static void U_CALLCONV -uprv_writeDirectUInt32(uint32_t *p, uint32_t x) { - *p=x; -} - -U_CAPI int16_t U_EXPORT2 -udata_readInt16(const UDataSwapper *ds, int16_t x) { - return (int16_t)ds->readUInt16((uint16_t)x); -} - -U_CAPI int32_t U_EXPORT2 -udata_readInt32(const UDataSwapper *ds, int32_t x) { - return (int32_t)ds->readUInt32((uint32_t)x); -} - -/** - * Swap a block of invariant, NUL-terminated strings, but not padding - * bytes after the last string. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -udata_swapInvStringBlock(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const char *inChars; - int32_t stringsLength; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* reduce the strings length to not include bytes after the last NUL */ - inChars=(const char *)inData; - stringsLength=length; - while(stringsLength>0 && inChars[stringsLength-1]!=0) { - --stringsLength; - } - - /* swap up to the last NUL */ - ds->swapInvChars(ds, inData, stringsLength, outData, pErrorCode); - - /* copy the bytes after the last NUL */ - if(inData!=outData && length>stringsLength) { - uprv_memcpy((char *)outData+stringsLength, inChars+stringsLength, length-stringsLength); - } - - /* return the length including padding bytes */ - if(U_SUCCESS(*pErrorCode)) { - return length; - } else { - return 0; - } -} - -U_CAPI void U_EXPORT2 -udata_printError(const UDataSwapper *ds, - const char *fmt, - ...) { - va_list args; - - if(ds->printError!=NULL) { - va_start(args, fmt); - ds->printError(ds->printErrorContext, fmt, args); - va_end(args); - } -} - -/* swap a data header ------------------------------------------------------- */ - -U_CAPI int32_t U_EXPORT2 -udata_swapDataHeader(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const DataHeader *pHeader; - uint16_t headerSize, infoSize; - - /* argument checking */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* check minimum length and magic bytes */ - pHeader=(const DataHeader *)inData; - if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || - pHeader->dataHeader.magic1!=0xda || - pHeader->dataHeader.magic2!=0x27 || - pHeader->info.sizeofUChar!=2 - ) { - udata_printError(ds, "udata_swapDataHeader(): initial bytes do not look like ICU data\n"); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - headerSize=ds->readUInt16(pHeader->dataHeader.headerSize); - infoSize=ds->readUInt16(pHeader->info.size); - - if( headerSizedataHeader)+infoSize) || - (length>=0 && length0) { - DataHeader *outHeader; - const char *s; - int32_t maxLength; - - /* Most of the fields are just bytes and need no swapping. */ - if(inData!=outData) { - uprv_memcpy(outData, inData, headerSize); - } - outHeader=(DataHeader *)outData; - - outHeader->info.isBigEndian = ds->outIsBigEndian; - outHeader->info.charsetFamily = ds->outCharset; - - /* swap headerSize */ - ds->swapArray16(ds, &pHeader->dataHeader.headerSize, 2, &outHeader->dataHeader.headerSize, pErrorCode); - - /* swap UDataInfo size and reservedWord */ - ds->swapArray16(ds, &pHeader->info.size, 4, &outHeader->info.size, pErrorCode); - - /* swap copyright statement after the UDataInfo */ - infoSize+=sizeof(pHeader->dataHeader); - s=(const char *)inData+infoSize; - maxLength=headerSize-infoSize; - /* get the length of the string */ - for(length=0; lengthswapInvChars(ds, s, length, (char *)outData+infoSize, pErrorCode); - } - - return headerSize; -} - -/* API functions ------------------------------------------------------------ */ - -U_CAPI UDataSwapper * U_EXPORT2 -udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset, - UBool outIsBigEndian, uint8_t outCharset, - UErrorCode *pErrorCode) { - UDataSwapper *swapper; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return NULL; - } - if(inCharset>U_EBCDIC_FAMILY || outCharset>U_EBCDIC_FAMILY) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - /* allocate the swapper */ - swapper=uprv_malloc(sizeof(UDataSwapper)); - if(swapper==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memset(swapper, 0, sizeof(UDataSwapper)); - - /* set values and functions pointers according to in/out parameters */ - swapper->inIsBigEndian=inIsBigEndian; - swapper->inCharset=inCharset; - swapper->outIsBigEndian=outIsBigEndian; - swapper->outCharset=outCharset; - - swapper->readUInt16= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt16 : uprv_readSwapUInt16; - swapper->readUInt32= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt32 : uprv_readSwapUInt32; - - swapper->writeUInt16= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt16 : uprv_writeSwapUInt16; - swapper->writeUInt32= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt32 : uprv_writeSwapUInt32; - - swapper->compareInvChars= outCharset==U_ASCII_FAMILY ? uprv_compareInvAscii : uprv_compareInvEbcdic; - - if(inIsBigEndian==outIsBigEndian) { - swapper->swapArray16=uprv_copyArray16; - swapper->swapArray32=uprv_copyArray32; - swapper->swapArray64=uprv_copyArray64; - } else { - swapper->swapArray16=uprv_swapArray16; - swapper->swapArray32=uprv_swapArray32; - swapper->swapArray64=uprv_swapArray64; - } - - if(inCharset==U_ASCII_FAMILY) { - swapper->swapInvChars= outCharset==U_ASCII_FAMILY ? uprv_copyAscii : uprv_ebcdicFromAscii; - } else /* U_EBCDIC_FAMILY */ { - swapper->swapInvChars= outCharset==U_EBCDIC_FAMILY ? uprv_copyEbcdic : uprv_asciiFromEbcdic; - } - - return swapper; -} - -U_CAPI UDataSwapper * U_EXPORT2 -udata_openSwapperForInputData(const void *data, int32_t length, - UBool outIsBigEndian, uint8_t outCharset, - UErrorCode *pErrorCode) { - const DataHeader *pHeader; - uint16_t headerSize, infoSize; - UBool inIsBigEndian; - int8_t inCharset; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return NULL; - } - if( data==NULL || - (length>=0 && length<(int32_t)sizeof(DataHeader)) || - outCharset>U_EBCDIC_FAMILY - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - pHeader=(const DataHeader *)data; - if( (length>=0 && lengthdataHeader.magic1!=0xda || - pHeader->dataHeader.magic2!=0x27 || - pHeader->info.sizeofUChar!=2 - ) { - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - inIsBigEndian=(UBool)pHeader->info.isBigEndian; - inCharset=pHeader->info.charsetFamily; - - if(inIsBigEndian==U_IS_BIG_ENDIAN) { - headerSize=pHeader->dataHeader.headerSize; - infoSize=pHeader->info.size; - } else { - headerSize=uprv_readSwapUInt16(pHeader->dataHeader.headerSize); - infoSize=uprv_readSwapUInt16(pHeader->info.size); - } - - if( headerSizedataHeader)+infoSize) || - (length>=0 && lengthEBCDIC). +*/ + +#include +#include "unicode/utypes.h" +#include "unicode/udata.h" /* UDataInfo */ +#include "ucmndata.h" /* DataHeader */ +#include "cmemory.h" +#include "udataswp.h" + +/* swapping primitives ------------------------------------------------------ */ + +static int32_t U_CALLCONV +uprv_swapArray16(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const uint16_t *p; + uint16_t *q; + int32_t count; + uint16_t x; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + p=(const uint16_t *)inData; + q=(uint16_t *)outData; + count=length/2; + while(count>0) { + x=*p++; + *q++=(uint16_t)((x<<8)|(x>>8)); + --count; + } + + return length; +} + +static int32_t U_CALLCONV +uprv_copyArray16(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if(length>0 && inData!=outData) { + uprv_memcpy(outData, inData, length); + } + return length; +} + +static int32_t U_CALLCONV +uprv_swapArray32(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const uint32_t *p; + uint32_t *q; + int32_t count; + uint32_t x; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + p=(const uint32_t *)inData; + q=(uint32_t *)outData; + count=length/4; + while(count>0) { + x=*p++; + *q++=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24)); + --count; + } + + return length; +} + +static int32_t U_CALLCONV +uprv_copyArray32(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if(length>0 && inData!=outData) { + uprv_memcpy(outData, inData, length); + } + return length; +} + +static int32_t U_CALLCONV +uprv_swapArray64(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const uint64_t *p; + uint64_t *q; + int32_t count; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + p=(const uint64_t *)inData; + q=(uint64_t *)outData; + count=length/8; + while(count>0) { + uint64_t x=*p++; + x=(x<<56)|((x&0xff00)<<40)|((x&0xff0000)<<24)|((x&0xff000000)<<8)| + ((x>>8)&0xff000000)|((x>>24)&0xff0000)|((x>>40)&0xff00)|(x>>56); + *q++=x; + --count; + } + + return length; +} + +static int32_t U_CALLCONV +uprv_copyArray64(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if(length>0 && inData!=outData) { + uprv_memcpy(outData, inData, length); + } + return length; +} + +static uint16_t U_CALLCONV +uprv_readSwapUInt16(uint16_t x) { + return (uint16_t)((x<<8)|(x>>8)); +} + +static uint16_t U_CALLCONV +uprv_readDirectUInt16(uint16_t x) { + return x; +} + +static uint32_t U_CALLCONV +uprv_readSwapUInt32(uint32_t x) { + return (uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24)); +} + +static uint32_t U_CALLCONV +uprv_readDirectUInt32(uint32_t x) { + return x; +} + +static void U_CALLCONV +uprv_writeSwapUInt16(uint16_t *p, uint16_t x) { + *p=(uint16_t)((x<<8)|(x>>8)); +} + +static void U_CALLCONV +uprv_writeDirectUInt16(uint16_t *p, uint16_t x) { + *p=x; +} + +static void U_CALLCONV +uprv_writeSwapUInt32(uint32_t *p, uint32_t x) { + *p=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24)); +} + +static void U_CALLCONV +uprv_writeDirectUInt32(uint32_t *p, uint32_t x) { + *p=x; +} + +U_CAPI int16_t U_EXPORT2 +udata_readInt16(const UDataSwapper *ds, int16_t x) { + return (int16_t)ds->readUInt16((uint16_t)x); +} + +U_CAPI int32_t U_EXPORT2 +udata_readInt32(const UDataSwapper *ds, int32_t x) { + return (int32_t)ds->readUInt32((uint32_t)x); +} + +/** + * Swap a block of invariant, NUL-terminated strings, but not padding + * bytes after the last string. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +udata_swapInvStringBlock(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const char *inChars; + int32_t stringsLength; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* reduce the strings length to not include bytes after the last NUL */ + inChars=(const char *)inData; + stringsLength=length; + while(stringsLength>0 && inChars[stringsLength-1]!=0) { + --stringsLength; + } + + /* swap up to the last NUL */ + ds->swapInvChars(ds, inData, stringsLength, outData, pErrorCode); + + /* copy the bytes after the last NUL */ + if(inData!=outData && length>stringsLength) { + uprv_memcpy((char *)outData+stringsLength, inChars+stringsLength, length-stringsLength); + } + + /* return the length including padding bytes */ + if(U_SUCCESS(*pErrorCode)) { + return length; + } else { + return 0; + } +} + +U_CAPI void U_EXPORT2 +udata_printError(const UDataSwapper *ds, + const char *fmt, + ...) { + va_list args; + + if(ds->printError!=NULL) { + va_start(args, fmt); + ds->printError(ds->printErrorContext, fmt, args); + va_end(args); + } +} + +/* swap a data header ------------------------------------------------------- */ + +U_CAPI int32_t U_EXPORT2 +udata_swapDataHeader(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const DataHeader *pHeader; + uint16_t headerSize, infoSize; + + /* argument checking */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* check minimum length and magic bytes */ + pHeader=(const DataHeader *)inData; + if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || + pHeader->dataHeader.magic1!=0xda || + pHeader->dataHeader.magic2!=0x27 || + pHeader->info.sizeofUChar!=2 + ) { + udata_printError(ds, "udata_swapDataHeader(): initial bytes do not look like ICU data\n"); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + headerSize=ds->readUInt16(pHeader->dataHeader.headerSize); + infoSize=ds->readUInt16(pHeader->info.size); + + if( headerSizedataHeader)+infoSize) || + (length>=0 && length0) { + DataHeader *outHeader; + const char *s; + int32_t maxLength; + + /* Most of the fields are just bytes and need no swapping. */ + if(inData!=outData) { + uprv_memcpy(outData, inData, headerSize); + } + outHeader=(DataHeader *)outData; + + outHeader->info.isBigEndian = ds->outIsBigEndian; + outHeader->info.charsetFamily = ds->outCharset; + + /* swap headerSize */ + ds->swapArray16(ds, &pHeader->dataHeader.headerSize, 2, &outHeader->dataHeader.headerSize, pErrorCode); + + /* swap UDataInfo size and reservedWord */ + ds->swapArray16(ds, &pHeader->info.size, 4, &outHeader->info.size, pErrorCode); + + /* swap copyright statement after the UDataInfo */ + infoSize+=sizeof(pHeader->dataHeader); + s=(const char *)inData+infoSize; + maxLength=headerSize-infoSize; + /* get the length of the string */ + for(length=0; lengthswapInvChars(ds, s, length, (char *)outData+infoSize, pErrorCode); + } + + return headerSize; +} + +/* API functions ------------------------------------------------------------ */ + +U_CAPI UDataSwapper * U_EXPORT2 +udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset, + UBool outIsBigEndian, uint8_t outCharset, + UErrorCode *pErrorCode) { + UDataSwapper *swapper; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return NULL; + } + if(inCharset>U_EBCDIC_FAMILY || outCharset>U_EBCDIC_FAMILY) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + /* allocate the swapper */ + swapper=(UDataSwapper *)uprv_malloc(sizeof(UDataSwapper)); + if(swapper==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memset(swapper, 0, sizeof(UDataSwapper)); + + /* set values and functions pointers according to in/out parameters */ + swapper->inIsBigEndian=inIsBigEndian; + swapper->inCharset=inCharset; + swapper->outIsBigEndian=outIsBigEndian; + swapper->outCharset=outCharset; + + swapper->readUInt16= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt16 : uprv_readSwapUInt16; + swapper->readUInt32= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt32 : uprv_readSwapUInt32; + + swapper->writeUInt16= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt16 : uprv_writeSwapUInt16; + swapper->writeUInt32= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt32 : uprv_writeSwapUInt32; + + swapper->compareInvChars= outCharset==U_ASCII_FAMILY ? uprv_compareInvAscii : uprv_compareInvEbcdic; + + if(inIsBigEndian==outIsBigEndian) { + swapper->swapArray16=uprv_copyArray16; + swapper->swapArray32=uprv_copyArray32; + swapper->swapArray64=uprv_copyArray64; + } else { + swapper->swapArray16=uprv_swapArray16; + swapper->swapArray32=uprv_swapArray32; + swapper->swapArray64=uprv_swapArray64; + } + + if(inCharset==U_ASCII_FAMILY) { + swapper->swapInvChars= outCharset==U_ASCII_FAMILY ? uprv_copyAscii : uprv_ebcdicFromAscii; + } else /* U_EBCDIC_FAMILY */ { + swapper->swapInvChars= outCharset==U_EBCDIC_FAMILY ? uprv_copyEbcdic : uprv_asciiFromEbcdic; + } + + return swapper; +} + +U_CAPI UDataSwapper * U_EXPORT2 +udata_openSwapperForInputData(const void *data, int32_t length, + UBool outIsBigEndian, uint8_t outCharset, + UErrorCode *pErrorCode) { + const DataHeader *pHeader; + uint16_t headerSize, infoSize; + UBool inIsBigEndian; + int8_t inCharset; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return NULL; + } + if( data==NULL || + (length>=0 && length<(int32_t)sizeof(DataHeader)) || + outCharset>U_EBCDIC_FAMILY + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + pHeader=(const DataHeader *)data; + if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || + pHeader->dataHeader.magic1!=0xda || + pHeader->dataHeader.magic2!=0x27 || + pHeader->info.sizeofUChar!=2 + ) { + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + inIsBigEndian=(UBool)pHeader->info.isBigEndian; + inCharset=pHeader->info.charsetFamily; + + if(inIsBigEndian==U_IS_BIG_ENDIAN) { + headerSize=pHeader->dataHeader.headerSize; + infoSize=pHeader->info.size; + } else { + headerSize=uprv_readSwapUInt16(pHeader->dataHeader.headerSize); + infoSize=uprv_readSwapUInt16(pHeader->info.size); + } + + if( headerSizedataHeader)+infoSize) || + (length>=0 && lengthbaseContext != NULL) { - if (((_UEnumBuffer*) en->baseContext)->len < capacity) { - capacity += PAD; - en->baseContext = uprv_realloc(en->baseContext, - sizeof(int32_t) + capacity); - if (en->baseContext == NULL) { - return NULL; - } - ((_UEnumBuffer*) en->baseContext)->len = capacity; - } - } else { - capacity += PAD; - en->baseContext = uprv_malloc(sizeof(int32_t) + capacity); - if (en->baseContext == NULL) { - return NULL; - } - ((_UEnumBuffer*) en->baseContext)->len = capacity; - } - - return (void*) & ((_UEnumBuffer*) en->baseContext)->data; -} - -U_CAPI void U_EXPORT2 -uenum_close(UEnumeration* en) -{ - if (en) { - if (en->close != NULL) { - if (en->baseContext) { - uprv_free(en->baseContext); - } - en->close(en); - } else { /* this seems dangerous, but we better kill the object */ - uprv_free(en); - } - } -} - -U_CAPI int32_t U_EXPORT2 -uenum_count(UEnumeration* en, UErrorCode* status) -{ - if (!en || U_FAILURE(*status)) { - return -1; - } - if (en->count != NULL) { - return en->count(en, status); - } else { - *status = U_UNSUPPORTED_ERROR; - return -1; - } -} - -/* Don't call this directly. Only uenum_unext should be calling this. */ -U_CAPI const UChar* U_EXPORT2 -uenum_unextDefault(UEnumeration* en, - int32_t* resultLength, - UErrorCode* status) -{ - UChar *ustr = NULL; - int32_t len = 0; - if (en->next != NULL) { - const char *cstr = en->next(en, &len, status); - if (cstr != NULL) { - ustr = (UChar*) _getBuffer(en, (len+1) * sizeof(UChar)); - if (ustr == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - } else { - u_charsToUChars(cstr, ustr, len+1); - } - } - } else { - *status = U_UNSUPPORTED_ERROR; - } - if (resultLength) { - *resultLength = len; - } - return ustr; -} - -/* Don't call this directly. Only uenum_next should be calling this. */ -U_CAPI const char* U_EXPORT2 -uenum_nextDefault(UEnumeration* en, - int32_t* resultLength, - UErrorCode* status) -{ - if (en->uNext != NULL) { - char *tempCharVal; - const UChar *tempUCharVal = en->uNext(en, resultLength, status); - if (tempUCharVal == NULL) { - return NULL; - } - tempCharVal = (char*) - _getBuffer(en, (*resultLength+1) * sizeof(char)); - if (!tempCharVal) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - u_UCharsToChars(tempUCharVal, tempCharVal, *resultLength + 1); - return tempCharVal; - } else { - *status = U_UNSUPPORTED_ERROR; - return NULL; - } -} - -U_CAPI const UChar* U_EXPORT2 -uenum_unext(UEnumeration* en, - int32_t* resultLength, - UErrorCode* status) -{ - if (!en || U_FAILURE(*status)) { - return NULL; - } - if (en->uNext != NULL) { - return en->uNext(en, resultLength, status); - } else { - *status = U_UNSUPPORTED_ERROR; - return NULL; - } -} - -U_CAPI const char* U_EXPORT2 -uenum_next(UEnumeration* en, - int32_t* resultLength, - UErrorCode* status) -{ - if (!en || U_FAILURE(*status)) { - return NULL; - } - if (en->next != NULL) { - if (resultLength != NULL) { - return en->next(en, resultLength, status); - } - else { - int32_t dummyLength=0; - return en->next(en, &dummyLength, status); - } - } else { - *status = U_UNSUPPORTED_ERROR; - return NULL; - } -} - -U_CAPI void U_EXPORT2 -uenum_reset(UEnumeration* en, UErrorCode* status) -{ - if (!en || U_FAILURE(*status)) { - return; - } - if (en->reset != NULL) { - en->reset(en, status); - } else { - *status = U_UNSUPPORTED_ERROR; - } -} diff --git a/deps/icu-small/source/common/uenum.cpp b/deps/icu-small/source/common/uenum.cpp new file mode 100644 index 0000000000..f75cfb7ac3 --- /dev/null +++ b/deps/icu-small/source/common/uenum.cpp @@ -0,0 +1,189 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uenum.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:2 +* +* created on: 2002jul08 +* created by: Vladimir Weinstein +*/ + +#include "unicode/putil.h" +#include "uenumimp.h" +#include "cmemory.h" + +/* Layout of the baseContext buffer. */ +typedef struct { + int32_t len; /* number of bytes available starting at 'data' */ + char data; /* actual data starts here */ +} _UEnumBuffer; + +/* Extra bytes to allocate in the baseContext buffer. */ +static const int32_t PAD = 8; + +/* Return a pointer to the baseContext buffer, possibly allocating + or reallocating it if at least 'capacity' bytes are not available. */ +static void* _getBuffer(UEnumeration* en, int32_t capacity) { + + if (en->baseContext != NULL) { + if (((_UEnumBuffer*) en->baseContext)->len < capacity) { + capacity += PAD; + en->baseContext = uprv_realloc(en->baseContext, + sizeof(int32_t) + capacity); + if (en->baseContext == NULL) { + return NULL; + } + ((_UEnumBuffer*) en->baseContext)->len = capacity; + } + } else { + capacity += PAD; + en->baseContext = uprv_malloc(sizeof(int32_t) + capacity); + if (en->baseContext == NULL) { + return NULL; + } + ((_UEnumBuffer*) en->baseContext)->len = capacity; + } + + return (void*) & ((_UEnumBuffer*) en->baseContext)->data; +} + +U_CAPI void U_EXPORT2 +uenum_close(UEnumeration* en) +{ + if (en) { + if (en->close != NULL) { + if (en->baseContext) { + uprv_free(en->baseContext); + } + en->close(en); + } else { /* this seems dangerous, but we better kill the object */ + uprv_free(en); + } + } +} + +U_CAPI int32_t U_EXPORT2 +uenum_count(UEnumeration* en, UErrorCode* status) +{ + if (!en || U_FAILURE(*status)) { + return -1; + } + if (en->count != NULL) { + return en->count(en, status); + } else { + *status = U_UNSUPPORTED_ERROR; + return -1; + } +} + +/* Don't call this directly. Only uenum_unext should be calling this. */ +U_CAPI const UChar* U_EXPORT2 +uenum_unextDefault(UEnumeration* en, + int32_t* resultLength, + UErrorCode* status) +{ + UChar *ustr = NULL; + int32_t len = 0; + if (en->next != NULL) { + const char *cstr = en->next(en, &len, status); + if (cstr != NULL) { + ustr = (UChar*) _getBuffer(en, (len+1) * sizeof(UChar)); + if (ustr == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + } else { + u_charsToUChars(cstr, ustr, len+1); + } + } + } else { + *status = U_UNSUPPORTED_ERROR; + } + if (resultLength) { + *resultLength = len; + } + return ustr; +} + +/* Don't call this directly. Only uenum_next should be calling this. */ +U_CAPI const char* U_EXPORT2 +uenum_nextDefault(UEnumeration* en, + int32_t* resultLength, + UErrorCode* status) +{ + if (en->uNext != NULL) { + char *tempCharVal; + const UChar *tempUCharVal = en->uNext(en, resultLength, status); + if (tempUCharVal == NULL) { + return NULL; + } + tempCharVal = (char*) + _getBuffer(en, (*resultLength+1) * sizeof(char)); + if (!tempCharVal) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + u_UCharsToChars(tempUCharVal, tempCharVal, *resultLength + 1); + return tempCharVal; + } else { + *status = U_UNSUPPORTED_ERROR; + return NULL; + } +} + +U_CAPI const UChar* U_EXPORT2 +uenum_unext(UEnumeration* en, + int32_t* resultLength, + UErrorCode* status) +{ + if (!en || U_FAILURE(*status)) { + return NULL; + } + if (en->uNext != NULL) { + return en->uNext(en, resultLength, status); + } else { + *status = U_UNSUPPORTED_ERROR; + return NULL; + } +} + +U_CAPI const char* U_EXPORT2 +uenum_next(UEnumeration* en, + int32_t* resultLength, + UErrorCode* status) +{ + if (!en || U_FAILURE(*status)) { + return NULL; + } + if (en->next != NULL) { + if (resultLength != NULL) { + return en->next(en, resultLength, status); + } + else { + int32_t dummyLength=0; + return en->next(en, &dummyLength, status); + } + } else { + *status = U_UNSUPPORTED_ERROR; + return NULL; + } +} + +U_CAPI void U_EXPORT2 +uenum_reset(UEnumeration* en, UErrorCode* status) +{ + if (!en || U_FAILURE(*status)) { + return; + } + if (en->reset != NULL) { + en->reset(en, status); + } else { + *status = U_UNSUPPORTED_ERROR; + } +} diff --git a/deps/icu-small/source/common/uenumimp.h b/deps/icu-small/source/common/uenumimp.h index 04baac5dd1..9385440640 100644 --- a/deps/icu-small/source/common/uenumimp.h +++ b/deps/icu-small/source/common/uenumimp.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: uenumimp.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:2 * diff --git a/deps/icu-small/source/common/uhash.c b/deps/icu-small/source/common/uhash.c deleted file mode 100644 index 02572c80ea..0000000000 --- a/deps/icu-small/source/common/uhash.c +++ /dev/null @@ -1,975 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 1997-2016, International Business Machines -* Corporation and others. All Rights Reserved. -****************************************************************************** -* Date Name Description -* 03/22/00 aliu Adapted from original C++ ICU Hashtable. -* 07/06/01 aliu Modified to support int32_t keys on -* platforms with sizeof(void*) < 32. -****************************************************************************** -*/ - -#include "uhash.h" -#include "unicode/ustring.h" -#include "cstring.h" -#include "cmemory.h" -#include "uassert.h" -#include "ustr_imp.h" - -/* This hashtable is implemented as a double hash. All elements are - * stored in a single array with no secondary storage for collision - * resolution (no linked list, etc.). When there is a hash collision - * (when two unequal keys have the same hashcode) we resolve this by - * using a secondary hash. The secondary hash is an increment - * computed as a hash function (a different one) of the primary - * hashcode. This increment is added to the initial hash value to - * obtain further slots assigned to the same hash code. For this to - * work, the length of the array and the increment must be relatively - * prime. The easiest way to achieve this is to have the length of - * the array be prime, and the increment be any value from - * 1..length-1. - * - * Hashcodes are 32-bit integers. We make sure all hashcodes are - * non-negative by masking off the top bit. This has two effects: (1) - * modulo arithmetic is simplified. If we allowed negative hashcodes, - * then when we computed hashcode % length, we could get a negative - * result, which we would then have to adjust back into range. It's - * simpler to just make hashcodes non-negative. (2) It makes it easy - * to check for empty vs. occupied slots in the table. We just mark - * empty or deleted slots with a negative hashcode. - * - * The central function is _uhash_find(). This function looks for a - * slot matching the given key and hashcode. If one is found, it - * returns a pointer to that slot. If the table is full, and no match - * is found, it returns NULL -- in theory. This would make the code - * more complicated, since all callers of _uhash_find() would then - * have to check for a NULL result. To keep this from happening, we - * don't allow the table to fill. When there is only one - * empty/deleted slot left, uhash_put() will refuse to increase the - * count, and fail. This simplifies the code. In practice, one will - * seldom encounter this using default UHashtables. However, if a - * hashtable is set to a U_FIXED resize policy, or if memory is - * exhausted, then the table may fill. - * - * High and low water ratios control rehashing. They establish levels - * of fullness (from 0 to 1) outside of which the data array is - * reallocated and repopulated. Setting the low water ratio to zero - * means the table will never shrink. Setting the high water ratio to - * one means the table will never grow. The ratios should be - * coordinated with the ratio between successive elements of the - * PRIMES table, so that when the primeIndex is incremented or - * decremented during rehashing, it brings the ratio of count / length - * back into the desired range (between low and high water ratios). - */ - -/******************************************************************** - * PRIVATE Constants, Macros - ********************************************************************/ - -/* This is a list of non-consecutive primes chosen such that - * PRIMES[i+1] ~ 2*PRIMES[i]. (Currently, the ratio ranges from 1.81 - * to 2.18; the inverse ratio ranges from 0.459 to 0.552.) If this - * ratio is changed, the low and high water ratios should also be - * adjusted to suit. - * - * These prime numbers were also chosen so that they are the largest - * prime number while being less than a power of two. - */ -static const int32_t PRIMES[] = { - 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749, - 65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593, - 16777213, 33554393, 67108859, 134217689, 268435399, 536870909, - 1073741789, 2147483647 /*, 4294967291 */ -}; - -#define PRIMES_LENGTH UPRV_LENGTHOF(PRIMES) -#define DEFAULT_PRIME_INDEX 3 - -/* These ratios are tuned to the PRIMES array such that a resize - * places the table back into the zone of non-resizing. That is, - * after a call to _uhash_rehash(), a subsequent call to - * _uhash_rehash() should do nothing (should not churn). This is only - * a potential problem with U_GROW_AND_SHRINK. - */ -static const float RESIZE_POLICY_RATIO_TABLE[6] = { - /* low, high water ratio */ - 0.0F, 0.5F, /* U_GROW: Grow on demand, do not shrink */ - 0.1F, 0.5F, /* U_GROW_AND_SHRINK: Grow and shrink on demand */ - 0.0F, 1.0F /* U_FIXED: Never change size */ -}; - -/* - Invariants for hashcode values: - - * DELETED < 0 - * EMPTY < 0 - * Real hashes >= 0 - - Hashcodes may not start out this way, but internally they are - adjusted so that they are always positive. We assume 32-bit - hashcodes; adjust these constants for other hashcode sizes. -*/ -#define HASH_DELETED ((int32_t) 0x80000000) -#define HASH_EMPTY ((int32_t) HASH_DELETED + 1) - -#define IS_EMPTY_OR_DELETED(x) ((x) < 0) - -/* This macro expects a UHashTok.pointer as its keypointer and - valuepointer parameters */ -#define HASH_DELETE_KEY_VALUE(hash, keypointer, valuepointer) \ - if (hash->keyDeleter != NULL && keypointer != NULL) { \ - (*hash->keyDeleter)(keypointer); \ - } \ - if (hash->valueDeleter != NULL && valuepointer != NULL) { \ - (*hash->valueDeleter)(valuepointer); \ - } - -/* - * Constants for hinting whether a key or value is an integer - * or a pointer. If a hint bit is zero, then the associated - * token is assumed to be an integer. - */ -#define HINT_KEY_POINTER (1) -#define HINT_VALUE_POINTER (2) - -/******************************************************************** - * PRIVATE Implementation - ********************************************************************/ - -static UHashTok -_uhash_setElement(UHashtable *hash, UHashElement* e, - int32_t hashcode, - UHashTok key, UHashTok value, int8_t hint) { - - UHashTok oldValue = e->value; - if (hash->keyDeleter != NULL && e->key.pointer != NULL && - e->key.pointer != key.pointer) { /* Avoid double deletion */ - (*hash->keyDeleter)(e->key.pointer); - } - if (hash->valueDeleter != NULL) { - if (oldValue.pointer != NULL && - oldValue.pointer != value.pointer) { /* Avoid double deletion */ - (*hash->valueDeleter)(oldValue.pointer); - } - oldValue.pointer = NULL; - } - /* Compilers should copy the UHashTok union correctly, but even if - * they do, memory heap tools (e.g. BoundsChecker) can get - * confused when a pointer is cloaked in a union and then copied. - * TO ALLEVIATE THIS, we use hints (based on what API the user is - * calling) to copy pointers when we know the user thinks - * something is a pointer. */ - if (hint & HINT_KEY_POINTER) { - e->key.pointer = key.pointer; - } else { - e->key = key; - } - if (hint & HINT_VALUE_POINTER) { - e->value.pointer = value.pointer; - } else { - e->value = value; - } - e->hashcode = hashcode; - return oldValue; -} - -/** - * Assumes that the given element is not empty or deleted. - */ -static UHashTok -_uhash_internalRemoveElement(UHashtable *hash, UHashElement* e) { - UHashTok empty; - U_ASSERT(!IS_EMPTY_OR_DELETED(e->hashcode)); - --hash->count; - empty.pointer = NULL; empty.integer = 0; - return _uhash_setElement(hash, e, HASH_DELETED, empty, empty, 0); -} - -static void -_uhash_internalSetResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) { - U_ASSERT(hash != NULL); - U_ASSERT(((int32_t)policy) >= 0); - U_ASSERT(((int32_t)policy) < 3); - hash->lowWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2]; - hash->highWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2 + 1]; -} - -/** - * Allocate internal data array of a size determined by the given - * prime index. If the index is out of range it is pinned into range. - * If the allocation fails the status is set to - * U_MEMORY_ALLOCATION_ERROR and all array storage is freed. In - * either case the previous array pointer is overwritten. - * - * Caller must ensure primeIndex is in range 0..PRIME_LENGTH-1. - */ -static void -_uhash_allocate(UHashtable *hash, - int32_t primeIndex, - UErrorCode *status) { - - UHashElement *p, *limit; - UHashTok emptytok; - - if (U_FAILURE(*status)) return; - - U_ASSERT(primeIndex >= 0 && primeIndex < PRIMES_LENGTH); - - hash->primeIndex = primeIndex; - hash->length = PRIMES[primeIndex]; - - p = hash->elements = (UHashElement*) - uprv_malloc(sizeof(UHashElement) * hash->length); - - if (hash->elements == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return; - } - - emptytok.pointer = NULL; /* Only one of these two is needed */ - emptytok.integer = 0; /* but we don't know which one. */ - - limit = p + hash->length; - while (p < limit) { - p->key = emptytok; - p->value = emptytok; - p->hashcode = HASH_EMPTY; - ++p; - } - - hash->count = 0; - hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio); - hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio); -} - -static UHashtable* -_uhash_init(UHashtable *result, - UHashFunction *keyHash, - UKeyComparator *keyComp, - UValueComparator *valueComp, - int32_t primeIndex, - UErrorCode *status) -{ - if (U_FAILURE(*status)) return NULL; - U_ASSERT(keyHash != NULL); - U_ASSERT(keyComp != NULL); - - result->keyHasher = keyHash; - result->keyComparator = keyComp; - result->valueComparator = valueComp; - result->keyDeleter = NULL; - result->valueDeleter = NULL; - result->allocated = FALSE; - _uhash_internalSetResizePolicy(result, U_GROW); - - _uhash_allocate(result, primeIndex, status); - - if (U_FAILURE(*status)) { - return NULL; - } - - return result; -} - -static UHashtable* -_uhash_create(UHashFunction *keyHash, - UKeyComparator *keyComp, - UValueComparator *valueComp, - int32_t primeIndex, - UErrorCode *status) { - UHashtable *result; - - if (U_FAILURE(*status)) return NULL; - - result = (UHashtable*) uprv_malloc(sizeof(UHashtable)); - if (result == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - _uhash_init(result, keyHash, keyComp, valueComp, primeIndex, status); - result->allocated = TRUE; - - if (U_FAILURE(*status)) { - uprv_free(result); - return NULL; - } - - return result; -} - -/** - * Look for a key in the table, or if no such key exists, the first - * empty slot matching the given hashcode. Keys are compared using - * the keyComparator function. - * - * First find the start position, which is the hashcode modulo - * the length. Test it to see if it is: - * - * a. identical: First check the hash values for a quick check, - * then compare keys for equality using keyComparator. - * b. deleted - * c. empty - * - * Stop if it is identical or empty, otherwise continue by adding a - * "jump" value (moduloing by the length again to keep it within - * range) and retesting. For efficiency, there need enough empty - * values so that the searchs stop within a reasonable amount of time. - * This can be changed by changing the high/low water marks. - * - * In theory, this function can return NULL, if it is full (no empty - * or deleted slots) and if no matching key is found. In practice, we - * prevent this elsewhere (in uhash_put) by making sure the last slot - * in the table is never filled. - * - * The size of the table should be prime for this algorithm to work; - * otherwise we are not guaranteed that the jump value (the secondary - * hash) is relatively prime to the table length. - */ -static UHashElement* -_uhash_find(const UHashtable *hash, UHashTok key, - int32_t hashcode) { - - int32_t firstDeleted = -1; /* assume invalid index */ - int32_t theIndex, startIndex; - int32_t jump = 0; /* lazy evaluate */ - int32_t tableHash; - UHashElement *elements = hash->elements; - - hashcode &= 0x7FFFFFFF; /* must be positive */ - startIndex = theIndex = (hashcode ^ 0x4000000) % hash->length; - - do { - tableHash = elements[theIndex].hashcode; - if (tableHash == hashcode) { /* quick check */ - if ((*hash->keyComparator)(key, elements[theIndex].key)) { - return &(elements[theIndex]); - } - } else if (!IS_EMPTY_OR_DELETED(tableHash)) { - /* We have hit a slot which contains a key-value pair, - * but for which the hash code does not match. Keep - * looking. - */ - } else if (tableHash == HASH_EMPTY) { /* empty, end o' the line */ - break; - } else if (firstDeleted < 0) { /* remember first deleted */ - firstDeleted = theIndex; - } - if (jump == 0) { /* lazy compute jump */ - /* The jump value must be relatively prime to the table - * length. As long as the length is prime, then any value - * 1..length-1 will be relatively prime to it. - */ - jump = (hashcode % (hash->length - 1)) + 1; - } - theIndex = (theIndex + jump) % hash->length; - } while (theIndex != startIndex); - - if (firstDeleted >= 0) { - theIndex = firstDeleted; /* reset if had deleted slot */ - } else if (tableHash != HASH_EMPTY) { - /* We get to this point if the hashtable is full (no empty or - * deleted slots), and we've failed to find a match. THIS - * WILL NEVER HAPPEN as long as uhash_put() makes sure that - * count is always < length. - */ - U_ASSERT(FALSE); - return NULL; /* Never happens if uhash_put() behaves */ - } - return &(elements[theIndex]); -} - -/** - * Attempt to grow or shrink the data arrays in order to make the - * count fit between the high and low water marks. hash_put() and - * hash_remove() call this method when the count exceeds the high or - * low water marks. This method may do nothing, if memory allocation - * fails, or if the count is already in range, or if the length is - * already at the low or high limit. In any case, upon return the - * arrays will be valid. - */ -static void -_uhash_rehash(UHashtable *hash, UErrorCode *status) { - - UHashElement *old = hash->elements; - int32_t oldLength = hash->length; - int32_t newPrimeIndex = hash->primeIndex; - int32_t i; - - if (hash->count > hash->highWaterMark) { - if (++newPrimeIndex >= PRIMES_LENGTH) { - return; - } - } else if (hash->count < hash->lowWaterMark) { - if (--newPrimeIndex < 0) { - return; - } - } else { - return; - } - - _uhash_allocate(hash, newPrimeIndex, status); - - if (U_FAILURE(*status)) { - hash->elements = old; - hash->length = oldLength; - return; - } - - for (i = oldLength - 1; i >= 0; --i) { - if (!IS_EMPTY_OR_DELETED(old[i].hashcode)) { - UHashElement *e = _uhash_find(hash, old[i].key, old[i].hashcode); - U_ASSERT(e != NULL); - U_ASSERT(e->hashcode == HASH_EMPTY); - e->key = old[i].key; - e->value = old[i].value; - e->hashcode = old[i].hashcode; - ++hash->count; - } - } - - uprv_free(old); -} - -static UHashTok -_uhash_remove(UHashtable *hash, - UHashTok key) { - /* First find the position of the key in the table. If the object - * has not been removed already, remove it. If the user wanted - * keys deleted, then delete it also. We have to put a special - * hashcode in that position that means that something has been - * deleted, since when we do a find, we have to continue PAST any - * deleted values. - */ - UHashTok result; - UHashElement* e = _uhash_find(hash, key, hash->keyHasher(key)); - U_ASSERT(e != NULL); - result.pointer = NULL; - result.integer = 0; - if (!IS_EMPTY_OR_DELETED(e->hashcode)) { - result = _uhash_internalRemoveElement(hash, e); - if (hash->count < hash->lowWaterMark) { - UErrorCode status = U_ZERO_ERROR; - _uhash_rehash(hash, &status); - } - } - return result; -} - -static UHashTok -_uhash_put(UHashtable *hash, - UHashTok key, - UHashTok value, - int8_t hint, - UErrorCode *status) { - - /* Put finds the position in the table for the new value. If the - * key is already in the table, it is deleted, if there is a - * non-NULL keyDeleter. Then the key, the hash and the value are - * all put at the position in their respective arrays. - */ - int32_t hashcode; - UHashElement* e; - UHashTok emptytok; - - if (U_FAILURE(*status)) { - goto err; - } - U_ASSERT(hash != NULL); - /* Cannot always check pointer here or iSeries sees NULL every time. */ - if ((hint & HINT_VALUE_POINTER) && value.pointer == NULL) { - /* Disallow storage of NULL values, since NULL is returned by - * get() to indicate an absent key. Storing NULL == removing. - */ - return _uhash_remove(hash, key); - } - if (hash->count > hash->highWaterMark) { - _uhash_rehash(hash, status); - if (U_FAILURE(*status)) { - goto err; - } - } - - hashcode = (*hash->keyHasher)(key); - e = _uhash_find(hash, key, hashcode); - U_ASSERT(e != NULL); - - if (IS_EMPTY_OR_DELETED(e->hashcode)) { - /* Important: We must never actually fill the table up. If we - * do so, then _uhash_find() will return NULL, and we'll have - * to check for NULL after every call to _uhash_find(). To - * avoid this we make sure there is always at least one empty - * or deleted slot in the table. This only is a problem if we - * are out of memory and rehash isn't working. - */ - ++hash->count; - if (hash->count == hash->length) { - /* Don't allow count to reach length */ - --hash->count; - *status = U_MEMORY_ALLOCATION_ERROR; - goto err; - } - } - - /* We must in all cases handle storage properly. If there was an - * old key, then it must be deleted (if the deleter != NULL). - * Make hashcodes stored in table positive. - */ - return _uhash_setElement(hash, e, hashcode & 0x7FFFFFFF, key, value, hint); - - err: - /* If the deleters are non-NULL, this method adopts its key and/or - * value arguments, and we must be sure to delete the key and/or - * value in all cases, even upon failure. - */ - HASH_DELETE_KEY_VALUE(hash, key.pointer, value.pointer); - emptytok.pointer = NULL; emptytok.integer = 0; - return emptytok; -} - - -/******************************************************************** - * PUBLIC API - ********************************************************************/ - -U_CAPI UHashtable* U_EXPORT2 -uhash_open(UHashFunction *keyHash, - UKeyComparator *keyComp, - UValueComparator *valueComp, - UErrorCode *status) { - - return _uhash_create(keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status); -} - -U_CAPI UHashtable* U_EXPORT2 -uhash_openSize(UHashFunction *keyHash, - UKeyComparator *keyComp, - UValueComparator *valueComp, - int32_t size, - UErrorCode *status) { - - /* Find the smallest index i for which PRIMES[i] >= size. */ - int32_t i = 0; - while (i<(PRIMES_LENGTH-1) && PRIMES[i]elements != NULL) { - if (hash->keyDeleter != NULL || hash->valueDeleter != NULL) { - int32_t pos=UHASH_FIRST; - UHashElement *e; - while ((e = (UHashElement*) uhash_nextElement(hash, &pos)) != NULL) { - HASH_DELETE_KEY_VALUE(hash, e->key.pointer, e->value.pointer); - } - } - uprv_free(hash->elements); - hash->elements = NULL; - } - if (hash->allocated) { - uprv_free(hash); - } -} - -U_CAPI UHashFunction *U_EXPORT2 -uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn) { - UHashFunction *result = hash->keyHasher; - hash->keyHasher = fn; - return result; -} - -U_CAPI UKeyComparator *U_EXPORT2 -uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn) { - UKeyComparator *result = hash->keyComparator; - hash->keyComparator = fn; - return result; -} -U_CAPI UValueComparator *U_EXPORT2 -uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){ - UValueComparator *result = hash->valueComparator; - hash->valueComparator = fn; - return result; -} - -U_CAPI UObjectDeleter *U_EXPORT2 -uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn) { - UObjectDeleter *result = hash->keyDeleter; - hash->keyDeleter = fn; - return result; -} - -U_CAPI UObjectDeleter *U_EXPORT2 -uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn) { - UObjectDeleter *result = hash->valueDeleter; - hash->valueDeleter = fn; - return result; -} - -U_CAPI void U_EXPORT2 -uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) { - UErrorCode status = U_ZERO_ERROR; - _uhash_internalSetResizePolicy(hash, policy); - hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio); - hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio); - _uhash_rehash(hash, &status); -} - -U_CAPI int32_t U_EXPORT2 -uhash_count(const UHashtable *hash) { - return hash->count; -} - -U_CAPI void* U_EXPORT2 -uhash_get(const UHashtable *hash, - const void* key) { - UHashTok keyholder; - keyholder.pointer = (void*) key; - return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer; -} - -U_CAPI void* U_EXPORT2 -uhash_iget(const UHashtable *hash, - int32_t key) { - UHashTok keyholder; - keyholder.integer = key; - return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer; -} - -U_CAPI int32_t U_EXPORT2 -uhash_geti(const UHashtable *hash, - const void* key) { - UHashTok keyholder; - keyholder.pointer = (void*) key; - return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer; -} - -U_CAPI int32_t U_EXPORT2 -uhash_igeti(const UHashtable *hash, - int32_t key) { - UHashTok keyholder; - keyholder.integer = key; - return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer; -} - -U_CAPI void* U_EXPORT2 -uhash_put(UHashtable *hash, - void* key, - void* value, - UErrorCode *status) { - UHashTok keyholder, valueholder; - keyholder.pointer = key; - valueholder.pointer = value; - return _uhash_put(hash, keyholder, valueholder, - HINT_KEY_POINTER | HINT_VALUE_POINTER, - status).pointer; -} - -U_CAPI void* U_EXPORT2 -uhash_iput(UHashtable *hash, - int32_t key, - void* value, - UErrorCode *status) { - UHashTok keyholder, valueholder; - keyholder.integer = key; - valueholder.pointer = value; - return _uhash_put(hash, keyholder, valueholder, - HINT_VALUE_POINTER, - status).pointer; -} - -U_CAPI int32_t U_EXPORT2 -uhash_puti(UHashtable *hash, - void* key, - int32_t value, - UErrorCode *status) { - UHashTok keyholder, valueholder; - keyholder.pointer = key; - valueholder.integer = value; - return _uhash_put(hash, keyholder, valueholder, - HINT_KEY_POINTER, - status).integer; -} - - -U_CAPI int32_t U_EXPORT2 -uhash_iputi(UHashtable *hash, - int32_t key, - int32_t value, - UErrorCode *status) { - UHashTok keyholder, valueholder; - keyholder.integer = key; - valueholder.integer = value; - return _uhash_put(hash, keyholder, valueholder, - 0, /* neither is a ptr */ - status).integer; -} - -U_CAPI void* U_EXPORT2 -uhash_remove(UHashtable *hash, - const void* key) { - UHashTok keyholder; - keyholder.pointer = (void*) key; - return _uhash_remove(hash, keyholder).pointer; -} - -U_CAPI void* U_EXPORT2 -uhash_iremove(UHashtable *hash, - int32_t key) { - UHashTok keyholder; - keyholder.integer = key; - return _uhash_remove(hash, keyholder).pointer; -} - -U_CAPI int32_t U_EXPORT2 -uhash_removei(UHashtable *hash, - const void* key) { - UHashTok keyholder; - keyholder.pointer = (void*) key; - return _uhash_remove(hash, keyholder).integer; -} - -U_CAPI int32_t U_EXPORT2 -uhash_iremovei(UHashtable *hash, - int32_t key) { - UHashTok keyholder; - keyholder.integer = key; - return _uhash_remove(hash, keyholder).integer; -} - -U_CAPI void U_EXPORT2 -uhash_removeAll(UHashtable *hash) { - int32_t pos = UHASH_FIRST; - const UHashElement *e; - U_ASSERT(hash != NULL); - if (hash->count != 0) { - while ((e = uhash_nextElement(hash, &pos)) != NULL) { - uhash_removeElement(hash, e); - } - } - U_ASSERT(hash->count == 0); -} - -U_CAPI const UHashElement* U_EXPORT2 -uhash_find(const UHashtable *hash, const void* key) { - UHashTok keyholder; - const UHashElement *e; - keyholder.pointer = (void*) key; - e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder)); - return IS_EMPTY_OR_DELETED(e->hashcode) ? NULL : e; -} - -U_CAPI const UHashElement* U_EXPORT2 -uhash_nextElement(const UHashtable *hash, int32_t *pos) { - /* Walk through the array until we find an element that is not - * EMPTY and not DELETED. - */ - int32_t i; - U_ASSERT(hash != NULL); - for (i = *pos + 1; i < hash->length; ++i) { - if (!IS_EMPTY_OR_DELETED(hash->elements[i].hashcode)) { - *pos = i; - return &(hash->elements[i]); - } - } - - /* No more elements */ - return NULL; -} - -U_CAPI void* U_EXPORT2 -uhash_removeElement(UHashtable *hash, const UHashElement* e) { - U_ASSERT(hash != NULL); - U_ASSERT(e != NULL); - if (!IS_EMPTY_OR_DELETED(e->hashcode)) { - UHashElement *nce = (UHashElement *)e; - return _uhash_internalRemoveElement(hash, nce).pointer; - } - return NULL; -} - -/******************************************************************** - * UHashTok convenience - ********************************************************************/ - -/** - * Return a UHashTok for an integer. - */ -/*U_CAPI UHashTok U_EXPORT2 -uhash_toki(int32_t i) { - UHashTok tok; - tok.integer = i; - return tok; -}*/ - -/** - * Return a UHashTok for a pointer. - */ -/*U_CAPI UHashTok U_EXPORT2 -uhash_tokp(void* p) { - UHashTok tok; - tok.pointer = p; - return tok; -}*/ - -/******************************************************************** - * PUBLIC Key Hash Functions - ********************************************************************/ - -U_CAPI int32_t U_EXPORT2 -uhash_hashUChars(const UHashTok key) { - const UChar *s = (const UChar *)key.pointer; - return s == NULL ? 0 : ustr_hashUCharsN(s, u_strlen(s)); -} - -U_CAPI int32_t U_EXPORT2 -uhash_hashChars(const UHashTok key) { - const char *s = (const char *)key.pointer; - return s == NULL ? 0 : ustr_hashCharsN(s, uprv_strlen(s)); -} - -U_CAPI int32_t U_EXPORT2 -uhash_hashIChars(const UHashTok key) { - const char *s = (const char *)key.pointer; - return s == NULL ? 0 : ustr_hashICharsN(s, uprv_strlen(s)); -} - -U_CAPI UBool U_EXPORT2 -uhash_equals(const UHashtable* hash1, const UHashtable* hash2){ - int32_t count1, count2, pos, i; - - if(hash1==hash2){ - return TRUE; - } - - /* - * Make sure that we are comparing 2 valid hashes of the same type - * with valid comparison functions. - * Without valid comparison functions, a binary comparison - * of the hash values will yield random results on machines - * with 64-bit pointers and 32-bit integer hashes. - * A valueComparator is normally optional. - */ - if (hash1==NULL || hash2==NULL || - hash1->keyComparator != hash2->keyComparator || - hash1->valueComparator != hash2->valueComparator || - hash1->valueComparator == NULL) - { - /* - Normally we would return an error here about incompatible hash tables, - but we return FALSE instead. - */ - return FALSE; - } - - count1 = uhash_count(hash1); - count2 = uhash_count(hash2); - if(count1!=count2){ - return FALSE; - } - - pos=UHASH_FIRST; - for(i=0; ikey; - const UHashTok val1 = elem1->value; - /* here the keys are not compared, instead the key form hash1 is used to fetch - * value from hash2. If the hashes are equal then then both hashes should - * contain equal values for the same key! - */ - const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1)); - const UHashTok val2 = elem2->value; - if(hash1->valueComparator(val1, val2)==FALSE){ - return FALSE; - } - } - return TRUE; -} - -/******************************************************************** - * PUBLIC Comparator Functions - ********************************************************************/ - -U_CAPI UBool U_EXPORT2 -uhash_compareUChars(const UHashTok key1, const UHashTok key2) { - const UChar *p1 = (const UChar*) key1.pointer; - const UChar *p2 = (const UChar*) key2.pointer; - if (p1 == p2) { - return TRUE; - } - if (p1 == NULL || p2 == NULL) { - return FALSE; - } - while (*p1 != 0 && *p1 == *p2) { - ++p1; - ++p2; - } - return (UBool)(*p1 == *p2); -} - -U_CAPI UBool U_EXPORT2 -uhash_compareChars(const UHashTok key1, const UHashTok key2) { - const char *p1 = (const char*) key1.pointer; - const char *p2 = (const char*) key2.pointer; - if (p1 == p2) { - return TRUE; - } - if (p1 == NULL || p2 == NULL) { - return FALSE; - } - while (*p1 != 0 && *p1 == *p2) { - ++p1; - ++p2; - } - return (UBool)(*p1 == *p2); -} - -U_CAPI UBool U_EXPORT2 -uhash_compareIChars(const UHashTok key1, const UHashTok key2) { - const char *p1 = (const char*) key1.pointer; - const char *p2 = (const char*) key2.pointer; - if (p1 == p2) { - return TRUE; - } - if (p1 == NULL || p2 == NULL) { - return FALSE; - } - while (*p1 != 0 && uprv_tolower(*p1) == uprv_tolower(*p2)) { - ++p1; - ++p2; - } - return (UBool)(*p1 == *p2); -} - -/******************************************************************** - * PUBLIC int32_t Support Functions - ********************************************************************/ - -U_CAPI int32_t U_EXPORT2 -uhash_hashLong(const UHashTok key) { - return key.integer; -} - -U_CAPI UBool U_EXPORT2 -uhash_compareLong(const UHashTok key1, const UHashTok key2) { - return (UBool)(key1.integer == key2.integer); -} diff --git a/deps/icu-small/source/common/uhash.cpp b/deps/icu-small/source/common/uhash.cpp new file mode 100644 index 0000000000..0e2a3c03c6 --- /dev/null +++ b/deps/icu-small/source/common/uhash.cpp @@ -0,0 +1,975 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +* Date Name Description +* 03/22/00 aliu Adapted from original C++ ICU Hashtable. +* 07/06/01 aliu Modified to support int32_t keys on +* platforms with sizeof(void*) < 32. +****************************************************************************** +*/ + +#include "uhash.h" +#include "unicode/ustring.h" +#include "cstring.h" +#include "cmemory.h" +#include "uassert.h" +#include "ustr_imp.h" + +/* This hashtable is implemented as a double hash. All elements are + * stored in a single array with no secondary storage for collision + * resolution (no linked list, etc.). When there is a hash collision + * (when two unequal keys have the same hashcode) we resolve this by + * using a secondary hash. The secondary hash is an increment + * computed as a hash function (a different one) of the primary + * hashcode. This increment is added to the initial hash value to + * obtain further slots assigned to the same hash code. For this to + * work, the length of the array and the increment must be relatively + * prime. The easiest way to achieve this is to have the length of + * the array be prime, and the increment be any value from + * 1..length-1. + * + * Hashcodes are 32-bit integers. We make sure all hashcodes are + * non-negative by masking off the top bit. This has two effects: (1) + * modulo arithmetic is simplified. If we allowed negative hashcodes, + * then when we computed hashcode % length, we could get a negative + * result, which we would then have to adjust back into range. It's + * simpler to just make hashcodes non-negative. (2) It makes it easy + * to check for empty vs. occupied slots in the table. We just mark + * empty or deleted slots with a negative hashcode. + * + * The central function is _uhash_find(). This function looks for a + * slot matching the given key and hashcode. If one is found, it + * returns a pointer to that slot. If the table is full, and no match + * is found, it returns NULL -- in theory. This would make the code + * more complicated, since all callers of _uhash_find() would then + * have to check for a NULL result. To keep this from happening, we + * don't allow the table to fill. When there is only one + * empty/deleted slot left, uhash_put() will refuse to increase the + * count, and fail. This simplifies the code. In practice, one will + * seldom encounter this using default UHashtables. However, if a + * hashtable is set to a U_FIXED resize policy, or if memory is + * exhausted, then the table may fill. + * + * High and low water ratios control rehashing. They establish levels + * of fullness (from 0 to 1) outside of which the data array is + * reallocated and repopulated. Setting the low water ratio to zero + * means the table will never shrink. Setting the high water ratio to + * one means the table will never grow. The ratios should be + * coordinated with the ratio between successive elements of the + * PRIMES table, so that when the primeIndex is incremented or + * decremented during rehashing, it brings the ratio of count / length + * back into the desired range (between low and high water ratios). + */ + +/******************************************************************** + * PRIVATE Constants, Macros + ********************************************************************/ + +/* This is a list of non-consecutive primes chosen such that + * PRIMES[i+1] ~ 2*PRIMES[i]. (Currently, the ratio ranges from 1.81 + * to 2.18; the inverse ratio ranges from 0.459 to 0.552.) If this + * ratio is changed, the low and high water ratios should also be + * adjusted to suit. + * + * These prime numbers were also chosen so that they are the largest + * prime number while being less than a power of two. + */ +static const int32_t PRIMES[] = { + 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749, + 65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593, + 16777213, 33554393, 67108859, 134217689, 268435399, 536870909, + 1073741789, 2147483647 /*, 4294967291 */ +}; + +#define PRIMES_LENGTH UPRV_LENGTHOF(PRIMES) +#define DEFAULT_PRIME_INDEX 3 + +/* These ratios are tuned to the PRIMES array such that a resize + * places the table back into the zone of non-resizing. That is, + * after a call to _uhash_rehash(), a subsequent call to + * _uhash_rehash() should do nothing (should not churn). This is only + * a potential problem with U_GROW_AND_SHRINK. + */ +static const float RESIZE_POLICY_RATIO_TABLE[6] = { + /* low, high water ratio */ + 0.0F, 0.5F, /* U_GROW: Grow on demand, do not shrink */ + 0.1F, 0.5F, /* U_GROW_AND_SHRINK: Grow and shrink on demand */ + 0.0F, 1.0F /* U_FIXED: Never change size */ +}; + +/* + Invariants for hashcode values: + + * DELETED < 0 + * EMPTY < 0 + * Real hashes >= 0 + + Hashcodes may not start out this way, but internally they are + adjusted so that they are always positive. We assume 32-bit + hashcodes; adjust these constants for other hashcode sizes. +*/ +#define HASH_DELETED ((int32_t) 0x80000000) +#define HASH_EMPTY ((int32_t) HASH_DELETED + 1) + +#define IS_EMPTY_OR_DELETED(x) ((x) < 0) + +/* This macro expects a UHashTok.pointer as its keypointer and + valuepointer parameters */ +#define HASH_DELETE_KEY_VALUE(hash, keypointer, valuepointer) \ + if (hash->keyDeleter != NULL && keypointer != NULL) { \ + (*hash->keyDeleter)(keypointer); \ + } \ + if (hash->valueDeleter != NULL && valuepointer != NULL) { \ + (*hash->valueDeleter)(valuepointer); \ + } + +/* + * Constants for hinting whether a key or value is an integer + * or a pointer. If a hint bit is zero, then the associated + * token is assumed to be an integer. + */ +#define HINT_KEY_POINTER (1) +#define HINT_VALUE_POINTER (2) + +/******************************************************************** + * PRIVATE Implementation + ********************************************************************/ + +static UHashTok +_uhash_setElement(UHashtable *hash, UHashElement* e, + int32_t hashcode, + UHashTok key, UHashTok value, int8_t hint) { + + UHashTok oldValue = e->value; + if (hash->keyDeleter != NULL && e->key.pointer != NULL && + e->key.pointer != key.pointer) { /* Avoid double deletion */ + (*hash->keyDeleter)(e->key.pointer); + } + if (hash->valueDeleter != NULL) { + if (oldValue.pointer != NULL && + oldValue.pointer != value.pointer) { /* Avoid double deletion */ + (*hash->valueDeleter)(oldValue.pointer); + } + oldValue.pointer = NULL; + } + /* Compilers should copy the UHashTok union correctly, but even if + * they do, memory heap tools (e.g. BoundsChecker) can get + * confused when a pointer is cloaked in a union and then copied. + * TO ALLEVIATE THIS, we use hints (based on what API the user is + * calling) to copy pointers when we know the user thinks + * something is a pointer. */ + if (hint & HINT_KEY_POINTER) { + e->key.pointer = key.pointer; + } else { + e->key = key; + } + if (hint & HINT_VALUE_POINTER) { + e->value.pointer = value.pointer; + } else { + e->value = value; + } + e->hashcode = hashcode; + return oldValue; +} + +/** + * Assumes that the given element is not empty or deleted. + */ +static UHashTok +_uhash_internalRemoveElement(UHashtable *hash, UHashElement* e) { + UHashTok empty; + U_ASSERT(!IS_EMPTY_OR_DELETED(e->hashcode)); + --hash->count; + empty.pointer = NULL; empty.integer = 0; + return _uhash_setElement(hash, e, HASH_DELETED, empty, empty, 0); +} + +static void +_uhash_internalSetResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) { + U_ASSERT(hash != NULL); + U_ASSERT(((int32_t)policy) >= 0); + U_ASSERT(((int32_t)policy) < 3); + hash->lowWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2]; + hash->highWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2 + 1]; +} + +/** + * Allocate internal data array of a size determined by the given + * prime index. If the index is out of range it is pinned into range. + * If the allocation fails the status is set to + * U_MEMORY_ALLOCATION_ERROR and all array storage is freed. In + * either case the previous array pointer is overwritten. + * + * Caller must ensure primeIndex is in range 0..PRIME_LENGTH-1. + */ +static void +_uhash_allocate(UHashtable *hash, + int32_t primeIndex, + UErrorCode *status) { + + UHashElement *p, *limit; + UHashTok emptytok; + + if (U_FAILURE(*status)) return; + + U_ASSERT(primeIndex >= 0 && primeIndex < PRIMES_LENGTH); + + hash->primeIndex = primeIndex; + hash->length = PRIMES[primeIndex]; + + p = hash->elements = (UHashElement*) + uprv_malloc(sizeof(UHashElement) * hash->length); + + if (hash->elements == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return; + } + + emptytok.pointer = NULL; /* Only one of these two is needed */ + emptytok.integer = 0; /* but we don't know which one. */ + + limit = p + hash->length; + while (p < limit) { + p->key = emptytok; + p->value = emptytok; + p->hashcode = HASH_EMPTY; + ++p; + } + + hash->count = 0; + hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio); + hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio); +} + +static UHashtable* +_uhash_init(UHashtable *result, + UHashFunction *keyHash, + UKeyComparator *keyComp, + UValueComparator *valueComp, + int32_t primeIndex, + UErrorCode *status) +{ + if (U_FAILURE(*status)) return NULL; + U_ASSERT(keyHash != NULL); + U_ASSERT(keyComp != NULL); + + result->keyHasher = keyHash; + result->keyComparator = keyComp; + result->valueComparator = valueComp; + result->keyDeleter = NULL; + result->valueDeleter = NULL; + result->allocated = FALSE; + _uhash_internalSetResizePolicy(result, U_GROW); + + _uhash_allocate(result, primeIndex, status); + + if (U_FAILURE(*status)) { + return NULL; + } + + return result; +} + +static UHashtable* +_uhash_create(UHashFunction *keyHash, + UKeyComparator *keyComp, + UValueComparator *valueComp, + int32_t primeIndex, + UErrorCode *status) { + UHashtable *result; + + if (U_FAILURE(*status)) return NULL; + + result = (UHashtable*) uprv_malloc(sizeof(UHashtable)); + if (result == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + + _uhash_init(result, keyHash, keyComp, valueComp, primeIndex, status); + result->allocated = TRUE; + + if (U_FAILURE(*status)) { + uprv_free(result); + return NULL; + } + + return result; +} + +/** + * Look for a key in the table, or if no such key exists, the first + * empty slot matching the given hashcode. Keys are compared using + * the keyComparator function. + * + * First find the start position, which is the hashcode modulo + * the length. Test it to see if it is: + * + * a. identical: First check the hash values for a quick check, + * then compare keys for equality using keyComparator. + * b. deleted + * c. empty + * + * Stop if it is identical or empty, otherwise continue by adding a + * "jump" value (moduloing by the length again to keep it within + * range) and retesting. For efficiency, there need enough empty + * values so that the searchs stop within a reasonable amount of time. + * This can be changed by changing the high/low water marks. + * + * In theory, this function can return NULL, if it is full (no empty + * or deleted slots) and if no matching key is found. In practice, we + * prevent this elsewhere (in uhash_put) by making sure the last slot + * in the table is never filled. + * + * The size of the table should be prime for this algorithm to work; + * otherwise we are not guaranteed that the jump value (the secondary + * hash) is relatively prime to the table length. + */ +static UHashElement* +_uhash_find(const UHashtable *hash, UHashTok key, + int32_t hashcode) { + + int32_t firstDeleted = -1; /* assume invalid index */ + int32_t theIndex, startIndex; + int32_t jump = 0; /* lazy evaluate */ + int32_t tableHash; + UHashElement *elements = hash->elements; + + hashcode &= 0x7FFFFFFF; /* must be positive */ + startIndex = theIndex = (hashcode ^ 0x4000000) % hash->length; + + do { + tableHash = elements[theIndex].hashcode; + if (tableHash == hashcode) { /* quick check */ + if ((*hash->keyComparator)(key, elements[theIndex].key)) { + return &(elements[theIndex]); + } + } else if (!IS_EMPTY_OR_DELETED(tableHash)) { + /* We have hit a slot which contains a key-value pair, + * but for which the hash code does not match. Keep + * looking. + */ + } else if (tableHash == HASH_EMPTY) { /* empty, end o' the line */ + break; + } else if (firstDeleted < 0) { /* remember first deleted */ + firstDeleted = theIndex; + } + if (jump == 0) { /* lazy compute jump */ + /* The jump value must be relatively prime to the table + * length. As long as the length is prime, then any value + * 1..length-1 will be relatively prime to it. + */ + jump = (hashcode % (hash->length - 1)) + 1; + } + theIndex = (theIndex + jump) % hash->length; + } while (theIndex != startIndex); + + if (firstDeleted >= 0) { + theIndex = firstDeleted; /* reset if had deleted slot */ + } else if (tableHash != HASH_EMPTY) { + /* We get to this point if the hashtable is full (no empty or + * deleted slots), and we've failed to find a match. THIS + * WILL NEVER HAPPEN as long as uhash_put() makes sure that + * count is always < length. + */ + U_ASSERT(FALSE); + return NULL; /* Never happens if uhash_put() behaves */ + } + return &(elements[theIndex]); +} + +/** + * Attempt to grow or shrink the data arrays in order to make the + * count fit between the high and low water marks. hash_put() and + * hash_remove() call this method when the count exceeds the high or + * low water marks. This method may do nothing, if memory allocation + * fails, or if the count is already in range, or if the length is + * already at the low or high limit. In any case, upon return the + * arrays will be valid. + */ +static void +_uhash_rehash(UHashtable *hash, UErrorCode *status) { + + UHashElement *old = hash->elements; + int32_t oldLength = hash->length; + int32_t newPrimeIndex = hash->primeIndex; + int32_t i; + + if (hash->count > hash->highWaterMark) { + if (++newPrimeIndex >= PRIMES_LENGTH) { + return; + } + } else if (hash->count < hash->lowWaterMark) { + if (--newPrimeIndex < 0) { + return; + } + } else { + return; + } + + _uhash_allocate(hash, newPrimeIndex, status); + + if (U_FAILURE(*status)) { + hash->elements = old; + hash->length = oldLength; + return; + } + + for (i = oldLength - 1; i >= 0; --i) { + if (!IS_EMPTY_OR_DELETED(old[i].hashcode)) { + UHashElement *e = _uhash_find(hash, old[i].key, old[i].hashcode); + U_ASSERT(e != NULL); + U_ASSERT(e->hashcode == HASH_EMPTY); + e->key = old[i].key; + e->value = old[i].value; + e->hashcode = old[i].hashcode; + ++hash->count; + } + } + + uprv_free(old); +} + +static UHashTok +_uhash_remove(UHashtable *hash, + UHashTok key) { + /* First find the position of the key in the table. If the object + * has not been removed already, remove it. If the user wanted + * keys deleted, then delete it also. We have to put a special + * hashcode in that position that means that something has been + * deleted, since when we do a find, we have to continue PAST any + * deleted values. + */ + UHashTok result; + UHashElement* e = _uhash_find(hash, key, hash->keyHasher(key)); + U_ASSERT(e != NULL); + result.pointer = NULL; + result.integer = 0; + if (!IS_EMPTY_OR_DELETED(e->hashcode)) { + result = _uhash_internalRemoveElement(hash, e); + if (hash->count < hash->lowWaterMark) { + UErrorCode status = U_ZERO_ERROR; + _uhash_rehash(hash, &status); + } + } + return result; +} + +static UHashTok +_uhash_put(UHashtable *hash, + UHashTok key, + UHashTok value, + int8_t hint, + UErrorCode *status) { + + /* Put finds the position in the table for the new value. If the + * key is already in the table, it is deleted, if there is a + * non-NULL keyDeleter. Then the key, the hash and the value are + * all put at the position in their respective arrays. + */ + int32_t hashcode; + UHashElement* e; + UHashTok emptytok; + + if (U_FAILURE(*status)) { + goto err; + } + U_ASSERT(hash != NULL); + /* Cannot always check pointer here or iSeries sees NULL every time. */ + if ((hint & HINT_VALUE_POINTER) && value.pointer == NULL) { + /* Disallow storage of NULL values, since NULL is returned by + * get() to indicate an absent key. Storing NULL == removing. + */ + return _uhash_remove(hash, key); + } + if (hash->count > hash->highWaterMark) { + _uhash_rehash(hash, status); + if (U_FAILURE(*status)) { + goto err; + } + } + + hashcode = (*hash->keyHasher)(key); + e = _uhash_find(hash, key, hashcode); + U_ASSERT(e != NULL); + + if (IS_EMPTY_OR_DELETED(e->hashcode)) { + /* Important: We must never actually fill the table up. If we + * do so, then _uhash_find() will return NULL, and we'll have + * to check for NULL after every call to _uhash_find(). To + * avoid this we make sure there is always at least one empty + * or deleted slot in the table. This only is a problem if we + * are out of memory and rehash isn't working. + */ + ++hash->count; + if (hash->count == hash->length) { + /* Don't allow count to reach length */ + --hash->count; + *status = U_MEMORY_ALLOCATION_ERROR; + goto err; + } + } + + /* We must in all cases handle storage properly. If there was an + * old key, then it must be deleted (if the deleter != NULL). + * Make hashcodes stored in table positive. + */ + return _uhash_setElement(hash, e, hashcode & 0x7FFFFFFF, key, value, hint); + + err: + /* If the deleters are non-NULL, this method adopts its key and/or + * value arguments, and we must be sure to delete the key and/or + * value in all cases, even upon failure. + */ + HASH_DELETE_KEY_VALUE(hash, key.pointer, value.pointer); + emptytok.pointer = NULL; emptytok.integer = 0; + return emptytok; +} + + +/******************************************************************** + * PUBLIC API + ********************************************************************/ + +U_CAPI UHashtable* U_EXPORT2 +uhash_open(UHashFunction *keyHash, + UKeyComparator *keyComp, + UValueComparator *valueComp, + UErrorCode *status) { + + return _uhash_create(keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status); +} + +U_CAPI UHashtable* U_EXPORT2 +uhash_openSize(UHashFunction *keyHash, + UKeyComparator *keyComp, + UValueComparator *valueComp, + int32_t size, + UErrorCode *status) { + + /* Find the smallest index i for which PRIMES[i] >= size. */ + int32_t i = 0; + while (i<(PRIMES_LENGTH-1) && PRIMES[i]elements != NULL) { + if (hash->keyDeleter != NULL || hash->valueDeleter != NULL) { + int32_t pos=UHASH_FIRST; + UHashElement *e; + while ((e = (UHashElement*) uhash_nextElement(hash, &pos)) != NULL) { + HASH_DELETE_KEY_VALUE(hash, e->key.pointer, e->value.pointer); + } + } + uprv_free(hash->elements); + hash->elements = NULL; + } + if (hash->allocated) { + uprv_free(hash); + } +} + +U_CAPI UHashFunction *U_EXPORT2 +uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn) { + UHashFunction *result = hash->keyHasher; + hash->keyHasher = fn; + return result; +} + +U_CAPI UKeyComparator *U_EXPORT2 +uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn) { + UKeyComparator *result = hash->keyComparator; + hash->keyComparator = fn; + return result; +} +U_CAPI UValueComparator *U_EXPORT2 +uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){ + UValueComparator *result = hash->valueComparator; + hash->valueComparator = fn; + return result; +} + +U_CAPI UObjectDeleter *U_EXPORT2 +uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn) { + UObjectDeleter *result = hash->keyDeleter; + hash->keyDeleter = fn; + return result; +} + +U_CAPI UObjectDeleter *U_EXPORT2 +uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn) { + UObjectDeleter *result = hash->valueDeleter; + hash->valueDeleter = fn; + return result; +} + +U_CAPI void U_EXPORT2 +uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) { + UErrorCode status = U_ZERO_ERROR; + _uhash_internalSetResizePolicy(hash, policy); + hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio); + hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio); + _uhash_rehash(hash, &status); +} + +U_CAPI int32_t U_EXPORT2 +uhash_count(const UHashtable *hash) { + return hash->count; +} + +U_CAPI void* U_EXPORT2 +uhash_get(const UHashtable *hash, + const void* key) { + UHashTok keyholder; + keyholder.pointer = (void*) key; + return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer; +} + +U_CAPI void* U_EXPORT2 +uhash_iget(const UHashtable *hash, + int32_t key) { + UHashTok keyholder; + keyholder.integer = key; + return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer; +} + +U_CAPI int32_t U_EXPORT2 +uhash_geti(const UHashtable *hash, + const void* key) { + UHashTok keyholder; + keyholder.pointer = (void*) key; + return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer; +} + +U_CAPI int32_t U_EXPORT2 +uhash_igeti(const UHashtable *hash, + int32_t key) { + UHashTok keyholder; + keyholder.integer = key; + return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer; +} + +U_CAPI void* U_EXPORT2 +uhash_put(UHashtable *hash, + void* key, + void* value, + UErrorCode *status) { + UHashTok keyholder, valueholder; + keyholder.pointer = key; + valueholder.pointer = value; + return _uhash_put(hash, keyholder, valueholder, + HINT_KEY_POINTER | HINT_VALUE_POINTER, + status).pointer; +} + +U_CAPI void* U_EXPORT2 +uhash_iput(UHashtable *hash, + int32_t key, + void* value, + UErrorCode *status) { + UHashTok keyholder, valueholder; + keyholder.integer = key; + valueholder.pointer = value; + return _uhash_put(hash, keyholder, valueholder, + HINT_VALUE_POINTER, + status).pointer; +} + +U_CAPI int32_t U_EXPORT2 +uhash_puti(UHashtable *hash, + void* key, + int32_t value, + UErrorCode *status) { + UHashTok keyholder, valueholder; + keyholder.pointer = key; + valueholder.integer = value; + return _uhash_put(hash, keyholder, valueholder, + HINT_KEY_POINTER, + status).integer; +} + + +U_CAPI int32_t U_EXPORT2 +uhash_iputi(UHashtable *hash, + int32_t key, + int32_t value, + UErrorCode *status) { + UHashTok keyholder, valueholder; + keyholder.integer = key; + valueholder.integer = value; + return _uhash_put(hash, keyholder, valueholder, + 0, /* neither is a ptr */ + status).integer; +} + +U_CAPI void* U_EXPORT2 +uhash_remove(UHashtable *hash, + const void* key) { + UHashTok keyholder; + keyholder.pointer = (void*) key; + return _uhash_remove(hash, keyholder).pointer; +} + +U_CAPI void* U_EXPORT2 +uhash_iremove(UHashtable *hash, + int32_t key) { + UHashTok keyholder; + keyholder.integer = key; + return _uhash_remove(hash, keyholder).pointer; +} + +U_CAPI int32_t U_EXPORT2 +uhash_removei(UHashtable *hash, + const void* key) { + UHashTok keyholder; + keyholder.pointer = (void*) key; + return _uhash_remove(hash, keyholder).integer; +} + +U_CAPI int32_t U_EXPORT2 +uhash_iremovei(UHashtable *hash, + int32_t key) { + UHashTok keyholder; + keyholder.integer = key; + return _uhash_remove(hash, keyholder).integer; +} + +U_CAPI void U_EXPORT2 +uhash_removeAll(UHashtable *hash) { + int32_t pos = UHASH_FIRST; + const UHashElement *e; + U_ASSERT(hash != NULL); + if (hash->count != 0) { + while ((e = uhash_nextElement(hash, &pos)) != NULL) { + uhash_removeElement(hash, e); + } + } + U_ASSERT(hash->count == 0); +} + +U_CAPI const UHashElement* U_EXPORT2 +uhash_find(const UHashtable *hash, const void* key) { + UHashTok keyholder; + const UHashElement *e; + keyholder.pointer = (void*) key; + e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder)); + return IS_EMPTY_OR_DELETED(e->hashcode) ? NULL : e; +} + +U_CAPI const UHashElement* U_EXPORT2 +uhash_nextElement(const UHashtable *hash, int32_t *pos) { + /* Walk through the array until we find an element that is not + * EMPTY and not DELETED. + */ + int32_t i; + U_ASSERT(hash != NULL); + for (i = *pos + 1; i < hash->length; ++i) { + if (!IS_EMPTY_OR_DELETED(hash->elements[i].hashcode)) { + *pos = i; + return &(hash->elements[i]); + } + } + + /* No more elements */ + return NULL; +} + +U_CAPI void* U_EXPORT2 +uhash_removeElement(UHashtable *hash, const UHashElement* e) { + U_ASSERT(hash != NULL); + U_ASSERT(e != NULL); + if (!IS_EMPTY_OR_DELETED(e->hashcode)) { + UHashElement *nce = (UHashElement *)e; + return _uhash_internalRemoveElement(hash, nce).pointer; + } + return NULL; +} + +/******************************************************************** + * UHashTok convenience + ********************************************************************/ + +/** + * Return a UHashTok for an integer. + */ +/*U_CAPI UHashTok U_EXPORT2 +uhash_toki(int32_t i) { + UHashTok tok; + tok.integer = i; + return tok; +}*/ + +/** + * Return a UHashTok for a pointer. + */ +/*U_CAPI UHashTok U_EXPORT2 +uhash_tokp(void* p) { + UHashTok tok; + tok.pointer = p; + return tok; +}*/ + +/******************************************************************** + * PUBLIC Key Hash Functions + ********************************************************************/ + +U_CAPI int32_t U_EXPORT2 +uhash_hashUChars(const UHashTok key) { + const UChar *s = (const UChar *)key.pointer; + return s == NULL ? 0 : ustr_hashUCharsN(s, u_strlen(s)); +} + +U_CAPI int32_t U_EXPORT2 +uhash_hashChars(const UHashTok key) { + const char *s = (const char *)key.pointer; + return s == NULL ? 0 : ustr_hashCharsN(s, uprv_strlen(s)); +} + +U_CAPI int32_t U_EXPORT2 +uhash_hashIChars(const UHashTok key) { + const char *s = (const char *)key.pointer; + return s == NULL ? 0 : ustr_hashICharsN(s, uprv_strlen(s)); +} + +U_CAPI UBool U_EXPORT2 +uhash_equals(const UHashtable* hash1, const UHashtable* hash2){ + int32_t count1, count2, pos, i; + + if(hash1==hash2){ + return TRUE; + } + + /* + * Make sure that we are comparing 2 valid hashes of the same type + * with valid comparison functions. + * Without valid comparison functions, a binary comparison + * of the hash values will yield random results on machines + * with 64-bit pointers and 32-bit integer hashes. + * A valueComparator is normally optional. + */ + if (hash1==NULL || hash2==NULL || + hash1->keyComparator != hash2->keyComparator || + hash1->valueComparator != hash2->valueComparator || + hash1->valueComparator == NULL) + { + /* + Normally we would return an error here about incompatible hash tables, + but we return FALSE instead. + */ + return FALSE; + } + + count1 = uhash_count(hash1); + count2 = uhash_count(hash2); + if(count1!=count2){ + return FALSE; + } + + pos=UHASH_FIRST; + for(i=0; ikey; + const UHashTok val1 = elem1->value; + /* here the keys are not compared, instead the key form hash1 is used to fetch + * value from hash2. If the hashes are equal then then both hashes should + * contain equal values for the same key! + */ + const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1)); + const UHashTok val2 = elem2->value; + if(hash1->valueComparator(val1, val2)==FALSE){ + return FALSE; + } + } + return TRUE; +} + +/******************************************************************** + * PUBLIC Comparator Functions + ********************************************************************/ + +U_CAPI UBool U_EXPORT2 +uhash_compareUChars(const UHashTok key1, const UHashTok key2) { + const UChar *p1 = (const UChar*) key1.pointer; + const UChar *p2 = (const UChar*) key2.pointer; + if (p1 == p2) { + return TRUE; + } + if (p1 == NULL || p2 == NULL) { + return FALSE; + } + while (*p1 != 0 && *p1 == *p2) { + ++p1; + ++p2; + } + return (UBool)(*p1 == *p2); +} + +U_CAPI UBool U_EXPORT2 +uhash_compareChars(const UHashTok key1, const UHashTok key2) { + const char *p1 = (const char*) key1.pointer; + const char *p2 = (const char*) key2.pointer; + if (p1 == p2) { + return TRUE; + } + if (p1 == NULL || p2 == NULL) { + return FALSE; + } + while (*p1 != 0 && *p1 == *p2) { + ++p1; + ++p2; + } + return (UBool)(*p1 == *p2); +} + +U_CAPI UBool U_EXPORT2 +uhash_compareIChars(const UHashTok key1, const UHashTok key2) { + const char *p1 = (const char*) key1.pointer; + const char *p2 = (const char*) key2.pointer; + if (p1 == p2) { + return TRUE; + } + if (p1 == NULL || p2 == NULL) { + return FALSE; + } + while (*p1 != 0 && uprv_tolower(*p1) == uprv_tolower(*p2)) { + ++p1; + ++p2; + } + return (UBool)(*p1 == *p2); +} + +/******************************************************************** + * PUBLIC int32_t Support Functions + ********************************************************************/ + +U_CAPI int32_t U_EXPORT2 +uhash_hashLong(const UHashTok key) { + return key.integer; +} + +U_CAPI UBool U_EXPORT2 +uhash_compareLong(const UHashTok key1, const UHashTok key2) { + return (UBool)(key1.integer == key2.integer); +} diff --git a/deps/icu-small/source/common/uhash.h b/deps/icu-small/source/common/uhash.h index 4aa50e4085..2e7cf6a394 100644 --- a/deps/icu-small/source/common/uhash.h +++ b/deps/icu-small/source/common/uhash.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/uhash_us.cpp b/deps/icu-small/source/common/uhash_us.cpp index ac76c1b978..ef482c2746 100644 --- a/deps/icu-small/source/common/uhash_us.cpp +++ b/deps/icu-small/source/common/uhash_us.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/uidna.cpp b/deps/icu-small/source/common/uidna.cpp index 2f13319849..6d56fcb8f5 100644 --- a/deps/icu-small/source/common/uidna.cpp +++ b/deps/icu-small/source/common/uidna.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: uidna.cpp - * encoding: US-ASCII + * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/uinit.cpp b/deps/icu-small/source/common/uinit.cpp index 34e82a3db4..624431be02 100644 --- a/deps/icu-small/source/common/uinit.cpp +++ b/deps/icu-small/source/common/uinit.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ****************************************************************************** * file name: uinit.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/uinvchar.c b/deps/icu-small/source/common/uinvchar.c deleted file mode 100644 index f1dbe4fe33..0000000000 --- a/deps/icu-small/source/common/uinvchar.c +++ /dev/null @@ -1,612 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1999-2010, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uinvchar.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:2 -* -* created on: 2004sep14 -* created by: Markus W. Scherer -* -* Functions for handling invariant characters, moved here from putil.c -* for better modularization. -*/ - -#include "unicode/utypes.h" -#include "unicode/ustring.h" -#include "udataswp.h" -#include "cstring.h" -#include "cmemory.h" -#include "uassert.h" -#include "uinvchar.h" - -/* invariant-character handling --------------------------------------------- */ - -/* - * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h) - * appropriately for most EBCDIC codepages. - * - * They currently also map most other ASCII graphic characters, - * appropriately for codepages 37 and 1047. - * Exceptions: The characters for []^ have different codes in 37 & 1047. - * Both versions are mapped to ASCII. - * - * ASCII 37 1047 - * [ 5B BA AD - * ] 5D BB BD - * ^ 5E B0 5F - * - * There are no mappings for variant characters from Unicode to EBCDIC. - * - * Currently, C0 control codes are also included in these maps. - * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other - * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A), - * but there is no mapping for ASCII LF back to EBCDIC. - * - * ASCII EBCDIC S/390-OE - * LF 0A 25 15 - * NEL 85 15 25 - * - * The maps below explicitly exclude the variant - * control and graphical characters that are in ASCII-based - * codepages at 0x80 and above. - * "No mapping" is expressed by mapping to a 00 byte. - * - * These tables do not establish a converter or a codepage. - */ - -static const uint8_t asciiFromEbcdic[256]={ - 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07, - 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a, - - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, - 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, - 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, - - 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00, - 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00, - - 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -}; - -static const uint8_t ebcdicFromAscii[256]={ - 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f, - 0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61, - 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f, - - 0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, - 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d, - 0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, - 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -/* Same as asciiFromEbcdic[] except maps all letters to lowercase. */ -static const uint8_t lowercaseAsciiFromEbcdic[256]={ - 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07, - 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a, - - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, - 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, - 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, - - 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00, - 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00, - - 0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -}; - -/* - * Bit sets indicating which characters of the ASCII repertoire - * (by ASCII/Unicode code) are "invariant". - * See utypes.h for more details. - * - * As invariant are considered the characters of the ASCII repertoire except - * for the following: - * 21 '!' - * 23 '#' - * 24 '$' - * - * 40 '@' - * - * 5b '[' - * 5c '\' - * 5d ']' - * 5e '^' - * - * 60 '`' - * - * 7b '{' - * 7c '|' - * 7d '}' - * 7e '~' - */ -static const uint32_t invariantChars[4]={ - 0xfffffbff, /* 00..1f but not 0a */ - 0xffffffe5, /* 20..3f but not 21 23 24 */ - 0x87fffffe, /* 40..5f but not 40 5b..5e */ - 0x87fffffe /* 60..7f but not 60 7b..7e */ -}; - -/* - * test unsigned types (or values known to be non-negative) for invariant characters, - * tests ASCII-family character values - */ -#define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0) - -/* test signed types for invariant characters, adds test for positive values */ -#define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c)) - -#if U_CHARSET_FAMILY==U_ASCII_FAMILY -#define CHAR_TO_UCHAR(c) c -#define UCHAR_TO_CHAR(c) c -#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY -#define CHAR_TO_UCHAR(u) asciiFromEbcdic[u] -#define UCHAR_TO_CHAR(u) ebcdicFromAscii[u] -#else -# error U_CHARSET_FAMILY is not valid -#endif - - -U_CAPI void U_EXPORT2 -u_charsToUChars(const char *cs, UChar *us, int32_t length) { - UChar u; - uint8_t c; - - /* - * Allow the entire ASCII repertoire to be mapped _to_ Unicode. - * For EBCDIC systems, this works for characters with codes from - * codepages 37 and 1047 or compatible. - */ - while(length>0) { - c=(uint8_t)(*cs++); - u=(UChar)CHAR_TO_UCHAR(c); - U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */ - *us++=u; - --length; - } -} - -U_CAPI void U_EXPORT2 -u_UCharsToChars(const UChar *us, char *cs, int32_t length) { - UChar u; - - while(length>0) { - u=*us++; - if(!UCHAR_IS_INVARIANT(u)) { - U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */ - u=0; - } - *cs++=(char)UCHAR_TO_CHAR(u); - --length; - } -} - -U_CAPI UBool U_EXPORT2 -uprv_isInvariantString(const char *s, int32_t length) { - uint8_t c; - - for(;;) { - if(length<0) { - /* NUL-terminated */ - c=(uint8_t)*s++; - if(c==0) { - break; - } - } else { - /* count length */ - if(length==0) { - break; - } - --length; - c=(uint8_t)*s++; - if(c==0) { - continue; /* NUL is invariant */ - } - } - /* c!=0 now, one branch below checks c==0 for variant characters */ - - /* - * no assertions here because these functions are legitimately called - * for strings with variant characters - */ -#if U_CHARSET_FAMILY==U_ASCII_FAMILY - if(!UCHAR_IS_INVARIANT(c)) { - return FALSE; /* found a variant char */ - } -#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY - c=CHAR_TO_UCHAR(c); - if(c==0 || !UCHAR_IS_INVARIANT(c)) { - return FALSE; /* found a variant char */ - } -#else -# error U_CHARSET_FAMILY is not valid -#endif - } - return TRUE; -} - -U_CAPI UBool U_EXPORT2 -uprv_isInvariantUString(const UChar *s, int32_t length) { - UChar c; - - for(;;) { - if(length<0) { - /* NUL-terminated */ - c=*s++; - if(c==0) { - break; - } - } else { - /* count length */ - if(length==0) { - break; - } - --length; - c=*s++; - } - - /* - * no assertions here because these functions are legitimately called - * for strings with variant characters - */ - if(!UCHAR_IS_INVARIANT(c)) { - return FALSE; /* found a variant char */ - } - } - return TRUE; -} - -/* UDataSwapFn implementations used in udataswp.c ------- */ - -/* convert ASCII to EBCDIC and verify that all characters are invariant */ -U_CAPI int32_t U_EXPORT2 -uprv_ebcdicFromAscii(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const uint8_t *s; - uint8_t *t; - uint8_t c; - - int32_t count; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* setup and swapping */ - s=(const uint8_t *)inData; - t=(uint8_t *)outData; - count=length; - while(count>0) { - c=*s++; - if(!UCHAR_IS_INVARIANT(c)) { - udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n", - length, length-count); - *pErrorCode=U_INVALID_CHAR_FOUND; - return 0; - } - *t++=ebcdicFromAscii[c]; - --count; - } - - return length; -} - -/* this function only checks and copies ASCII strings without conversion */ -U_CFUNC int32_t -uprv_copyAscii(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const uint8_t *s; - uint8_t c; - - int32_t count; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* setup and checking */ - s=(const uint8_t *)inData; - count=length; - while(count>0) { - c=*s++; - if(!UCHAR_IS_INVARIANT(c)) { - udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n", - length, length-count); - *pErrorCode=U_INVALID_CHAR_FOUND; - return 0; - } - --count; - } - - if(length>0 && inData!=outData) { - uprv_memcpy(outData, inData, length); - } - - return length; -} - -/* convert EBCDIC to ASCII and verify that all characters are invariant */ -U_CFUNC int32_t -uprv_asciiFromEbcdic(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const uint8_t *s; - uint8_t *t; - uint8_t c; - - int32_t count; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* setup and swapping */ - s=(const uint8_t *)inData; - t=(uint8_t *)outData; - count=length; - while(count>0) { - c=*s++; - if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) { - udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n", - length, length-count); - *pErrorCode=U_INVALID_CHAR_FOUND; - return 0; - } - *t++=c; - --count; - } - - return length; -} - -/* this function only checks and copies EBCDIC strings without conversion */ -U_CFUNC int32_t -uprv_copyEbcdic(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const uint8_t *s; - uint8_t c; - - int32_t count; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* setup and checking */ - s=(const uint8_t *)inData; - count=length; - while(count>0) { - c=*s++; - if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) { - udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n", - length, length-count); - *pErrorCode=U_INVALID_CHAR_FOUND; - return 0; - } - --count; - } - - if(length>0 && inData!=outData) { - uprv_memcpy(outData, inData, length); - } - - return length; -} - -/* compare invariant strings; variant characters compare less than others and unlike each other */ -U_CFUNC int32_t -uprv_compareInvAscii(const UDataSwapper *ds, - const char *outString, int32_t outLength, - const UChar *localString, int32_t localLength) { - int32_t minLength; - UChar32 c1, c2; - uint8_t c; - - if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) { - return 0; - } - - if(outLength<0) { - outLength=(int32_t)uprv_strlen(outString); - } - if(localLength<0) { - localLength=u_strlen(localString); - } - - minLength= outLength0) { - c=(uint8_t)*outString++; - if(UCHAR_IS_INVARIANT(c)) { - c1=c; - } else { - c1=-1; - } - - c2=*localString++; - if(!UCHAR_IS_INVARIANT(c2)) { - c2=-2; - } - - if((c1-=c2)!=0) { - return c1; - } - - --minLength; - } - - /* strings start with same prefix, compare lengths */ - return outLength-localLength; -} - -U_CFUNC int32_t -uprv_compareInvEbcdic(const UDataSwapper *ds, - const char *outString, int32_t outLength, - const UChar *localString, int32_t localLength) { - int32_t minLength; - UChar32 c1, c2; - uint8_t c; - - if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) { - return 0; - } - - if(outLength<0) { - outLength=(int32_t)uprv_strlen(outString); - } - if(localLength<0) { - localLength=u_strlen(localString); - } - - minLength= outLength0) { - c=(uint8_t)*outString++; - if(c==0) { - c1=0; - } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) { - /* c1 is set */ - } else { - c1=-1; - } - - c2=*localString++; - if(!UCHAR_IS_INVARIANT(c2)) { - c2=-2; - } - - if((c1-=c2)!=0) { - return c1; - } - - --minLength; - } - - /* strings start with same prefix, compare lengths */ - return outLength-localLength; -} - -U_CAPI int32_t U_EXPORT2 -uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) { - int32_t c1, c2; - - for(;; ++s1, ++s2) { - c1=(uint8_t)*s1; - c2=(uint8_t)*s2; - if(c1!=c2) { - if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) { - c1=-(int32_t)(uint8_t)*s1; - } - if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) { - c2=-(int32_t)(uint8_t)*s2; - } - return c1-c2; - } else if(c1==0) { - return 0; - } - } -} - -U_CAPI char U_EXPORT2 -uprv_ebcdicToLowercaseAscii(char c) { - return (char)lowercaseAsciiFromEbcdic[(uint8_t)c]; -} - -U_INTERNAL uint8_t* U_EXPORT2 -uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n) -{ - uint8_t *orig_dst = dst; - - if(n==-1) { - n = uprv_strlen((const char*)src)+1; /* copy NUL */ - } - /* copy non-null */ - while(*src && n>0) { - *(dst++) = asciiFromEbcdic[*(src++)]; - n--; - } - /* pad */ - while(n>0) { - *(dst++) = 0; - n--; - } - return orig_dst; -} - -U_INTERNAL uint8_t* U_EXPORT2 -uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n) -{ - uint8_t *orig_dst = dst; - - if(n==-1) { - n = uprv_strlen((const char*)src)+1; /* copy NUL */ - } - /* copy non-null */ - while(*src && n>0) { - char ch = ebcdicFromAscii[*(src++)]; - if(ch == 0) { - ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */ - } - *(dst++) = ch; - n--; - } - /* pad */ - while(n>0) { - *(dst++) = 0; - n--; - } - return orig_dst; -} diff --git a/deps/icu-small/source/common/uinvchar.cpp b/deps/icu-small/source/common/uinvchar.cpp new file mode 100644 index 0000000000..ed1ab8e761 --- /dev/null +++ b/deps/icu-small/source/common/uinvchar.cpp @@ -0,0 +1,614 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1999-2010, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uinvchar.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:2 +* +* created on: 2004sep14 +* created by: Markus W. Scherer +* +* Functions for handling invariant characters, moved here from putil.c +* for better modularization. +*/ + +#include "unicode/utypes.h" +#include "unicode/ustring.h" +#include "udataswp.h" +#include "cstring.h" +#include "cmemory.h" +#include "uassert.h" +#include "uinvchar.h" + +/* invariant-character handling --------------------------------------------- */ + +/* + * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h) + * appropriately for most EBCDIC codepages. + * + * They currently also map most other ASCII graphic characters, + * appropriately for codepages 37 and 1047. + * Exceptions: The characters for []^ have different codes in 37 & 1047. + * Both versions are mapped to ASCII. + * + * ASCII 37 1047 + * [ 5B BA AD + * ] 5D BB BD + * ^ 5E B0 5F + * + * There are no mappings for variant characters from Unicode to EBCDIC. + * + * Currently, C0 control codes are also included in these maps. + * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other + * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A), + * but there is no mapping for ASCII LF back to EBCDIC. + * + * ASCII EBCDIC S/390-OE + * LF 0A 25 15 + * NEL 85 15 25 + * + * The maps below explicitly exclude the variant + * control and graphical characters that are in ASCII-based + * codepages at 0x80 and above. + * "No mapping" is expressed by mapping to a 00 byte. + * + * These tables do not establish a converter or a codepage. + */ + +static const uint8_t asciiFromEbcdic[256]={ + 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07, + 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a, + + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, + 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, + 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, + + 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00, + 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00, + + 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +static const uint8_t ebcdicFromAscii[256]={ + 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f, + 0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f, + + 0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, + 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d, + 0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, + 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/* Same as asciiFromEbcdic[] except maps all letters to lowercase. */ +static const uint8_t lowercaseAsciiFromEbcdic[256]={ + 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07, + 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a, + + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, + 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, + 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, + + 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00, + 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00, + + 0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* + * Bit sets indicating which characters of the ASCII repertoire + * (by ASCII/Unicode code) are "invariant". + * See utypes.h for more details. + * + * As invariant are considered the characters of the ASCII repertoire except + * for the following: + * 21 '!' + * 23 '#' + * 24 '$' + * + * 40 '@' + * + * 5b '[' + * 5c '\' + * 5d ']' + * 5e '^' + * + * 60 '`' + * + * 7b '{' + * 7c '|' + * 7d '}' + * 7e '~' + */ +static const uint32_t invariantChars[4]={ + 0xfffffbff, /* 00..1f but not 0a */ + 0xffffffe5, /* 20..3f but not 21 23 24 */ + 0x87fffffe, /* 40..5f but not 40 5b..5e */ + 0x87fffffe /* 60..7f but not 60 7b..7e */ +}; + +/* + * test unsigned types (or values known to be non-negative) for invariant characters, + * tests ASCII-family character values + */ +#define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0) + +/* test signed types for invariant characters, adds test for positive values */ +#define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c)) + +#if U_CHARSET_FAMILY==U_ASCII_FAMILY +#define CHAR_TO_UCHAR(c) c +#define UCHAR_TO_CHAR(c) c +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY +#define CHAR_TO_UCHAR(u) asciiFromEbcdic[u] +#define UCHAR_TO_CHAR(u) ebcdicFromAscii[u] +#else +# error U_CHARSET_FAMILY is not valid +#endif + + +U_CAPI void U_EXPORT2 +u_charsToUChars(const char *cs, UChar *us, int32_t length) { + UChar u; + uint8_t c; + + /* + * Allow the entire ASCII repertoire to be mapped _to_ Unicode. + * For EBCDIC systems, this works for characters with codes from + * codepages 37 and 1047 or compatible. + */ + while(length>0) { + c=(uint8_t)(*cs++); + u=(UChar)CHAR_TO_UCHAR(c); + U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */ + *us++=u; + --length; + } +} + +U_CAPI void U_EXPORT2 +u_UCharsToChars(const UChar *us, char *cs, int32_t length) { + UChar u; + + while(length>0) { + u=*us++; + if(!UCHAR_IS_INVARIANT(u)) { + U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */ + u=0; + } + *cs++=(char)UCHAR_TO_CHAR(u); + --length; + } +} + +U_CAPI UBool U_EXPORT2 +uprv_isInvariantString(const char *s, int32_t length) { + uint8_t c; + + for(;;) { + if(length<0) { + /* NUL-terminated */ + c=(uint8_t)*s++; + if(c==0) { + break; + } + } else { + /* count length */ + if(length==0) { + break; + } + --length; + c=(uint8_t)*s++; + if(c==0) { + continue; /* NUL is invariant */ + } + } + /* c!=0 now, one branch below checks c==0 for variant characters */ + + /* + * no assertions here because these functions are legitimately called + * for strings with variant characters + */ +#if U_CHARSET_FAMILY==U_ASCII_FAMILY + if(!UCHAR_IS_INVARIANT(c)) { + return FALSE; /* found a variant char */ + } +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY + c=CHAR_TO_UCHAR(c); + if(c==0 || !UCHAR_IS_INVARIANT(c)) { + return FALSE; /* found a variant char */ + } +#else +# error U_CHARSET_FAMILY is not valid +#endif + } + return TRUE; +} + +U_CAPI UBool U_EXPORT2 +uprv_isInvariantUString(const UChar *s, int32_t length) { + UChar c; + + for(;;) { + if(length<0) { + /* NUL-terminated */ + c=*s++; + if(c==0) { + break; + } + } else { + /* count length */ + if(length==0) { + break; + } + --length; + c=*s++; + } + + /* + * no assertions here because these functions are legitimately called + * for strings with variant characters + */ + if(!UCHAR_IS_INVARIANT(c)) { + return FALSE; /* found a variant char */ + } + } + return TRUE; +} + +/* UDataSwapFn implementations used in udataswp.c ------- */ + +/* convert ASCII to EBCDIC and verify that all characters are invariant */ +U_CAPI int32_t U_EXPORT2 +uprv_ebcdicFromAscii(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const uint8_t *s; + uint8_t *t; + uint8_t c; + + int32_t count; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + s=(const uint8_t *)inData; + t=(uint8_t *)outData; + count=length; + while(count>0) { + c=*s++; + if(!UCHAR_IS_INVARIANT(c)) { + udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n", + length, length-count); + *pErrorCode=U_INVALID_CHAR_FOUND; + return 0; + } + *t++=ebcdicFromAscii[c]; + --count; + } + + return length; +} + +/* this function only checks and copies ASCII strings without conversion */ +U_CFUNC int32_t +uprv_copyAscii(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const uint8_t *s; + uint8_t c; + + int32_t count; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and checking */ + s=(const uint8_t *)inData; + count=length; + while(count>0) { + c=*s++; + if(!UCHAR_IS_INVARIANT(c)) { + udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n", + length, length-count); + *pErrorCode=U_INVALID_CHAR_FOUND; + return 0; + } + --count; + } + + if(length>0 && inData!=outData) { + uprv_memcpy(outData, inData, length); + } + + return length; +} + +/* convert EBCDIC to ASCII and verify that all characters are invariant */ +U_CFUNC int32_t +uprv_asciiFromEbcdic(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const uint8_t *s; + uint8_t *t; + uint8_t c; + + int32_t count; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + s=(const uint8_t *)inData; + t=(uint8_t *)outData; + count=length; + while(count>0) { + c=*s++; + if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) { + udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n", + length, length-count); + *pErrorCode=U_INVALID_CHAR_FOUND; + return 0; + } + *t++=c; + --count; + } + + return length; +} + +/* this function only checks and copies EBCDIC strings without conversion */ +U_CFUNC int32_t +uprv_copyEbcdic(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const uint8_t *s; + uint8_t c; + + int32_t count; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and checking */ + s=(const uint8_t *)inData; + count=length; + while(count>0) { + c=*s++; + if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) { + udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n", + length, length-count); + *pErrorCode=U_INVALID_CHAR_FOUND; + return 0; + } + --count; + } + + if(length>0 && inData!=outData) { + uprv_memcpy(outData, inData, length); + } + + return length; +} + +/* compare invariant strings; variant characters compare less than others and unlike each other */ +U_CFUNC int32_t +uprv_compareInvAscii(const UDataSwapper *ds, + const char *outString, int32_t outLength, + const UChar *localString, int32_t localLength) { + (void)ds; + int32_t minLength; + UChar32 c1, c2; + uint8_t c; + + if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) { + return 0; + } + + if(outLength<0) { + outLength=(int32_t)uprv_strlen(outString); + } + if(localLength<0) { + localLength=u_strlen(localString); + } + + minLength= outLength0) { + c=(uint8_t)*outString++; + if(UCHAR_IS_INVARIANT(c)) { + c1=c; + } else { + c1=-1; + } + + c2=*localString++; + if(!UCHAR_IS_INVARIANT(c2)) { + c2=-2; + } + + if((c1-=c2)!=0) { + return c1; + } + + --minLength; + } + + /* strings start with same prefix, compare lengths */ + return outLength-localLength; +} + +U_CFUNC int32_t +uprv_compareInvEbcdic(const UDataSwapper *ds, + const char *outString, int32_t outLength, + const UChar *localString, int32_t localLength) { + (void)ds; + int32_t minLength; + UChar32 c1, c2; + uint8_t c; + + if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) { + return 0; + } + + if(outLength<0) { + outLength=(int32_t)uprv_strlen(outString); + } + if(localLength<0) { + localLength=u_strlen(localString); + } + + minLength= outLength0) { + c=(uint8_t)*outString++; + if(c==0) { + c1=0; + } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) { + /* c1 is set */ + } else { + c1=-1; + } + + c2=*localString++; + if(!UCHAR_IS_INVARIANT(c2)) { + c2=-2; + } + + if((c1-=c2)!=0) { + return c1; + } + + --minLength; + } + + /* strings start with same prefix, compare lengths */ + return outLength-localLength; +} + +U_CAPI int32_t U_EXPORT2 +uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) { + int32_t c1, c2; + + for(;; ++s1, ++s2) { + c1=(uint8_t)*s1; + c2=(uint8_t)*s2; + if(c1!=c2) { + if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) { + c1=-(int32_t)(uint8_t)*s1; + } + if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) { + c2=-(int32_t)(uint8_t)*s2; + } + return c1-c2; + } else if(c1==0) { + return 0; + } + } +} + +U_CAPI char U_EXPORT2 +uprv_ebcdicToLowercaseAscii(char c) { + return (char)lowercaseAsciiFromEbcdic[(uint8_t)c]; +} + +U_INTERNAL uint8_t* U_EXPORT2 +uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n) +{ + uint8_t *orig_dst = dst; + + if(n==-1) { + n = uprv_strlen((const char*)src)+1; /* copy NUL */ + } + /* copy non-null */ + while(*src && n>0) { + *(dst++) = asciiFromEbcdic[*(src++)]; + n--; + } + /* pad */ + while(n>0) { + *(dst++) = 0; + n--; + } + return orig_dst; +} + +U_INTERNAL uint8_t* U_EXPORT2 +uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n) +{ + uint8_t *orig_dst = dst; + + if(n==-1) { + n = uprv_strlen((const char*)src)+1; /* copy NUL */ + } + /* copy non-null */ + while(*src && n>0) { + char ch = ebcdicFromAscii[*(src++)]; + if(ch == 0) { + ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */ + } + *(dst++) = ch; + n--; + } + /* pad */ + while(n>0) { + *(dst++) = 0; + n--; + } + return orig_dst; +} diff --git a/deps/icu-small/source/common/uinvchar.h b/deps/icu-small/source/common/uinvchar.h index 19a3b2696e..c4f9f88b9a 100644 --- a/deps/icu-small/source/common/uinvchar.h +++ b/deps/icu-small/source/common/uinvchar.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: uinvchar.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:2 * @@ -64,7 +64,7 @@ uprv_isInvariantUString(const UChar *s, int32_t length); */ U_INTERNAL inline UBool U_EXPORT2 uprv_isInvariantUnicodeString(const icu::UnicodeString &s) { - return uprv_isInvariantUString(s.getBuffer(), s.length()); + return uprv_isInvariantUString(icu::toUCharPtr(s.getBuffer()), s.length()); } #endif /* __cplusplus */ diff --git a/deps/icu-small/source/common/uiter.cpp b/deps/icu-small/source/common/uiter.cpp index 26ca877814..b9252d81c2 100644 --- a/deps/icu-small/source/common/uiter.cpp +++ b/deps/icu-small/source/common/uiter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: uiter.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/ulist.c b/deps/icu-small/source/common/ulist.c deleted file mode 100644 index 3b8e6c53ed..0000000000 --- a/deps/icu-small/source/common/ulist.c +++ /dev/null @@ -1,274 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 2009-2016, International Business Machines -* Corporation and others. All Rights Reserved. -****************************************************************************** -*/ - -#include "ulist.h" -#include "cmemory.h" -#include "cstring.h" -#include "uenumimp.h" - -typedef struct UListNode UListNode; -struct UListNode { - void *data; - - UListNode *next; - UListNode *previous; - - /* When data is created with uprv_malloc, needs to be freed during deleteList function. */ - UBool forceDelete; -}; - -struct UList { - UListNode *curr; - UListNode *head; - UListNode *tail; - - int32_t size; - int32_t currentIndex; -}; - -static void ulist_addFirstItem(UList *list, UListNode *newItem); - -U_CAPI UList *U_EXPORT2 ulist_createEmptyList(UErrorCode *status) { - UList *newList = NULL; - - if (U_FAILURE(*status)) { - return NULL; - } - - newList = (UList *)uprv_malloc(sizeof(UList)); - if (newList == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - newList->curr = NULL; - newList->head = NULL; - newList->tail = NULL; - newList->size = 0; - newList->currentIndex = -1; - - return newList; -} - -/* - * Function called by addItemEndList or addItemBeginList when the first item is added to the list. - * This function properly sets the pointers for the first item added. - */ -static void ulist_addFirstItem(UList *list, UListNode *newItem) { - newItem->next = NULL; - newItem->previous = NULL; - list->head = newItem; - list->tail = newItem; -} - -static void ulist_removeItem(UList *list, UListNode *p) { - if (p->previous == NULL) { - // p is the list head. - list->head = p->next; - } else { - p->previous->next = p->next; - } - if (p->next == NULL) { - // p is the list tail. - list->tail = p->previous; - } else { - p->next->previous = p->previous; - } - list->curr = NULL; - list->currentIndex = 0; - --list->size; - if (p->forceDelete) { - uprv_free(p->data); - } - uprv_free(p); -} - -U_CAPI void U_EXPORT2 ulist_addItemEndList(UList *list, const void *data, UBool forceDelete, UErrorCode *status) { - UListNode *newItem = NULL; - - if (U_FAILURE(*status) || list == NULL || data == NULL) { - if (forceDelete) { - uprv_free((void *)data); - } - return; - } - - newItem = (UListNode *)uprv_malloc(sizeof(UListNode)); - if (newItem == NULL) { - if (forceDelete) { - uprv_free((void *)data); - } - *status = U_MEMORY_ALLOCATION_ERROR; - return; - } - newItem->data = (void *)(data); - newItem->forceDelete = forceDelete; - - if (list->size == 0) { - ulist_addFirstItem(list, newItem); - } else { - newItem->next = NULL; - newItem->previous = list->tail; - list->tail->next = newItem; - list->tail = newItem; - } - - list->size++; -} - -U_CAPI void U_EXPORT2 ulist_addItemBeginList(UList *list, const void *data, UBool forceDelete, UErrorCode *status) { - UListNode *newItem = NULL; - - if (U_FAILURE(*status) || list == NULL || data == NULL) { - if (forceDelete) { - uprv_free((void *)data); - } - return; - } - - newItem = (UListNode *)uprv_malloc(sizeof(UListNode)); - if (newItem == NULL) { - if (forceDelete) { - uprv_free((void *)data); - } - *status = U_MEMORY_ALLOCATION_ERROR; - return; - } - newItem->data = (void *)(data); - newItem->forceDelete = forceDelete; - - if (list->size == 0) { - ulist_addFirstItem(list, newItem); - } else { - newItem->previous = NULL; - newItem->next = list->head; - list->head->previous = newItem; - list->head = newItem; - list->currentIndex++; - } - - list->size++; -} - -U_CAPI UBool U_EXPORT2 ulist_containsString(const UList *list, const char *data, int32_t length) { - if (list != NULL) { - const UListNode *pointer; - for (pointer = list->head; pointer != NULL; pointer = pointer->next) { - if (length == uprv_strlen(pointer->data)) { - if (uprv_memcmp(data, pointer->data, length) == 0) { - return TRUE; - } - } - } - } - return FALSE; -} - -U_CAPI UBool U_EXPORT2 ulist_removeString(UList *list, const char *data) { - if (list != NULL) { - UListNode *pointer; - for (pointer = list->head; pointer != NULL; pointer = pointer->next) { - if (uprv_strcmp(data, pointer->data) == 0) { - ulist_removeItem(list, pointer); - // Remove only the first occurrence, like Java LinkedList.remove(Object). - return TRUE; - } - } - } - return FALSE; -} - -U_CAPI void *U_EXPORT2 ulist_getNext(UList *list) { - UListNode *curr = NULL; - - if (list == NULL || list->curr == NULL) { - return NULL; - } - - curr = list->curr; - list->curr = curr->next; - list->currentIndex++; - - return curr->data; -} - -U_CAPI int32_t U_EXPORT2 ulist_getListSize(const UList *list) { - if (list != NULL) { - return list->size; - } - - return -1; -} - -U_CAPI void U_EXPORT2 ulist_resetList(UList *list) { - if (list != NULL) { - list->curr = list->head; - list->currentIndex = 0; - } -} - -U_CAPI void U_EXPORT2 ulist_deleteList(UList *list) { - UListNode *listHead = NULL; - - if (list != NULL) { - listHead = list->head; - while (listHead != NULL) { - UListNode *listPointer = listHead->next; - - if (listHead->forceDelete) { - uprv_free(listHead->data); - } - - uprv_free(listHead); - listHead = listPointer; - } - uprv_free(list); - list = NULL; - } -} - -U_CAPI void U_EXPORT2 ulist_close_keyword_values_iterator(UEnumeration *en) { - if (en != NULL) { - ulist_deleteList((UList *)(en->context)); - uprv_free(en); - } -} - -U_CAPI int32_t U_EXPORT2 ulist_count_keyword_values(UEnumeration *en, UErrorCode *status) { - if (U_FAILURE(*status)) { - return -1; - } - - return ulist_getListSize((UList *)(en->context)); -} - -U_CAPI const char * U_EXPORT2 ulist_next_keyword_value(UEnumeration *en, int32_t *resultLength, UErrorCode *status) { - const char *s; - if (U_FAILURE(*status)) { - return NULL; - } - - s = (const char *)ulist_getNext((UList *)(en->context)); - if (s != NULL && resultLength != NULL) { - *resultLength = uprv_strlen(s); - } - return s; -} - -U_CAPI void U_EXPORT2 ulist_reset_keyword_values_iterator(UEnumeration *en, UErrorCode *status) { - if (U_FAILURE(*status)) { - return ; - } - - ulist_resetList((UList *)(en->context)); -} - -U_CAPI UList * U_EXPORT2 ulist_getListFromEnum(UEnumeration *en) { - return (UList *)(en->context); -} diff --git a/deps/icu-small/source/common/ulist.cpp b/deps/icu-small/source/common/ulist.cpp new file mode 100644 index 0000000000..d4549328ea --- /dev/null +++ b/deps/icu-small/source/common/ulist.cpp @@ -0,0 +1,270 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 2009-2016, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +*/ + +#include "ulist.h" +#include "cmemory.h" +#include "cstring.h" +#include "uenumimp.h" + +typedef struct UListNode UListNode; +struct UListNode { + void *data; + + UListNode *next; + UListNode *previous; + + /* When data is created with uprv_malloc, needs to be freed during deleteList function. */ + UBool forceDelete; +}; + +struct UList { + UListNode *curr; + UListNode *head; + UListNode *tail; + + int32_t size; +}; + +static void ulist_addFirstItem(UList *list, UListNode *newItem); + +U_CAPI UList *U_EXPORT2 ulist_createEmptyList(UErrorCode *status) { + UList *newList = NULL; + + if (U_FAILURE(*status)) { + return NULL; + } + + newList = (UList *)uprv_malloc(sizeof(UList)); + if (newList == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + + newList->curr = NULL; + newList->head = NULL; + newList->tail = NULL; + newList->size = 0; + + return newList; +} + +/* + * Function called by addItemEndList or addItemBeginList when the first item is added to the list. + * This function properly sets the pointers for the first item added. + */ +static void ulist_addFirstItem(UList *list, UListNode *newItem) { + newItem->next = NULL; + newItem->previous = NULL; + list->head = newItem; + list->tail = newItem; +} + +static void ulist_removeItem(UList *list, UListNode *p) { + if (p->previous == NULL) { + // p is the list head. + list->head = p->next; + } else { + p->previous->next = p->next; + } + if (p->next == NULL) { + // p is the list tail. + list->tail = p->previous; + } else { + p->next->previous = p->previous; + } + if (p == list->curr) { + list->curr = p->next; + } + --list->size; + if (p->forceDelete) { + uprv_free(p->data); + } + uprv_free(p); +} + +U_CAPI void U_EXPORT2 ulist_addItemEndList(UList *list, const void *data, UBool forceDelete, UErrorCode *status) { + UListNode *newItem = NULL; + + if (U_FAILURE(*status) || list == NULL || data == NULL) { + if (forceDelete) { + uprv_free((void *)data); + } + return; + } + + newItem = (UListNode *)uprv_malloc(sizeof(UListNode)); + if (newItem == NULL) { + if (forceDelete) { + uprv_free((void *)data); + } + *status = U_MEMORY_ALLOCATION_ERROR; + return; + } + newItem->data = (void *)(data); + newItem->forceDelete = forceDelete; + + if (list->size == 0) { + ulist_addFirstItem(list, newItem); + } else { + newItem->next = NULL; + newItem->previous = list->tail; + list->tail->next = newItem; + list->tail = newItem; + } + + list->size++; +} + +U_CAPI void U_EXPORT2 ulist_addItemBeginList(UList *list, const void *data, UBool forceDelete, UErrorCode *status) { + UListNode *newItem = NULL; + + if (U_FAILURE(*status) || list == NULL || data == NULL) { + if (forceDelete) { + uprv_free((void *)data); + } + return; + } + + newItem = (UListNode *)uprv_malloc(sizeof(UListNode)); + if (newItem == NULL) { + if (forceDelete) { + uprv_free((void *)data); + } + *status = U_MEMORY_ALLOCATION_ERROR; + return; + } + newItem->data = (void *)(data); + newItem->forceDelete = forceDelete; + + if (list->size == 0) { + ulist_addFirstItem(list, newItem); + } else { + newItem->previous = NULL; + newItem->next = list->head; + list->head->previous = newItem; + list->head = newItem; + } + + list->size++; +} + +U_CAPI UBool U_EXPORT2 ulist_containsString(const UList *list, const char *data, int32_t length) { + if (list != NULL) { + const UListNode *pointer; + for (pointer = list->head; pointer != NULL; pointer = pointer->next) { + if (length == (int32_t)uprv_strlen((const char *)pointer->data)) { + if (uprv_memcmp(data, pointer->data, length) == 0) { + return TRUE; + } + } + } + } + return FALSE; +} + +U_CAPI UBool U_EXPORT2 ulist_removeString(UList *list, const char *data) { + if (list != NULL) { + UListNode *pointer; + for (pointer = list->head; pointer != NULL; pointer = pointer->next) { + if (uprv_strcmp(data, (const char *)pointer->data) == 0) { + ulist_removeItem(list, pointer); + // Remove only the first occurrence, like Java LinkedList.remove(Object). + return TRUE; + } + } + } + return FALSE; +} + +U_CAPI void *U_EXPORT2 ulist_getNext(UList *list) { + UListNode *curr = NULL; + + if (list == NULL || list->curr == NULL) { + return NULL; + } + + curr = list->curr; + list->curr = curr->next; + + return curr->data; +} + +U_CAPI int32_t U_EXPORT2 ulist_getListSize(const UList *list) { + if (list != NULL) { + return list->size; + } + + return -1; +} + +U_CAPI void U_EXPORT2 ulist_resetList(UList *list) { + if (list != NULL) { + list->curr = list->head; + } +} + +U_CAPI void U_EXPORT2 ulist_deleteList(UList *list) { + UListNode *listHead = NULL; + + if (list != NULL) { + listHead = list->head; + while (listHead != NULL) { + UListNode *listPointer = listHead->next; + + if (listHead->forceDelete) { + uprv_free(listHead->data); + } + + uprv_free(listHead); + listHead = listPointer; + } + uprv_free(list); + list = NULL; + } +} + +U_CAPI void U_EXPORT2 ulist_close_keyword_values_iterator(UEnumeration *en) { + if (en != NULL) { + ulist_deleteList((UList *)(en->context)); + uprv_free(en); + } +} + +U_CAPI int32_t U_EXPORT2 ulist_count_keyword_values(UEnumeration *en, UErrorCode *status) { + if (U_FAILURE(*status)) { + return -1; + } + + return ulist_getListSize((UList *)(en->context)); +} + +U_CAPI const char * U_EXPORT2 ulist_next_keyword_value(UEnumeration *en, int32_t *resultLength, UErrorCode *status) { + const char *s; + if (U_FAILURE(*status)) { + return NULL; + } + + s = (const char *)ulist_getNext((UList *)(en->context)); + if (s != NULL && resultLength != NULL) { + *resultLength = uprv_strlen(s); + } + return s; +} + +U_CAPI void U_EXPORT2 ulist_reset_keyword_values_iterator(UEnumeration *en, UErrorCode *status) { + if (U_FAILURE(*status)) { + return ; + } + + ulist_resetList((UList *)(en->context)); +} + +U_CAPI UList * U_EXPORT2 ulist_getListFromEnum(UEnumeration *en) { + return (UList *)(en->context); +} diff --git a/deps/icu-small/source/common/ulist.h b/deps/icu-small/source/common/ulist.h index 6f292bf9dc..de58a4ad02 100644 --- a/deps/icu-small/source/common/ulist.h +++ b/deps/icu-small/source/common/ulist.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/ulistformatter.cpp b/deps/icu-small/source/common/ulistformatter.cpp index 98aa50e25f..c140c784b5 100644 --- a/deps/icu-small/source/common/ulistformatter.cpp +++ b/deps/icu-small/source/common/ulistformatter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ***************************************************************************************** diff --git a/deps/icu-small/source/common/uloc.cpp b/deps/icu-small/source/common/uloc.cpp index 2a02b27c5b..4d854bbcca 100644 --- a/deps/icu-small/source/common/uloc.cpp +++ b/deps/icu-small/source/common/uloc.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -44,10 +44,11 @@ #include "uarrsort.h" #include "uenumimp.h" #include "uassert.h" +#include "charstr.h" #include /* for sprintf */ -using namespace icu; +U_NAMESPACE_USE /* ### Declarations **************************************************/ @@ -101,7 +102,7 @@ static const char * const LANGUAGES[] = { "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb", "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale", "aln", "alt", "am", "an", "ang", "anp", "ar", "arc", - "arn", "aro", "arp", "arq", "arw", "ary", "arz", "as", + "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as", "asa", "ase", "ast", "av", "avk", "awa", "ay", "az", "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj", "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg", @@ -216,7 +217,7 @@ static const char * const LANGUAGES_3[] = { "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb", "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale", "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc", - "arn", "aro", "arp", "arq", "arw", "ary", "arz", "asm", + "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm", "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze", "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj", "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul", @@ -560,6 +561,10 @@ static int32_t getShortestSubtagLength(const char *localeID) { } /* ### Keywords **************************************************/ +#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9')) +#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) ) +/* Punctuation/symbols allowed in legacy key values */ +#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/') #define ULOC_KEYWORD_BUFFER_LEN 25 #define ULOC_MAX_NO_KEYWORDS 25 @@ -596,20 +601,26 @@ locale_getKeywordsStart(const char *localeID) { */ static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status) { - int32_t i; - int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName); + int32_t keywordNameLen = 0; - if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) { - /* keyword name too long for internal buffer */ - *status = U_INTERNAL_PROGRAM_ERROR; - return 0; + for (; *keywordName != 0; keywordName++) { + if (!UPRV_ISALPHANUM(*keywordName)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */ + return 0; + } + if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) { + buf[keywordNameLen++] = uprv_tolower(*keywordName); + } else { + /* keyword name too long for internal buffer */ + *status = U_INTERNAL_PROGRAM_ERROR; + return 0; + } } - - /* normalize the keyword name */ - for(i = 0; i < keywordNameLen; i++) { - buf[i] = uprv_tolower(keywordName[i]); + if (keywordNameLen == 0) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */ + return 0; } - buf[i] = 0; + buf[keywordNameLen] = 0; /* terminate */ return keywordNameLen; } @@ -837,87 +848,108 @@ uloc_getKeywordValue(const char* localeID, const char* nextSeparator = NULL; char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; - int32_t i = 0; int32_t result = 0; if(status && U_SUCCESS(*status) && localeID) { char tempBuffer[ULOC_FULLNAME_CAPACITY]; const char* tmpLocaleID; + if (keywordName == NULL || keywordName[0] == 0) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + locale_canonKeywordName(keywordNameBuffer, keywordName, status); + if(U_FAILURE(*status)) { + return 0; + } + if (_hasBCP47Extension(localeID)) { _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status); } else { tmpLocaleID=localeID; } - startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */ + startSearchHere = locale_getKeywordsStart(tmpLocaleID); if(startSearchHere == NULL) { /* no keywords, return at once */ return 0; } - locale_canonKeywordName(keywordNameBuffer, keywordName, status); - if(U_FAILURE(*status)) { - return 0; - } - /* find the first keyword */ while(startSearchHere) { - startSearchHere++; - /* skip leading spaces (allowed?) */ + const char* keyValueTail; + int32_t keyValueLen; + + startSearchHere++; /* skip @ or ; */ + nextSeparator = uprv_strchr(startSearchHere, '='); + if(!nextSeparator) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */ + return 0; + } + /* strip leading & trailing spaces (TC decided to tolerate these) */ while(*startSearchHere == ' ') { startSearchHere++; } - nextSeparator = uprv_strchr(startSearchHere, '='); - /* need to normalize both keyword and keyword name */ - if(!nextSeparator) { - break; + keyValueTail = nextSeparator; + while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') { + keyValueTail--; + } + /* now keyValueTail points to first char after the keyName */ + /* copy & normalize keyName from locale */ + if (startSearchHere == keyValueTail) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */ + return 0; } - if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) { + keyValueLen = 0; + while (startSearchHere < keyValueTail) { + if (!UPRV_ISALPHANUM(*startSearchHere)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */ + return 0; + } + if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) { + localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++); + } else { /* keyword name too long for internal buffer */ *status = U_INTERNAL_PROGRAM_ERROR; return 0; + } } - for(i = 0; i < nextSeparator - startSearchHere; i++) { - localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]); - } - /* trim trailing spaces */ - while(startSearchHere[i-1] == ' ') { - i--; - U_ASSERT(i>=0); - } - localeKeywordNameBuffer[i] = 0; + localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */ startSearchHere = uprv_strchr(nextSeparator, ';'); if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) { - nextSeparator++; + /* current entry matches the keyword. */ + nextSeparator++; /* skip '=' */ + /* First strip leading & trailing spaces (TC decided to tolerate these) */ while(*nextSeparator == ' ') { - nextSeparator++; + nextSeparator++; + } + keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator); + while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') { + keyValueTail--; + } + /* Now copy the value, but check well-formedness */ + if (nextSeparator == keyValueTail) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */ + return 0; } - /* we actually found the keyword. Copy the value */ - if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) { - while(*(startSearchHere-1) == ' ') { - startSearchHere--; - } - uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator); - result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status); - } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */ - i = (int32_t)uprv_strlen(nextSeparator); - while(nextSeparator[i - 1] == ' ') { - i--; - } - uprv_strncpy(buffer, nextSeparator, i); - result = u_terminateChars(buffer, bufferCapacity, i, status); - } else { - /* give a bigger buffer, please */ - *status = U_BUFFER_OVERFLOW_ERROR; - if(startSearchHere) { - result = (int32_t)(startSearchHere - nextSeparator); - } else { - result = (int32_t)uprv_strlen(nextSeparator); - } + keyValueLen = 0; + while (nextSeparator < keyValueTail) { + if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */ + return 0; + } + if (keyValueLen < bufferCapacity) { + /* Should we lowercase value to return here? Tests expect as-is. */ + buffer[keyValueLen++] = *nextSeparator++; + } else { /* keep advancing so we return correct length in case of overflow */ + keyValueLen++; + nextSeparator++; + } } + result = u_terminateChars(buffer, bufferCapacity, keyValueLen, status); return result; } } @@ -936,46 +968,59 @@ uloc_setKeywordValue(const char* keywordName, int32_t keywordValueLen; int32_t bufLen; int32_t needLen = 0; - int32_t foundValueLen; - int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */ char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; + char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1]; char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; - int32_t i = 0; int32_t rc; char* nextSeparator = NULL; char* nextEqualsign = NULL; char* startSearchHere = NULL; char* keywordStart = NULL; - char *insertHere = NULL; + CharString updatedKeysAndValues; + int32_t updatedKeysAndValuesLen; + UBool handledInputKeyAndValue = FALSE; + char keyValuePrefix = '@'; + if(U_FAILURE(*status)) { return -1; } - if(bufferCapacity>1) { - bufLen = (int32_t)uprv_strlen(buffer); - } else { + if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) { *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } + bufLen = (int32_t)uprv_strlen(buffer); if(bufferCapacity= ULOC_KEYWORD_BUFFER_LEN) { - /* keyword name too long for internal buffer */ - *status = U_INTERNAL_PROGRAM_ERROR; + /* now keyValueTail points to first char after the keyName */ + /* copy & normalize keyName from locale */ + if (keywordStart == keyValueTail) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */ return 0; } - for(i = 0; i < nextEqualsign - keywordStart; i++) { - localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]); - } - /* trim trailing spaces */ - while(keywordStart[i-1] == ' ') { - i--; + keyValueLen = 0; + while (keywordStart < keyValueTail) { + if (!UPRV_ISALPHANUM(*keywordStart)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */ + return 0; + } + if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) { + localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++); + } else { + /* keyword name too long for internal buffer */ + *status = U_INTERNAL_PROGRAM_ERROR; + return 0; + } } - U_ASSERT(i>=0 && i nextEqualsign && *(keyValueTail-1) == ' ') { + keyValueTail--; + } + if (nextEqualsign == keyValueTail) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */ + return 0; + } + rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer); if(rc == 0) { - nextEqualsign++; - while(*nextEqualsign == ' ') { - nextEqualsign++; - } - /* we actually found the keyword. Change the value */ - if (nextSeparator) { - keywordAtEnd = 0; - foundValueLen = (int32_t)(nextSeparator - nextEqualsign); - } else { - keywordAtEnd = 1; - foundValueLen = (int32_t)uprv_strlen(nextEqualsign); - } - if(keywordValue) { /* adding a value - not removing */ - if(foundValueLen == keywordValueLen) { - uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); - return bufLen; /* no change in size */ - } else if(foundValueLen > keywordValueLen) { - int32_t delta = foundValueLen - keywordValueLen; - if(nextSeparator) { /* RH side */ - uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer)); - } - uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); - bufLen -= delta; - buffer[bufLen]=0; - return bufLen; - } else { /* FVL < KVL */ - int32_t delta = keywordValueLen - foundValueLen; - if((bufLen+delta) >= bufferCapacity) { - *status = U_BUFFER_OVERFLOW_ERROR; - return bufLen+delta; - } - if(nextSeparator) { /* RH side */ - uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer)); - } - uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); - bufLen += delta; - buffer[bufLen]=0; - return bufLen; - } - } else { /* removing a keyword */ - if(keywordAtEnd) { - /* zero out the ';' or '@' just before startSearchhere */ - keywordStart[-1] = 0; - return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */ - } else { - uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer)); - keywordStart[bufLen-((nextSeparator+1)-buffer)]=0; - return (int32_t)(bufLen-((nextSeparator+1)-keywordStart)); - } + /* Current entry matches the input keyword. Update the entry */ + if(keywordValueLen > 0) { /* updating a value */ + updatedKeysAndValues.append(keyValuePrefix, *status); + keyValuePrefix = ';'; /* for any subsequent key-value pair */ + updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status); + updatedKeysAndValues.append('=', *status); + updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status); + } /* else removing this entry, don't emit anything */ + handledInputKeyAndValue = TRUE; + } else { + /* input keyword sorts earlier than current entry, add before current entry */ + if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) { + /* insert new entry at this location */ + updatedKeysAndValues.append(keyValuePrefix, *status); + keyValuePrefix = ';'; /* for any subsequent key-value pair */ + updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status); + updatedKeysAndValues.append('=', *status); + updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status); + handledInputKeyAndValue = TRUE; } - } else if(rc<0){ /* end match keyword */ - /* could insert at this location. */ - insertHere = keywordStart; + /* copy the current entry */ + updatedKeysAndValues.append(keyValuePrefix, *status); + keyValuePrefix = ';'; /* for any subsequent key-value pair */ + updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status); + updatedKeysAndValues.append('=', *status); + updatedKeysAndValues.append(nextEqualsign, keyValueTail-nextEqualsign, *status); + } + if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) { + /* append new entry at the end, it sorts later than existing entries */ + updatedKeysAndValues.append(keyValuePrefix, *status); + /* skip keyValuePrefix update, no subsequent key-value pair */ + updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status); + updatedKeysAndValues.append('=', *status); + updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status); + handledInputKeyAndValue = TRUE; } keywordStart = nextSeparator; } /* end loop searching */ - if(!keywordValue) { - return bufLen; /* removal of non-extant keyword - no change */ - } - - /* we know there is at least one keyword. */ - needLen = bufLen+1+keywordNameLen+1+keywordValueLen; + /* Any error from updatedKeysAndValues.append above would be internal and not due to + * problems with the passed-in locale. So if we did encounter problems with the + * passed-in locale above, those errors took precedence and overrode any error + * status from updatedKeysAndValues.append, and also caused a return of 0. If there + * are errors here they are from updatedKeysAndValues.append; they do cause an + * error return but the passed-in locale is unmodified and the original bufLen is + * returned. + */ + if (!handledInputKeyAndValue || U_FAILURE(*status)) { + /* if input key/value specified removal of a keyword not present in locale, or + * there was an error in CharString.append, leave original locale alone. */ + return bufLen; + } + + updatedKeysAndValuesLen = updatedKeysAndValues.length(); + /* needLen = length of the part before '@' + length of updated key-value part including '@' */ + needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen; if(needLen >= bufferCapacity) { *status = U_BUFFER_OVERFLOW_ERROR; return needLen; /* no change */ } - - if(insertHere) { - uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer)); - keywordStart = insertHere; - } else { - keywordStart = buffer+bufLen; - *keywordStart = ';'; - keywordStart++; - } - uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen); - keywordStart += keywordNameLen; - *keywordStart = '='; - keywordStart++; - uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */ - keywordStart+=keywordValueLen; - if(insertHere) { - *keywordStart = ';'; - keywordStart++; + if (updatedKeysAndValuesLen > 0) { + uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen); } buffer[needLen]=0; return needLen; @@ -2119,6 +2168,20 @@ uloc_getLCID(const char* localeID) { UErrorCode status = U_ZERO_ERROR; char langID[ULOC_FULLNAME_CAPACITY]; + uint32_t lcid = 0; + + /* Check for incomplete id. */ + if (!localeID || uprv_strlen(localeID) < 2) { + return 0; + } + + // Attempt platform lookup if available + lcid = uprv_convertToLCIDPlatform(localeID); + if (lcid > 0) + { + // Windows found an LCID, return that + return lcid; + } uloc_getLanguage(localeID, langID, sizeof(langID), &status); if (U_FAILURE(status)) { @@ -2529,9 +2592,6 @@ uloc_toUnicodeLocaleType(const char* keyword, const char* value) return bcpType; } -#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9')) -#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) ) - static UBool isWellFormedLegacyKey(const char* legacyKey) { @@ -2574,11 +2634,10 @@ uloc_toLegacyKey(const char* keyword) // Checks if the specified locale key is well-formed with the legacy locale syntax. // // Note: - // Neither ICU nor LDML/CLDR provides the definition of keyword syntax. - // However, a key should not contain '=' obviously. For now, all existing - // keys are using ASCII alphabetic letters only. We won't add any new key - // that is not compatible with the BCP 47 syntax. Therefore, we assume - // a valid key consist from [0-9a-zA-Z], no symbols. + // LDML/CLDR provides some definition of keyword syntax in + // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and + // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax + // Keys can only consist of [0-9a-zA-Z]. if (isWellFormedLegacyKey(keyword)) { return keyword; } @@ -2594,12 +2653,11 @@ uloc_toLegacyType(const char* keyword, const char* value) // Checks if the specified locale type is well-formed with the legacy locale syntax. // // Note: - // Neither ICU nor LDML/CLDR provides the definition of keyword syntax. - // However, a type should not contain '=' obviously. For now, all existing - // types are using ASCII alphabetic letters with a few symbol letters. We won't - // add any new type that is not compatible with the BCP 47 syntax except timezone - // IDs. For now, we assume a valid type start with [0-9a-zA-Z], but may contain - // '-' '_' '/' in the middle. + // LDML/CLDR provides some definition of keyword syntax in + // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and + // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax + // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values + // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv") if (isWellFormedLegacyType(value)) { return value; } diff --git a/deps/icu-small/source/common/uloc_keytype.cpp b/deps/icu-small/source/common/uloc_keytype.cpp index 0bb337b1ff..04b566a5d6 100644 --- a/deps/icu-small/source/common/uloc_keytype.cpp +++ b/deps/icu-small/source/common/uloc_keytype.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/uloc_tag.c b/deps/icu-small/source/common/uloc_tag.c deleted file mode 100644 index 168b71256f..0000000000 --- a/deps/icu-small/source/common/uloc_tag.c +++ /dev/null @@ -1,2529 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2009-2015, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ - -#include "unicode/utypes.h" -#include "unicode/ures.h" -#include "unicode/putil.h" -#include "unicode/uloc.h" -#include "ustr_imp.h" -#include "cmemory.h" -#include "cstring.h" -#include "putilimp.h" -#include "uinvchar.h" -#include "ulocimp.h" -#include "uassert.h" - - -/* struct holding a single variant */ -typedef struct VariantListEntry { - const char *variant; - struct VariantListEntry *next; -} VariantListEntry; - -/* struct holding a single attribute value */ -typedef struct AttributeListEntry { - const char *attribute; - struct AttributeListEntry *next; -} AttributeListEntry; - -/* struct holding a single extension */ -typedef struct ExtensionListEntry { - const char *key; - const char *value; - struct ExtensionListEntry *next; -} ExtensionListEntry; - -#define MAXEXTLANG 3 -typedef struct ULanguageTag { - char *buf; /* holding parsed subtags */ - const char *language; - const char *extlang[MAXEXTLANG]; - const char *script; - const char *region; - VariantListEntry *variants; - ExtensionListEntry *extensions; - const char *privateuse; - const char *grandfathered; -} ULanguageTag; - -#define MINLEN 2 -#define SEP '-' -#define PRIVATEUSE 'x' -#define LDMLEXT 'u' - -#define LOCALE_SEP '_' -#define LOCALE_EXT_SEP '@' -#define LOCALE_KEYWORD_SEP ';' -#define LOCALE_KEY_TYPE_SEP '=' - -#define ISALPHA(c) uprv_isASCIILetter(c) -#define ISNUMERIC(c) ((c)>='0' && (c)<='9') - -static const char EMPTY[] = ""; -static const char LANG_UND[] = "und"; -static const char PRIVATEUSE_KEY[] = "x"; -static const char _POSIX[] = "_POSIX"; -static const char POSIX_KEY[] = "va"; -static const char POSIX_VALUE[] = "posix"; -static const char LOCALE_ATTRIBUTE_KEY[] = "attribute"; -static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant"; -static const char LOCALE_TYPE_YES[] = "yes"; - -#define LANG_UND_LEN 3 - -static const char* const GRANDFATHERED[] = { -/* grandfathered preferred */ - "art-lojban", "jbo", - "cel-gaulish", "xtg-x-cel-gaulish", - "en-GB-oed", "en-GB-x-oed", - "i-ami", "ami", - "i-bnn", "bnn", - "i-default", "en-x-i-default", - "i-enochian", "und-x-i-enochian", - "i-hak", "hak", - "i-klingon", "tlh", - "i-lux", "lb", - "i-mingo", "see-x-i-mingo", - "i-navajo", "nv", - "i-pwn", "pwn", - "i-tao", "tao", - "i-tay", "tay", - "i-tsu", "tsu", - "no-bok", "nb", - "no-nyn", "nn", - "sgn-be-fr", "sfb", - "sgn-be-nl", "vgt", - "sgn-ch-de", "sgg", - "zh-guoyu", "cmn", - "zh-hakka", "hak", - "zh-min", "nan-x-zh-min", - "zh-min-nan", "nan", - "zh-xiang", "hsn", - NULL, NULL -}; - -static const char DEPRECATEDLANGS[][4] = { -/* deprecated new */ - "iw", "he", - "ji", "yi", - "in", "id" -}; - -/* -* ------------------------------------------------- -* -* These ultag_ functions may be exposed as APIs later -* -* ------------------------------------------------- -*/ - -static ULanguageTag* -ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status); - -static void -ultag_close(ULanguageTag* langtag); - -static const char* -ultag_getLanguage(const ULanguageTag* langtag); - -#if 0 -static const char* -ultag_getJDKLanguage(const ULanguageTag* langtag); -#endif - -static const char* -ultag_getExtlang(const ULanguageTag* langtag, int32_t idx); - -static int32_t -ultag_getExtlangSize(const ULanguageTag* langtag); - -static const char* -ultag_getScript(const ULanguageTag* langtag); - -static const char* -ultag_getRegion(const ULanguageTag* langtag); - -static const char* -ultag_getVariant(const ULanguageTag* langtag, int32_t idx); - -static int32_t -ultag_getVariantsSize(const ULanguageTag* langtag); - -static const char* -ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx); - -static const char* -ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx); - -static int32_t -ultag_getExtensionsSize(const ULanguageTag* langtag); - -static const char* -ultag_getPrivateUse(const ULanguageTag* langtag); - -#if 0 -static const char* -ultag_getGrandfathered(const ULanguageTag* langtag); -#endif - -/* -* ------------------------------------------------- -* -* Language subtag syntax validation functions -* -* ------------------------------------------------- -*/ - -static UBool -_isAlphaString(const char* s, int32_t len) { - int32_t i; - for (i = 0; i < len; i++) { - if (!ISALPHA(*(s + i))) { - return FALSE; - } - } - return TRUE; -} - -static UBool -_isNumericString(const char* s, int32_t len) { - int32_t i; - for (i = 0; i < len; i++) { - if (!ISNUMERIC(*(s + i))) { - return FALSE; - } - } - return TRUE; -} - -static UBool -_isAlphaNumericString(const char* s, int32_t len) { - int32_t i; - for (i = 0; i < len; i++) { - if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) { - return FALSE; - } - } - return TRUE; -} - -static UBool -_isLanguageSubtag(const char* s, int32_t len) { - /* - * language = 2*3ALPHA ; shortest ISO 639 code - * ["-" extlang] ; sometimes followed by - * ; extended language subtags - * / 4ALPHA ; or reserved for future use - * / 5*8ALPHA ; or registered language subtag - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len >= 2 && len <= 8 && _isAlphaString(s, len)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isExtlangSubtag(const char* s, int32_t len) { - /* - * extlang = 3ALPHA ; selected ISO 639 codes - * *2("-" 3ALPHA) ; permanently reserved - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len == 3 && _isAlphaString(s, len)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isScriptSubtag(const char* s, int32_t len) { - /* - * script = 4ALPHA ; ISO 15924 code - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len == 4 && _isAlphaString(s, len)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isRegionSubtag(const char* s, int32_t len) { - /* - * region = 2ALPHA ; ISO 3166-1 code - * / 3DIGIT ; UN M.49 code - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len == 2 && _isAlphaString(s, len)) { - return TRUE; - } - if (len == 3 && _isNumericString(s, len)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isVariantSubtag(const char* s, int32_t len) { - /* - * variant = 5*8alphanum ; registered variants - * / (DIGIT 3alphanum) - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) { - return TRUE; - } - if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isPrivateuseVariantSubtag(const char* s, int32_t len) { - /* - * variant = 1*8alphanum ; registered variants - * / (DIGIT 3alphanum) - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isExtensionSingleton(const char* s, int32_t len) { - /* - * extension = singleton 1*("-" (2*8alphanum)) - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isExtensionSubtag(const char* s, int32_t len) { - /* - * extension = singleton 1*("-" (2*8alphanum)) - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isExtensionSubtags(const char* s, int32_t len) { - const char *p = s; - const char *pSubtag = NULL; - - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - - while ((p - s) < len) { - if (*p == SEP) { - if (pSubtag == NULL) { - return FALSE; - } - if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) { - return FALSE; - } - pSubtag = NULL; - } else if (pSubtag == NULL) { - pSubtag = p; - } - p++; - } - if (pSubtag == NULL) { - return FALSE; - } - return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag)); -} - -static UBool -_isPrivateuseValueSubtag(const char* s, int32_t len) { - /* - * privateuse = "x" 1*("-" (1*8alphanum)) - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isPrivateuseValueSubtags(const char* s, int32_t len) { - const char *p = s; - const char *pSubtag = NULL; - - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - - while ((p - s) < len) { - if (*p == SEP) { - if (pSubtag == NULL) { - return FALSE; - } - if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) { - return FALSE; - } - pSubtag = NULL; - } else if (pSubtag == NULL) { - pSubtag = p; - } - p++; - } - if (pSubtag == NULL) { - return FALSE; - } - return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag)); -} - -U_CFUNC UBool -ultag_isUnicodeLocaleKey(const char* s, int32_t len) { - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len == 2 && _isAlphaNumericString(s, len)) { - return TRUE; - } - return FALSE; -} - -U_CFUNC UBool -ultag_isUnicodeLocaleType(const char*s, int32_t len) { - const char* p; - int32_t subtagLen = 0; - - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - - for (p = s; len > 0; p++, len--) { - if (*p == SEP) { - if (subtagLen < 3) { - return FALSE; - } - subtagLen = 0; - } else if (ISALPHA(*p) || ISNUMERIC(*p)) { - subtagLen++; - if (subtagLen > 8) { - return FALSE; - } - } else { - return FALSE; - } - } - - return (subtagLen >= 3); -} -/* -* ------------------------------------------------- -* -* Helper functions -* -* ------------------------------------------------- -*/ - -static UBool -_addVariantToList(VariantListEntry **first, VariantListEntry *var) { - UBool bAdded = TRUE; - - if (*first == NULL) { - var->next = NULL; - *first = var; - } else { - VariantListEntry *prev, *cur; - int32_t cmp; - - /* variants order should be preserved */ - prev = NULL; - cur = *first; - while (TRUE) { - if (cur == NULL) { - prev->next = var; - var->next = NULL; - break; - } - - /* Checking for duplicate variant */ - cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant); - if (cmp == 0) { - /* duplicated variant */ - bAdded = FALSE; - break; - } - prev = cur; - cur = cur->next; - } - } - - return bAdded; -} - -static UBool -_addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) { - UBool bAdded = TRUE; - - if (*first == NULL) { - attr->next = NULL; - *first = attr; - } else { - AttributeListEntry *prev, *cur; - int32_t cmp; - - /* reorder variants in alphabetical order */ - prev = NULL; - cur = *first; - while (TRUE) { - if (cur == NULL) { - prev->next = attr; - attr->next = NULL; - break; - } - cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute); - if (cmp < 0) { - if (prev == NULL) { - *first = attr; - } else { - prev->next = attr; - } - attr->next = cur; - break; - } - if (cmp == 0) { - /* duplicated variant */ - bAdded = FALSE; - break; - } - prev = cur; - cur = cur->next; - } - } - - return bAdded; -} - - -static UBool -_addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) { - UBool bAdded = TRUE; - - if (*first == NULL) { - ext->next = NULL; - *first = ext; - } else { - ExtensionListEntry *prev, *cur; - int32_t cmp; - - /* reorder variants in alphabetical order */ - prev = NULL; - cur = *first; - while (TRUE) { - if (cur == NULL) { - prev->next = ext; - ext->next = NULL; - break; - } - if (localeToBCP) { - /* special handling for locale to bcp conversion */ - int32_t len, curlen; - - len = (int32_t)uprv_strlen(ext->key); - curlen = (int32_t)uprv_strlen(cur->key); - - if (len == 1 && curlen == 1) { - if (*(ext->key) == *(cur->key)) { - cmp = 0; - } else if (*(ext->key) == PRIVATEUSE) { - cmp = 1; - } else if (*(cur->key) == PRIVATEUSE) { - cmp = -1; - } else { - cmp = *(ext->key) - *(cur->key); - } - } else if (len == 1) { - cmp = *(ext->key) - LDMLEXT; - } else if (curlen == 1) { - cmp = LDMLEXT - *(cur->key); - } else { - cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); - /* Both are u extension keys - we need special handling for 'attribute' */ - if (cmp != 0) { - if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) { - cmp = 1; - } else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) { - cmp = -1; - } - } - } - } else { - cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); - } - if (cmp < 0) { - if (prev == NULL) { - *first = ext; - } else { - prev->next = ext; - } - ext->next = cur; - break; - } - if (cmp == 0) { - /* duplicated extension key */ - bAdded = FALSE; - break; - } - prev = cur; - cur = cur->next; - } - } - - return bAdded; -} - -static void -_initializeULanguageTag(ULanguageTag* langtag) { - int32_t i; - - langtag->buf = NULL; - - langtag->language = EMPTY; - for (i = 0; i < MAXEXTLANG; i++) { - langtag->extlang[i] = NULL; - } - - langtag->script = EMPTY; - langtag->region = EMPTY; - - langtag->variants = NULL; - langtag->extensions = NULL; - - langtag->grandfathered = EMPTY; - langtag->privateuse = EMPTY; -} - -static int32_t -_appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { - char buf[ULOC_LANG_CAPACITY]; - UErrorCode tmpStatus = U_ZERO_ERROR; - int32_t len, i; - int32_t reslen = 0; - - if (U_FAILURE(*status)) { - return 0; - } - - len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus); - if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - len = 0; - } - - /* Note: returned language code is in lower case letters */ - - if (len == 0) { - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); - } - reslen += LANG_UND_LEN; - } else if (!_isLanguageSubtag(buf, len)) { - /* invalid language code */ - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); - } - reslen += LANG_UND_LEN; - } else { - /* resolve deprecated */ - for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) { - if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) { - uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]); - len = (int32_t)uprv_strlen(buf); - break; - } - } - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); - } - reslen += len; - } - u_terminateChars(appendAt, capacity, reslen, status); - return reslen; -} - -static int32_t -_appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { - char buf[ULOC_SCRIPT_CAPACITY]; - UErrorCode tmpStatus = U_ZERO_ERROR; - int32_t len; - int32_t reslen = 0; - - if (U_FAILURE(*status)) { - return 0; - } - - len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus); - if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - return 0; - } - - if (len > 0) { - if (!_isScriptSubtag(buf, len)) { - /* invalid script code */ - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - return 0; - } else { - if (reslen < capacity) { - *(appendAt + reslen) = SEP; - } - reslen++; - - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); - } - reslen += len; - } - } - u_terminateChars(appendAt, capacity, reslen, status); - return reslen; -} - -static int32_t -_appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { - char buf[ULOC_COUNTRY_CAPACITY]; - UErrorCode tmpStatus = U_ZERO_ERROR; - int32_t len; - int32_t reslen = 0; - - if (U_FAILURE(*status)) { - return 0; - } - - len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus); - if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - return 0; - } - - if (len > 0) { - if (!_isRegionSubtag(buf, len)) { - /* invalid region code */ - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - return 0; - } else { - if (reslen < capacity) { - *(appendAt + reslen) = SEP; - } - reslen++; - - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); - } - reslen += len; - } - } - u_terminateChars(appendAt, capacity, reslen, status); - return reslen; -} - -static int32_t -_appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) { - char buf[ULOC_FULLNAME_CAPACITY]; - UErrorCode tmpStatus = U_ZERO_ERROR; - int32_t len, i; - int32_t reslen = 0; - - if (U_FAILURE(*status)) { - return 0; - } - - len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); - if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - return 0; - } - - if (len > 0) { - char *p, *pVar; - UBool bNext = TRUE; - VariantListEntry *var; - VariantListEntry *varFirst = NULL; - - pVar = NULL; - p = buf; - while (bNext) { - if (*p == SEP || *p == LOCALE_SEP || *p == 0) { - if (*p == 0) { - bNext = FALSE; - } else { - *p = 0; /* terminate */ - } - if (pVar == NULL) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - /* ignore empty variant */ - } else { - /* ICU uses upper case letters for variants, but - the canonical format is lowercase in BCP47 */ - for (i = 0; *(pVar + i) != 0; i++) { - *(pVar + i) = uprv_tolower(*(pVar + i)); - } - - /* validate */ - if (_isVariantSubtag(pVar, -1)) { - if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) { - /* emit the variant to the list */ - var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); - if (var == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - var->variant = pVar; - if (!_addVariantToList(&varFirst, var)) { - /* duplicated variant */ - uprv_free(var); - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - } - } else { - /* Special handling for POSIX variant, need to remember that we had it and then */ - /* treat it like an extension later. */ - *hadPosix = TRUE; - } - } else if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } else if (_isPrivateuseValueSubtag(pVar, -1)) { - /* Handle private use subtags separately */ - break; - } - } - /* reset variant starting position */ - pVar = NULL; - } else if (pVar == NULL) { - pVar = p; - } - p++; - } - - if (U_SUCCESS(*status)) { - if (varFirst != NULL) { - int32_t varLen; - - /* write out validated/normalized variants to the target */ - var = varFirst; - while (var != NULL) { - if (reslen < capacity) { - *(appendAt + reslen) = SEP; - } - reslen++; - varLen = (int32_t)uprv_strlen(var->variant); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen)); - } - reslen += varLen; - var = var->next; - } - } - } - - /* clean up */ - var = varFirst; - while (var != NULL) { - VariantListEntry *tmpVar = var->next; - uprv_free(var); - var = tmpVar; - } - - if (U_FAILURE(*status)) { - return 0; - } - } - - u_terminateChars(appendAt, capacity, reslen, status); - return reslen; -} - -static int32_t -_appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { - char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; - char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 }; - int32_t attrBufLength = 0; - UEnumeration *keywordEnum = NULL; - int32_t reslen = 0; - - keywordEnum = uloc_openKeywords(localeID, status); - if (U_FAILURE(*status) && !hadPosix) { - uenum_close(keywordEnum); - return 0; - } - if (keywordEnum != NULL || hadPosix) { - /* reorder extensions */ - int32_t len; - const char *key; - ExtensionListEntry *firstExt = NULL; - ExtensionListEntry *ext; - AttributeListEntry *firstAttr = NULL; - AttributeListEntry *attr; - char *attrValue; - char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; - char *pExtBuf = extBuf; - int32_t extBufCapacity = sizeof(extBuf); - const char *bcpKey, *bcpValue; - UErrorCode tmpStatus = U_ZERO_ERROR; - int32_t keylen; - UBool isBcpUExt; - - while (TRUE) { - key = uenum_next(keywordEnum, NULL, status); - if (key == NULL) { - break; - } - len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus); - /* buf must be null-terminated */ - if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - /* ignore this keyword */ - tmpStatus = U_ZERO_ERROR; - continue; - } - - keylen = (int32_t)uprv_strlen(key); - isBcpUExt = (keylen > 1); - - /* special keyword used for representing Unicode locale attributes */ - if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) { - if (len > 0) { - int32_t i = 0; - while (TRUE) { - attrBufLength = 0; - for (; i < len; i++) { - if (buf[i] != '-') { - attrBuf[attrBufLength++] = buf[i]; - } else { - i++; - break; - } - } - if (attrBufLength > 0) { - attrBuf[attrBufLength] = 0; - - } else if (i >= len){ - break; - } - - /* create AttributeListEntry */ - attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); - if (attr == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - attrValue = (char*)uprv_malloc(attrBufLength + 1); - if (attrValue == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - uprv_strcpy(attrValue, attrBuf); - attr->attribute = attrValue; - - if (!_addAttributeToList(&firstAttr, attr)) { - uprv_free(attr); - uprv_free(attrValue); - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - } - } - /* for a place holder ExtensionListEntry */ - bcpKey = LOCALE_ATTRIBUTE_KEY; - bcpValue = NULL; - } - } else if (isBcpUExt) { - bcpKey = uloc_toUnicodeLocaleKey(key); - if (bcpKey == NULL) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - continue; - } - - /* we've checked buf is null-terminated above */ - bcpValue = uloc_toUnicodeLocaleType(key, buf); - if (bcpValue == NULL) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - continue; - } - if (bcpValue == buf) { - /* - When uloc_toUnicodeLocaleType(key, buf) returns the - input value as is, the value is well-formed, but has - no known mapping. This implementation normalizes the - the value to lower case - */ - int32_t bcpValueLen = uprv_strlen(bcpValue); - if (bcpValueLen < extBufCapacity) { - uprv_strcpy(pExtBuf, bcpValue); - T_CString_toLowerCase(pExtBuf); - - bcpValue = pExtBuf; - - pExtBuf += (bcpValueLen + 1); - extBufCapacity -= (bcpValueLen + 1); - } else { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - continue; - } - } - } else { - if (*key == PRIVATEUSE) { - if (!_isPrivateuseValueSubtags(buf, len)) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - continue; - } - } else { - if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - continue; - } - } - bcpKey = key; - if ((len + 1) < extBufCapacity) { - uprv_memcpy(pExtBuf, buf, len); - bcpValue = pExtBuf; - - pExtBuf += len; - - *pExtBuf = 0; - pExtBuf++; - - extBufCapacity -= (len + 1); - } else { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - } - - /* create ExtensionListEntry */ - ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); - if (ext == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - ext->key = bcpKey; - ext->value = bcpValue; - - if (!_addExtensionToList(&firstExt, ext, TRUE)) { - uprv_free(ext); - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - } - } - - /* Special handling for POSIX variant - add the keywords for POSIX */ - if (hadPosix) { - /* create ExtensionListEntry for POSIX */ - ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); - if (ext == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto cleanup; - } - ext->key = POSIX_KEY; - ext->value = POSIX_VALUE; - - if (!_addExtensionToList(&firstExt, ext, TRUE)) { - uprv_free(ext); - } - } - - if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) { - UBool startLDMLExtension = FALSE; - for (ext = firstExt; ext; ext = ext->next) { - if (!startLDMLExtension && uprv_strlen(ext->key) > 1) { - /* first LDML u singlton extension */ - if (reslen < capacity) { - *(appendAt + reslen) = SEP; - } - reslen++; - if (reslen < capacity) { - *(appendAt + reslen) = LDMLEXT; - } - reslen++; - - startLDMLExtension = TRUE; - } - - /* write out the sorted BCP47 attributes, extensions and private use */ - if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) { - /* write the value for the attributes */ - for (attr = firstAttr; attr; attr = attr->next) { - if (reslen < capacity) { - *(appendAt + reslen) = SEP; - } - reslen++; - len = (int32_t)uprv_strlen(attr->attribute); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen)); - } - reslen += len; - } - } else { - if (reslen < capacity) { - *(appendAt + reslen) = SEP; - } - reslen++; - len = (int32_t)uprv_strlen(ext->key); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen)); - } - reslen += len; - if (reslen < capacity) { - *(appendAt + reslen) = SEP; - } - reslen++; - len = (int32_t)uprv_strlen(ext->value); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen)); - } - reslen += len; - } - } - } -cleanup: - /* clean up */ - ext = firstExt; - while (ext != NULL) { - ExtensionListEntry *tmpExt = ext->next; - uprv_free(ext); - ext = tmpExt; - } - - attr = firstAttr; - while (attr != NULL) { - AttributeListEntry *tmpAttr = attr->next; - char *pValue = (char *)attr->attribute; - uprv_free(pValue); - uprv_free(attr); - attr = tmpAttr; - } - - uenum_close(keywordEnum); - - if (U_FAILURE(*status)) { - return 0; - } - } - - return u_terminateChars(appendAt, capacity, reslen, status); -} - -/** - * Append keywords parsed from LDML extension value - * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional} - * Note: char* buf is used for storing keywords - */ -static void -_appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) { - const char *pTag; /* beginning of current subtag */ - const char *pKwds; /* beginning of key-type pairs */ - UBool variantExists = *posixVariant; - - ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */ - ExtensionListEntry *kwd, *nextKwd; - - AttributeListEntry *attrFirst = NULL; /* first attribute */ - AttributeListEntry *attr, *nextAttr; - - int32_t len; - int32_t bufIdx = 0; - - char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; - int32_t attrBufIdx = 0; - - /* Reset the posixVariant value */ - *posixVariant = FALSE; - - pTag = ldmlext; - pKwds = NULL; - - /* Iterate through u extension attributes */ - while (*pTag) { - /* locate next separator char */ - for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); - - if (ultag_isUnicodeLocaleKey(pTag, len)) { - pKwds = pTag; - break; - } - - /* add this attribute to the list */ - attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); - if (attr == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto cleanup; - } - - if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) { - uprv_memcpy(&attrBuf[attrBufIdx], pTag, len); - attrBuf[attrBufIdx + len] = 0; - attr->attribute = &attrBuf[attrBufIdx]; - attrBufIdx += (len + 1); - } else { - *status = U_ILLEGAL_ARGUMENT_ERROR; - goto cleanup; - } - - if (!_addAttributeToList(&attrFirst, attr)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - uprv_free(attr); - goto cleanup; - } - - /* next tag */ - pTag += len; - if (*pTag) { - /* next to the separator */ - pTag++; - } - } - - if (attrFirst) { - /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */ - - if (attrBufIdx > bufSize) { - /* attrBufIdx == + 1 */ - *status = U_ILLEGAL_ARGUMENT_ERROR; - goto cleanup; - } - - kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); - if (kwd == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto cleanup; - } - - kwd->key = LOCALE_ATTRIBUTE_KEY; - kwd->value = buf; - - /* attribute subtags sorted in alphabetical order as type */ - attr = attrFirst; - while (attr != NULL) { - nextAttr = attr->next; - - /* buffer size check is done above */ - if (attr != attrFirst) { - *(buf + bufIdx) = SEP; - bufIdx++; - } - - len = uprv_strlen(attr->attribute); - uprv_memcpy(buf + bufIdx, attr->attribute, len); - bufIdx += len; - - attr = nextAttr; - } - *(buf + bufIdx) = 0; - bufIdx++; - - if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - uprv_free(kwd); - goto cleanup; - } - - /* once keyword entry is created, delete the attribute list */ - attr = attrFirst; - while (attr != NULL) { - nextAttr = attr->next; - uprv_free(attr); - attr = nextAttr; - } - attrFirst = NULL; - } - - if (pKwds) { - const char *pBcpKey = NULL; /* u extenstion key subtag */ - const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */ - int32_t bcpKeyLen = 0; - int32_t bcpTypeLen = 0; - UBool isDone = FALSE; - - pTag = pKwds; - /* BCP47 representation of LDML key/type pairs */ - while (!isDone) { - const char *pNextBcpKey = NULL; - int32_t nextBcpKeyLen = 0; - UBool emitKeyword = FALSE; - - if (*pTag) { - /* locate next separator char */ - for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); - - if (ultag_isUnicodeLocaleKey(pTag, len)) { - if (pBcpKey) { - emitKeyword = TRUE; - pNextBcpKey = pTag; - nextBcpKeyLen = len; - } else { - pBcpKey = pTag; - bcpKeyLen = len; - } - } else { - U_ASSERT(pBcpKey != NULL); - /* within LDML type subtags */ - if (pBcpType) { - bcpTypeLen += (len + 1); - } else { - pBcpType = pTag; - bcpTypeLen = len; - } - } - - /* next tag */ - pTag += len; - if (*pTag) { - /* next to the separator */ - pTag++; - } - } else { - /* processing last one */ - emitKeyword = TRUE; - isDone = TRUE; - } - - if (emitKeyword) { - const char *pKey = NULL; /* LDML key */ - const char *pType = NULL; /* LDML type */ - - char bcpKeyBuf[9]; /* BCP key length is always 2 for now */ - - U_ASSERT(pBcpKey != NULL); - - if (bcpKeyLen >= sizeof(bcpKeyBuf)) { - /* the BCP key is invalid */ - *status = U_ILLEGAL_ARGUMENT_ERROR; - goto cleanup; - } - - uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen); - bcpKeyBuf[bcpKeyLen] = 0; - - /* u extension key to LDML key */ - pKey = uloc_toLegacyKey(bcpKeyBuf); - if (pKey == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - goto cleanup; - } - if (pKey == bcpKeyBuf) { - /* - The key returned by toLegacyKey points to the input buffer. - We normalize the result key to lower case. - */ - T_CString_toLowerCase(bcpKeyBuf); - if (bufSize - bufIdx - 1 >= bcpKeyLen) { - uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen); - pKey = buf + bufIdx; - bufIdx += bcpKeyLen; - *(buf + bufIdx) = 0; - bufIdx++; - } else { - *status = U_BUFFER_OVERFLOW_ERROR; - goto cleanup; - } - } - - if (pBcpType) { - char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */ - if (bcpTypeLen >= sizeof(bcpTypeBuf)) { - /* the BCP type is too long */ - *status = U_ILLEGAL_ARGUMENT_ERROR; - goto cleanup; - } - - uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen); - bcpTypeBuf[bcpTypeLen] = 0; - - /* BCP type to locale type */ - pType = uloc_toLegacyType(pKey, bcpTypeBuf); - if (pType == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - goto cleanup; - } - if (pType == bcpTypeBuf) { - /* - The type returned by toLegacyType points to the input buffer. - We normalize the result type to lower case. - */ - /* normalize to lower case */ - T_CString_toLowerCase(bcpTypeBuf); - if (bufSize - bufIdx - 1 >= bcpTypeLen) { - uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen); - pType = buf + bufIdx; - bufIdx += bcpTypeLen; - *(buf + bufIdx) = 0; - bufIdx++; - } else { - *status = U_BUFFER_OVERFLOW_ERROR; - goto cleanup; - } - } - } else { - /* typeless - default type value is "yes" */ - pType = LOCALE_TYPE_YES; - } - - /* Special handling for u-va-posix, since we want to treat this as a variant, - not as a keyword */ - if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) { - *posixVariant = TRUE; - } else { - /* create an ExtensionListEntry for this keyword */ - kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); - if (kwd == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto cleanup; - } - - kwd->key = pKey; - kwd->value = pType; - - if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - uprv_free(kwd); - goto cleanup; - } - } - - pBcpKey = pNextBcpKey; - bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0; - pBcpType = NULL; - bcpTypeLen = 0; - } - } - } - - kwd = kwdFirst; - while (kwd != NULL) { - nextKwd = kwd->next; - _addExtensionToList(appendTo, kwd, FALSE); - kwd = nextKwd; - } - - return; - -cleanup: - attr = attrFirst; - while (attr != NULL) { - nextAttr = attr->next; - uprv_free(attr); - attr = nextAttr; - } - - kwd = kwdFirst; - while (kwd != NULL) { - nextKwd = kwd->next; - uprv_free(kwd); - kwd = nextKwd; - } -} - - -static int32_t -_appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) { - int32_t reslen = 0; - int32_t i, n; - int32_t len; - ExtensionListEntry *kwdFirst = NULL; - ExtensionListEntry *kwd; - const char *key, *type; - char *kwdBuf = NULL; - int32_t kwdBufLength = capacity; - UBool posixVariant = FALSE; - - if (U_FAILURE(*status)) { - return 0; - } - - kwdBuf = (char*)uprv_malloc(kwdBufLength); - if (kwdBuf == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - - /* Determine if variants already exists */ - if (ultag_getVariantsSize(langtag)) { - posixVariant = TRUE; - } - - n = ultag_getExtensionsSize(langtag); - - /* resolve locale keywords and reordering keys */ - for (i = 0; i < n; i++) { - key = ultag_getExtensionKey(langtag, i); - type = ultag_getExtensionValue(langtag, i); - if (*key == LDMLEXT) { - _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status); - if (U_FAILURE(*status)) { - break; - } - } else { - kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); - if (kwd == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - kwd->key = key; - kwd->value = type; - if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { - uprv_free(kwd); - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - } - } - - if (U_SUCCESS(*status)) { - type = ultag_getPrivateUse(langtag); - if ((int32_t)uprv_strlen(type) > 0) { - /* add private use as a keyword */ - kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); - if (kwd == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - } else { - kwd->key = PRIVATEUSE_KEY; - kwd->value = type; - if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { - uprv_free(kwd); - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - } - } - } - - /* If a POSIX variant was in the extensions, write it out before writing the keywords. */ - - if (U_SUCCESS(*status) && posixVariant) { - len = (int32_t) uprv_strlen(_POSIX); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen)); - } - reslen += len; - } - - if (U_SUCCESS(*status) && kwdFirst != NULL) { - /* write out the sorted keywords */ - UBool firstValue = TRUE; - kwd = kwdFirst; - do { - if (reslen < capacity) { - if (firstValue) { - /* '@' */ - *(appendAt + reslen) = LOCALE_EXT_SEP; - firstValue = FALSE; - } else { - /* ';' */ - *(appendAt + reslen) = LOCALE_KEYWORD_SEP; - } - } - reslen++; - - /* key */ - len = (int32_t)uprv_strlen(kwd->key); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen)); - } - reslen += len; - - /* '=' */ - if (reslen < capacity) { - *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP; - } - reslen++; - - /* type */ - len = (int32_t)uprv_strlen(kwd->value); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen)); - } - reslen += len; - - kwd = kwd->next; - } while (kwd); - } - - /* clean up */ - kwd = kwdFirst; - while (kwd != NULL) { - ExtensionListEntry *tmpKwd = kwd->next; - uprv_free(kwd); - kwd = tmpKwd; - } - - uprv_free(kwdBuf); - - if (U_FAILURE(*status)) { - return 0; - } - - return u_terminateChars(appendAt, capacity, reslen, status); -} - -static int32_t -_appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { - char buf[ULOC_FULLNAME_CAPACITY]; - char tmpAppend[ULOC_FULLNAME_CAPACITY]; - UErrorCode tmpStatus = U_ZERO_ERROR; - int32_t len, i; - int32_t reslen = 0; - - if (U_FAILURE(*status)) { - return 0; - } - - len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); - if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - return 0; - } - - if (len > 0) { - char *p, *pPriv; - UBool bNext = TRUE; - UBool firstValue = TRUE; - UBool writeValue; - - pPriv = NULL; - p = buf; - while (bNext) { - writeValue = FALSE; - if (*p == SEP || *p == LOCALE_SEP || *p == 0) { - if (*p == 0) { - bNext = FALSE; - } else { - *p = 0; /* terminate */ - } - if (pPriv != NULL) { - /* Private use in the canonical format is lowercase in BCP47 */ - for (i = 0; *(pPriv + i) != 0; i++) { - *(pPriv + i) = uprv_tolower(*(pPriv + i)); - } - - /* validate */ - if (_isPrivateuseValueSubtag(pPriv, -1)) { - if (firstValue) { - if (!_isVariantSubtag(pPriv, -1)) { - writeValue = TRUE; - } - } else { - writeValue = TRUE; - } - } else if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } else { - break; - } - - if (writeValue) { - if (reslen < capacity) { - tmpAppend[reslen++] = SEP; - } - - if (firstValue) { - if (reslen < capacity) { - tmpAppend[reslen++] = *PRIVATEUSE_KEY; - } - - if (reslen < capacity) { - tmpAppend[reslen++] = SEP; - } - - len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX); - if (reslen < capacity) { - uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen)); - } - reslen += len; - - if (reslen < capacity) { - tmpAppend[reslen++] = SEP; - } - - firstValue = FALSE; - } - - len = (int32_t)uprv_strlen(pPriv); - if (reslen < capacity) { - uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen)); - } - reslen += len; - } - } - /* reset private use starting position */ - pPriv = NULL; - } else if (pPriv == NULL) { - pPriv = p; - } - p++; - } - - if (U_FAILURE(*status)) { - return 0; - } - } - - if (U_SUCCESS(*status)) { - len = reslen; - if (reslen < capacity) { - uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen)); - } - } - - u_terminateChars(appendAt, capacity, reslen, status); - - return reslen; -} - -/* -* ------------------------------------------------- -* -* ultag_ functions -* -* ------------------------------------------------- -*/ - -/* Bit flags used by the parser */ -#define LANG 0x0001 -#define EXTL 0x0002 -#define SCRT 0x0004 -#define REGN 0x0008 -#define VART 0x0010 -#define EXTS 0x0020 -#define EXTV 0x0040 -#define PRIV 0x0080 - -/** - * Ticket #12705 - Visual Studio 2015 Update 3 contains a new code optimizer which has problems optimizing - * this function. (See https://blogs.msdn.microsoft.com/vcblog/2016/05/04/new-code-optimizer/ ) - * As a workaround, we will turn off optimization just for this function on VS2015 Update 3 and above. - */ -#if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210)) -#pragma optimize( "", off ) -#endif - -static ULanguageTag* -ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) { - ULanguageTag *t; - char *tagBuf; - int16_t next; - char *pSubtag, *pNext, *pLastGoodPosition; - int32_t subtagLen; - int32_t extlangIdx; - ExtensionListEntry *pExtension; - char *pExtValueSubtag, *pExtValueSubtagEnd; - int32_t i; - UBool privateuseVar = FALSE; - int32_t grandfatheredLen = 0; - - if (parsedLen != NULL) { - *parsedLen = 0; - } - - if (U_FAILURE(*status)) { - return NULL; - } - - if (tagLen < 0) { - tagLen = (int32_t)uprv_strlen(tag); - } - - /* copy the entire string */ - tagBuf = (char*)uprv_malloc(tagLen + 1); - if (tagBuf == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memcpy(tagBuf, tag, tagLen); - *(tagBuf + tagLen) = 0; - - /* create a ULanguageTag */ - t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag)); - if (t == NULL) { - uprv_free(tagBuf); - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - _initializeULanguageTag(t); - t->buf = tagBuf; - - if (tagLen < MINLEN) { - /* the input tag is too short - return empty ULanguageTag */ - return t; - } - - /* check if the tag is grandfathered */ - for (i = 0; GRANDFATHERED[i] != NULL; i += 2) { - if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) { - int32_t newTagLength; - - grandfatheredLen = tagLen; /* back up for output parsedLen */ - newTagLength = uprv_strlen(GRANDFATHERED[i+1]); - if (tagLen < newTagLength) { - uprv_free(tagBuf); - tagBuf = (char*)uprv_malloc(newTagLength + 1); - if (tagBuf == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - ultag_close(t); - return NULL; - } - t->buf = tagBuf; - tagLen = newTagLength; - } - uprv_strcpy(t->buf, GRANDFATHERED[i + 1]); - break; - } - } - - /* - * langtag = language - * ["-" script] - * ["-" region] - * *("-" variant) - * *("-" extension) - * ["-" privateuse] - */ - - next = LANG | PRIV; - pNext = pLastGoodPosition = tagBuf; - extlangIdx = 0; - pExtension = NULL; - pExtValueSubtag = NULL; - pExtValueSubtagEnd = NULL; - - while (pNext) { - char *pSep; - - pSubtag = pNext; - - /* locate next separator char */ - pSep = pSubtag; - while (*pSep) { - if (*pSep == SEP) { - break; - } - pSep++; - } - if (*pSep == 0) { - /* last subtag */ - pNext = NULL; - } else { - pNext = pSep + 1; - } - subtagLen = (int32_t)(pSep - pSubtag); - - if (next & LANG) { - if (_isLanguageSubtag(pSubtag, subtagLen)) { - *pSep = 0; /* terminate */ - t->language = T_CString_toLowerCase(pSubtag); - - pLastGoodPosition = pSep; - next = EXTL | SCRT | REGN | VART | EXTS | PRIV; - continue; - } - } - if (next & EXTL) { - if (_isExtlangSubtag(pSubtag, subtagLen)) { - *pSep = 0; - t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag); - - pLastGoodPosition = pSep; - if (extlangIdx < 3) { - next = EXTL | SCRT | REGN | VART | EXTS | PRIV; - } else { - next = SCRT | REGN | VART | EXTS | PRIV; - } - continue; - } - } - if (next & SCRT) { - if (_isScriptSubtag(pSubtag, subtagLen)) { - char *p = pSubtag; - - *pSep = 0; - - /* to title case */ - *p = uprv_toupper(*p); - p++; - for (; *p; p++) { - *p = uprv_tolower(*p); - } - - t->script = pSubtag; - - pLastGoodPosition = pSep; - next = REGN | VART | EXTS | PRIV; - continue; - } - } - if (next & REGN) { - if (_isRegionSubtag(pSubtag, subtagLen)) { - *pSep = 0; - t->region = T_CString_toUpperCase(pSubtag); - - pLastGoodPosition = pSep; - next = VART | EXTS | PRIV; - continue; - } - } - if (next & VART) { - if (_isVariantSubtag(pSubtag, subtagLen) || - (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) { - VariantListEntry *var; - UBool isAdded; - - var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); - if (var == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto error; - } - *pSep = 0; - var->variant = T_CString_toUpperCase(pSubtag); - isAdded = _addVariantToList(&(t->variants), var); - if (!isAdded) { - /* duplicated variant entry */ - uprv_free(var); - break; - } - pLastGoodPosition = pSep; - next = VART | EXTS | PRIV; - continue; - } - } - if (next & EXTS) { - if (_isExtensionSingleton(pSubtag, subtagLen)) { - if (pExtension != NULL) { - if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { - /* the previous extension is incomplete */ - uprv_free(pExtension); - pExtension = NULL; - break; - } - - /* terminate the previous extension value */ - *pExtValueSubtagEnd = 0; - pExtension->value = T_CString_toLowerCase(pExtValueSubtag); - - /* insert the extension to the list */ - if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { - pLastGoodPosition = pExtValueSubtagEnd; - } else { - /* stop parsing here */ - uprv_free(pExtension); - pExtension = NULL; - break; - } - } - - /* create a new extension */ - pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); - if (pExtension == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto error; - } - *pSep = 0; - pExtension->key = T_CString_toLowerCase(pSubtag); - pExtension->value = NULL; /* will be set later */ - - /* - * reset the start and the end location of extension value - * subtags for this extension - */ - pExtValueSubtag = NULL; - pExtValueSubtagEnd = NULL; - - next = EXTV; - continue; - } - } - if (next & EXTV) { - if (_isExtensionSubtag(pSubtag, subtagLen)) { - if (pExtValueSubtag == NULL) { - /* if the start postion of this extension's value is not yet, - this one is the first value subtag */ - pExtValueSubtag = pSubtag; - } - - /* Mark the end of this subtag */ - pExtValueSubtagEnd = pSep; - next = EXTS | EXTV | PRIV; - - continue; - } - } - if (next & PRIV) { - if (uprv_tolower(*pSubtag) == PRIVATEUSE) { - char *pPrivuseVal; - - if (pExtension != NULL) { - /* Process the last extension */ - if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { - /* the previous extension is incomplete */ - uprv_free(pExtension); - pExtension = NULL; - break; - } else { - /* terminate the previous extension value */ - *pExtValueSubtagEnd = 0; - pExtension->value = T_CString_toLowerCase(pExtValueSubtag); - - /* insert the extension to the list */ - if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { - pLastGoodPosition = pExtValueSubtagEnd; - pExtension = NULL; - } else { - /* stop parsing here */ - uprv_free(pExtension); - pExtension = NULL; - break; - } - } - } - - /* The rest of part will be private use value subtags */ - if (pNext == NULL) { - /* empty private use subtag */ - break; - } - /* back up the private use value start position */ - pPrivuseVal = pNext; - - /* validate private use value subtags */ - while (pNext) { - pSubtag = pNext; - pSep = pSubtag; - while (*pSep) { - if (*pSep == SEP) { - break; - } - pSep++; - } - if (*pSep == 0) { - /* last subtag */ - pNext = NULL; - } else { - pNext = pSep + 1; - } - subtagLen = (int32_t)(pSep - pSubtag); - - if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) { - *pSep = 0; - next = VART; - privateuseVar = TRUE; - break; - } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) { - pLastGoodPosition = pSep; - } else { - break; - } - } - - if (next == VART) { - continue; - } - - if (pLastGoodPosition - pPrivuseVal > 0) { - *pLastGoodPosition = 0; - t->privateuse = T_CString_toLowerCase(pPrivuseVal); - } - /* No more subtags, exiting the parse loop */ - break; - } - break; - } - - /* If we fell through here, it means this subtag is illegal - quit parsing */ - break; - } - - if (pExtension != NULL) { - /* Process the last extension */ - if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { - /* the previous extension is incomplete */ - uprv_free(pExtension); - } else { - /* terminate the previous extension value */ - *pExtValueSubtagEnd = 0; - pExtension->value = T_CString_toLowerCase(pExtValueSubtag); - /* insert the extension to the list */ - if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { - pLastGoodPosition = pExtValueSubtagEnd; - } else { - uprv_free(pExtension); - } - } - } - - if (parsedLen != NULL) { - *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf); - } - - return t; - -error: - ultag_close(t); - return NULL; -} - -/** -* Ticket #12705 - Turn optimization back on. -*/ -#if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210)) -#pragma optimize( "", on ) -#endif - -static void -ultag_close(ULanguageTag* langtag) { - - if (langtag == NULL) { - return; - } - - uprv_free(langtag->buf); - - if (langtag->variants) { - VariantListEntry *curVar = langtag->variants; - while (curVar) { - VariantListEntry *nextVar = curVar->next; - uprv_free(curVar); - curVar = nextVar; - } - } - - if (langtag->extensions) { - ExtensionListEntry *curExt = langtag->extensions; - while (curExt) { - ExtensionListEntry *nextExt = curExt->next; - uprv_free(curExt); - curExt = nextExt; - } - } - - uprv_free(langtag); -} - -static const char* -ultag_getLanguage(const ULanguageTag* langtag) { - return langtag->language; -} - -#if 0 -static const char* -ultag_getJDKLanguage(const ULanguageTag* langtag) { - int32_t i; - for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) { - if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) { - return DEPRECATEDLANGS[i + 1]; - } - } - return langtag->language; -} -#endif - -static const char* -ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) { - if (idx >= 0 && idx < MAXEXTLANG) { - return langtag->extlang[idx]; - } - return NULL; -} - -static int32_t -ultag_getExtlangSize(const ULanguageTag* langtag) { - int32_t size = 0; - int32_t i; - for (i = 0; i < MAXEXTLANG; i++) { - if (langtag->extlang[i]) { - size++; - } - } - return size; -} - -static const char* -ultag_getScript(const ULanguageTag* langtag) { - return langtag->script; -} - -static const char* -ultag_getRegion(const ULanguageTag* langtag) { - return langtag->region; -} - -static const char* -ultag_getVariant(const ULanguageTag* langtag, int32_t idx) { - const char *var = NULL; - VariantListEntry *cur = langtag->variants; - int32_t i = 0; - while (cur) { - if (i == idx) { - var = cur->variant; - break; - } - cur = cur->next; - i++; - } - return var; -} - -static int32_t -ultag_getVariantsSize(const ULanguageTag* langtag) { - int32_t size = 0; - VariantListEntry *cur = langtag->variants; - while (TRUE) { - if (cur == NULL) { - break; - } - size++; - cur = cur->next; - } - return size; -} - -static const char* -ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) { - const char *key = NULL; - ExtensionListEntry *cur = langtag->extensions; - int32_t i = 0; - while (cur) { - if (i == idx) { - key = cur->key; - break; - } - cur = cur->next; - i++; - } - return key; -} - -static const char* -ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) { - const char *val = NULL; - ExtensionListEntry *cur = langtag->extensions; - int32_t i = 0; - while (cur) { - if (i == idx) { - val = cur->value; - break; - } - cur = cur->next; - i++; - } - return val; -} - -static int32_t -ultag_getExtensionsSize(const ULanguageTag* langtag) { - int32_t size = 0; - ExtensionListEntry *cur = langtag->extensions; - while (TRUE) { - if (cur == NULL) { - break; - } - size++; - cur = cur->next; - } - return size; -} - -static const char* -ultag_getPrivateUse(const ULanguageTag* langtag) { - return langtag->privateuse; -} - -#if 0 -static const char* -ultag_getGrandfathered(const ULanguageTag* langtag) { - return langtag->grandfathered; -} -#endif - - -/* -* ------------------------------------------------- -* -* Locale/BCP47 conversion APIs, exposed as uloc_* -* -* ------------------------------------------------- -*/ -U_CAPI int32_t U_EXPORT2 -uloc_toLanguageTag(const char* localeID, - char* langtag, - int32_t langtagCapacity, - UBool strict, - UErrorCode* status) { - /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */ - char canonical[256]; - int32_t reslen = 0; - UErrorCode tmpStatus = U_ZERO_ERROR; - UBool hadPosix = FALSE; - const char* pKeywordStart; - - /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */ - canonical[0] = 0; - if (uprv_strlen(localeID) > 0) { - uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus); - if (tmpStatus != U_ZERO_ERROR) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - } - - /* For handling special case - private use only tag */ - pKeywordStart = locale_getKeywordsStart(canonical); - if (pKeywordStart == canonical) { - UEnumeration *kwdEnum; - int kwdCnt = 0; - UBool done = FALSE; - - kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus); - if (kwdEnum != NULL) { - kwdCnt = uenum_count(kwdEnum, &tmpStatus); - if (kwdCnt == 1) { - const char *key; - int32_t len = 0; - - key = uenum_next(kwdEnum, &len, &tmpStatus); - if (len == 1 && *key == PRIVATEUSE) { - char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; - buf[0] = PRIVATEUSE; - buf[1] = SEP; - len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus); - if (U_SUCCESS(tmpStatus)) { - if (_isPrivateuseValueSubtags(&buf[2], len)) { - /* return private use only tag */ - reslen = len + 2; - uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity)); - u_terminateChars(langtag, langtagCapacity, reslen, status); - done = TRUE; - } else if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - done = TRUE; - } - /* if not strict mode, then "und" will be returned */ - } else { - *status = U_ILLEGAL_ARGUMENT_ERROR; - done = TRUE; - } - } - } - uenum_close(kwdEnum); - if (done) { - return reslen; - } - } - } - - reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status); - reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); - reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); - reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status); - reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); - reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); - - return reslen; -} - - -U_CAPI int32_t U_EXPORT2 -uloc_forLanguageTag(const char* langtag, - char* localeID, - int32_t localeIDCapacity, - int32_t* parsedLength, - UErrorCode* status) { - ULanguageTag *lt; - int32_t reslen = 0; - const char *subtag, *p; - int32_t len; - int32_t i, n; - UBool noRegion = TRUE; - - lt = ultag_parse(langtag, -1, parsedLength, status); - if (U_FAILURE(*status)) { - return 0; - } - - /* language */ - subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt); - if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) { - len = (int32_t)uprv_strlen(subtag); - if (len > 0) { - if (reslen < localeIDCapacity) { - uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen)); - } - reslen += len; - } - } - - /* script */ - subtag = ultag_getScript(lt); - len = (int32_t)uprv_strlen(subtag); - if (len > 0) { - if (reslen < localeIDCapacity) { - *(localeID + reslen) = LOCALE_SEP; - } - reslen++; - - /* write out the script in title case */ - p = subtag; - while (*p) { - if (reslen < localeIDCapacity) { - if (p == subtag) { - *(localeID + reslen) = uprv_toupper(*p); - } else { - *(localeID + reslen) = *p; - } - } - reslen++; - p++; - } - } - - /* region */ - subtag = ultag_getRegion(lt); - len = (int32_t)uprv_strlen(subtag); - if (len > 0) { - if (reslen < localeIDCapacity) { - *(localeID + reslen) = LOCALE_SEP; - } - reslen++; - /* write out the retion in upper case */ - p = subtag; - while (*p) { - if (reslen < localeIDCapacity) { - *(localeID + reslen) = uprv_toupper(*p); - } - reslen++; - p++; - } - noRegion = FALSE; - } - - /* variants */ - n = ultag_getVariantsSize(lt); - if (n > 0) { - if (noRegion) { - if (reslen < localeIDCapacity) { - *(localeID + reslen) = LOCALE_SEP; - } - reslen++; - } - - for (i = 0; i < n; i++) { - subtag = ultag_getVariant(lt, i); - if (reslen < localeIDCapacity) { - *(localeID + reslen) = LOCALE_SEP; - } - reslen++; - /* write out the variant in upper case */ - p = subtag; - while (*p) { - if (reslen < localeIDCapacity) { - *(localeID + reslen) = uprv_toupper(*p); - } - reslen++; - p++; - } - } - } - - /* keywords */ - n = ultag_getExtensionsSize(lt); - subtag = ultag_getPrivateUse(lt); - if (n > 0 || uprv_strlen(subtag) > 0) { - if (reslen == 0 && n > 0) { - /* need a language */ - if (reslen < localeIDCapacity) { - uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen)); - } - reslen += LANG_UND_LEN; - } - len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status); - reslen += len; - } - - ultag_close(lt); - return u_terminateChars(localeID, localeIDCapacity, reslen, status); -} diff --git a/deps/icu-small/source/common/uloc_tag.cpp b/deps/icu-small/source/common/uloc_tag.cpp new file mode 100644 index 0000000000..856407defe --- /dev/null +++ b/deps/icu-small/source/common/uloc_tag.cpp @@ -0,0 +1,2530 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2009-2015, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ + +#include "unicode/utypes.h" +#include "unicode/ures.h" +#include "unicode/putil.h" +#include "unicode/uloc.h" +#include "ustr_imp.h" +#include "cmemory.h" +#include "cstring.h" +#include "putilimp.h" +#include "uinvchar.h" +#include "ulocimp.h" +#include "uassert.h" + + +/* struct holding a single variant */ +typedef struct VariantListEntry { + const char *variant; + struct VariantListEntry *next; +} VariantListEntry; + +/* struct holding a single attribute value */ +typedef struct AttributeListEntry { + const char *attribute; + struct AttributeListEntry *next; +} AttributeListEntry; + +/* struct holding a single extension */ +typedef struct ExtensionListEntry { + const char *key; + const char *value; + struct ExtensionListEntry *next; +} ExtensionListEntry; + +#define MAXEXTLANG 3 +typedef struct ULanguageTag { + char *buf; /* holding parsed subtags */ + const char *language; + const char *extlang[MAXEXTLANG]; + const char *script; + const char *region; + VariantListEntry *variants; + ExtensionListEntry *extensions; + const char *privateuse; + const char *grandfathered; +} ULanguageTag; + +#define MINLEN 2 +#define SEP '-' +#define PRIVATEUSE 'x' +#define LDMLEXT 'u' + +#define LOCALE_SEP '_' +#define LOCALE_EXT_SEP '@' +#define LOCALE_KEYWORD_SEP ';' +#define LOCALE_KEY_TYPE_SEP '=' + +#define ISALPHA(c) uprv_isASCIILetter(c) +#define ISNUMERIC(c) ((c)>='0' && (c)<='9') + +static const char EMPTY[] = ""; +static const char LANG_UND[] = "und"; +static const char PRIVATEUSE_KEY[] = "x"; +static const char _POSIX[] = "_POSIX"; +static const char POSIX_KEY[] = "va"; +static const char POSIX_VALUE[] = "posix"; +static const char LOCALE_ATTRIBUTE_KEY[] = "attribute"; +static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant"; +static const char LOCALE_TYPE_YES[] = "yes"; + +#define LANG_UND_LEN 3 + +static const char* const GRANDFATHERED[] = { +/* grandfathered preferred */ + "art-lojban", "jbo", + "cel-gaulish", "xtg-x-cel-gaulish", + "en-GB-oed", "en-GB-x-oed", + "i-ami", "ami", + "i-bnn", "bnn", + "i-default", "en-x-i-default", + "i-enochian", "und-x-i-enochian", + "i-hak", "hak", + "i-klingon", "tlh", + "i-lux", "lb", + "i-mingo", "see-x-i-mingo", + "i-navajo", "nv", + "i-pwn", "pwn", + "i-tao", "tao", + "i-tay", "tay", + "i-tsu", "tsu", + "no-bok", "nb", + "no-nyn", "nn", + "sgn-be-fr", "sfb", + "sgn-be-nl", "vgt", + "sgn-ch-de", "sgg", + "zh-guoyu", "cmn", + "zh-hakka", "hak", + "zh-min", "nan-x-zh-min", + "zh-min-nan", "nan", + "zh-xiang", "hsn", + NULL, NULL +}; + +static const char DEPRECATEDLANGS[][4] = { +/* deprecated new */ + "iw", "he", + "ji", "yi", + "in", "id" +}; + +/* +* ------------------------------------------------- +* +* These ultag_ functions may be exposed as APIs later +* +* ------------------------------------------------- +*/ + +static ULanguageTag* +ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status); + +static void +ultag_close(ULanguageTag* langtag); + +static const char* +ultag_getLanguage(const ULanguageTag* langtag); + +#if 0 +static const char* +ultag_getJDKLanguage(const ULanguageTag* langtag); +#endif + +static const char* +ultag_getExtlang(const ULanguageTag* langtag, int32_t idx); + +static int32_t +ultag_getExtlangSize(const ULanguageTag* langtag); + +static const char* +ultag_getScript(const ULanguageTag* langtag); + +static const char* +ultag_getRegion(const ULanguageTag* langtag); + +static const char* +ultag_getVariant(const ULanguageTag* langtag, int32_t idx); + +static int32_t +ultag_getVariantsSize(const ULanguageTag* langtag); + +static const char* +ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx); + +static const char* +ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx); + +static int32_t +ultag_getExtensionsSize(const ULanguageTag* langtag); + +static const char* +ultag_getPrivateUse(const ULanguageTag* langtag); + +#if 0 +static const char* +ultag_getGrandfathered(const ULanguageTag* langtag); +#endif + +/* +* ------------------------------------------------- +* +* Language subtag syntax validation functions +* +* ------------------------------------------------- +*/ + +static UBool +_isAlphaString(const char* s, int32_t len) { + int32_t i; + for (i = 0; i < len; i++) { + if (!ISALPHA(*(s + i))) { + return FALSE; + } + } + return TRUE; +} + +static UBool +_isNumericString(const char* s, int32_t len) { + int32_t i; + for (i = 0; i < len; i++) { + if (!ISNUMERIC(*(s + i))) { + return FALSE; + } + } + return TRUE; +} + +static UBool +_isAlphaNumericString(const char* s, int32_t len) { + int32_t i; + for (i = 0; i < len; i++) { + if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) { + return FALSE; + } + } + return TRUE; +} + +static UBool +_isLanguageSubtag(const char* s, int32_t len) { + /* + * language = 2*3ALPHA ; shortest ISO 639 code + * ["-" extlang] ; sometimes followed by + * ; extended language subtags + * / 4ALPHA ; or reserved for future use + * / 5*8ALPHA ; or registered language subtag + */ + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + if (len >= 2 && len <= 8 && _isAlphaString(s, len)) { + return TRUE; + } + return FALSE; +} + +static UBool +_isExtlangSubtag(const char* s, int32_t len) { + /* + * extlang = 3ALPHA ; selected ISO 639 codes + * *2("-" 3ALPHA) ; permanently reserved + */ + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + if (len == 3 && _isAlphaString(s, len)) { + return TRUE; + } + return FALSE; +} + +static UBool +_isScriptSubtag(const char* s, int32_t len) { + /* + * script = 4ALPHA ; ISO 15924 code + */ + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + if (len == 4 && _isAlphaString(s, len)) { + return TRUE; + } + return FALSE; +} + +static UBool +_isRegionSubtag(const char* s, int32_t len) { + /* + * region = 2ALPHA ; ISO 3166-1 code + * / 3DIGIT ; UN M.49 code + */ + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + if (len == 2 && _isAlphaString(s, len)) { + return TRUE; + } + if (len == 3 && _isNumericString(s, len)) { + return TRUE; + } + return FALSE; +} + +static UBool +_isVariantSubtag(const char* s, int32_t len) { + /* + * variant = 5*8alphanum ; registered variants + * / (DIGIT 3alphanum) + */ + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) { + return TRUE; + } + if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) { + return TRUE; + } + return FALSE; +} + +static UBool +_isPrivateuseVariantSubtag(const char* s, int32_t len) { + /* + * variant = 1*8alphanum ; registered variants + * / (DIGIT 3alphanum) + */ + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { + return TRUE; + } + return FALSE; +} + +static UBool +_isExtensionSingleton(const char* s, int32_t len) { + /* + * extension = singleton 1*("-" (2*8alphanum)) + */ + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) { + return TRUE; + } + return FALSE; +} + +static UBool +_isExtensionSubtag(const char* s, int32_t len) { + /* + * extension = singleton 1*("-" (2*8alphanum)) + */ + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) { + return TRUE; + } + return FALSE; +} + +static UBool +_isExtensionSubtags(const char* s, int32_t len) { + const char *p = s; + const char *pSubtag = NULL; + + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + + while ((p - s) < len) { + if (*p == SEP) { + if (pSubtag == NULL) { + return FALSE; + } + if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) { + return FALSE; + } + pSubtag = NULL; + } else if (pSubtag == NULL) { + pSubtag = p; + } + p++; + } + if (pSubtag == NULL) { + return FALSE; + } + return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag)); +} + +static UBool +_isPrivateuseValueSubtag(const char* s, int32_t len) { + /* + * privateuse = "x" 1*("-" (1*8alphanum)) + */ + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { + return TRUE; + } + return FALSE; +} + +static UBool +_isPrivateuseValueSubtags(const char* s, int32_t len) { + const char *p = s; + const char *pSubtag = NULL; + + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + + while ((p - s) < len) { + if (*p == SEP) { + if (pSubtag == NULL) { + return FALSE; + } + if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) { + return FALSE; + } + pSubtag = NULL; + } else if (pSubtag == NULL) { + pSubtag = p; + } + p++; + } + if (pSubtag == NULL) { + return FALSE; + } + return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag)); +} + +U_CFUNC UBool +ultag_isUnicodeLocaleKey(const char* s, int32_t len) { + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + if (len == 2 && _isAlphaNumericString(s, len)) { + return TRUE; + } + return FALSE; +} + +U_CFUNC UBool +ultag_isUnicodeLocaleType(const char*s, int32_t len) { + const char* p; + int32_t subtagLen = 0; + + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + + for (p = s; len > 0; p++, len--) { + if (*p == SEP) { + if (subtagLen < 3) { + return FALSE; + } + subtagLen = 0; + } else if (ISALPHA(*p) || ISNUMERIC(*p)) { + subtagLen++; + if (subtagLen > 8) { + return FALSE; + } + } else { + return FALSE; + } + } + + return (subtagLen >= 3); +} +/* +* ------------------------------------------------- +* +* Helper functions +* +* ------------------------------------------------- +*/ + +static UBool +_addVariantToList(VariantListEntry **first, VariantListEntry *var) { + UBool bAdded = TRUE; + + if (*first == NULL) { + var->next = NULL; + *first = var; + } else { + VariantListEntry *prev, *cur; + int32_t cmp; + + /* variants order should be preserved */ + prev = NULL; + cur = *first; + while (TRUE) { + if (cur == NULL) { + prev->next = var; + var->next = NULL; + break; + } + + /* Checking for duplicate variant */ + cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant); + if (cmp == 0) { + /* duplicated variant */ + bAdded = FALSE; + break; + } + prev = cur; + cur = cur->next; + } + } + + return bAdded; +} + +static UBool +_addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) { + UBool bAdded = TRUE; + + if (*first == NULL) { + attr->next = NULL; + *first = attr; + } else { + AttributeListEntry *prev, *cur; + int32_t cmp; + + /* reorder variants in alphabetical order */ + prev = NULL; + cur = *first; + while (TRUE) { + if (cur == NULL) { + prev->next = attr; + attr->next = NULL; + break; + } + cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute); + if (cmp < 0) { + if (prev == NULL) { + *first = attr; + } else { + prev->next = attr; + } + attr->next = cur; + break; + } + if (cmp == 0) { + /* duplicated variant */ + bAdded = FALSE; + break; + } + prev = cur; + cur = cur->next; + } + } + + return bAdded; +} + + +static UBool +_addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) { + UBool bAdded = TRUE; + + if (*first == NULL) { + ext->next = NULL; + *first = ext; + } else { + ExtensionListEntry *prev, *cur; + int32_t cmp; + + /* reorder variants in alphabetical order */ + prev = NULL; + cur = *first; + while (TRUE) { + if (cur == NULL) { + prev->next = ext; + ext->next = NULL; + break; + } + if (localeToBCP) { + /* special handling for locale to bcp conversion */ + int32_t len, curlen; + + len = (int32_t)uprv_strlen(ext->key); + curlen = (int32_t)uprv_strlen(cur->key); + + if (len == 1 && curlen == 1) { + if (*(ext->key) == *(cur->key)) { + cmp = 0; + } else if (*(ext->key) == PRIVATEUSE) { + cmp = 1; + } else if (*(cur->key) == PRIVATEUSE) { + cmp = -1; + } else { + cmp = *(ext->key) - *(cur->key); + } + } else if (len == 1) { + cmp = *(ext->key) - LDMLEXT; + } else if (curlen == 1) { + cmp = LDMLEXT - *(cur->key); + } else { + cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); + /* Both are u extension keys - we need special handling for 'attribute' */ + if (cmp != 0) { + if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) { + cmp = 1; + } else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) { + cmp = -1; + } + } + } + } else { + cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); + } + if (cmp < 0) { + if (prev == NULL) { + *first = ext; + } else { + prev->next = ext; + } + ext->next = cur; + break; + } + if (cmp == 0) { + /* duplicated extension key */ + bAdded = FALSE; + break; + } + prev = cur; + cur = cur->next; + } + } + + return bAdded; +} + +static void +_initializeULanguageTag(ULanguageTag* langtag) { + int32_t i; + + langtag->buf = NULL; + + langtag->language = EMPTY; + for (i = 0; i < MAXEXTLANG; i++) { + langtag->extlang[i] = NULL; + } + + langtag->script = EMPTY; + langtag->region = EMPTY; + + langtag->variants = NULL; + langtag->extensions = NULL; + + langtag->grandfathered = EMPTY; + langtag->privateuse = EMPTY; +} + +static int32_t +_appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { + char buf[ULOC_LANG_CAPACITY]; + UErrorCode tmpStatus = U_ZERO_ERROR; + int32_t len, i; + int32_t reslen = 0; + + if (U_FAILURE(*status)) { + return 0; + } + + len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus); + if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + len = 0; + } + + /* Note: returned language code is in lower case letters */ + + if (len == 0) { + if (reslen < capacity) { + uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); + } + reslen += LANG_UND_LEN; + } else if (!_isLanguageSubtag(buf, len)) { + /* invalid language code */ + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + if (reslen < capacity) { + uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); + } + reslen += LANG_UND_LEN; + } else { + /* resolve deprecated */ + for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) { + if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) { + uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]); + len = (int32_t)uprv_strlen(buf); + break; + } + } + if (reslen < capacity) { + uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); + } + reslen += len; + } + u_terminateChars(appendAt, capacity, reslen, status); + return reslen; +} + +static int32_t +_appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { + char buf[ULOC_SCRIPT_CAPACITY]; + UErrorCode tmpStatus = U_ZERO_ERROR; + int32_t len; + int32_t reslen = 0; + + if (U_FAILURE(*status)) { + return 0; + } + + len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus); + if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + } + return 0; + } + + if (len > 0) { + if (!_isScriptSubtag(buf, len)) { + /* invalid script code */ + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + } + return 0; + } else { + if (reslen < capacity) { + *(appendAt + reslen) = SEP; + } + reslen++; + + if (reslen < capacity) { + uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); + } + reslen += len; + } + } + u_terminateChars(appendAt, capacity, reslen, status); + return reslen; +} + +static int32_t +_appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { + char buf[ULOC_COUNTRY_CAPACITY]; + UErrorCode tmpStatus = U_ZERO_ERROR; + int32_t len; + int32_t reslen = 0; + + if (U_FAILURE(*status)) { + return 0; + } + + len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus); + if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + } + return 0; + } + + if (len > 0) { + if (!_isRegionSubtag(buf, len)) { + /* invalid region code */ + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + } + return 0; + } else { + if (reslen < capacity) { + *(appendAt + reslen) = SEP; + } + reslen++; + + if (reslen < capacity) { + uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); + } + reslen += len; + } + } + u_terminateChars(appendAt, capacity, reslen, status); + return reslen; +} + +static int32_t +_appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) { + char buf[ULOC_FULLNAME_CAPACITY]; + UErrorCode tmpStatus = U_ZERO_ERROR; + int32_t len, i; + int32_t reslen = 0; + + if (U_FAILURE(*status)) { + return 0; + } + + len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); + if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + } + return 0; + } + + if (len > 0) { + char *p, *pVar; + UBool bNext = TRUE; + VariantListEntry *var; + VariantListEntry *varFirst = NULL; + + pVar = NULL; + p = buf; + while (bNext) { + if (*p == SEP || *p == LOCALE_SEP || *p == 0) { + if (*p == 0) { + bNext = FALSE; + } else { + *p = 0; /* terminate */ + } + if (pVar == NULL) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + /* ignore empty variant */ + } else { + /* ICU uses upper case letters for variants, but + the canonical format is lowercase in BCP47 */ + for (i = 0; *(pVar + i) != 0; i++) { + *(pVar + i) = uprv_tolower(*(pVar + i)); + } + + /* validate */ + if (_isVariantSubtag(pVar, -1)) { + if (uprv_strcmp(pVar,POSIX_VALUE) || len != (int32_t)uprv_strlen(POSIX_VALUE)) { + /* emit the variant to the list */ + var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); + if (var == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + break; + } + var->variant = pVar; + if (!_addVariantToList(&varFirst, var)) { + /* duplicated variant */ + uprv_free(var); + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + } + } else { + /* Special handling for POSIX variant, need to remember that we had it and then */ + /* treat it like an extension later. */ + *hadPosix = TRUE; + } + } else if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } else if (_isPrivateuseValueSubtag(pVar, -1)) { + /* Handle private use subtags separately */ + break; + } + } + /* reset variant starting position */ + pVar = NULL; + } else if (pVar == NULL) { + pVar = p; + } + p++; + } + + if (U_SUCCESS(*status)) { + if (varFirst != NULL) { + int32_t varLen; + + /* write out validated/normalized variants to the target */ + var = varFirst; + while (var != NULL) { + if (reslen < capacity) { + *(appendAt + reslen) = SEP; + } + reslen++; + varLen = (int32_t)uprv_strlen(var->variant); + if (reslen < capacity) { + uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen)); + } + reslen += varLen; + var = var->next; + } + } + } + + /* clean up */ + var = varFirst; + while (var != NULL) { + VariantListEntry *tmpVar = var->next; + uprv_free(var); + var = tmpVar; + } + + if (U_FAILURE(*status)) { + return 0; + } + } + + u_terminateChars(appendAt, capacity, reslen, status); + return reslen; +} + +static int32_t +_appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { + char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; + char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 }; + int32_t attrBufLength = 0; + UEnumeration *keywordEnum = NULL; + int32_t reslen = 0; + + keywordEnum = uloc_openKeywords(localeID, status); + if (U_FAILURE(*status) && !hadPosix) { + uenum_close(keywordEnum); + return 0; + } + if (keywordEnum != NULL || hadPosix) { + /* reorder extensions */ + int32_t len; + const char *key; + ExtensionListEntry *firstExt = NULL; + ExtensionListEntry *ext; + AttributeListEntry *firstAttr = NULL; + AttributeListEntry *attr; + char *attrValue; + char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; + char *pExtBuf = extBuf; + int32_t extBufCapacity = sizeof(extBuf); + const char *bcpKey=nullptr, *bcpValue=nullptr; + UErrorCode tmpStatus = U_ZERO_ERROR; + int32_t keylen; + UBool isBcpUExt; + + while (TRUE) { + key = uenum_next(keywordEnum, NULL, status); + if (key == NULL) { + break; + } + len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus); + /* buf must be null-terminated */ + if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + /* ignore this keyword */ + tmpStatus = U_ZERO_ERROR; + continue; + } + + keylen = (int32_t)uprv_strlen(key); + isBcpUExt = (keylen > 1); + + /* special keyword used for representing Unicode locale attributes */ + if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) { + if (len > 0) { + int32_t i = 0; + while (TRUE) { + attrBufLength = 0; + for (; i < len; i++) { + if (buf[i] != '-') { + attrBuf[attrBufLength++] = buf[i]; + } else { + i++; + break; + } + } + if (attrBufLength > 0) { + attrBuf[attrBufLength] = 0; + + } else if (i >= len){ + break; + } + + /* create AttributeListEntry */ + attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); + if (attr == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + break; + } + attrValue = (char*)uprv_malloc(attrBufLength + 1); + if (attrValue == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + break; + } + uprv_strcpy(attrValue, attrBuf); + attr->attribute = attrValue; + + if (!_addAttributeToList(&firstAttr, attr)) { + uprv_free(attr); + uprv_free(attrValue); + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + } + } + /* for a place holder ExtensionListEntry */ + bcpKey = LOCALE_ATTRIBUTE_KEY; + bcpValue = NULL; + } + } else if (isBcpUExt) { + bcpKey = uloc_toUnicodeLocaleKey(key); + if (bcpKey == NULL) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + continue; + } + + /* we've checked buf is null-terminated above */ + bcpValue = uloc_toUnicodeLocaleType(key, buf); + if (bcpValue == NULL) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + continue; + } + if (bcpValue == buf) { + /* + When uloc_toUnicodeLocaleType(key, buf) returns the + input value as is, the value is well-formed, but has + no known mapping. This implementation normalizes the + the value to lower case + */ + int32_t bcpValueLen = uprv_strlen(bcpValue); + if (bcpValueLen < extBufCapacity) { + uprv_strcpy(pExtBuf, bcpValue); + T_CString_toLowerCase(pExtBuf); + + bcpValue = pExtBuf; + + pExtBuf += (bcpValueLen + 1); + extBufCapacity -= (bcpValueLen + 1); + } else { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + continue; + } + } + } else { + if (*key == PRIVATEUSE) { + if (!_isPrivateuseValueSubtags(buf, len)) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + continue; + } + } else { + if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + continue; + } + } + bcpKey = key; + if ((len + 1) < extBufCapacity) { + uprv_memcpy(pExtBuf, buf, len); + bcpValue = pExtBuf; + + pExtBuf += len; + + *pExtBuf = 0; + pExtBuf++; + + extBufCapacity -= (len + 1); + } else { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + } + + /* create ExtensionListEntry */ + ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); + if (ext == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + break; + } + ext->key = bcpKey; + ext->value = bcpValue; + + if (!_addExtensionToList(&firstExt, ext, TRUE)) { + uprv_free(ext); + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + } + } + + /* Special handling for POSIX variant - add the keywords for POSIX */ + if (hadPosix) { + /* create ExtensionListEntry for POSIX */ + ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); + if (ext == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + goto cleanup; + } + ext->key = POSIX_KEY; + ext->value = POSIX_VALUE; + + if (!_addExtensionToList(&firstExt, ext, TRUE)) { + uprv_free(ext); + } + } + + if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) { + UBool startLDMLExtension = FALSE; + for (ext = firstExt; ext; ext = ext->next) { + if (!startLDMLExtension && uprv_strlen(ext->key) > 1) { + /* first LDML u singlton extension */ + if (reslen < capacity) { + *(appendAt + reslen) = SEP; + } + reslen++; + if (reslen < capacity) { + *(appendAt + reslen) = LDMLEXT; + } + reslen++; + + startLDMLExtension = TRUE; + } + + /* write out the sorted BCP47 attributes, extensions and private use */ + if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) { + /* write the value for the attributes */ + for (attr = firstAttr; attr; attr = attr->next) { + if (reslen < capacity) { + *(appendAt + reslen) = SEP; + } + reslen++; + len = (int32_t)uprv_strlen(attr->attribute); + if (reslen < capacity) { + uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen)); + } + reslen += len; + } + } else { + if (reslen < capacity) { + *(appendAt + reslen) = SEP; + } + reslen++; + len = (int32_t)uprv_strlen(ext->key); + if (reslen < capacity) { + uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen)); + } + reslen += len; + if (reslen < capacity) { + *(appendAt + reslen) = SEP; + } + reslen++; + len = (int32_t)uprv_strlen(ext->value); + if (reslen < capacity) { + uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen)); + } + reslen += len; + } + } + } +cleanup: + /* clean up */ + ext = firstExt; + while (ext != NULL) { + ExtensionListEntry *tmpExt = ext->next; + uprv_free(ext); + ext = tmpExt; + } + + attr = firstAttr; + while (attr != NULL) { + AttributeListEntry *tmpAttr = attr->next; + char *pValue = (char *)attr->attribute; + uprv_free(pValue); + uprv_free(attr); + attr = tmpAttr; + } + + uenum_close(keywordEnum); + + if (U_FAILURE(*status)) { + return 0; + } + } + + return u_terminateChars(appendAt, capacity, reslen, status); +} + +/** + * Append keywords parsed from LDML extension value + * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional} + * Note: char* buf is used for storing keywords + */ +static void +_appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) { + const char *pTag; /* beginning of current subtag */ + const char *pKwds; /* beginning of key-type pairs */ + UBool variantExists = *posixVariant; + + ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */ + ExtensionListEntry *kwd, *nextKwd; + + AttributeListEntry *attrFirst = NULL; /* first attribute */ + AttributeListEntry *attr, *nextAttr; + + int32_t len; + int32_t bufIdx = 0; + + char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; + int32_t attrBufIdx = 0; + + /* Reset the posixVariant value */ + *posixVariant = FALSE; + + pTag = ldmlext; + pKwds = NULL; + + /* Iterate through u extension attributes */ + while (*pTag) { + /* locate next separator char */ + for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); + + if (ultag_isUnicodeLocaleKey(pTag, len)) { + pKwds = pTag; + break; + } + + /* add this attribute to the list */ + attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); + if (attr == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + goto cleanup; + } + + if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) { + uprv_memcpy(&attrBuf[attrBufIdx], pTag, len); + attrBuf[attrBufIdx + len] = 0; + attr->attribute = &attrBuf[attrBufIdx]; + attrBufIdx += (len + 1); + } else { + *status = U_ILLEGAL_ARGUMENT_ERROR; + goto cleanup; + } + + if (!_addAttributeToList(&attrFirst, attr)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + uprv_free(attr); + goto cleanup; + } + + /* next tag */ + pTag += len; + if (*pTag) { + /* next to the separator */ + pTag++; + } + } + + if (attrFirst) { + /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */ + + if (attrBufIdx > bufSize) { + /* attrBufIdx == + 1 */ + *status = U_ILLEGAL_ARGUMENT_ERROR; + goto cleanup; + } + + kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); + if (kwd == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + goto cleanup; + } + + kwd->key = LOCALE_ATTRIBUTE_KEY; + kwd->value = buf; + + /* attribute subtags sorted in alphabetical order as type */ + attr = attrFirst; + while (attr != NULL) { + nextAttr = attr->next; + + /* buffer size check is done above */ + if (attr != attrFirst) { + *(buf + bufIdx) = SEP; + bufIdx++; + } + + len = uprv_strlen(attr->attribute); + uprv_memcpy(buf + bufIdx, attr->attribute, len); + bufIdx += len; + + attr = nextAttr; + } + *(buf + bufIdx) = 0; + bufIdx++; + + if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + uprv_free(kwd); + goto cleanup; + } + + /* once keyword entry is created, delete the attribute list */ + attr = attrFirst; + while (attr != NULL) { + nextAttr = attr->next; + uprv_free(attr); + attr = nextAttr; + } + attrFirst = NULL; + } + + if (pKwds) { + const char *pBcpKey = NULL; /* u extenstion key subtag */ + const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */ + int32_t bcpKeyLen = 0; + int32_t bcpTypeLen = 0; + UBool isDone = FALSE; + + pTag = pKwds; + /* BCP47 representation of LDML key/type pairs */ + while (!isDone) { + const char *pNextBcpKey = NULL; + int32_t nextBcpKeyLen = 0; + UBool emitKeyword = FALSE; + + if (*pTag) { + /* locate next separator char */ + for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); + + if (ultag_isUnicodeLocaleKey(pTag, len)) { + if (pBcpKey) { + emitKeyword = TRUE; + pNextBcpKey = pTag; + nextBcpKeyLen = len; + } else { + pBcpKey = pTag; + bcpKeyLen = len; + } + } else { + U_ASSERT(pBcpKey != NULL); + /* within LDML type subtags */ + if (pBcpType) { + bcpTypeLen += (len + 1); + } else { + pBcpType = pTag; + bcpTypeLen = len; + } + } + + /* next tag */ + pTag += len; + if (*pTag) { + /* next to the separator */ + pTag++; + } + } else { + /* processing last one */ + emitKeyword = TRUE; + isDone = TRUE; + } + + if (emitKeyword) { + const char *pKey = NULL; /* LDML key */ + const char *pType = NULL; /* LDML type */ + + char bcpKeyBuf[9]; /* BCP key length is always 2 for now */ + + U_ASSERT(pBcpKey != NULL); + + if (bcpKeyLen >= (int32_t)sizeof(bcpKeyBuf)) { + /* the BCP key is invalid */ + *status = U_ILLEGAL_ARGUMENT_ERROR; + goto cleanup; + } + + uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen); + bcpKeyBuf[bcpKeyLen] = 0; + + /* u extension key to LDML key */ + pKey = uloc_toLegacyKey(bcpKeyBuf); + if (pKey == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + goto cleanup; + } + if (pKey == bcpKeyBuf) { + /* + The key returned by toLegacyKey points to the input buffer. + We normalize the result key to lower case. + */ + T_CString_toLowerCase(bcpKeyBuf); + if (bufSize - bufIdx - 1 >= bcpKeyLen) { + uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen); + pKey = buf + bufIdx; + bufIdx += bcpKeyLen; + *(buf + bufIdx) = 0; + bufIdx++; + } else { + *status = U_BUFFER_OVERFLOW_ERROR; + goto cleanup; + } + } + + if (pBcpType) { + char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */ + if (bcpTypeLen >= (int32_t)sizeof(bcpTypeBuf)) { + /* the BCP type is too long */ + *status = U_ILLEGAL_ARGUMENT_ERROR; + goto cleanup; + } + + uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen); + bcpTypeBuf[bcpTypeLen] = 0; + + /* BCP type to locale type */ + pType = uloc_toLegacyType(pKey, bcpTypeBuf); + if (pType == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + goto cleanup; + } + if (pType == bcpTypeBuf) { + /* + The type returned by toLegacyType points to the input buffer. + We normalize the result type to lower case. + */ + /* normalize to lower case */ + T_CString_toLowerCase(bcpTypeBuf); + if (bufSize - bufIdx - 1 >= bcpTypeLen) { + uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen); + pType = buf + bufIdx; + bufIdx += bcpTypeLen; + *(buf + bufIdx) = 0; + bufIdx++; + } else { + *status = U_BUFFER_OVERFLOW_ERROR; + goto cleanup; + } + } + } else { + /* typeless - default type value is "yes" */ + pType = LOCALE_TYPE_YES; + } + + /* Special handling for u-va-posix, since we want to treat this as a variant, + not as a keyword */ + if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) { + *posixVariant = TRUE; + } else { + /* create an ExtensionListEntry for this keyword */ + kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); + if (kwd == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + goto cleanup; + } + + kwd->key = pKey; + kwd->value = pType; + + if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + uprv_free(kwd); + goto cleanup; + } + } + + pBcpKey = pNextBcpKey; + bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0; + pBcpType = NULL; + bcpTypeLen = 0; + } + } + } + + kwd = kwdFirst; + while (kwd != NULL) { + nextKwd = kwd->next; + _addExtensionToList(appendTo, kwd, FALSE); + kwd = nextKwd; + } + + return; + +cleanup: + attr = attrFirst; + while (attr != NULL) { + nextAttr = attr->next; + uprv_free(attr); + attr = nextAttr; + } + + kwd = kwdFirst; + while (kwd != NULL) { + nextKwd = kwd->next; + uprv_free(kwd); + kwd = nextKwd; + } +} + + +static int32_t +_appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) { + int32_t reslen = 0; + int32_t i, n; + int32_t len; + ExtensionListEntry *kwdFirst = NULL; + ExtensionListEntry *kwd; + const char *key, *type; + char *kwdBuf = NULL; + int32_t kwdBufLength = capacity; + UBool posixVariant = FALSE; + + if (U_FAILURE(*status)) { + return 0; + } + + kwdBuf = (char*)uprv_malloc(kwdBufLength); + if (kwdBuf == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + + /* Determine if variants already exists */ + if (ultag_getVariantsSize(langtag)) { + posixVariant = TRUE; + } + + n = ultag_getExtensionsSize(langtag); + + /* resolve locale keywords and reordering keys */ + for (i = 0; i < n; i++) { + key = ultag_getExtensionKey(langtag, i); + type = ultag_getExtensionValue(langtag, i); + if (*key == LDMLEXT) { + _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status); + if (U_FAILURE(*status)) { + break; + } + } else { + kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); + if (kwd == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + break; + } + kwd->key = key; + kwd->value = type; + if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { + uprv_free(kwd); + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + } + } + + if (U_SUCCESS(*status)) { + type = ultag_getPrivateUse(langtag); + if ((int32_t)uprv_strlen(type) > 0) { + /* add private use as a keyword */ + kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); + if (kwd == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + } else { + kwd->key = PRIVATEUSE_KEY; + kwd->value = type; + if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { + uprv_free(kwd); + *status = U_ILLEGAL_ARGUMENT_ERROR; + } + } + } + } + + /* If a POSIX variant was in the extensions, write it out before writing the keywords. */ + + if (U_SUCCESS(*status) && posixVariant) { + len = (int32_t) uprv_strlen(_POSIX); + if (reslen < capacity) { + uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen)); + } + reslen += len; + } + + if (U_SUCCESS(*status) && kwdFirst != NULL) { + /* write out the sorted keywords */ + UBool firstValue = TRUE; + kwd = kwdFirst; + do { + if (reslen < capacity) { + if (firstValue) { + /* '@' */ + *(appendAt + reslen) = LOCALE_EXT_SEP; + firstValue = FALSE; + } else { + /* ';' */ + *(appendAt + reslen) = LOCALE_KEYWORD_SEP; + } + } + reslen++; + + /* key */ + len = (int32_t)uprv_strlen(kwd->key); + if (reslen < capacity) { + uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen)); + } + reslen += len; + + /* '=' */ + if (reslen < capacity) { + *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP; + } + reslen++; + + /* type */ + len = (int32_t)uprv_strlen(kwd->value); + if (reslen < capacity) { + uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen)); + } + reslen += len; + + kwd = kwd->next; + } while (kwd); + } + + /* clean up */ + kwd = kwdFirst; + while (kwd != NULL) { + ExtensionListEntry *tmpKwd = kwd->next; + uprv_free(kwd); + kwd = tmpKwd; + } + + uprv_free(kwdBuf); + + if (U_FAILURE(*status)) { + return 0; + } + + return u_terminateChars(appendAt, capacity, reslen, status); +} + +static int32_t +_appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { + (void)hadPosix; + char buf[ULOC_FULLNAME_CAPACITY]; + char tmpAppend[ULOC_FULLNAME_CAPACITY]; + UErrorCode tmpStatus = U_ZERO_ERROR; + int32_t len, i; + int32_t reslen = 0; + + if (U_FAILURE(*status)) { + return 0; + } + + len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); + if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + } + return 0; + } + + if (len > 0) { + char *p, *pPriv; + UBool bNext = TRUE; + UBool firstValue = TRUE; + UBool writeValue; + + pPriv = NULL; + p = buf; + while (bNext) { + writeValue = FALSE; + if (*p == SEP || *p == LOCALE_SEP || *p == 0) { + if (*p == 0) { + bNext = FALSE; + } else { + *p = 0; /* terminate */ + } + if (pPriv != NULL) { + /* Private use in the canonical format is lowercase in BCP47 */ + for (i = 0; *(pPriv + i) != 0; i++) { + *(pPriv + i) = uprv_tolower(*(pPriv + i)); + } + + /* validate */ + if (_isPrivateuseValueSubtag(pPriv, -1)) { + if (firstValue) { + if (!_isVariantSubtag(pPriv, -1)) { + writeValue = TRUE; + } + } else { + writeValue = TRUE; + } + } else if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } else { + break; + } + + if (writeValue) { + if (reslen < capacity) { + tmpAppend[reslen++] = SEP; + } + + if (firstValue) { + if (reslen < capacity) { + tmpAppend[reslen++] = *PRIVATEUSE_KEY; + } + + if (reslen < capacity) { + tmpAppend[reslen++] = SEP; + } + + len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX); + if (reslen < capacity) { + uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen)); + } + reslen += len; + + if (reslen < capacity) { + tmpAppend[reslen++] = SEP; + } + + firstValue = FALSE; + } + + len = (int32_t)uprv_strlen(pPriv); + if (reslen < capacity) { + uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen)); + } + reslen += len; + } + } + /* reset private use starting position */ + pPriv = NULL; + } else if (pPriv == NULL) { + pPriv = p; + } + p++; + } + + if (U_FAILURE(*status)) { + return 0; + } + } + + if (U_SUCCESS(*status)) { + len = reslen; + if (reslen < capacity) { + uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen)); + } + } + + u_terminateChars(appendAt, capacity, reslen, status); + + return reslen; +} + +/* +* ------------------------------------------------- +* +* ultag_ functions +* +* ------------------------------------------------- +*/ + +/* Bit flags used by the parser */ +#define LANG 0x0001 +#define EXTL 0x0002 +#define SCRT 0x0004 +#define REGN 0x0008 +#define VART 0x0010 +#define EXTS 0x0020 +#define EXTV 0x0040 +#define PRIV 0x0080 + +/** + * Ticket #12705 - Visual Studio 2015 Update 3 contains a new code optimizer which has problems optimizing + * this function. (See https://blogs.msdn.microsoft.com/vcblog/2016/05/04/new-code-optimizer/ ) + * As a workaround, we will turn off optimization just for this function on VS2015 Update 3 and above. + */ +#if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210)) +#pragma optimize( "", off ) +#endif + +static ULanguageTag* +ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) { + ULanguageTag *t; + char *tagBuf; + int16_t next; + char *pSubtag, *pNext, *pLastGoodPosition; + int32_t subtagLen; + int32_t extlangIdx; + ExtensionListEntry *pExtension; + char *pExtValueSubtag, *pExtValueSubtagEnd; + int32_t i; + UBool privateuseVar = FALSE; + int32_t grandfatheredLen = 0; + + if (parsedLen != NULL) { + *parsedLen = 0; + } + + if (U_FAILURE(*status)) { + return NULL; + } + + if (tagLen < 0) { + tagLen = (int32_t)uprv_strlen(tag); + } + + /* copy the entire string */ + tagBuf = (char*)uprv_malloc(tagLen + 1); + if (tagBuf == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memcpy(tagBuf, tag, tagLen); + *(tagBuf + tagLen) = 0; + + /* create a ULanguageTag */ + t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag)); + if (t == NULL) { + uprv_free(tagBuf); + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + _initializeULanguageTag(t); + t->buf = tagBuf; + + if (tagLen < MINLEN) { + /* the input tag is too short - return empty ULanguageTag */ + return t; + } + + /* check if the tag is grandfathered */ + for (i = 0; GRANDFATHERED[i] != NULL; i += 2) { + if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) { + int32_t newTagLength; + + grandfatheredLen = tagLen; /* back up for output parsedLen */ + newTagLength = uprv_strlen(GRANDFATHERED[i+1]); + if (tagLen < newTagLength) { + uprv_free(tagBuf); + tagBuf = (char*)uprv_malloc(newTagLength + 1); + if (tagBuf == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + ultag_close(t); + return NULL; + } + t->buf = tagBuf; + tagLen = newTagLength; + } + uprv_strcpy(t->buf, GRANDFATHERED[i + 1]); + break; + } + } + + /* + * langtag = language + * ["-" script] + * ["-" region] + * *("-" variant) + * *("-" extension) + * ["-" privateuse] + */ + + next = LANG | PRIV; + pNext = pLastGoodPosition = tagBuf; + extlangIdx = 0; + pExtension = NULL; + pExtValueSubtag = NULL; + pExtValueSubtagEnd = NULL; + + while (pNext) { + char *pSep; + + pSubtag = pNext; + + /* locate next separator char */ + pSep = pSubtag; + while (*pSep) { + if (*pSep == SEP) { + break; + } + pSep++; + } + if (*pSep == 0) { + /* last subtag */ + pNext = NULL; + } else { + pNext = pSep + 1; + } + subtagLen = (int32_t)(pSep - pSubtag); + + if (next & LANG) { + if (_isLanguageSubtag(pSubtag, subtagLen)) { + *pSep = 0; /* terminate */ + t->language = T_CString_toLowerCase(pSubtag); + + pLastGoodPosition = pSep; + next = EXTL | SCRT | REGN | VART | EXTS | PRIV; + continue; + } + } + if (next & EXTL) { + if (_isExtlangSubtag(pSubtag, subtagLen)) { + *pSep = 0; + t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag); + + pLastGoodPosition = pSep; + if (extlangIdx < 3) { + next = EXTL | SCRT | REGN | VART | EXTS | PRIV; + } else { + next = SCRT | REGN | VART | EXTS | PRIV; + } + continue; + } + } + if (next & SCRT) { + if (_isScriptSubtag(pSubtag, subtagLen)) { + char *p = pSubtag; + + *pSep = 0; + + /* to title case */ + *p = uprv_toupper(*p); + p++; + for (; *p; p++) { + *p = uprv_tolower(*p); + } + + t->script = pSubtag; + + pLastGoodPosition = pSep; + next = REGN | VART | EXTS | PRIV; + continue; + } + } + if (next & REGN) { + if (_isRegionSubtag(pSubtag, subtagLen)) { + *pSep = 0; + t->region = T_CString_toUpperCase(pSubtag); + + pLastGoodPosition = pSep; + next = VART | EXTS | PRIV; + continue; + } + } + if (next & VART) { + if (_isVariantSubtag(pSubtag, subtagLen) || + (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) { + VariantListEntry *var; + UBool isAdded; + + var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); + if (var == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + goto error; + } + *pSep = 0; + var->variant = T_CString_toUpperCase(pSubtag); + isAdded = _addVariantToList(&(t->variants), var); + if (!isAdded) { + /* duplicated variant entry */ + uprv_free(var); + break; + } + pLastGoodPosition = pSep; + next = VART | EXTS | PRIV; + continue; + } + } + if (next & EXTS) { + if (_isExtensionSingleton(pSubtag, subtagLen)) { + if (pExtension != NULL) { + if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { + /* the previous extension is incomplete */ + uprv_free(pExtension); + pExtension = NULL; + break; + } + + /* terminate the previous extension value */ + *pExtValueSubtagEnd = 0; + pExtension->value = T_CString_toLowerCase(pExtValueSubtag); + + /* insert the extension to the list */ + if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { + pLastGoodPosition = pExtValueSubtagEnd; + } else { + /* stop parsing here */ + uprv_free(pExtension); + pExtension = NULL; + break; + } + } + + /* create a new extension */ + pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); + if (pExtension == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + goto error; + } + *pSep = 0; + pExtension->key = T_CString_toLowerCase(pSubtag); + pExtension->value = NULL; /* will be set later */ + + /* + * reset the start and the end location of extension value + * subtags for this extension + */ + pExtValueSubtag = NULL; + pExtValueSubtagEnd = NULL; + + next = EXTV; + continue; + } + } + if (next & EXTV) { + if (_isExtensionSubtag(pSubtag, subtagLen)) { + if (pExtValueSubtag == NULL) { + /* if the start postion of this extension's value is not yet, + this one is the first value subtag */ + pExtValueSubtag = pSubtag; + } + + /* Mark the end of this subtag */ + pExtValueSubtagEnd = pSep; + next = EXTS | EXTV | PRIV; + + continue; + } + } + if (next & PRIV) { + if (uprv_tolower(*pSubtag) == PRIVATEUSE) { + char *pPrivuseVal; + + if (pExtension != NULL) { + /* Process the last extension */ + if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { + /* the previous extension is incomplete */ + uprv_free(pExtension); + pExtension = NULL; + break; + } else { + /* terminate the previous extension value */ + *pExtValueSubtagEnd = 0; + pExtension->value = T_CString_toLowerCase(pExtValueSubtag); + + /* insert the extension to the list */ + if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { + pLastGoodPosition = pExtValueSubtagEnd; + pExtension = NULL; + } else { + /* stop parsing here */ + uprv_free(pExtension); + pExtension = NULL; + break; + } + } + } + + /* The rest of part will be private use value subtags */ + if (pNext == NULL) { + /* empty private use subtag */ + break; + } + /* back up the private use value start position */ + pPrivuseVal = pNext; + + /* validate private use value subtags */ + while (pNext) { + pSubtag = pNext; + pSep = pSubtag; + while (*pSep) { + if (*pSep == SEP) { + break; + } + pSep++; + } + if (*pSep == 0) { + /* last subtag */ + pNext = NULL; + } else { + pNext = pSep + 1; + } + subtagLen = (int32_t)(pSep - pSubtag); + + if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) { + *pSep = 0; + next = VART; + privateuseVar = TRUE; + break; + } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) { + pLastGoodPosition = pSep; + } else { + break; + } + } + + if (next == VART) { + continue; + } + + if (pLastGoodPosition - pPrivuseVal > 0) { + *pLastGoodPosition = 0; + t->privateuse = T_CString_toLowerCase(pPrivuseVal); + } + /* No more subtags, exiting the parse loop */ + break; + } + break; + } + + /* If we fell through here, it means this subtag is illegal - quit parsing */ + break; + } + + if (pExtension != NULL) { + /* Process the last extension */ + if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { + /* the previous extension is incomplete */ + uprv_free(pExtension); + } else { + /* terminate the previous extension value */ + *pExtValueSubtagEnd = 0; + pExtension->value = T_CString_toLowerCase(pExtValueSubtag); + /* insert the extension to the list */ + if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { + pLastGoodPosition = pExtValueSubtagEnd; + } else { + uprv_free(pExtension); + } + } + } + + if (parsedLen != NULL) { + *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf); + } + + return t; + +error: + ultag_close(t); + return NULL; +} + +/** +* Ticket #12705 - Turn optimization back on. +*/ +#if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210)) +#pragma optimize( "", on ) +#endif + +static void +ultag_close(ULanguageTag* langtag) { + + if (langtag == NULL) { + return; + } + + uprv_free(langtag->buf); + + if (langtag->variants) { + VariantListEntry *curVar = langtag->variants; + while (curVar) { + VariantListEntry *nextVar = curVar->next; + uprv_free(curVar); + curVar = nextVar; + } + } + + if (langtag->extensions) { + ExtensionListEntry *curExt = langtag->extensions; + while (curExt) { + ExtensionListEntry *nextExt = curExt->next; + uprv_free(curExt); + curExt = nextExt; + } + } + + uprv_free(langtag); +} + +static const char* +ultag_getLanguage(const ULanguageTag* langtag) { + return langtag->language; +} + +#if 0 +static const char* +ultag_getJDKLanguage(const ULanguageTag* langtag) { + int32_t i; + for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) { + if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) { + return DEPRECATEDLANGS[i + 1]; + } + } + return langtag->language; +} +#endif + +static const char* +ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) { + if (idx >= 0 && idx < MAXEXTLANG) { + return langtag->extlang[idx]; + } + return NULL; +} + +static int32_t +ultag_getExtlangSize(const ULanguageTag* langtag) { + int32_t size = 0; + int32_t i; + for (i = 0; i < MAXEXTLANG; i++) { + if (langtag->extlang[i]) { + size++; + } + } + return size; +} + +static const char* +ultag_getScript(const ULanguageTag* langtag) { + return langtag->script; +} + +static const char* +ultag_getRegion(const ULanguageTag* langtag) { + return langtag->region; +} + +static const char* +ultag_getVariant(const ULanguageTag* langtag, int32_t idx) { + const char *var = NULL; + VariantListEntry *cur = langtag->variants; + int32_t i = 0; + while (cur) { + if (i == idx) { + var = cur->variant; + break; + } + cur = cur->next; + i++; + } + return var; +} + +static int32_t +ultag_getVariantsSize(const ULanguageTag* langtag) { + int32_t size = 0; + VariantListEntry *cur = langtag->variants; + while (TRUE) { + if (cur == NULL) { + break; + } + size++; + cur = cur->next; + } + return size; +} + +static const char* +ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) { + const char *key = NULL; + ExtensionListEntry *cur = langtag->extensions; + int32_t i = 0; + while (cur) { + if (i == idx) { + key = cur->key; + break; + } + cur = cur->next; + i++; + } + return key; +} + +static const char* +ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) { + const char *val = NULL; + ExtensionListEntry *cur = langtag->extensions; + int32_t i = 0; + while (cur) { + if (i == idx) { + val = cur->value; + break; + } + cur = cur->next; + i++; + } + return val; +} + +static int32_t +ultag_getExtensionsSize(const ULanguageTag* langtag) { + int32_t size = 0; + ExtensionListEntry *cur = langtag->extensions; + while (TRUE) { + if (cur == NULL) { + break; + } + size++; + cur = cur->next; + } + return size; +} + +static const char* +ultag_getPrivateUse(const ULanguageTag* langtag) { + return langtag->privateuse; +} + +#if 0 +static const char* +ultag_getGrandfathered(const ULanguageTag* langtag) { + return langtag->grandfathered; +} +#endif + + +/* +* ------------------------------------------------- +* +* Locale/BCP47 conversion APIs, exposed as uloc_* +* +* ------------------------------------------------- +*/ +U_CAPI int32_t U_EXPORT2 +uloc_toLanguageTag(const char* localeID, + char* langtag, + int32_t langtagCapacity, + UBool strict, + UErrorCode* status) { + /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */ + char canonical[256]; + int32_t reslen = 0; + UErrorCode tmpStatus = U_ZERO_ERROR; + UBool hadPosix = FALSE; + const char* pKeywordStart; + + /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */ + canonical[0] = 0; + if (uprv_strlen(localeID) > 0) { + uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus); + if (tmpStatus != U_ZERO_ERROR) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + } + + /* For handling special case - private use only tag */ + pKeywordStart = locale_getKeywordsStart(canonical); + if (pKeywordStart == canonical) { + UEnumeration *kwdEnum; + int kwdCnt = 0; + UBool done = FALSE; + + kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus); + if (kwdEnum != NULL) { + kwdCnt = uenum_count(kwdEnum, &tmpStatus); + if (kwdCnt == 1) { + const char *key; + int32_t len = 0; + + key = uenum_next(kwdEnum, &len, &tmpStatus); + if (len == 1 && *key == PRIVATEUSE) { + char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; + buf[0] = PRIVATEUSE; + buf[1] = SEP; + len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus); + if (U_SUCCESS(tmpStatus)) { + if (_isPrivateuseValueSubtags(&buf[2], len)) { + /* return private use only tag */ + reslen = len + 2; + uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity)); + u_terminateChars(langtag, langtagCapacity, reslen, status); + done = TRUE; + } else if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + done = TRUE; + } + /* if not strict mode, then "und" will be returned */ + } else { + *status = U_ILLEGAL_ARGUMENT_ERROR; + done = TRUE; + } + } + } + uenum_close(kwdEnum); + if (done) { + return reslen; + } + } + } + + reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status); + reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); + reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); + reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status); + reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); + reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); + + return reslen; +} + + +U_CAPI int32_t U_EXPORT2 +uloc_forLanguageTag(const char* langtag, + char* localeID, + int32_t localeIDCapacity, + int32_t* parsedLength, + UErrorCode* status) { + ULanguageTag *lt; + int32_t reslen = 0; + const char *subtag, *p; + int32_t len; + int32_t i, n; + UBool noRegion = TRUE; + + lt = ultag_parse(langtag, -1, parsedLength, status); + if (U_FAILURE(*status)) { + return 0; + } + + /* language */ + subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt); + if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) { + len = (int32_t)uprv_strlen(subtag); + if (len > 0) { + if (reslen < localeIDCapacity) { + uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen)); + } + reslen += len; + } + } + + /* script */ + subtag = ultag_getScript(lt); + len = (int32_t)uprv_strlen(subtag); + if (len > 0) { + if (reslen < localeIDCapacity) { + *(localeID + reslen) = LOCALE_SEP; + } + reslen++; + + /* write out the script in title case */ + p = subtag; + while (*p) { + if (reslen < localeIDCapacity) { + if (p == subtag) { + *(localeID + reslen) = uprv_toupper(*p); + } else { + *(localeID + reslen) = *p; + } + } + reslen++; + p++; + } + } + + /* region */ + subtag = ultag_getRegion(lt); + len = (int32_t)uprv_strlen(subtag); + if (len > 0) { + if (reslen < localeIDCapacity) { + *(localeID + reslen) = LOCALE_SEP; + } + reslen++; + /* write out the retion in upper case */ + p = subtag; + while (*p) { + if (reslen < localeIDCapacity) { + *(localeID + reslen) = uprv_toupper(*p); + } + reslen++; + p++; + } + noRegion = FALSE; + } + + /* variants */ + n = ultag_getVariantsSize(lt); + if (n > 0) { + if (noRegion) { + if (reslen < localeIDCapacity) { + *(localeID + reslen) = LOCALE_SEP; + } + reslen++; + } + + for (i = 0; i < n; i++) { + subtag = ultag_getVariant(lt, i); + if (reslen < localeIDCapacity) { + *(localeID + reslen) = LOCALE_SEP; + } + reslen++; + /* write out the variant in upper case */ + p = subtag; + while (*p) { + if (reslen < localeIDCapacity) { + *(localeID + reslen) = uprv_toupper(*p); + } + reslen++; + p++; + } + } + } + + /* keywords */ + n = ultag_getExtensionsSize(lt); + subtag = ultag_getPrivateUse(lt); + if (n > 0 || uprv_strlen(subtag) > 0) { + if (reslen == 0 && n > 0) { + /* need a language */ + if (reslen < localeIDCapacity) { + uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen)); + } + reslen += LANG_UND_LEN; + } + len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status); + reslen += len; + } + + ultag_close(lt); + return u_terminateChars(localeID, localeIDCapacity, reslen, status); +} diff --git a/deps/icu-small/source/common/ulocimp.h b/deps/icu-small/source/common/ulocimp.h index 26d5c7963e..855f9235dc 100644 --- a/deps/icu-small/source/common/ulocimp.h +++ b/deps/icu-small/source/common/ulocimp.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/umapfile.c b/deps/icu-small/source/common/umapfile.c deleted file mode 100644 index 377b14d30f..0000000000 --- a/deps/icu-small/source/common/umapfile.c +++ /dev/null @@ -1,466 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************/ - - -/*---------------------------------------------------------------------------- - * - * Memory mapped file wrappers for use by the ICU Data Implementation - * All of the platform-specific implementation for mapping data files - * is here. The rest of the ICU Data implementation uses only the - * wrapper functions. - * - *----------------------------------------------------------------------------*/ -/* Defines _XOPEN_SOURCE for access to POSIX functions. - * Must be before any other #includes. */ -#include "uposixdefs.h" - -#include "unicode/putil.h" -#include "udatamem.h" -#include "umapfile.h" - -/* memory-mapping base definitions ------------------------------------------ */ - -#if MAP_IMPLEMENTATION==MAP_WIN32 -# define WIN32_LEAN_AND_MEAN -# define VC_EXTRALEAN -# define NOUSER -# define NOSERVICE -# define NOIME -# define NOMCX -# include -# include "cmemory.h" - - typedef HANDLE MemoryMap; - -# define IS_MAP(map) ((map)!=NULL) -#elif MAP_IMPLEMENTATION==MAP_POSIX || MAP_IMPLEMENTATION==MAP_390DLL - typedef size_t MemoryMap; - -# define IS_MAP(map) ((map)!=0) - -# include -# include -# include -# include - -# ifndef MAP_FAILED -# define MAP_FAILED ((void*)-1) -# endif - -# if MAP_IMPLEMENTATION==MAP_390DLL - /* No memory mapping for 390 batch mode. Fake it using dll loading. */ -# include -# include "cstring.h" -# include "cmemory.h" -# include "unicode/udata.h" -# define LIB_PREFIX "lib" -# define LIB_SUFFIX ".dll" - /* This is inconvienient until we figure out what to do with U_ICUDATA_NAME in utypes.h */ -# define U_ICUDATA_ENTRY_NAME "icudt" U_ICU_VERSION_SHORT U_LIB_SUFFIX_C_NAME_STRING "_dat" -# endif -#elif MAP_IMPLEMENTATION==MAP_STDIO -# include -# include "cmemory.h" - - typedef void *MemoryMap; - -# define IS_MAP(map) ((map)!=NULL) -#endif - -/*----------------------------------------------------------------------------* - * * - * Memory Mapped File support. Platform dependent implementation of * - * functions used by the rest of the implementation.* - * * - *----------------------------------------------------------------------------*/ -#if MAP_IMPLEMENTATION==MAP_NONE - U_CFUNC UBool - uprv_mapFile(UDataMemory *pData, const char *path) { - UDataMemory_init(pData); /* Clear the output struct. */ - return FALSE; /* no file access */ - } - - U_CFUNC void uprv_unmapFile(UDataMemory *pData) { - /* nothing to do */ - } -#elif MAP_IMPLEMENTATION==MAP_WIN32 - U_CFUNC UBool - uprv_mapFile( - UDataMemory *pData, /* Fill in with info on the result doing the mapping. */ - /* Output only; any original contents are cleared. */ - const char *path /* File path to be opened/mapped */ - ) - { - HANDLE map; - HANDLE file; - SECURITY_ATTRIBUTES mappingAttributes; - SECURITY_ATTRIBUTES *mappingAttributesPtr = NULL; - SECURITY_DESCRIPTOR securityDesc; - - UDataMemory_init(pData); /* Clear the output struct. */ - - /* open the input file */ - file=CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, NULL, - OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL|FILE_FLAG_RANDOM_ACCESS, NULL); - if(file==INVALID_HANDLE_VALUE) { - return FALSE; - } - - /* Declare and initialize a security descriptor. - This is required for multiuser systems on Windows 2000 SP4 and beyond */ - if (InitializeSecurityDescriptor(&securityDesc, SECURITY_DESCRIPTOR_REVISION)) { - /* give the security descriptor a Null Dacl done using the "TRUE, (PACL)NULL" here */ - if (SetSecurityDescriptorDacl(&securityDesc, TRUE, (PACL)NULL, FALSE)) { - /* Make the security attributes point to the security descriptor */ - uprv_memset(&mappingAttributes, 0, sizeof(mappingAttributes)); - mappingAttributes.nLength = sizeof(mappingAttributes); - mappingAttributes.lpSecurityDescriptor = &securityDesc; - mappingAttributes.bInheritHandle = FALSE; /* object uninheritable */ - mappingAttributesPtr = &mappingAttributes; - } - } - /* else creating security descriptors can fail when we are on Windows 98, - and mappingAttributesPtr == NULL for that case. */ - - /* create an unnamed Windows file-mapping object for the specified file */ - map=CreateFileMapping(file, mappingAttributesPtr, PAGE_READONLY, 0, 0, NULL); - CloseHandle(file); - if(map==NULL) { - return FALSE; - } - - /* map a view of the file into our address space */ - pData->pHeader=(const DataHeader *)MapViewOfFile(map, FILE_MAP_READ, 0, 0, 0); - if(pData->pHeader==NULL) { - CloseHandle(map); - return FALSE; - } - pData->map=map; - return TRUE; - } - - U_CFUNC void - uprv_unmapFile(UDataMemory *pData) { - if(pData!=NULL && pData->map!=NULL) { - UnmapViewOfFile(pData->pHeader); - CloseHandle(pData->map); - pData->pHeader=NULL; - pData->map=NULL; - } - } - - - -#elif MAP_IMPLEMENTATION==MAP_POSIX - U_CFUNC UBool - uprv_mapFile(UDataMemory *pData, const char *path) { - int fd; - int length; - struct stat mystat; - void *data; - - UDataMemory_init(pData); /* Clear the output struct. */ - - /* determine the length of the file */ - if(stat(path, &mystat)!=0 || mystat.st_size<=0) { - return FALSE; - } - length=mystat.st_size; - - /* open the file */ - fd=open(path, O_RDONLY); - if(fd==-1) { - return FALSE; - } - - /* get a view of the mapping */ -#if U_PLATFORM != U_PF_HPUX - data=mmap(0, length, PROT_READ, MAP_SHARED, fd, 0); -#else - data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0); -#endif - close(fd); /* no longer needed */ - if(data==MAP_FAILED) { - return FALSE; - } - - pData->map = (char *)data + length; - pData->pHeader=(const DataHeader *)data; - pData->mapAddr = data; -#if U_PLATFORM == U_PF_IPHONE - posix_madvise(data, length, POSIX_MADV_RANDOM); -#endif - return TRUE; - } - - U_CFUNC void - uprv_unmapFile(UDataMemory *pData) { - if(pData!=NULL && pData->map!=NULL) { - size_t dataLen = (char *)pData->map - (char *)pData->mapAddr; - if(munmap(pData->mapAddr, dataLen)==-1) { - } - pData->pHeader=NULL; - pData->map=0; - pData->mapAddr=NULL; - } - } - - - -#elif MAP_IMPLEMENTATION==MAP_STDIO - /* copy of the filestrm.c/T_FileStream_size() implementation */ - static int32_t - umap_fsize(FILE *f) { - int32_t savedPos = ftell(f); - int32_t size = 0; - - /*Changes by Bertrand A. D. doesn't affect the current position - goes to the end of the file before ftell*/ - fseek(f, 0, SEEK_END); - size = (int32_t)ftell(f); - fseek(f, savedPos, SEEK_SET); - return size; - } - - U_CFUNC UBool - uprv_mapFile(UDataMemory *pData, const char *path) { - FILE *file; - int32_t fileLength; - void *p; - - UDataMemory_init(pData); /* Clear the output struct. */ - /* open the input file */ - file=fopen(path, "rb"); - if(file==NULL) { - return FALSE; - } - - /* get the file length */ - fileLength=umap_fsize(file); - if(ferror(file) || fileLength<=20) { - fclose(file); - return FALSE; - } - - /* allocate the memory to hold the file data */ - p=uprv_malloc(fileLength); - if(p==NULL) { - fclose(file); - return FALSE; - } - - /* read the file */ - if(fileLength!=fread(p, 1, fileLength, file)) { - uprv_free(p); - fclose(file); - return FALSE; - } - - fclose(file); - pData->map=p; - pData->pHeader=(const DataHeader *)p; - pData->mapAddr=p; - return TRUE; - } - - U_CFUNC void - uprv_unmapFile(UDataMemory *pData) { - if(pData!=NULL && pData->map!=NULL) { - uprv_free(pData->map); - pData->map = NULL; - pData->mapAddr = NULL; - pData->pHeader = NULL; - } - } - - -#elif MAP_IMPLEMENTATION==MAP_390DLL - /* 390 specific Library Loading. - * This is the only platform left that dynamically loads an ICU Data Library. - * All other platforms use .data files when dynamic loading is required, but - * this turn out to be awkward to support in 390 batch mode. - * - * The idea here is to hide the fact that 390 is using dll loading from the - * rest of ICU, and make it look like there is file loading happening. - * - */ - - static char *strcpy_returnEnd(char *dest, const char *src) - { - while((*dest=*src)!=0) { - ++dest; - ++src; - } - return dest; - } - - /*------------------------------------------------------------------------------ - * - * computeDirPath given a user-supplied path of an item to be opened, - * compute and return - * - the full directory path to be used - * when opening the file. - * - Pointer to null at end of above returned path - * - * Parameters: - * path: input path. Buffer is not altered. - * pathBuffer: Output buffer. Any contents are overwritten. - * - * Returns: - * Pointer to null termination in returned pathBuffer. - * - * TODO: This works the way ICU historically has, but the - * whole data fallback search path is so complicated that - * proabably almost no one will ever really understand it, - * the potential for confusion is large. (It's not just - * this one function, but the whole scheme.) - * - *------------------------------------------------------------------------------*/ - static char *uprv_computeDirPath(const char *path, char *pathBuffer) - { - char *finalSlash; /* Ptr to last dir separator in input path, or null if none. */ - int32_t pathLen; /* Length of the returned directory path */ - - finalSlash = 0; - if (path != 0) { - finalSlash = uprv_strrchr(path, U_FILE_SEP_CHAR); - } - - *pathBuffer = 0; - if (finalSlash == 0) { - /* No user-supplied path. - * Copy the ICU_DATA path to the path buffer and return that*/ - const char *icuDataDir; - icuDataDir=u_getDataDirectory(); - if(icuDataDir!=NULL && *icuDataDir!=0) { - return strcpy_returnEnd(pathBuffer, icuDataDir); - } else { - /* there is no icuDataDir either. Just return the empty pathBuffer. */ - return pathBuffer; - } - } - - /* User supplied path did contain a directory portion. - * Copy it to the output path buffer */ - pathLen = (int32_t)(finalSlash - path + 1); - uprv_memcpy(pathBuffer, path, pathLen); - *(pathBuffer+pathLen) = 0; - return pathBuffer+pathLen; - } - - -# define DATA_TYPE "dat" - - U_CFUNC UBool uprv_mapFile(UDataMemory *pData, const char *path) { - const char *inBasename; - char *basename; - char pathBuffer[1024]; - const DataHeader *pHeader; - dllhandle *handle; - void *val=0; - - inBasename=uprv_strrchr(path, U_FILE_SEP_CHAR); - if(inBasename==NULL) { - inBasename = path; - } else { - inBasename++; - } - basename=uprv_computeDirPath(path, pathBuffer); - if(uprv_strcmp(inBasename, U_ICUDATA_NAME".dat") != 0) { - /* must mmap file... for build */ - int fd; - int length; - struct stat mystat; - void *data; - UDataMemory_init(pData); /* Clear the output struct. */ - - /* determine the length of the file */ - if(stat(path, &mystat)!=0 || mystat.st_size<=0) { - return FALSE; - } - length=mystat.st_size; - - /* open the file */ - fd=open(path, O_RDONLY); - if(fd==-1) { - return FALSE; - } - - /* get a view of the mapping */ - data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0); - close(fd); /* no longer needed */ - if(data==MAP_FAILED) { - return FALSE; - } - pData->map = (char *)data + length; - pData->pHeader=(const DataHeader *)data; - pData->mapAddr = data; - return TRUE; - } - -# ifdef OS390BATCH - /* ### hack: we still need to get u_getDataDirectory() fixed - for OS/390 (batch mode - always return "//"? ) - and this here straightened out with LIB_PREFIX and LIB_SUFFIX (both empty?!) - This is probably due to the strange file system on OS/390. It's more like - a database with short entry names than a typical file system. */ - /* U_ICUDATA_NAME should always have the correct name */ - /* BUT FOR BATCH MODE IT IS AN EXCEPTION BECAUSE */ - /* THE FIRST THREE LETTERS ARE PREASSIGNED TO THE */ - /* PROJECT!!!!! */ - uprv_strcpy(pathBuffer, "IXMI" U_ICU_VERSION_SHORT "DA"); -# else - /* set up the library name */ - uprv_strcpy(basename, LIB_PREFIX U_LIBICUDATA_NAME U_ICU_VERSION_SHORT LIB_SUFFIX); -# endif - -# ifdef UDATA_DEBUG - fprintf(stderr, "dllload: %s ", pathBuffer); -# endif - - handle=dllload(pathBuffer); - -# ifdef UDATA_DEBUG - fprintf(stderr, " -> %08X\n", handle ); -# endif - - if(handle != NULL) { - /* we have a data DLL - what kind of lookup do we need here? */ - /* try to find the Table of Contents */ - UDataMemory_init(pData); /* Clear the output struct. */ - val=dllqueryvar((dllhandle*)handle, U_ICUDATA_ENTRY_NAME); - if(val == 0) { - /* failed... so keep looking */ - return FALSE; - } -# ifdef UDATA_DEBUG - fprintf(stderr, "dllqueryvar(%08X, %s) -> %08X\n", handle, U_ICUDATA_ENTRY_NAME, val); -# endif - - pData->pHeader=(const DataHeader *)val; - return TRUE; - } else { - return FALSE; /* no handle */ - } - } - - U_CFUNC void uprv_unmapFile(UDataMemory *pData) { - if(pData!=NULL && pData->map!=NULL) { - uprv_free(pData->map); - pData->map = NULL; - pData->mapAddr = NULL; - pData->pHeader = NULL; - } - } - -#else -# error MAP_IMPLEMENTATION is set incorrectly -#endif diff --git a/deps/icu-small/source/common/umapfile.cpp b/deps/icu-small/source/common/umapfile.cpp new file mode 100644 index 0000000000..749a843218 --- /dev/null +++ b/deps/icu-small/source/common/umapfile.cpp @@ -0,0 +1,496 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2013, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************/ + + +/*---------------------------------------------------------------------------- + * + * Memory mapped file wrappers for use by the ICU Data Implementation + * All of the platform-specific implementation for mapping data files + * is here. The rest of the ICU Data implementation uses only the + * wrapper functions. + * + *----------------------------------------------------------------------------*/ +/* Defines _XOPEN_SOURCE for access to POSIX functions. + * Must be before any other #includes. */ +#include "uposixdefs.h" + +#include "unicode/putil.h" +#include "udatamem.h" +#include "umapfile.h" + +/* memory-mapping base definitions ------------------------------------------ */ + +#if MAP_IMPLEMENTATION==MAP_WIN32 +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif +# define VC_EXTRALEAN +# define NOUSER +# define NOSERVICE +# define NOIME +# define NOMCX +# include +# include "cmemory.h" + + typedef HANDLE MemoryMap; + +# define IS_MAP(map) ((map)!=NULL) +#elif MAP_IMPLEMENTATION==MAP_POSIX || MAP_IMPLEMENTATION==MAP_390DLL + typedef size_t MemoryMap; + +# define IS_MAP(map) ((map)!=0) + +# include +# include +# include +# include + +# ifndef MAP_FAILED +# define MAP_FAILED ((void*)-1) +# endif + +# if MAP_IMPLEMENTATION==MAP_390DLL + /* No memory mapping for 390 batch mode. Fake it using dll loading. */ +# include +# include "cstring.h" +# include "cmemory.h" +# include "unicode/udata.h" +# define LIB_PREFIX "lib" +# define LIB_SUFFIX ".dll" + /* This is inconvienient until we figure out what to do with U_ICUDATA_NAME in utypes.h */ +# define U_ICUDATA_ENTRY_NAME "icudt" U_ICU_VERSION_SHORT U_LIB_SUFFIX_C_NAME_STRING "_dat" +# endif +#elif MAP_IMPLEMENTATION==MAP_STDIO +# include +# include "cmemory.h" + + typedef void *MemoryMap; + +# define IS_MAP(map) ((map)!=NULL) +#endif + +/*----------------------------------------------------------------------------* + * * + * Memory Mapped File support. Platform dependent implementation of * + * functions used by the rest of the implementation.* + * * + *----------------------------------------------------------------------------*/ +#if MAP_IMPLEMENTATION==MAP_NONE + U_CFUNC UBool + uprv_mapFile(UDataMemory *pData, const char *path) { + UDataMemory_init(pData); /* Clear the output struct. */ + return FALSE; /* no file access */ + } + + U_CFUNC void uprv_unmapFile(UDataMemory *pData) { + /* nothing to do */ + } +#elif MAP_IMPLEMENTATION==MAP_WIN32 + U_CFUNC UBool + uprv_mapFile( + UDataMemory *pData, /* Fill in with info on the result doing the mapping. */ + /* Output only; any original contents are cleared. */ + const char *path /* File path to be opened/mapped */ + ) + { + HANDLE map; + HANDLE file; + SECURITY_ATTRIBUTES mappingAttributes; + SECURITY_ATTRIBUTES *mappingAttributesPtr = NULL; + SECURITY_DESCRIPTOR securityDesc; + + UDataMemory_init(pData); /* Clear the output struct. */ + + /* open the input file */ +#if U_PLATFORM_HAS_WINUWP_API == 0 + file=CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, NULL, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL|FILE_FLAG_RANDOM_ACCESS, NULL); +#else + // First we need to go from char to UTF-16 + // u_UCharsToChars could work but it requires length. + WCHAR utf16Path[MAX_PATH]; + int32_t i; + for (i = 0; i < UPRV_LENGTHOF(utf16Path); i++) + { + utf16Path[i] = path[i]; + if (path[i] == '\0') + { + break; + } + } + if (i >= UPRV_LENGTHOF(utf16Path)) + { + // Ran out of room, unlikely but be safe + utf16Path[UPRV_LENGTHOF(utf16Path) - 1] = '\0'; + } + + // TODO: Is it worth setting extended parameters to specify random access? + file = CreateFile2(utf16Path, GENERIC_READ, FILE_SHARE_READ, OPEN_EXISTING, NULL); +#endif + if(file==INVALID_HANDLE_VALUE) { + return FALSE; + } + + /* Declare and initialize a security descriptor. + This is required for multiuser systems on Windows 2000 SP4 and beyond */ + // TODO: UWP does not have this function and I do not think it is required? +#if U_PLATFORM_HAS_WINUWP_API == 0 + if (InitializeSecurityDescriptor(&securityDesc, SECURITY_DESCRIPTOR_REVISION)) { + /* give the security descriptor a Null Dacl done using the "TRUE, (PACL)NULL" here */ + if (SetSecurityDescriptorDacl(&securityDesc, TRUE, (PACL)NULL, FALSE)) { + /* Make the security attributes point to the security descriptor */ + uprv_memset(&mappingAttributes, 0, sizeof(mappingAttributes)); + mappingAttributes.nLength = sizeof(mappingAttributes); + mappingAttributes.lpSecurityDescriptor = &securityDesc; + mappingAttributes.bInheritHandle = FALSE; /* object uninheritable */ + mappingAttributesPtr = &mappingAttributes; + } + } + /* else creating security descriptors can fail when we are on Windows 98, + and mappingAttributesPtr == NULL for that case. */ + + /* create an unnamed Windows file-mapping object for the specified file */ + map=CreateFileMapping(file, mappingAttributesPtr, PAGE_READONLY, 0, 0, NULL); +#else + map = CreateFileMappingFromApp(file, NULL, PAGE_READONLY, 0, NULL); +#endif + CloseHandle(file); + if(map==NULL) { + return FALSE; + } + + /* map a view of the file into our address space */ + pData->pHeader=(const DataHeader *)MapViewOfFile(map, FILE_MAP_READ, 0, 0, 0); + if(pData->pHeader==NULL) { + CloseHandle(map); + return FALSE; + } + pData->map=map; + return TRUE; + } + + U_CFUNC void + uprv_unmapFile(UDataMemory *pData) { + if(pData!=NULL && pData->map!=NULL) { + UnmapViewOfFile(pData->pHeader); + CloseHandle(pData->map); + pData->pHeader=NULL; + pData->map=NULL; + } + } + + + +#elif MAP_IMPLEMENTATION==MAP_POSIX + U_CFUNC UBool + uprv_mapFile(UDataMemory *pData, const char *path) { + int fd; + int length; + struct stat mystat; + void *data; + + UDataMemory_init(pData); /* Clear the output struct. */ + + /* determine the length of the file */ + if(stat(path, &mystat)!=0 || mystat.st_size<=0) { + return FALSE; + } + length=mystat.st_size; + + /* open the file */ + fd=open(path, O_RDONLY); + if(fd==-1) { + return FALSE; + } + + /* get a view of the mapping */ +#if U_PLATFORM != U_PF_HPUX + data=mmap(0, length, PROT_READ, MAP_SHARED, fd, 0); +#else + data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0); +#endif + close(fd); /* no longer needed */ + if(data==MAP_FAILED) { + return FALSE; + } + + pData->map = (char *)data + length; + pData->pHeader=(const DataHeader *)data; + pData->mapAddr = data; +#if U_PLATFORM == U_PF_IPHONE + posix_madvise(data, length, POSIX_MADV_RANDOM); +#endif + return TRUE; + } + + U_CFUNC void + uprv_unmapFile(UDataMemory *pData) { + if(pData!=NULL && pData->map!=NULL) { + size_t dataLen = (char *)pData->map - (char *)pData->mapAddr; + if(munmap(pData->mapAddr, dataLen)==-1) { + } + pData->pHeader=NULL; + pData->map=0; + pData->mapAddr=NULL; + } + } + + + +#elif MAP_IMPLEMENTATION==MAP_STDIO + /* copy of the filestrm.c/T_FileStream_size() implementation */ + static int32_t + umap_fsize(FILE *f) { + int32_t savedPos = ftell(f); + int32_t size = 0; + + /*Changes by Bertrand A. D. doesn't affect the current position + goes to the end of the file before ftell*/ + fseek(f, 0, SEEK_END); + size = (int32_t)ftell(f); + fseek(f, savedPos, SEEK_SET); + return size; + } + + U_CFUNC UBool + uprv_mapFile(UDataMemory *pData, const char *path) { + FILE *file; + int32_t fileLength; + void *p; + + UDataMemory_init(pData); /* Clear the output struct. */ + /* open the input file */ + file=fopen(path, "rb"); + if(file==NULL) { + return FALSE; + } + + /* get the file length */ + fileLength=umap_fsize(file); + if(ferror(file) || fileLength<=20) { + fclose(file); + return FALSE; + } + + /* allocate the memory to hold the file data */ + p=uprv_malloc(fileLength); + if(p==NULL) { + fclose(file); + return FALSE; + } + + /* read the file */ + if(fileLength!=fread(p, 1, fileLength, file)) { + uprv_free(p); + fclose(file); + return FALSE; + } + + fclose(file); + pData->map=p; + pData->pHeader=(const DataHeader *)p; + pData->mapAddr=p; + return TRUE; + } + + U_CFUNC void + uprv_unmapFile(UDataMemory *pData) { + if(pData!=NULL && pData->map!=NULL) { + uprv_free(pData->map); + pData->map = NULL; + pData->mapAddr = NULL; + pData->pHeader = NULL; + } + } + + +#elif MAP_IMPLEMENTATION==MAP_390DLL + /* 390 specific Library Loading. + * This is the only platform left that dynamically loads an ICU Data Library. + * All other platforms use .data files when dynamic loading is required, but + * this turn out to be awkward to support in 390 batch mode. + * + * The idea here is to hide the fact that 390 is using dll loading from the + * rest of ICU, and make it look like there is file loading happening. + * + */ + + static char *strcpy_returnEnd(char *dest, const char *src) + { + while((*dest=*src)!=0) { + ++dest; + ++src; + } + return dest; + } + + /*------------------------------------------------------------------------------ + * + * computeDirPath given a user-supplied path of an item to be opened, + * compute and return + * - the full directory path to be used + * when opening the file. + * - Pointer to null at end of above returned path + * + * Parameters: + * path: input path. Buffer is not altered. + * pathBuffer: Output buffer. Any contents are overwritten. + * + * Returns: + * Pointer to null termination in returned pathBuffer. + * + * TODO: This works the way ICU historically has, but the + * whole data fallback search path is so complicated that + * proabably almost no one will ever really understand it, + * the potential for confusion is large. (It's not just + * this one function, but the whole scheme.) + * + *------------------------------------------------------------------------------*/ + static char *uprv_computeDirPath(const char *path, char *pathBuffer) + { + char *finalSlash; /* Ptr to last dir separator in input path, or null if none. */ + int32_t pathLen; /* Length of the returned directory path */ + + finalSlash = 0; + if (path != 0) { + finalSlash = uprv_strrchr(path, U_FILE_SEP_CHAR); + } + + *pathBuffer = 0; + if (finalSlash == 0) { + /* No user-supplied path. + * Copy the ICU_DATA path to the path buffer and return that*/ + const char *icuDataDir; + icuDataDir=u_getDataDirectory(); + if(icuDataDir!=NULL && *icuDataDir!=0) { + return strcpy_returnEnd(pathBuffer, icuDataDir); + } else { + /* there is no icuDataDir either. Just return the empty pathBuffer. */ + return pathBuffer; + } + } + + /* User supplied path did contain a directory portion. + * Copy it to the output path buffer */ + pathLen = (int32_t)(finalSlash - path + 1); + uprv_memcpy(pathBuffer, path, pathLen); + *(pathBuffer+pathLen) = 0; + return pathBuffer+pathLen; + } + + +# define DATA_TYPE "dat" + + U_CFUNC UBool uprv_mapFile(UDataMemory *pData, const char *path) { + const char *inBasename; + char *basename; + char pathBuffer[1024]; + const DataHeader *pHeader; + dllhandle *handle; + void *val=0; + + inBasename=uprv_strrchr(path, U_FILE_SEP_CHAR); + if(inBasename==NULL) { + inBasename = path; + } else { + inBasename++; + } + basename=uprv_computeDirPath(path, pathBuffer); + if(uprv_strcmp(inBasename, U_ICUDATA_NAME".dat") != 0) { + /* must mmap file... for build */ + int fd; + int length; + struct stat mystat; + void *data; + UDataMemory_init(pData); /* Clear the output struct. */ + + /* determine the length of the file */ + if(stat(path, &mystat)!=0 || mystat.st_size<=0) { + return FALSE; + } + length=mystat.st_size; + + /* open the file */ + fd=open(path, O_RDONLY); + if(fd==-1) { + return FALSE; + } + + /* get a view of the mapping */ + data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); /* no longer needed */ + if(data==MAP_FAILED) { + return FALSE; + } + pData->map = (char *)data + length; + pData->pHeader=(const DataHeader *)data; + pData->mapAddr = data; + return TRUE; + } + +# ifdef OS390BATCH + /* ### hack: we still need to get u_getDataDirectory() fixed + for OS/390 (batch mode - always return "//"? ) + and this here straightened out with LIB_PREFIX and LIB_SUFFIX (both empty?!) + This is probably due to the strange file system on OS/390. It's more like + a database with short entry names than a typical file system. */ + /* U_ICUDATA_NAME should always have the correct name */ + /* BUT FOR BATCH MODE IT IS AN EXCEPTION BECAUSE */ + /* THE FIRST THREE LETTERS ARE PREASSIGNED TO THE */ + /* PROJECT!!!!! */ + uprv_strcpy(pathBuffer, "IXMI" U_ICU_VERSION_SHORT "DA"); +# else + /* set up the library name */ + uprv_strcpy(basename, LIB_PREFIX U_LIBICUDATA_NAME U_ICU_VERSION_SHORT LIB_SUFFIX); +# endif + +# ifdef UDATA_DEBUG + fprintf(stderr, "dllload: %s ", pathBuffer); +# endif + + handle=dllload(pathBuffer); + +# ifdef UDATA_DEBUG + fprintf(stderr, " -> %08X\n", handle ); +# endif + + if(handle != NULL) { + /* we have a data DLL - what kind of lookup do we need here? */ + /* try to find the Table of Contents */ + UDataMemory_init(pData); /* Clear the output struct. */ + val=dllqueryvar((dllhandle*)handle, U_ICUDATA_ENTRY_NAME); + if(val == 0) { + /* failed... so keep looking */ + return FALSE; + } +# ifdef UDATA_DEBUG + fprintf(stderr, "dllqueryvar(%08X, %s) -> %08X\n", handle, U_ICUDATA_ENTRY_NAME, val); +# endif + + pData->pHeader=(const DataHeader *)val; + return TRUE; + } else { + return FALSE; /* no handle */ + } + } + + U_CFUNC void uprv_unmapFile(UDataMemory *pData) { + if(pData!=NULL && pData->map!=NULL) { + uprv_free(pData->map); + pData->map = NULL; + pData->mapAddr = NULL; + pData->pHeader = NULL; + } + } + +#else +# error MAP_IMPLEMENTATION is set incorrectly +#endif diff --git a/deps/icu-small/source/common/umapfile.h b/deps/icu-small/source/common/umapfile.h index 70a6cc5f86..24e476b11e 100644 --- a/deps/icu-small/source/common/umapfile.h +++ b/deps/icu-small/source/common/umapfile.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/umath.c b/deps/icu-small/source/common/umath.c deleted file mode 100644 index 86f58819e0..0000000000 --- a/deps/icu-small/source/common/umath.c +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1997-2006, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* This file contains platform independent math. -*/ - -#include "putilimp.h" - -U_CAPI int32_t U_EXPORT2 -uprv_max(int32_t x, int32_t y) -{ - return (x > y ? x : y); -} - -U_CAPI int32_t U_EXPORT2 -uprv_min(int32_t x, int32_t y) -{ - return (x > y ? y : x); -} diff --git a/deps/icu-small/source/common/umath.cpp b/deps/icu-small/source/common/umath.cpp new file mode 100644 index 0000000000..3ab72ab482 --- /dev/null +++ b/deps/icu-small/source/common/umath.cpp @@ -0,0 +1,25 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2006, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* This file contains platform independent math. +*/ + +#include "putilimp.h" + +U_CAPI int32_t U_EXPORT2 +uprv_max(int32_t x, int32_t y) +{ + return (x > y ? x : y); +} + +U_CAPI int32_t U_EXPORT2 +uprv_min(int32_t x, int32_t y) +{ + return (x > y ? y : x); +} diff --git a/deps/icu-small/source/common/umutex.cpp b/deps/icu-small/source/common/umutex.cpp index 13569cb075..12bd7575d6 100644 --- a/deps/icu-small/source/common/umutex.cpp +++ b/deps/icu-small/source/common/umutex.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/umutex.h b/deps/icu-small/source/common/umutex.h index d6618ed766..8f2f612354 100644 --- a/deps/icu-small/source/common/umutex.h +++ b/deps/icu-small/source/common/umutex.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -86,7 +86,9 @@ U_NAMESPACE_END // Original plan was to use gcc atomics for MinGW, but they // aren't supported, so we fold MinGW into this path. +#ifndef WIN32_LEAN_AND_MEAN # define WIN32_LEAN_AND_MEAN +#endif # define VC_EXTRALEAN # define NOUSER # define NOSERVICE @@ -331,7 +333,9 @@ U_NAMESPACE_END * win32 APIs for Critical Sections. */ +#ifndef WIN32_LEAN_AND_MEAN # define WIN32_LEAN_AND_MEAN +#endif # define VC_EXTRALEAN # define NOUSER # define NOSERVICE diff --git a/deps/icu-small/source/common/unames.cpp b/deps/icu-small/source/common/unames.cpp index b8c4151f42..13a4572e1c 100644 --- a/deps/icu-small/source/common/unames.cpp +++ b/deps/icu-small/source/common/unames.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: unames.c -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/appendable.h b/deps/icu-small/source/common/unicode/appendable.h index 2ae334505e..8512c2f303 100644 --- a/deps/icu-small/source/common/unicode/appendable.h +++ b/deps/icu-small/source/common/unicode/appendable.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: appendable.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -19,7 +19,7 @@ /** * \file - * \brief C++ API: Appendable class: Sink for Unicode code points and 16-bit code units (UChars). + * \brief C++ API: Appendable class: Sink for Unicode code points and 16-bit code units (char16_ts). */ #include "unicode/utypes.h" @@ -34,10 +34,10 @@ class UnicodeString; * Combines elements of Java Appendable and ICU4C ByteSink. * * This class can be used in APIs where it does not matter whether the actual destination is - * a UnicodeString, a UChar[] array, a UnicodeSet, or any other object + * a UnicodeString, a char16_t[] array, a UnicodeSet, or any other object * that receives and processes characters and/or strings. * - * Implementation classes must implement at least appendCodeUnit(UChar). + * Implementation classes must implement at least appendCodeUnit(char16_t). * The base class provides default implementations for the other methods. * * The methods do not take UErrorCode parameters. @@ -62,11 +62,11 @@ public: * @return TRUE if the operation succeeded * @stable ICU 4.8 */ - virtual UBool appendCodeUnit(UChar c) = 0; + virtual UBool appendCodeUnit(char16_t c) = 0; /** * Appends a code point. - * The default implementation calls appendCodeUnit(UChar) once or twice. + * The default implementation calls appendCodeUnit(char16_t) once or twice. * @param c code point 0..0x10ffff * @return TRUE if the operation succeeded * @stable ICU 4.8 @@ -75,20 +75,20 @@ public: /** * Appends a string. - * The default implementation calls appendCodeUnit(UChar) for each code unit. + * The default implementation calls appendCodeUnit(char16_t) for each code unit. * @param s string, must not be NULL if length!=0 * @param length string length, or -1 if NUL-terminated * @return TRUE if the operation succeeded * @stable ICU 4.8 */ - virtual UBool appendString(const UChar *s, int32_t length); + virtual UBool appendString(const char16_t *s, int32_t length); /** * Tells the object that the caller is going to append roughly - * appendCapacity UChars. A subclass might use this to pre-allocate + * appendCapacity char16_ts. A subclass might use this to pre-allocate * a larger buffer if necessary. * The default implementation does nothing. (It always returns TRUE.) - * @param appendCapacity estimated number of UChars that will be appended + * @param appendCapacity estimated number of char16_ts that will be appended * @return TRUE if the operation succeeded * @stable ICU 4.8 */ @@ -102,19 +102,19 @@ public: * The returned buffer is only valid until the next operation * on this Appendable. * - * After writing at most *resultCapacity UChars, call appendString() with the - * pointer returned from this function and the number of UChars written. - * Many appendString() implementations will avoid copying UChars if this function + * After writing at most *resultCapacity char16_ts, call appendString() with the + * pointer returned from this function and the number of char16_ts written. + * Many appendString() implementations will avoid copying char16_ts if this function * returned an internal buffer. * * Partial usage example: * \code * int32_t capacity; - * UChar* buffer = app.getAppendBuffer(..., &capacity); - * ... Write n UChars into buffer, with n <= capacity. + * char16_t* buffer = app.getAppendBuffer(..., &capacity); + * ... Write n char16_ts into buffer, with n <= capacity. * app.appendString(buffer, n); * \endcode - * In many implementations, that call to append will avoid copying UChars. + * In many implementations, that call to append will avoid copying char16_ts. * * If the Appendable allocates or reallocates an internal buffer, it should use * the desiredCapacityHint if appropriate. @@ -138,9 +138,9 @@ public: * @return a buffer with *resultCapacity>=minCapacity * @stable ICU 4.8 */ - virtual UChar *getAppendBuffer(int32_t minCapacity, + virtual char16_t *getAppendBuffer(int32_t minCapacity, int32_t desiredCapacityHint, - UChar *scratch, int32_t scratchCapacity, + char16_t *scratch, int32_t scratchCapacity, int32_t *resultCapacity); }; @@ -171,7 +171,7 @@ public: * @return TRUE if the operation succeeded * @stable ICU 4.8 */ - virtual UBool appendCodeUnit(UChar c); + virtual UBool appendCodeUnit(char16_t c); /** * Appends a code point to the string. @@ -188,12 +188,12 @@ public: * @return TRUE if the operation succeeded * @stable ICU 4.8 */ - virtual UBool appendString(const UChar *s, int32_t length); + virtual UBool appendString(const char16_t *s, int32_t length); /** * Tells the UnicodeString that the caller is going to append roughly - * appendCapacity UChars. - * @param appendCapacity estimated number of UChars that will be appended + * appendCapacity char16_ts. + * @param appendCapacity estimated number of char16_ts that will be appended * @return TRUE if the operation succeeded * @stable ICU 4.8 */ @@ -220,9 +220,9 @@ public: * @return a buffer with *resultCapacity>=minCapacity * @stable ICU 4.8 */ - virtual UChar *getAppendBuffer(int32_t minCapacity, + virtual char16_t *getAppendBuffer(int32_t minCapacity, int32_t desiredCapacityHint, - UChar *scratch, int32_t scratchCapacity, + char16_t *scratch, int32_t scratchCapacity, int32_t *resultCapacity); private: diff --git a/deps/icu-small/source/common/unicode/brkiter.h b/deps/icu-small/source/common/unicode/brkiter.h index 88b39c6699..b1e4cc68c6 100644 --- a/deps/icu-small/source/common/unicode/brkiter.h +++ b/deps/icu-small/source/common/unicode/brkiter.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** diff --git a/deps/icu-small/source/common/unicode/bytestream.h b/deps/icu-small/source/common/unicode/bytestream.h index 4e404a6edc..477892b275 100644 --- a/deps/icu-small/source/common/unicode/bytestream.h +++ b/deps/icu-small/source/common/unicode/bytestream.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // Copyright (C) 2009-2012, International Business Machines // Corporation and others. All Rights Reserved. @@ -222,8 +222,6 @@ private: CheckedArrayByteSink &operator=(const CheckedArrayByteSink &); ///< assignment operator not implemented }; -#if U_HAVE_STD_STRING - /** * Implementation of ByteSink that writes to a "string". * The StringClass is usually instantiated with a std::string. @@ -252,8 +250,6 @@ class StringByteSink : public ByteSink { StringByteSink &operator=(const StringByteSink &); ///< assignment operator not implemented }; -#endif - U_NAMESPACE_END #endif // __BYTESTREAM_H__ diff --git a/deps/icu-small/source/common/unicode/bytestrie.h b/deps/icu-small/source/common/unicode/bytestrie.h index 4a30ce1c04..c57b8ccfeb 100644 --- a/deps/icu-small/source/common/unicode/bytestrie.h +++ b/deps/icu-small/source/common/unicode/bytestrie.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: bytestrie.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/bytestriebuilder.h b/deps/icu-small/source/common/unicode/bytestriebuilder.h index 3ad800975b..0f9f5e2c06 100644 --- a/deps/icu-small/source/common/unicode/bytestriebuilder.h +++ b/deps/icu-small/source/common/unicode/bytestriebuilder.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: bytestriebuilder.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -127,14 +127,14 @@ private: void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode); virtual int32_t getElementStringLength(int32_t i) const; - virtual UChar getElementUnit(int32_t i, int32_t byteIndex) const; + virtual char16_t getElementUnit(int32_t i, int32_t byteIndex) const; virtual int32_t getElementValue(int32_t i) const; virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const; virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const; virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const; - virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const; + virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, char16_t byte) const; virtual UBool matchNodesCanHaveValues() const { return FALSE; } diff --git a/deps/icu-small/source/common/unicode/caniter.h b/deps/icu-small/source/common/unicode/caniter.h index b988b2003d..543341f42c 100644 --- a/deps/icu-small/source/common/unicode/caniter.h +++ b/deps/icu-small/source/common/unicode/caniter.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -187,7 +187,7 @@ private: UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment) //Set getEquivalents2(String segment); - Hashtable *getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status); + Hashtable *getEquivalents2(Hashtable *fillinResult, const char16_t *segment, int32_t segLen, UErrorCode &status); //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status); /** @@ -196,7 +196,7 @@ private: * If so, take the remainder, and return the equivalents */ //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer); - Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status); + Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const char16_t *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status); //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status); void cleanPieces(); diff --git a/deps/icu-small/source/common/unicode/casemap.h b/deps/icu-small/source/common/unicode/casemap.h new file mode 100644 index 0000000000..98184820d5 --- /dev/null +++ b/deps/icu-small/source/common/unicode/casemap.h @@ -0,0 +1,359 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// casemap.h +// created: 2017jan12 Markus W. Scherer + +#ifndef __CASEMAP_H__ +#define __CASEMAP_H__ + +#include "unicode/utypes.h" +#include "unicode/uobject.h" + +/** + * \file + * \brief C++ API: Low-level C++ case mapping functions. + */ + +U_NAMESPACE_BEGIN + +#ifndef U_HIDE_DRAFT_API + +class BreakIterator; +class Edits; + +/** + * Low-level C++ case mapping functions. + * + * @draft ICU 59 + */ +class U_COMMON_API CaseMap U_FINAL : public UMemory { +public: + /** + * Lowercases a UTF-16 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param locale The locale ID. ("" = root locale, NULL = default locale.) + * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see u_strToLower + * @draft ICU 59 + */ + static int32_t toLower( + const char *locale, uint32_t options, + const char16_t *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + + /** + * Uppercases a UTF-16 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param locale The locale ID. ("" = root locale, NULL = default locale.) + * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see u_strToUpper + * @draft ICU 59 + */ + static int32_t toUpper( + const char *locale, uint32_t options, + const char16_t *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + +#if !UCONFIG_NO_BREAK_ITERATION + + /** + * Titlecases a UTF-16 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. (This can be modified with options bits.) + * + * @param locale The locale ID. ("" = root locale, NULL = default locale.) + * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, + * U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT. + * @param iter A break iterator to find the first characters of words that are to be titlecased. + * It is set to the source string (setText()) + * and used one or more times for iteration (first() and next()). + * If NULL, then a word break iterator for the locale is used + * (or something equivalent). + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see u_strToTitle + * @see ucasemap_toTitle + * @draft ICU 59 + */ + static int32_t toTitle( + const char *locale, uint32_t options, BreakIterator *iter, + const char16_t *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + +#endif // UCONFIG_NO_BREAK_ITERATION + + /** + * Case-folds a UTF-16 string and optionally records edits. + * + * Case folding is locale-independent and not context-sensitive, + * but there is an option for whether to include or exclude mappings for dotted I + * and dotless i that are marked with 'T' in CaseFolding.txt. + * + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, + * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see u_strFoldCase + * @draft ICU 59 + */ + static int32_t fold( + uint32_t options, + const char16_t *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + + /** + * Lowercases a UTF-8 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param locale The locale ID. ("" = root locale, NULL = default locale.) + * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see ucasemap_utf8ToLower + * @draft ICU 59 + */ + static int32_t utf8ToLower( + const char *locale, uint32_t options, + const char *src, int32_t srcLength, + char *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + + /** + * Uppercases a UTF-8 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param locale The locale ID. ("" = root locale, NULL = default locale.) + * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see ucasemap_utf8ToUpper + * @draft ICU 59 + */ + static int32_t utf8ToUpper( + const char *locale, uint32_t options, + const char *src, int32_t srcLength, + char *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + +#if !UCONFIG_NO_BREAK_ITERATION + + /** + * Titlecases a UTF-8 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. (This can be modified with options bits.) + * + * @param locale The locale ID. ("" = root locale, NULL = default locale.) + * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, + * U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT. + * @param iter A break iterator to find the first characters of words that are to be titlecased. + * It is set to the source string (setText()) + * and used one or more times for iteration (first() and next()). + * If NULL, then a word break iterator for the locale is used + * (or something equivalent). + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see ucasemap_utf8ToTitle + * @draft ICU 59 + */ + static int32_t utf8ToTitle( + const char *locale, uint32_t options, BreakIterator *iter, + const char *src, int32_t srcLength, + char *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + +#endif // UCONFIG_NO_BREAK_ITERATION + + /** + * Case-folds a UTF-8 string and optionally records edits. + * + * Case folding is locale-independent and not context-sensitive, + * but there is an option for whether to include or exclude mappings for dotted I + * and dotless i that are marked with 'T' in CaseFolding.txt. + * + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, + * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see ucasemap_utf8FoldCase + * @draft ICU 59 + */ + static int32_t utf8Fold( + uint32_t options, + const char *src, int32_t srcLength, + char *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + +private: + CaseMap() = delete; + CaseMap(const CaseMap &other) = delete; + CaseMap &operator=(const CaseMap &other) = delete; +}; + +#endif // U_HIDE_DRAFT_API + +U_NAMESPACE_END + +#endif // __CASEMAP_H__ diff --git a/deps/icu-small/source/common/unicode/char16ptr.h b/deps/icu-small/source/common/unicode/char16ptr.h new file mode 100644 index 0000000000..fa17c62446 --- /dev/null +++ b/deps/icu-small/source/common/unicode/char16ptr.h @@ -0,0 +1,306 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// char16ptr.h +// created: 2017feb28 Markus W. Scherer + +#ifndef __CHAR16PTR_H__ +#define __CHAR16PTR_H__ + +#include +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: char16_t pointer wrappers with + * implicit conversion from bit-compatible raw pointer types. + * Also conversion functions from char16_t * to UChar * and OldUChar *. + */ + +U_NAMESPACE_BEGIN + +/** + * \def U_ALIASING_BARRIER + * Barrier for pointer anti-aliasing optimizations even across function boundaries. + * @internal + */ +#ifdef U_ALIASING_BARRIER + // Use the predefined value. +#elif (defined(__clang__) || defined(__GNUC__)) && U_PLATFORM != U_PF_BROWSER_NATIVE_CLIENT +# define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory") +#endif + +// Do not use #ifndef U_HIDE_DRAFT_API for the following class, it +// is now used in place of UChar* in several stable C++ methods +/** + * char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. + * @draft ICU 59 + */ +class U_COMMON_API Char16Ptr U_FINAL { +public: + /** + * Copies the pointer. + * @param p pointer + * @draft ICU 59 + */ + inline Char16Ptr(char16_t *p); +#if !U_CHAR16_IS_TYPEDEF + /** + * Converts the pointer to char16_t *. + * @param p pointer to be converted + * @draft ICU 59 + */ + inline Char16Ptr(uint16_t *p); +#endif +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * Converts the pointer to char16_t *. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * @param p pointer to be converted + * @draft ICU 59 + */ + inline Char16Ptr(wchar_t *p); +#endif + /** + * nullptr constructor. + * @param p nullptr + * @draft ICU 59 + */ + inline Char16Ptr(std::nullptr_t p); + /** + * Destructor. + * @draft ICU 59 + */ + inline ~Char16Ptr(); + + /** + * Pointer access. + * @return the wrapped pointer + * @draft ICU 59 + */ + inline char16_t *get() const; + /** + * char16_t pointer access via type conversion (e.g., static_cast). + * @return the wrapped pointer + * @draft ICU 59 + */ + inline operator char16_t *() const { return get(); } + +private: + Char16Ptr() = delete; + +#ifdef U_ALIASING_BARRIER + template static char16_t *cast(T *t) { + U_ALIASING_BARRIER(t); + return reinterpret_cast(t); + } + + char16_t *p; +#else + union { + char16_t *cp; + uint16_t *up; + wchar_t *wp; + } u; +#endif +}; + +#ifdef U_ALIASING_BARRIER + +Char16Ptr::Char16Ptr(char16_t *p) : p(p) {} +#if !U_CHAR16_IS_TYPEDEF +Char16Ptr::Char16Ptr(uint16_t *p) : p(cast(p)) {} +#endif +#if U_SIZEOF_WCHAR_T==2 +Char16Ptr::Char16Ptr(wchar_t *p) : p(cast(p)) {} +#endif +Char16Ptr::Char16Ptr(std::nullptr_t p) : p(p) {} +Char16Ptr::~Char16Ptr() { + U_ALIASING_BARRIER(p); +} + +char16_t *Char16Ptr::get() const { return p; } + +#else + +Char16Ptr::Char16Ptr(char16_t *p) { u.cp = p; } +#if !U_CHAR16_IS_TYPEDEF +Char16Ptr::Char16Ptr(uint16_t *p) { u.up = p; } +#endif +#if U_SIZEOF_WCHAR_T==2 +Char16Ptr::Char16Ptr(wchar_t *p) { u.wp = p; } +#endif +Char16Ptr::Char16Ptr(std::nullptr_t p) { u.cp = p; } +Char16Ptr::~Char16Ptr() {} + +char16_t *Char16Ptr::get() const { return u.cp; } + +#endif + +// Do not use #ifndef U_HIDE_DRAFT_API for the following class, it is +// now used in place of const UChar* in several stable C++ methods +/** + * const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. + * @draft ICU 59 + */ +class U_COMMON_API ConstChar16Ptr U_FINAL { +public: + /** + * Copies the pointer. + * @param p pointer + * @draft ICU 59 + */ + inline ConstChar16Ptr(const char16_t *p); +#if !U_CHAR16_IS_TYPEDEF + /** + * Converts the pointer to char16_t *. + * @param p pointer to be converted + * @draft ICU 59 + */ + inline ConstChar16Ptr(const uint16_t *p); +#endif +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * Converts the pointer to char16_t *. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * @param p pointer to be converted + * @draft ICU 59 + */ + inline ConstChar16Ptr(const wchar_t *p); +#endif + /** + * nullptr constructor. + * @param p nullptr + * @draft ICU 59 + */ + inline ConstChar16Ptr(const std::nullptr_t p); + + /** + * Destructor. + * @draft ICU 59 + */ + inline ~ConstChar16Ptr(); + + /** + * Pointer access. + * @return the wrapped pointer + * @draft ICU 59 + */ + inline const char16_t *get() const; + /** + * char16_t pointer access via type conversion (e.g., static_cast). + * @return the wrapped pointer + * @draft ICU 59 + */ + inline operator const char16_t *() const { return get(); } + +private: + ConstChar16Ptr() = delete; + +#ifdef U_ALIASING_BARRIER + template static const char16_t *cast(const T *t) { + U_ALIASING_BARRIER(t); + return reinterpret_cast(t); + } + + const char16_t *p; +#else + union { + const char16_t *cp; + const uint16_t *up; + const wchar_t *wp; + } u; +#endif +}; + +#ifdef U_ALIASING_BARRIER + +ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p(p) {} +#if !U_CHAR16_IS_TYPEDEF +ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p(cast(p)) {} +#endif +#if U_SIZEOF_WCHAR_T==2 +ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p(cast(p)) {} +#endif +ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p(p) {} +ConstChar16Ptr::~ConstChar16Ptr() { + U_ALIASING_BARRIER(p); +} + +const char16_t *ConstChar16Ptr::get() const { return p; } + +#else + +ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u.cp = p; } +#if !U_CHAR16_IS_TYPEDEF +ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u.up = p; } +#endif +#if U_SIZEOF_WCHAR_T==2 +ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u.wp = p; } +#endif +ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u.cp = p; } +ConstChar16Ptr::~ConstChar16Ptr() {} + +const char16_t *ConstChar16Ptr::get() const { return u.cp; } + +#endif + +/** + * Converts from const char16_t * to const UChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as const UChar * + * @draft ICU 59 + */ +inline const UChar *toUCharPtr(const char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + +/** + * Converts from char16_t * to UChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as UChar * + * @draft ICU 59 + */ +inline UChar *toUCharPtr(char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + +/** + * Converts from const char16_t * to const OldUChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as const OldUChar * + * @draft ICU 59 + */ +inline const OldUChar *toOldUCharPtr(const char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + +/** + * Converts from char16_t * to OldUChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as OldUChar * + * @draft ICU 59 + */ +inline OldUChar *toOldUCharPtr(char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +} + +U_NAMESPACE_END + +#endif // __CHAR16PTR_H__ diff --git a/deps/icu-small/source/common/unicode/chariter.h b/deps/icu-small/source/common/unicode/chariter.h index 7c6f98f640..dbed89dbe6 100644 --- a/deps/icu-small/source/common/unicode/chariter.h +++ b/deps/icu-small/source/common/unicode/chariter.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************** @@ -78,7 +78,7 @@ U_NAMESPACE_BEGIN * } * * void function1(ForwardCharacterIterator &it) { - * UChar c; + * char16_t c; * while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) { * // use c * } @@ -149,7 +149,7 @@ public: * @return the current code unit. * @stable ICU 2.0 */ - virtual UChar nextPostInc(void) = 0; + virtual char16_t nextPostInc(void) = 0; /** * Gets the current code point for returning and advances to the next code point @@ -230,7 +230,7 @@ protected: * showing a way to convert simple for() loops: * \code * void forward2(CharacterIterator &it) { - * UChar c; + * char16_t c; * for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) { * // use c * } @@ -249,7 +249,7 @@ protected: * Backward iteration with a more traditional for() loop: * \code * void backward2(CharacterIterator &it) { - * UChar c; + * char16_t c; * for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) { * // use c * } @@ -266,7 +266,7 @@ protected: * // get the position * int32_t pos=it.getIndex(); * // get the previous code unit - * UChar u=it.previous(); + * char16_t u=it.previous(); * // move back one more code unit * it.move(-1, CharacterIterator::kCurrent); * // set the position back to where it was @@ -283,7 +283,7 @@ protected: * Function processing characters, in this example simple output *
  * \code
- *  void processChar( UChar c )
+ *  void processChar( char16_t c )
  *  {
  *      cout << " " << c;
  *  }
@@ -294,7 +294,7 @@ protected:
  * \code
  *  void traverseForward(CharacterIterator& iter)
  *  {
- *      for(UChar c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
+ *      for(char16_t c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
  *          processChar(c);
  *      }
  *  }
@@ -305,7 +305,7 @@ protected:
  * \code
  *  void traverseBackward(CharacterIterator& iter)
  *  {
- *      for(UChar c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
+ *      for(char16_t c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
  *          processChar(c);
  *      }
  *  }
@@ -317,7 +317,7 @@ protected:
  * \code
  * void traverseOut(CharacterIterator& iter, int32_t pos)
  * {
- *      UChar c;
+ *      char16_t c;
  *      for (c = iter.setIndex(pos);
  *      c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
  *          c = iter.next()) {}
@@ -386,7 +386,7 @@ public:
      * @return the first code unit in its iteration range.
      * @stable ICU 2.0
      */
-    virtual UChar         first(void) = 0;
+    virtual char16_t         first(void) = 0;
 
     /**
      * Sets the iterator to refer to the first code unit in its
@@ -396,7 +396,7 @@ public:
      * @return the first code unit in its iteration range.
      * @stable ICU 2.0
      */
-    virtual UChar         firstPostInc(void);
+    virtual char16_t         firstPostInc(void);
 
     /**
      * Sets the iterator to refer to the first code point in its
@@ -435,7 +435,7 @@ public:
      * @return the last code unit.
      * @stable ICU 2.0
      */
-    virtual UChar         last(void) = 0;
+    virtual char16_t         last(void) = 0;
 
     /**
      * Sets the iterator to refer to the last code point in its
@@ -463,7 +463,7 @@ public:
      * @return the "position"-th code unit.
      * @stable ICU 2.0
      */
-    virtual UChar         setIndex(int32_t position) = 0;
+    virtual char16_t         setIndex(int32_t position) = 0;
 
     /**
      * Sets the iterator to refer to the beginning of the code point
@@ -483,7 +483,7 @@ public:
      * @return the current code unit.
      * @stable ICU 2.0
      */
-    virtual UChar         current(void) const = 0;
+    virtual char16_t         current(void) const = 0;
 
     /**
      * Returns the code point the iterator currently refers to.
@@ -499,7 +499,7 @@ public:
      * @return the next code unit.
      * @stable ICU 2.0
      */
-    virtual UChar         next(void) = 0;
+    virtual char16_t         next(void) = 0;
 
     /**
      * Advances to the next code point in the iteration range
@@ -520,7 +520,7 @@ public:
      * @return the previous code unit.
      * @stable ICU 2.0
      */
-    virtual UChar         previous(void) = 0;
+    virtual char16_t         previous(void) = 0;
 
     /**
      * Advances to the previous code point in the iteration range
@@ -607,6 +607,10 @@ public:
      * @return the new position
      * @stable ICU 2.0
      */
+#ifdef move32
+     // One of the system headers right now is sometimes defining a conflicting macro we don't use
+#undef move32
+#endif
     virtual int32_t      move32(int32_t delta, EOrigin origin) = 0;
 
     /**
diff --git a/deps/icu-small/source/common/unicode/dbbi.h b/deps/icu-small/source/common/unicode/dbbi.h
index be9618c551..62509c5227 100644
--- a/deps/icu-small/source/common/unicode/dbbi.h
+++ b/deps/icu-small/source/common/unicode/dbbi.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2016 and later: Unicode, Inc. and others.
+// © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
 /*
 **********************************************************************
diff --git a/deps/icu-small/source/common/unicode/docmain.h b/deps/icu-small/source/common/unicode/docmain.h
index 7b053b0464..6e59f3e388 100644
--- a/deps/icu-small/source/common/unicode/docmain.h
+++ b/deps/icu-small/source/common/unicode/docmain.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2016 and later: Unicode, Inc. and others.
+// © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
 /********************************************************************
  * COPYRIGHT:
@@ -98,6 +98,11 @@
  *     C API
  *   
  *   
+ *     Codepage Detection
+ *     ucsdet.h
+ *     C API
+ *   
+ *   
  *     Unicode Text Compression
  *     ucnv.h
(encoding name "SCSU" or "BOCU-1") * C API diff --git a/deps/icu-small/source/common/unicode/dtintrv.h b/deps/icu-small/source/common/unicode/dtintrv.h index c16ee63d96..2221b36c9b 100644 --- a/deps/icu-small/source/common/unicode/dtintrv.h +++ b/deps/icu-small/source/common/unicode/dtintrv.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/common/unicode/edits.h b/deps/icu-small/source/common/unicode/edits.h new file mode 100644 index 0000000000..8d3becb7a2 --- /dev/null +++ b/deps/icu-small/source/common/unicode/edits.h @@ -0,0 +1,245 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// edits.h +// created: 2016dec30 Markus W. Scherer + +#ifndef __EDITS_H__ +#define __EDITS_H__ + +#include "unicode/utypes.h" +#include "unicode/uobject.h" + +/** + * \file + * \brief C++ API: C++ class Edits for low-level string transformations on styled text. + */ + +U_NAMESPACE_BEGIN + +#ifndef U_HIDE_DRAFT_API + +/** + * Records lengths of string edits but not replacement text. + * Supports replacements, insertions, deletions in linear progression. + * Does not support moving/reordering of text. + * + * An Edits object tracks a separate UErrorCode, but ICU string transformation functions + * (e.g., case mapping functions) merge any such errors into their API's UErrorCode. + * + * @draft ICU 59 + */ +class U_COMMON_API Edits U_FINAL : public UMemory { +public: + /** + * Constructs an empty object. + * @draft ICU 59 + */ + Edits() : + array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), + errorCode(U_ZERO_ERROR) {} + /** + * Destructor. + * @draft ICU 59 + */ + ~Edits(); + + /** + * Resets the data but may not release memory. + * @draft ICU 59 + */ + void reset(); + + /** + * Adds a record for an unchanged segment of text. + * Normally called from inside ICU string transformation functions, not user code. + * @draft ICU 59 + */ + void addUnchanged(int32_t unchangedLength); + /** + * Adds a record for a text replacement/insertion/deletion. + * Normally called from inside ICU string transformation functions, not user code. + * @draft ICU 59 + */ + void addReplace(int32_t oldLength, int32_t newLength); + /** + * Sets the UErrorCode if an error occurred while recording edits. + * Preserves older error codes in the outErrorCode. + * Normally called from inside ICU string transformation functions, not user code. + * @return TRUE if U_FAILURE(outErrorCode) + * @draft ICU 59 + */ + UBool copyErrorTo(UErrorCode &outErrorCode); + + /** + * How much longer is the new text compared with the old text? + * @return new length minus old length + * @draft ICU 59 + */ + int32_t lengthDelta() const { return delta; } + /** + * @return TRUE if there are any change edits + * @draft ICU 59 + */ + UBool hasChanges() const; + + /** + * Access to the list of edits. + * @see getCoarseIterator + * @see getFineIterator + * @draft ICU 59 + */ + struct U_COMMON_API Iterator U_FINAL : public UMemory { + /** + * Copy constructor. + * @draft ICU 59 + */ + Iterator(const Iterator &other) = default; + /** + * Assignment operator. + * @draft ICU 59 + */ + Iterator &operator=(const Iterator &other) = default; + + /** + * Advances to the next edit. + * @return TRUE if there is another edit + * @draft ICU 59 + */ + UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); } + + /** + * Finds the edit that contains the source index. + * The source index may be found in a non-change + * even if normal iteration would skip non-changes. + * Normal iteration can continue from a found edit. + * + * The iterator state before this search logically does not matter. + * (It may affect the performance of the search.) + * + * The iterator state after this search is undefined + * if the source index is out of bounds for the source string. + * + * @param i source index + * @return TRUE if the edit for the source index was found + * @draft ICU 59 + */ + UBool findSourceIndex(int32_t i, UErrorCode &errorCode); + + /** + * @return TRUE if this edit replaces oldLength() units with newLength() different ones. + * FALSE if oldLength units remain unchanged. + * @draft ICU 59 + */ + UBool hasChange() const { return changed; } + /** + * @return the number of units in the original string which are replaced or remain unchanged. + * @draft ICU 59 + */ + int32_t oldLength() const { return oldLength_; } + /** + * @return the number of units in the modified string, if hasChange() is TRUE. + * Same as oldLength if hasChange() is FALSE. + * @draft ICU 59 + */ + int32_t newLength() const { return newLength_; } + + /** + * @return the current index into the source string + * @draft ICU 59 + */ + int32_t sourceIndex() const { return srcIndex; } + /** + * @return the current index into the replacement-characters-only string, + * not counting unchanged spans + * @draft ICU 59 + */ + int32_t replacementIndex() const { return replIndex; } + /** + * @return the current index into the full destination string + * @draft ICU 59 + */ + int32_t destinationIndex() const { return destIndex; } + + private: + friend class Edits; + + Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs); + + int32_t readLength(int32_t head); + void updateIndexes(); + UBool noNext(); + UBool next(UBool onlyChanges, UErrorCode &errorCode); + + const uint16_t *array; + int32_t index, length; + int32_t remaining; + UBool onlyChanges_, coarse; + + UBool changed; + int32_t oldLength_, newLength_; + int32_t srcIndex, replIndex, destIndex; + }; + + /** + * Returns an Iterator for coarse-grained changes for simple string updates. + * Skips non-changes. + * @return an Iterator that merges adjacent changes. + * @draft ICU 59 + */ + Iterator getCoarseChangesIterator() const { + return Iterator(array, length, TRUE, TRUE); + } + + /** + * Returns an Iterator for coarse-grained changes and non-changes for simple string updates. + * @return an Iterator that merges adjacent changes. + * @draft ICU 59 + */ + Iterator getCoarseIterator() const { + return Iterator(array, length, FALSE, TRUE); + } + + /** + * Returns an Iterator for fine-grained changes for modifying styled text. + * Skips non-changes. + * @return an Iterator that separates adjacent changes. + * @draft ICU 59 + */ + Iterator getFineChangesIterator() const { + return Iterator(array, length, TRUE, FALSE); + } + + /** + * Returns an Iterator for fine-grained changes and non-changes for modifying styled text. + * @return an Iterator that separates adjacent changes. + * @draft ICU 59 + */ + Iterator getFineIterator() const { + return Iterator(array, length, FALSE, FALSE); + } + +private: + Edits(const Edits &) = delete; + Edits &operator=(const Edits &) = delete; + + void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; } + int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; } + + void append(int32_t r); + UBool growArray(); + + static const int32_t STACK_CAPACITY = 100; + uint16_t *array; + int32_t capacity; + int32_t length; + int32_t delta; + UErrorCode errorCode; + uint16_t stackArray[STACK_CAPACITY]; +}; + +#endif // U_HIDE_DRAFT_API + +U_NAMESPACE_END + +#endif // __EDITS_H__ diff --git a/deps/icu-small/source/common/unicode/enumset.h b/deps/icu-small/source/common/unicode/enumset.h index e7c39c40dd..82b2074ec3 100644 --- a/deps/icu-small/source/common/unicode/enumset.h +++ b/deps/icu-small/source/common/unicode/enumset.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/unicode/errorcode.h b/deps/icu-small/source/common/unicode/errorcode.h index 92feb2b1b6..1e5df8f03e 100644 --- a/deps/icu-small/source/common/unicode/errorcode.h +++ b/deps/icu-small/source/common/unicode/errorcode.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: errorcode.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/filteredbrk.h b/deps/icu-small/source/common/unicode/filteredbrk.h index 261151138c..51bb651fba 100644 --- a/deps/icu-small/source/common/unicode/filteredbrk.h +++ b/deps/icu-small/source/common/unicode/filteredbrk.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** diff --git a/deps/icu-small/source/common/unicode/icudataver.h b/deps/icu-small/source/common/unicode/icudataver.h index c5c44fc075..d5c728da88 100644 --- a/deps/icu-small/source/common/unicode/icudataver.h +++ b/deps/icu-small/source/common/unicode/icudataver.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/unicode/icuplug.h b/deps/icu-small/source/common/unicode/icuplug.h index ff5e054d38..2a11b96be6 100644 --- a/deps/icu-small/source/common/unicode/icuplug.h +++ b/deps/icu-small/source/common/unicode/icuplug.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/unicode/idna.h b/deps/icu-small/source/common/unicode/idna.h index 23a1d7ca0e..f08658e502 100644 --- a/deps/icu-small/source/common/unicode/idna.h +++ b/deps/icu-small/source/common/unicode/idna.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: idna.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/listformatter.h b/deps/icu-small/source/common/unicode/listformatter.h index f2c898881a..180fbcb5cd 100644 --- a/deps/icu-small/source/common/unicode/listformatter.h +++ b/deps/icu-small/source/common/unicode/listformatter.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: listformatter.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -157,6 +157,8 @@ class U_COMMON_API ListFormatter : public UObject{ private: static void initializeHash(UErrorCode& errorCode); static const ListFormatInternal* getListFormatInternal(const Locale& locale, const char *style, UErrorCode& errorCode); + struct ListPatternsSink; + static ListFormatInternal* loadListFormatInternal(const Locale& locale, const char* style, UErrorCode& errorCode); ListFormatter(); diff --git a/deps/icu-small/source/common/unicode/localpointer.h b/deps/icu-small/source/common/unicode/localpointer.h index 9ac5de5f06..3ab820188f 100644 --- a/deps/icu-small/source/common/unicode/localpointer.h +++ b/deps/icu-small/source/common/unicode/localpointer.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: localpointer.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -174,9 +174,9 @@ private: * \code * LocalPointer s(new UnicodeString((UChar32)0x50005)); * int32_t length=s->length(); // 2 - * UChar lead=s->charAt(0); // 0xd900 + * char16_t lead=s->charAt(0); // 0xd900 * if(some condition) { return; } // no need to explicitly delete the pointer - * s.adoptInstead(new UnicodeString((UChar)0xfffc)); + * s.adoptInstead(new UnicodeString((char16_t)0xfffc)); * length=s->length(); // 1 * // no need to explicitly delete the pointer * \endcode @@ -323,10 +323,10 @@ public: * Usage example: * \code * LocalArray a(new UnicodeString[2]); - * a[0].append((UChar)0x61); + * a[0].append((char16_t)0x61); * if(some condition) { return; } // no need to explicitly delete the array * a.adoptInstead(new UnicodeString[4]); - * a[3].append((UChar)0x62).append((UChar)0x63).reverse(); + * a[3].append((char16_t)0x62).append((char16_t)0x63).reverse(); * // no need to explicitly delete the array * \endcode * diff --git a/deps/icu-small/source/common/unicode/locdspnm.h b/deps/icu-small/source/common/unicode/locdspnm.h index b8c7a0ccae..7f227829b4 100644 --- a/deps/icu-small/source/common/unicode/locdspnm.h +++ b/deps/icu-small/source/common/unicode/locdspnm.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -20,6 +20,7 @@ #if !UCONFIG_NO_FORMATTING #include "unicode/locid.h" +#include "unicode/strenum.h" #include "unicode/uscript.h" #include "unicode/uldnames.h" #include "unicode/udisplaycontext.h" diff --git a/deps/icu-small/source/common/unicode/locid.h b/deps/icu-small/source/common/unicode/locid.h index f797c25e8b..37a34f7140 100644 --- a/deps/icu-small/source/common/unicode/locid.h +++ b/deps/icu-small/source/common/unicode/locid.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -33,10 +33,8 @@ #include "unicode/utypes.h" #include "unicode/uobject.h" -#include "unicode/unistr.h" #include "unicode/putil.h" #include "unicode/uloc.h" -#include "unicode/strenum.h" /** * \file @@ -48,6 +46,9 @@ U_NAMESPACE_BEGIN // Forward Declarations void U_CALLCONV locale_available_init(); /**< @internal */ +class StringEnumeration; +class UnicodeString; + /** * A Locale object represents a specific geographical, political, * or cultural region. An operation that requires a Locale to perform diff --git a/deps/icu-small/source/common/unicode/messagepattern.h b/deps/icu-small/source/common/unicode/messagepattern.h index 8c1115e04d..f28adafee0 100644 --- a/deps/icu-small/source/common/unicode/messagepattern.h +++ b/deps/icu-small/source/common/unicode/messagepattern.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: messagepattern.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/normalizer2.h b/deps/icu-small/source/common/unicode/normalizer2.h index 6a7668bfd5..d326da948a 100644 --- a/deps/icu-small/source/common/unicode/normalizer2.h +++ b/deps/icu-small/source/common/unicode/normalizer2.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: normalizer2.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -282,7 +282,7 @@ public: * * When used on a standard NFC Normalizer2 instance, * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can); - * in this case, the result contains either one or two code points (=1..4 UChars). + * in this case, the result contains either one or two code points (=1..4 char16_ts). * * This function is independent of the mode of the Normalizer2. * The default implementation returns FALSE. diff --git a/deps/icu-small/source/common/unicode/normlzr.h b/deps/icu-small/source/common/unicode/normlzr.h index b9b67bc434..82335ae6d7 100644 --- a/deps/icu-small/source/common/unicode/normlzr.h +++ b/deps/icu-small/source/common/unicode/normlzr.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************** @@ -168,7 +168,7 @@ public: * @param mode The normalization mode. * @deprecated ICU 56 Use Normalizer2 instead. */ - Normalizer(const UChar* str, int32_t length, UNormalizationMode mode); + Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode); /** * Creates a new Normalizer object for iterating over the @@ -704,7 +704,7 @@ public: * @param status a UErrorCode * @deprecated ICU 56 Use Normalizer2 instead. */ - void setText(const UChar* newText, + void setText(ConstChar16Ptr newText, int32_t length, UErrorCode &status); /** @@ -796,8 +796,8 @@ Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2, uint32_t options, UErrorCode &errorCode) { // all argument checking is done in unorm_compare - return unorm_compare(s1.getBuffer(), s1.length(), - s2.getBuffer(), s2.length(), + return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(), + toUCharPtr(s2.getBuffer()), s2.length(), options, &errorCode); } diff --git a/deps/icu-small/source/common/unicode/parseerr.h b/deps/icu-small/source/common/unicode/parseerr.h index f1976581a1..c8283bfcc9 100644 --- a/deps/icu-small/source/common/unicode/parseerr.h +++ b/deps/icu-small/source/common/unicode/parseerr.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/unicode/parsepos.h b/deps/icu-small/source/common/unicode/parsepos.h index 7bc2627773..50cc56db59 100644 --- a/deps/icu-small/source/common/unicode/parsepos.h +++ b/deps/icu-small/source/common/unicode/parsepos.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* * Copyright (C) 1997-2005, International Business Machines Corporation and others. All Rights Reserved. diff --git a/deps/icu-small/source/common/unicode/platform.h b/deps/icu-small/source/common/unicode/platform.h index bf31af5d13..23b9464c65 100644 --- a/deps/icu-small/source/common/unicode/platform.h +++ b/deps/icu-small/source/common/unicode/platform.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -150,7 +150,7 @@ # define U_PLATFORM U_PF_ANDROID /* Android wchar_t support depends on the API level. */ # include -#elif defined(__native_client__) +#elif defined(__pnacl__) || defined(__native_client__) # define U_PLATFORM U_PF_BROWSER_NATIVE_CLIENT #elif defined(linux) || defined(__linux__) || defined(__linux) # define U_PLATFORM U_PF_LINUX @@ -233,6 +233,18 @@ # define U_PLATFORM_HAS_WIN32_API 0 #endif +/** + * \def U_PLATFORM_HAS_WINUWP_API + * Defines whether target is intended for Universal Windows Platform API + * Set to 1 for Windows10 Release Solution Configuration + * @internal + */ +#ifdef U_PLATFORM_HAS_WINUWP_API + /* Use the predefined value. */ +#else +# define U_PLATFORM_HAS_WINUWP_API 0 +#endif + /** * \def U_PLATFORM_IMPLEMENTS_POSIX * Defines whether the platform implements (most of) the POSIX API. @@ -343,17 +355,6 @@ #define U_IOSTREAM_SOURCE 199711 #endif -/** - * \def U_HAVE_STD_STRING - * Defines whether the standard C++ (STL) <string> header is available. - * @internal - */ -#ifdef U_HAVE_STD_STRING - /* Use the predefined value. */ -#else -# define U_HAVE_STD_STRING 1 -#endif - /*===========================================================================*/ /** @{ Compiler and environment features */ /*===========================================================================*/ @@ -430,7 +431,7 @@ # define U_HAVE_DEBUG_LOCATION_NEW 0 #endif -/* Compatibility with non clang compilers: http://clang.llvm.org/docs/LanguageExtensions.html */ +/* Compatibility with compilers other than clang: http://clang.llvm.org/docs/LanguageExtensions.html */ #ifndef __has_attribute # define __has_attribute(x) 0 #endif @@ -497,6 +498,13 @@ # define U_CPLUSPLUS_VERSION 1 #endif +#if (U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11) +// add in std::nullptr_t +namespace std { + typedef decltype(nullptr) nullptr_t; +}; +#endif + /** * \def U_HAVE_RVALUE_REFERENCES * Set to 1 if the compiler supports rvalue references. @@ -537,17 +545,22 @@ * http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough * @internal */ -#ifdef __cplusplus +#ifndef __cplusplus + // Not for C. +#elif defined(U_FALLTHROUGH) + // Use the predefined value. +#elif defined(__clang__) + // Test for compiler vs. feature separately. + // Other compilers might choke on the feature test. # if __has_cpp_attribute(clang::fallthrough) || \ (__has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough")) # define U_FALLTHROUGH [[clang::fallthrough]] -# else -# define U_FALLTHROUGH # endif -#else -# define U_FALLTHROUGH #endif +#ifndef U_FALLTHROUGH +# define U_FALLTHROUGH +#endif /** @} */ @@ -764,6 +777,7 @@ * gcc 4.4 defines the __CHAR16_TYPE__ macro to a usable type but * does not support u"abc" string literals. * C++11 and C11 require support for UTF-16 literals + * TODO: Fix for plain C. Doesn't work on Mac. */ # if U_CPLUSPLUS_VERSION >= 11 || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) # define U_HAVE_CHAR16_T 1 diff --git a/deps/icu-small/source/common/unicode/ptypes.h b/deps/icu-small/source/common/unicode/ptypes.h index 69d7286d32..6eaf2dbf03 100644 --- a/deps/icu-small/source/common/unicode/ptypes.h +++ b/deps/icu-small/source/common/unicode/ptypes.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/unicode/putil.h b/deps/icu-small/source/common/unicode/putil.h index 6f3250ed68..91d6bb10f7 100644 --- a/deps/icu-small/source/common/unicode/putil.h +++ b/deps/icu-small/source/common/unicode/putil.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/unicode/rbbi.h b/deps/icu-small/source/common/unicode/rbbi.h index 9f2a1a62b3..d654154008 100644 --- a/deps/icu-small/source/common/unicode/rbbi.h +++ b/deps/icu-small/source/common/unicode/rbbi.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* *************************************************************************** diff --git a/deps/icu-small/source/common/unicode/rep.h b/deps/icu-small/source/common/unicode/rep.h index 8fa2332a71..b1023a37a2 100644 --- a/deps/icu-small/source/common/unicode/rep.h +++ b/deps/icu-small/source/common/unicode/rep.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ************************************************************************** @@ -93,7 +93,7 @@ public: * @return 16-bit code unit of text at given offset * @stable ICU 1.8 */ - inline UChar charAt(int32_t offset) const; + inline char16_t charAt(int32_t offset) const; /** * Returns the 32-bit code point at the given 16-bit offset into @@ -230,7 +230,7 @@ protected: * Virtual version of charAt(). * @stable ICU 2.4 */ - virtual UChar getCharAt(int32_t offset) const = 0; + virtual char16_t getCharAt(int32_t offset) const = 0; /** * Virtual version of char32At(). @@ -246,7 +246,7 @@ Replaceable::length() const { return getLength(); } -inline UChar +inline char16_t Replaceable::charAt(int32_t offset) const { return getCharAt(offset); } diff --git a/deps/icu-small/source/common/unicode/resbund.h b/deps/icu-small/source/common/unicode/resbund.h index 0d37d4209d..358ed7eeb9 100644 --- a/deps/icu-small/source/common/unicode/resbund.h +++ b/deps/icu-small/source/common/unicode/resbund.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -216,7 +216,7 @@ public: * could be U_MISSING_RESOURCE_ERROR if the key is not found * could be a warning * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING - * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file. + * @return a pointer to a zero-terminated char16_t array which lives in a memory mapped/DLL file. * @stable ICU 2.0 */ UnicodeString diff --git a/deps/icu-small/source/common/unicode/schriter.h b/deps/icu-small/source/common/unicode/schriter.h index b5fa32d0ae..d83a57f8d0 100644 --- a/deps/icu-small/source/common/unicode/schriter.h +++ b/deps/icu-small/source/common/unicode/schriter.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -175,7 +175,7 @@ protected: * @param newTextLength The length of the String * @stable ICU 2.0 */ - void setText(const UChar* newText, int32_t newTextLength); + void setText(const char16_t* newText, int32_t newTextLength); /** * Copy of the iterated string object. diff --git a/deps/icu-small/source/common/unicode/simpleformatter.h b/deps/icu-small/source/common/unicode/simpleformatter.h index 2b74f5ca52..26eae01525 100644 --- a/deps/icu-small/source/common/unicode/simpleformatter.h +++ b/deps/icu-small/source/common/unicode/simpleformatter.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -19,8 +19,6 @@ #include "unicode/utypes.h" #include "unicode/unistr.h" -#ifndef U_HIDE_DRAFT_API - U_NAMESPACE_BEGIN /** @@ -49,15 +47,15 @@ U_NAMESPACE_BEGIN * * @see MessageFormat * @see UMessagePatternApostropheMode - * @draft ICU 57 + * @stable ICU 57 */ class U_COMMON_API SimpleFormatter U_FINAL : public UMemory { public: /** * Default constructor. - * @draft ICU 57 + * @stable ICU 57 */ - SimpleFormatter() : compiledPattern((UChar)0) {} + SimpleFormatter() : compiledPattern((char16_t)0) {} /** * Constructs a formatter from the pattern string. @@ -66,7 +64,7 @@ public: * @param errorCode ICU error code in/out parameter. * Must fulfill U_SUCCESS before the function call. * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax. - * @draft ICU 57 + * @stable ICU 57 */ SimpleFormatter(const UnicodeString& pattern, UErrorCode &errorCode) { applyPattern(pattern, errorCode); @@ -84,7 +82,7 @@ public: * Must fulfill U_SUCCESS before the function call. * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and * too few or too many arguments. - * @draft ICU 57 + * @stable ICU 57 */ SimpleFormatter(const UnicodeString& pattern, int32_t min, int32_t max, UErrorCode &errorCode) { @@ -93,20 +91,20 @@ public: /** * Copy constructor. - * @draft ICU 57 + * @stable ICU 57 */ SimpleFormatter(const SimpleFormatter& other) : compiledPattern(other.compiledPattern) {} /** * Assignment operator. - * @draft ICU 57 + * @stable ICU 57 */ SimpleFormatter &operator=(const SimpleFormatter& other); /** * Destructor. - * @draft ICU 57 + * @stable ICU 57 */ ~SimpleFormatter(); @@ -118,7 +116,7 @@ public: * Must fulfill U_SUCCESS before the function call. * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax. * @return TRUE if U_SUCCESS(errorCode). - * @draft ICU 57 + * @stable ICU 57 */ UBool applyPattern(const UnicodeString &pattern, UErrorCode &errorCode) { return applyPatternMinMaxArguments(pattern, 0, INT32_MAX, errorCode); @@ -137,14 +135,14 @@ public: * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and * too few or too many arguments. * @return TRUE if U_SUCCESS(errorCode). - * @draft ICU 57 + * @stable ICU 57 */ UBool applyPatternMinMaxArguments(const UnicodeString &pattern, int32_t min, int32_t max, UErrorCode &errorCode); /** * @return The max argument number + 1. - * @draft ICU 57 + * @stable ICU 57 */ int32_t getArgumentLimit() const { return getArgumentLimit(compiledPattern.getBuffer(), compiledPattern.length()); @@ -160,7 +158,7 @@ public: * @param errorCode ICU error code in/out parameter. * Must fulfill U_SUCCESS before the function call. * @return appendTo - * @draft ICU 57 + * @stable ICU 57 */ UnicodeString &format( const UnicodeString &value0, @@ -177,7 +175,7 @@ public: * @param errorCode ICU error code in/out parameter. * Must fulfill U_SUCCESS before the function call. * @return appendTo - * @draft ICU 57 + * @stable ICU 57 */ UnicodeString &format( const UnicodeString &value0, @@ -196,7 +194,7 @@ public: * @param errorCode ICU error code in/out parameter. * Must fulfill U_SUCCESS before the function call. * @return appendTo - * @draft ICU 57 + * @stable ICU 57 */ UnicodeString &format( const UnicodeString &value0, @@ -221,7 +219,7 @@ public: * @param errorCode ICU error code in/out parameter. * Must fulfill U_SUCCESS before the function call. * @return appendTo - * @draft ICU 57 + * @stable ICU 57 */ UnicodeString &formatAndAppend( const UnicodeString *const *values, int32_t valuesLength, @@ -247,7 +245,7 @@ public: * @param errorCode ICU error code in/out parameter. * Must fulfill U_SUCCESS before the function call. * @return result - * @draft ICU 57 + * @stable ICU 57 */ UnicodeString &formatAndReplace( const UnicodeString *const *values, int32_t valuesLength, @@ -257,7 +255,7 @@ public: /** * Returns the pattern text with none of the arguments. * Like formatting with all-empty string values. - * @draft ICU 57 + * @stable ICU 57 */ UnicodeString getTextWithNoArguments() const { return getTextWithNoArguments(compiledPattern.getBuffer(), compiledPattern.length()); @@ -275,15 +273,15 @@ private: */ UnicodeString compiledPattern; - static inline int32_t getArgumentLimit(const UChar *compiledPattern, + static inline int32_t getArgumentLimit(const char16_t *compiledPattern, int32_t compiledPatternLength) { return compiledPatternLength == 0 ? 0 : compiledPattern[0]; } - static UnicodeString getTextWithNoArguments(const UChar *compiledPattern, int32_t compiledPatternLength); + static UnicodeString getTextWithNoArguments(const char16_t *compiledPattern, int32_t compiledPatternLength); static UnicodeString &format( - const UChar *compiledPattern, int32_t compiledPatternLength, + const char16_t *compiledPattern, int32_t compiledPatternLength, const UnicodeString *const *values, UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue, int32_t *offsets, int32_t offsetsLength, @@ -292,6 +290,4 @@ private: U_NAMESPACE_END -#endif /* U_HIDE_DRAFT_API */ - #endif // __SIMPLEFORMATTER_H__ diff --git a/deps/icu-small/source/common/unicode/std_string.h b/deps/icu-small/source/common/unicode/std_string.h index 104ef0c83e..729c563995 100644 --- a/deps/icu-small/source/common/unicode/std_string.h +++ b/deps/icu-small/source/common/unicode/std_string.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: std_string.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -27,13 +27,11 @@ #include "unicode/utypes.h" -#if U_HAVE_STD_STRING - -#if !defined(_MSC_VER) -namespace std { class type_info; } // WORKAROUND: http://llvm.org/bugs/show_bug.cgi?id=13364 +// Workaround for a libstdc++ bug before libstdc++4.6 (2011). +// https://bugs.llvm.org/show_bug.cgi?id=13364 +#if defined(__GLIBCXX__) +namespace std { class type_info; } #endif #include -#endif // U_HAVE_STD_STRING - #endif // __STD_STRING_H__ diff --git a/deps/icu-small/source/common/unicode/strenum.h b/deps/icu-small/source/common/unicode/strenum.h index 74ff28decc..fa525d4f52 100644 --- a/deps/icu-small/source/common/unicode/strenum.h +++ b/deps/icu-small/source/common/unicode/strenum.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -35,7 +35,7 @@ U_NAMESPACE_BEGIN * call, so the returned string still might not be 'valid' on * subsequent use.

* - *

Strings may take the form of const char*, const UChar*, or const + *

Strings may take the form of const char*, const char16_t*, or const * UnicodeString*. The type you get is determine by the variant of * 'next' that you call. In general the StringEnumeration is * optimized for one of these types, but all StringEnumerations can @@ -112,7 +112,7 @@ public: *

If the iterator is out of sync with its service, status is set * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.

* - *

If the native service string is a UChar* string, it is + *

If the native service string is a char16_t* string, it is * converted to char* with the invariant converter. If the * conversion fails (because a character cannot be converted) then * status is set to U_INVARIANT_CONVERSION_ERROR and the return @@ -131,7 +131,7 @@ public: virtual const char* next(int32_t *resultLength, UErrorCode& status); /** - *

Returns the next element as a NUL-terminated UChar*. If there + *

Returns the next element as a NUL-terminated char16_t*. If there * are no more elements, returns NULL. If the resultLength pointer * is not NULL, the length of the string (not counting the * terminating NUL) is returned at that address. If an error @@ -153,7 +153,7 @@ public: * * @stable ICU 2.4 */ - virtual const UChar* unext(int32_t *resultLength, UErrorCode& status); + virtual const char16_t* unext(int32_t *resultLength, UErrorCode& status); /** *

Returns the next element a UnicodeString*. If there are no diff --git a/deps/icu-small/source/common/unicode/stringpiece.h b/deps/icu-small/source/common/unicode/stringpiece.h index 43864ad81a..640fbac5a8 100644 --- a/deps/icu-small/source/common/unicode/stringpiece.h +++ b/deps/icu-small/source/common/unicode/stringpiece.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // Copyright (C) 2009-2013, International Business Machines // Corporation and others. All Rights Reserved. @@ -68,14 +68,12 @@ class U_COMMON_API StringPiece : public UMemory { * @stable ICU 4.2 */ StringPiece(const char* str); -#if U_HAVE_STD_STRING /** * Constructs from a std::string. * @stable ICU 4.2 */ StringPiece(const std::string& str) : ptr_(str.data()), length_(static_cast(str.size())) { } -#endif /** * Constructs from a const char * pointer and a specified length. * @param offset a const char * pointer (need not be terminated) diff --git a/deps/icu-small/source/common/unicode/stringtriebuilder.h b/deps/icu-small/source/common/unicode/stringtriebuilder.h index c2839fe53f..d1ac003c48 100644 --- a/deps/icu-small/source/common/unicode/stringtriebuilder.h +++ b/deps/icu-small/source/common/unicode/stringtriebuilder.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: stringtriebuilder.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -105,7 +105,7 @@ protected: /** @internal */ virtual int32_t getElementStringLength(int32_t i) const = 0; /** @internal */ - virtual UChar getElementUnit(int32_t i, int32_t unitIndex) const = 0; + virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const = 0; /** @internal */ virtual int32_t getElementValue(int32_t i) const = 0; @@ -120,7 +120,7 @@ protected: /** @internal */ virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0; /** @internal */ - virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const = 0; + virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const = 0; /** @internal */ virtual UBool matchNodesCanHaveValues() const = 0; @@ -137,7 +137,7 @@ protected: /** @internal */ static const int32_t kMaxBranchLinearSubNodeLength=5; - // Maximum number of nested split-branch levels for a branch on all 2^16 possible UChar units. + // Maximum number of nested split-branch levels for a branch on all 2^16 possible char16_t units. // log2(2^16/kMaxBranchLinearSubNodeLength) rounded up. /** @internal */ static const int32_t kMaxSplitBranchLevels=14; @@ -338,7 +338,7 @@ protected: virtual void write(StringTrieBuilder &builder); // Adds a unit with a final value. void add(int32_t c, int32_t value) { - units[length]=(UChar)c; + units[length]=(char16_t)c; equal[length]=NULL; values[length]=value; ++length; @@ -346,7 +346,7 @@ protected: } // Adds a unit which leads to another match node. void add(int32_t c, Node *node) { - units[length]=(UChar)c; + units[length]=(char16_t)c; equal[length]=node; values[length]=0; ++length; @@ -356,7 +356,7 @@ protected: Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value". int32_t length; int32_t values[kMaxBranchLinearSubNodeLength]; - UChar units[kMaxBranchLinearSubNodeLength]; + char16_t units[kMaxBranchLinearSubNodeLength]; }; /** @@ -364,7 +364,7 @@ protected: */ class SplitBranchNode : public BranchNode { public: - SplitBranchNode(UChar middleUnit, Node *lessThanNode, Node *greaterOrEqualNode) + SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode) : BranchNode(((0x555555*37+middleUnit)*37+ hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)), unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {} @@ -372,7 +372,7 @@ protected: virtual int32_t markRightEdgesFirst(int32_t edgeNumber); virtual void write(StringTrieBuilder &builder); protected: - UChar unit; + char16_t unit; Node *lessThan; Node *greaterOrEqual; }; diff --git a/deps/icu-small/source/common/unicode/symtable.h b/deps/icu-small/source/common/unicode/symtable.h index 829c8105d0..c2dc95a61b 100644 --- a/deps/icu-small/source/common/unicode/symtable.h +++ b/deps/icu-small/source/common/unicode/symtable.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/unicode/ubidi.h b/deps/icu-small/source/common/unicode/ubidi.h index e59969861f..ef21f24206 100644 --- a/deps/icu-small/source/common/unicode/ubidi.h +++ b/deps/icu-small/source/common/unicode/ubidi.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: ubidi.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -1196,11 +1196,14 @@ ubidi_setContext(UBiDi *pBiDi, * A level overrides the directional property of its corresponding * (same index) character if the level has the * #UBIDI_LEVEL_OVERRIDE bit set.

- * Except for that bit, it must be + * Aside from that bit, it must be * paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL, - * with one exception: a level of zero may be specified for a paragraph - * separator even if paraLevel>0 when multiple paragraphs - * are submitted in the same call to ubidi_setPara().

+ * except that level 0 is always allowed. + * Level 0 for a paragraph separator prevents reordering of paragraphs; + * this only works reliably if #UBIDI_LEVEL_OVERRIDE + * is also set for paragraph separators. + * Level 0 for other characters is treated as a wildcard + * and is lifted up to the resolved level of the surrounding paragraph.

* Caution: A copy of this pointer, not of the levels, * will be stored in the UBiDi object; * the embeddingLevels array must not be diff --git a/deps/icu-small/source/common/unicode/ubiditransform.h b/deps/icu-small/source/common/unicode/ubiditransform.h index 509f68bcc0..724587dddc 100644 --- a/deps/icu-small/source/common/unicode/ubiditransform.h +++ b/deps/icu-small/source/common/unicode/ubiditransform.h @@ -1,12 +1,12 @@ /* ****************************************************************************** * -* Copyright (C) 2016 and later: Unicode, Inc. and others. +* © 2016 and later: Unicode, Inc. and others. * License & terms of use: http://www.unicode.org/copyright.html * ****************************************************************************** * file name: ubiditransform.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/ubrk.h b/deps/icu-small/source/common/unicode/ubrk.h index f43943ed1a..22a4b99cd6 100644 --- a/deps/icu-small/source/common/unicode/ubrk.h +++ b/deps/icu-small/source/common/unicode/ubrk.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -267,6 +267,34 @@ ubrk_openRules(const UChar *rules, UParseError *parseErr, UErrorCode *status); +#ifndef U_HIDE_DRAFT_API +/** + * Open a new UBreakIterator for locating text boundaries using precompiled binary rules. + * Opening a UBreakIterator this way is substantially faster than using ubrk_openRules. + * Binary rules may be obtained using ubrk_getBinaryRules. The compiled rules are not + * compatible across different major versions of ICU, nor across platforms of different + * endianness or different base character set family (ASCII vs EBCDIC). + * @param binaryRules A set of compiled binary rules specifying the text breaking + * conventions. Ownership of the storage containing the compiled + * rules remains with the caller of this function. The compiled + * rules must not be modified or deleted during the life of the + * break iterator. + * @param rulesLength The length of binaryRules in bytes; must be >= 0. + * @param text The text to be iterated over. May be null, in which case + * ubrk_setText() is used to specify the text to be iterated. + * @param textLength The number of characters in text, or -1 if null-terminated. + * @param status Pointer to UErrorCode to receive any errors. + * @return UBreakIterator for the specified rules. + * @see ubrk_getBinaryRules + * @draft ICU 59 + */ +U_DRAFT UBreakIterator* U_EXPORT2 +ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength, + const UChar * text, int32_t textLength, + UErrorCode * status); + +#endif /* U_HIDE_DRAFT_API */ + /** * Thread safe cloning operation * @param bi iterator to be cloned @@ -566,6 +594,40 @@ ubrk_refreshUText(UBreakIterator *bi, UText *text, UErrorCode *status); + +#ifndef U_HIDE_DRAFT_API +/** + * Get a compiled binary version of the rules specifying the behavior of a UBreakIterator. + * The binary rules may be used with ubrk_openBinaryRules to open a new UBreakIterator + * more quickly than using ubrk_openRules. The compiled rules are not compatible across + * different major versions of ICU, nor across platforms of different endianness or + * different base character set family (ASCII vs EBCDIC). Supports preflighting (with + * binaryRules=NULL and rulesCapacity=0) to get the rules length without copying them to + * the binaryRules buffer. However, whether preflighting or not, if the actual length + * is greater than INT32_MAX, then the function returns 0 and sets *status to + * U_INDEX_OUTOFBOUNDS_ERROR. + + * @param bi The break iterator to use. + * @param binaryRules Buffer to receive the compiled binary rules; set to NULL for + * preflighting. + * @param rulesCapacity Capacity (in bytes) of the binaryRules buffer; set to 0 for + * preflighting. Must be >= 0. + * @param status Pointer to UErrorCode to receive any errors, such as + * U_BUFFER_OVERFLOW_ERROR, U_INDEX_OUTOFBOUNDS_ERROR, or + * U_ILLEGAL_ARGUMENT_ERROR. + * @return The actual byte length of the binary rules, if <= INT32_MAX; + * otherwise 0. If not preflighting and this is larger than + * rulesCapacity, *status will be set to an error. + * @see ubrk_openBinaryRules + * @draft ICU 59 + */ +U_DRAFT int32_t U_EXPORT2 +ubrk_getBinaryRules(UBreakIterator *bi, + uint8_t * binaryRules, int32_t rulesCapacity, + UErrorCode * status); + +#endif /* U_HIDE_DRAFT_API */ + #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ #endif diff --git a/deps/icu-small/source/common/unicode/ucasemap.h b/deps/icu-small/source/common/unicode/ucasemap.h index d7345e8a40..18e6c2ba0b 100644 --- a/deps/icu-small/source/common/unicode/ucasemap.h +++ b/deps/icu-small/source/common/unicode/ucasemap.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: ucasemap.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -22,8 +22,8 @@ #define __UCASEMAP_H__ #include "unicode/utypes.h" -#include "unicode/ustring.h" #include "unicode/localpointer.h" +#include "unicode/ustring.h" /** * \file @@ -185,6 +185,15 @@ ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode); */ #define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200 +/** + * Omit unchanged text when case-mapping with Edits. + * + * @see CaseMap + * @see Edits + * @draft ICU 59 + */ +#define UCASEMAP_OMIT_UNCHANGED_TEXT 0x4000 + #if !UCONFIG_NO_BREAK_ITERATION /** @@ -253,7 +262,7 @@ ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode * @param dest A buffer for the result string. The result will be NUL-terminated if * the buffer is large enough. * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then * dest may be NULL and the function will only return the length of the result * without writing any of the result string. * @param src The original string. @@ -272,7 +281,7 @@ ucasemap_toTitle(UCaseMap *csm, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode); -#endif +#endif // UCONFIG_NO_BREAK_ITERATION /** * Lowercase the characters in a UTF-8 string. diff --git a/deps/icu-small/source/common/unicode/ucat.h b/deps/icu-small/source/common/unicode/ucat.h index 418b64fa19..f9c18b47d6 100644 --- a/deps/icu-small/source/common/unicode/ucat.h +++ b/deps/icu-small/source/common/unicode/ucat.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/unicode/uchar.h b/deps/icu-small/source/common/unicode/uchar.h index 7f6ea6d934..8174ca23e6 100644 --- a/deps/icu-small/source/common/unicode/uchar.h +++ b/deps/icu-small/source/common/unicode/uchar.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -399,36 +399,34 @@ typedef enum UProperty { UCHAR_CHANGES_WHEN_CASEMAPPED=55, /** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */ UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56, -#ifndef U_HIDE_DRAFT_API /** * Binary property Emoji. * See http://www.unicode.org/reports/tr51/#Emoji_Properties * - * @draft ICU 57 + * @stable ICU 57 */ UCHAR_EMOJI=57, /** * Binary property Emoji_Presentation. * See http://www.unicode.org/reports/tr51/#Emoji_Properties * - * @draft ICU 57 + * @stable ICU 57 */ UCHAR_EMOJI_PRESENTATION=58, /** * Binary property Emoji_Modifier. * See http://www.unicode.org/reports/tr51/#Emoji_Properties * - * @draft ICU 57 + * @stable ICU 57 */ UCHAR_EMOJI_MODIFIER=59, /** * Binary property Emoji_Modifier_Base. * See http://www.unicode.org/reports/tr51/#Emoji_Properties * - * @draft ICU 57 + * @stable ICU 57 */ UCHAR_EMOJI_MODIFIER_BASE=60, -#endif /* U_HIDE_DRAFT_API */ #ifndef U_HIDE_DEPRECATED_API /** * One more than the last constant for binary Unicode properties. diff --git a/deps/icu-small/source/common/unicode/ucharstrie.h b/deps/icu-small/source/common/unicode/ucharstrie.h index 8daed447ce..dfc93f6d0b 100644 --- a/deps/icu-small/source/common/unicode/ucharstrie.h +++ b/deps/icu-small/source/common/unicode/ucharstrie.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: ucharstrie.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -36,7 +36,7 @@ class UVector32; /** * Light-weight, non-const reader class for a UCharsTrie. - * Traverses a UChar-serialized data structure with minimal state, + * Traverses a char16_t-serialized data structure with minimal state, * for mapping strings (16-bit-unit sequences) to non-negative integer values. * * This class owns the serialized trie data only if it was constructed by @@ -52,18 +52,18 @@ public: /** * Constructs a UCharsTrie reader instance. * - * The trieUChars must contain a copy of a UChar sequence from the UCharsTrieBuilder, - * starting with the first UChar of that sequence. - * The UCharsTrie object will not read more UChars than + * The trieUChars must contain a copy of a char16_t sequence from the UCharsTrieBuilder, + * starting with the first char16_t of that sequence. + * The UCharsTrie object will not read more char16_ts than * the UCharsTrieBuilder generated in the corresponding build() call. * * The array is not copied/cloned and must not be modified while * the UCharsTrie object is in use. * - * @param trieUChars The UChar array that contains the serialized trie. + * @param trieUChars The char16_t array that contains the serialized trie. * @stable ICU 4.8 */ - UCharsTrie(const UChar *trieUChars) + UCharsTrie(ConstChar16Ptr trieUChars) : ownedArray_(NULL), uchars_(trieUChars), pos_(uchars_), remainingMatchLength_(-1) {} @@ -75,7 +75,7 @@ public: /** * Copy constructor, copies the other trie reader object and its state, - * but not the UChar array which will be shared. (Shallow copy.) + * but not the char16_t array which will be shared. (Shallow copy.) * @param other Another UCharsTrie object. * @stable ICU 4.8 */ @@ -109,8 +109,8 @@ public: private: friend class UCharsTrie; - const UChar *uchars; - const UChar *pos; + const char16_t *uchars; + const char16_t *pos; int32_t remainingMatchLength; }; @@ -148,14 +148,14 @@ public: /** * Determines whether the string so far matches, whether it has a value, - * and whether another input UChar can continue a matching string. + * and whether another input char16_t can continue a matching string. * @return The match/value Result. * @stable ICU 4.8 */ UStringTrieResult current() const; /** - * Traverses the trie from the initial state for this input UChar. + * Traverses the trie from the initial state for this input char16_t. * Equivalent to reset().next(uchar). * @param uchar Input char value. Values below 0 and above 0xffff will never match. * @return The match/value Result. @@ -177,7 +177,7 @@ public: UStringTrieResult firstForCodePoint(UChar32 cp); /** - * Traverses the trie from the current state for this input UChar. + * Traverses the trie from the current state for this input char16_t. * @param uchar Input char value. Values below 0 and above 0xffff will never match. * @return The match/value Result. * @stable ICU 4.8 @@ -208,7 +208,7 @@ public: * @return The match/value Result. * @stable ICU 4.8 */ - UStringTrieResult next(const UChar *s, int32_t length); + UStringTrieResult next(ConstChar16Ptr s, int32_t length); /** * Returns a matching string's value if called immediately after @@ -220,7 +220,7 @@ public: * @stable ICU 4.8 */ inline int32_t getValue() const { - const UChar *pos=pos_; + const char16_t *pos=pos_; int32_t leadUnit=*pos++; // U_ASSERT(leadUnit>=kMinValueLead); return leadUnit&kValueIsFinal ? @@ -237,16 +237,16 @@ public: * @stable ICU 4.8 */ inline UBool hasUniqueValue(int32_t &uniqueValue) const { - const UChar *pos=pos_; + const char16_t *pos=pos_; // Skip the rest of a pending linear-match node. return pos!=NULL && findUniqueValue(pos+remainingMatchLength_+1, FALSE, uniqueValue); } /** - * Finds each UChar which continues the string from the current state. - * That is, each UChar c for which it would be next(c)!=USTRINGTRIE_NO_MATCH now. - * @param out Each next UChar is appended to this object. - * @return the number of UChars which continue the string from here + * Finds each char16_t which continues the string from the current state. + * That is, each char16_t c for which it would be next(c)!=USTRINGTRIE_NO_MATCH now. + * @param out Each next char16_t is appended to this object. + * @return the number of char16_ts which continue the string from here * @stable ICU 4.8 */ int32_t getNextUChars(Appendable &out) const; @@ -258,8 +258,8 @@ public: class U_COMMON_API Iterator : public UMemory { public: /** - * Iterates from the root of a UChar-serialized UCharsTrie. - * @param trieUChars The trie UChars. + * Iterates from the root of a char16_t-serialized UCharsTrie. + * @param trieUChars The trie char16_ts. * @param maxStringLength If 0, the iterator returns full strings. * Otherwise, the iterator returns strings with this maximum length. * @param errorCode Standard ICU error code. Its input value must @@ -268,7 +268,7 @@ public: * function chaining. (See User Guide for details.) * @stable ICU 4.8 */ - Iterator(const UChar *trieUChars, int32_t maxStringLength, UErrorCode &errorCode); + Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength, UErrorCode &errorCode); /** * Iterates from the current state of the specified UCharsTrie. @@ -336,11 +336,11 @@ public: return TRUE; } - const UChar *branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode); + const char16_t *branchNext(const char16_t *pos, int32_t length, UErrorCode &errorCode); - const UChar *uchars_; - const UChar *pos_; - const UChar *initialPos_; + const char16_t *uchars_; + const char16_t *pos_; + const char16_t *initialPos_; int32_t remainingMatchLength_; int32_t initialRemainingMatchLength_; UBool skipValue_; // Skip intermediate value which was already delivered. @@ -368,7 +368,7 @@ private: * this constructor adopts the builder's array. * This constructor is only called by the builder. */ - UCharsTrie(UChar *adoptUChars, const UChar *trieUChars) + UCharsTrie(char16_t *adoptUChars, const char16_t *trieUChars) : ownedArray_(adoptUChars), uchars_(trieUChars), pos_(uchars_), remainingMatchLength_(-1) {} @@ -381,7 +381,7 @@ private: // Reads a compact 32-bit integer. // pos is already after the leadUnit, and the lead unit has bit 15 reset. - static inline int32_t readValue(const UChar *pos, int32_t leadUnit) { + static inline int32_t readValue(const char16_t *pos, int32_t leadUnit) { int32_t value; if(leadUnit=kMinTwoUnitValueLead) { if(leadUnit=kMinTwoUnitNodeValueLead) { if(leadUnit=kMinTwoUnitDeltaLead) { if(delta==kThreeUnitDeltaLead) { @@ -444,7 +444,7 @@ private: return pos+delta; } - static const UChar *skipDelta(const UChar *pos) { + static const char16_t *skipDelta(const char16_t *pos) { int32_t delta=*pos++; if(delta>=kMinTwoUnitDeltaLead) { if(delta==kThreeUnitDeltaLead) { @@ -461,28 +461,28 @@ private: } // Handles a branch node for both next(uchar) and next(string). - UStringTrieResult branchNext(const UChar *pos, int32_t length, int32_t uchar); + UStringTrieResult branchNext(const char16_t *pos, int32_t length, int32_t uchar); // Requires remainingLength_<0. - UStringTrieResult nextImpl(const UChar *pos, int32_t uchar); + UStringTrieResult nextImpl(const char16_t *pos, int32_t uchar); // Helper functions for hasUniqueValue(). // Recursively finds a unique value (or whether there is not a unique one) // from a branch. - static const UChar *findUniqueValueFromBranch(const UChar *pos, int32_t length, + static const char16_t *findUniqueValueFromBranch(const char16_t *pos, int32_t length, UBool haveUniqueValue, int32_t &uniqueValue); // Recursively finds a unique value (or whether there is not a unique one) // starting from a position on a node lead unit. - static UBool findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue); + static UBool findUniqueValue(const char16_t *pos, UBool haveUniqueValue, int32_t &uniqueValue); // Helper functions for getNextUChars(). // getNextUChars() when pos is on a branch node. - static void getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out); + static void getNextBranchUChars(const char16_t *pos, int32_t length, Appendable &out); // UCharsTrie data structure // - // The trie consists of a series of UChar-serialized nodes for incremental - // Unicode string/UChar sequence matching. (UChar=16-bit unsigned integer) + // The trie consists of a series of char16_t-serialized nodes for incremental + // Unicode string/char16_t sequence matching. (char16_t=16-bit unsigned integer) // The root node is at the beginning of the trie data. // // Types of nodes are distinguished by their node lead unit ranges. @@ -491,9 +491,9 @@ private: // // Node types: // - Final-value node: Stores a 32-bit integer in a compact, variable-length format. - // The value is for the string/UChar sequence so far. + // The value is for the string/char16_t sequence so far. // - Match node, optionally with an intermediate value in a different compact format. - // The value, if present, is for the string/UChar sequence so far. + // The value, if present, is for the string/char16_t sequence so far. // // Aside from the value, which uses the node lead unit's high bits: // @@ -560,15 +560,15 @@ private: static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1; // 0x03feffff - UChar *ownedArray_; + char16_t *ownedArray_; // Fixed value referencing the UCharsTrie words. - const UChar *uchars_; + const char16_t *uchars_; // Iterator variables. // Pointer to next trie unit to read. NULL if no more matches. - const UChar *pos_; + const char16_t *pos_; // Remaining length of a linear-match node, minus 1. Negative if not in such a node. int32_t remainingMatchLength_; }; diff --git a/deps/icu-small/source/common/unicode/ucharstriebuilder.h b/deps/icu-small/source/common/unicode/ucharstriebuilder.h index cc9fb77a91..2aa4757e52 100644 --- a/deps/icu-small/source/common/unicode/ucharstriebuilder.h +++ b/deps/icu-small/source/common/unicode/ucharstriebuilder.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: ucharstriebuilder.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -89,21 +89,21 @@ public: UCharsTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode); /** - * Builds a UCharsTrie for the add()ed data and UChar-serializes it. + * Builds a UCharsTrie for the add()ed data and char16_t-serializes it. * Once built, no further data can be add()ed until clear() is called. * * A UCharsTrie cannot be empty. At least one (string, value) pair * must have been add()ed. * * Multiple calls to buildUnicodeString() set the UnicodeStrings to the - * builder's same UChar array, without rebuilding. + * builder's same char16_t array, without rebuilding. * If buildUnicodeString() is called after build(), the trie will be * re-serialized into a new array. * If build() is called after buildUnicodeString(), the trie object will become * the owner of the previously returned array. * After clear() has been called, a new array will be used as well. * @param buildOption Build option, see UStringTrieBuildOption. - * @param result A UnicodeString which will be set to the UChar-serialized + * @param result A UnicodeString which will be set to the char16_t-serialized * UCharsTrie for the add()ed data. * @param errorCode Standard ICU error code. Its input value must * pass the U_SUCCESS() test, or else the function returns @@ -135,14 +135,14 @@ private: void buildUChars(UStringTrieBuildOption buildOption, UErrorCode &errorCode); virtual int32_t getElementStringLength(int32_t i) const; - virtual UChar getElementUnit(int32_t i, int32_t unitIndex) const; + virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const; virtual int32_t getElementValue(int32_t i) const; virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const; virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const; virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const; - virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const; + virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const; virtual UBool matchNodesCanHaveValues() const { return TRUE; } @@ -152,11 +152,11 @@ private: class UCTLinearMatchNode : public LinearMatchNode { public: - UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode); + UCTLinearMatchNode(const char16_t *units, int32_t len, Node *nextNode); virtual UBool operator==(const Node &other) const; virtual void write(StringTrieBuilder &builder); private: - const UChar *s; + const char16_t *s; }; virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length, @@ -164,7 +164,7 @@ private: UBool ensureCapacity(int32_t length); virtual int32_t write(int32_t unit); - int32_t write(const UChar *s, int32_t length); + int32_t write(const char16_t *s, int32_t length); virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length); virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal); virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node); @@ -175,9 +175,9 @@ private: int32_t elementsCapacity; int32_t elementsLength; - // UChar serialization of the trie. + // char16_t serialization of the trie. // Grows from the back: ucharsLength measures from the end of the buffer! - UChar *uchars; + char16_t *uchars; int32_t ucharsCapacity; int32_t ucharsLength; }; diff --git a/deps/icu-small/source/common/unicode/uchriter.h b/deps/icu-small/source/common/unicode/uchriter.h index 1365c9b7d1..38f67c5b45 100644 --- a/deps/icu-small/source/common/unicode/uchriter.h +++ b/deps/icu-small/source/common/unicode/uchriter.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -15,18 +15,18 @@ /** * \file - * \brief C++ API: UChar Character Iterator + * \brief C++ API: char16_t Character Iterator */ U_NAMESPACE_BEGIN /** * A concrete subclass of CharacterIterator that iterates over the - * characters (code units or code points) in a UChar array. + * characters (code units or code points) in a char16_t array. * It's possible not only to create an - * iterator that iterates over an entire UChar array, but also to - * create one that iterates over only a subrange of a UChar array - * (iterators over different subranges of the same UChar array don't + * iterator that iterates over an entire char16_t array, but also to + * create one that iterates over only a subrange of a char16_t array + * (iterators over different subranges of the same char16_t array don't * compare equal). * @see CharacterIterator * @see ForwardCharacterIterator @@ -35,34 +35,34 @@ U_NAMESPACE_BEGIN class U_COMMON_API UCharCharacterIterator : public CharacterIterator { public: /** - * Create an iterator over the UChar array referred to by "textPtr". + * Create an iterator over the char16_t array referred to by "textPtr". * The iteration range is 0 to length-1. * text is only aliased, not adopted (the * destructor will not delete it). - * @param textPtr The UChar array to be iterated over - * @param length The length of the UChar array + * @param textPtr The char16_t array to be iterated over + * @param length The length of the char16_t array * @stable ICU 2.0 */ - UCharCharacterIterator(const UChar* textPtr, int32_t length); + UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length); /** - * Create an iterator over the UChar array referred to by "textPtr". + * Create an iterator over the char16_t array referred to by "textPtr". * The iteration range is 0 to length-1. * text is only aliased, not adopted (the * destructor will not delete it). * The starting * position is specified by "position". If "position" is outside the valid * iteration range, the behavior of this object is undefined. - * @param textPtr The UChar array to be iteratd over - * @param length The length of the UChar array + * @param textPtr The char16_t array to be iteratd over + * @param length The length of the char16_t array * @param position The starting position of the iteration * @stable ICU 2.0 */ - UCharCharacterIterator(const UChar* textPtr, int32_t length, + UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length, int32_t position); /** - * Create an iterator over the UChar array referred to by "textPtr". + * Create an iterator over the char16_t array referred to by "textPtr". * The iteration range is 0 to end-1. * text is only aliased, not adopted (the * destructor will not delete it). @@ -70,14 +70,14 @@ public: * position is specified by "position". If begin and end do not * form a valid iteration range or "position" is outside the valid * iteration range, the behavior of this object is undefined. - * @param textPtr The UChar array to be iterated over - * @param length The length of the UChar array + * @param textPtr The char16_t array to be iterated over + * @param length The length of the char16_t array * @param textBegin The begin position of the iteration range * @param textEnd The end position of the iteration range * @param position The starting position of the iteration * @stable ICU 2.0 */ - UCharCharacterIterator(const UChar* textPtr, int32_t length, + UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length, int32_t textBegin, int32_t textEnd, int32_t position); @@ -141,7 +141,7 @@ public: * @return the first code unit in its iteration range. * @stable ICU 2.0 */ - virtual UChar first(void); + virtual char16_t first(void); /** * Sets the iterator to refer to the first code unit in its @@ -151,7 +151,7 @@ public: * @return the first code unit in its iteration range * @stable ICU 2.0 */ - virtual UChar firstPostInc(void); + virtual char16_t firstPostInc(void); /** * Sets the iterator to refer to the first code point in its @@ -181,7 +181,7 @@ public: * @return the last code unit in its iteration range. * @stable ICU 2.0 */ - virtual UChar last(void); + virtual char16_t last(void); /** * Sets the iterator to refer to the last code point in its @@ -200,7 +200,7 @@ public: * @return the code unit * @stable ICU 2.0 */ - virtual UChar setIndex(int32_t position); + virtual char16_t setIndex(int32_t position); /** * Sets the iterator to refer to the beginning of the code point @@ -220,7 +220,7 @@ public: * @return the code unit the iterator currently refers to. * @stable ICU 2.0 */ - virtual UChar current(void) const; + virtual char16_t current(void) const; /** * Returns the code point the iterator currently refers to. @@ -236,7 +236,7 @@ public: * @return the next code unit in the iteration range. * @stable ICU 2.0 */ - virtual UChar next(void); + virtual char16_t next(void); /** * Gets the current code unit for returning and advances to the next code unit @@ -246,7 +246,7 @@ public: * @return the current code unit. * @stable ICU 2.0 */ - virtual UChar nextPostInc(void); + virtual char16_t nextPostInc(void); /** * Advances to the next code point in the iteration range (toward @@ -288,7 +288,7 @@ public: * @return the previous code unit in the iteration range. * @stable ICU 2.0 */ - virtual UChar previous(void); + virtual char16_t previous(void); /** * Advances to the previous code point in the iteration range (toward @@ -334,16 +334,20 @@ public: * @return the new position * @stable ICU 2.0 */ +#ifdef move32 + // One of the system headers right now is sometimes defining a conflicting macro we don't use +#undef move32 +#endif virtual int32_t move32(int32_t delta, EOrigin origin); /** * Sets the iterator to iterate over a new range of text * @stable ICU 2.0 */ - void setText(const UChar* newText, int32_t newTextLength); + void setText(ConstChar16Ptr newText, int32_t newTextLength); /** - * Copies the UChar array under iteration into the UnicodeString + * Copies the char16_t array under iteration into the UnicodeString * referred to by "result". Even if this iterator iterates across * only a part of this string, the whole string is copied. * @param result Receives a copy of the text under iteration. @@ -375,7 +379,7 @@ protected: * Protected member text * @stable ICU 2.0 */ - const UChar* text; + const char16_t* text; }; diff --git a/deps/icu-small/source/common/unicode/uclean.h b/deps/icu-small/source/common/unicode/uclean.h index 6a5a4f42b6..d0bfcb13a6 100644 --- a/deps/icu-small/source/common/unicode/uclean.h +++ b/deps/icu-small/source/common/unicode/uclean.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ****************************************************************************** * file name: uclean.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -101,7 +101,7 @@ u_init(UErrorCode *status); U_STABLE void U_EXPORT2 u_cleanup(void); - +U_CDECL_BEGIN /** * Pointer type for a user supplied memory allocation function. * @param context user supplied value, obtained from from u_setMemoryFunctions(). @@ -149,9 +149,10 @@ typedef void U_CALLCONV UMemFreeFn (const void *context, void *mem); * @system */ U_STABLE void U_EXPORT2 -u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f, +u_setMemoryFunctions(const void *context, UMemAllocFn * U_CALLCONV a, UMemReallocFn * U_CALLCONV r, UMemFreeFn * U_CALLCONV f, UErrorCode *status); +U_CDECL_END #ifndef U_HIDE_DEPRECATED_API /********************************************************************************* @@ -172,6 +173,7 @@ u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMem */ typedef void *UMTX; +U_CDECL_BEGIN /** * Function Pointer type for a user supplied mutex initialization function. * The user-supplied function will be called by ICU whenever ICU needs to create a @@ -201,7 +203,7 @@ typedef void U_CALLCONV UMtxInitFn (const void *context, UMTX *mutex, UErrorCod * @system */ typedef void U_CALLCONV UMtxFn (const void *context, UMTX *mutex); - +U_CDECL_END /** * Set the functions that ICU will use for mutex operations diff --git a/deps/icu-small/source/common/unicode/ucnv.h b/deps/icu-small/source/common/unicode/ucnv.h index 767c1a2a7f..86e3b84474 100644 --- a/deps/icu-small/source/common/unicode/ucnv.h +++ b/deps/icu-small/source/common/unicode/ucnv.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/unicode/ucnv_cb.h b/deps/icu-small/source/common/unicode/ucnv_cb.h index a553481c45..632cc0b35f 100644 --- a/deps/icu-small/source/common/unicode/ucnv_cb.h +++ b/deps/icu-small/source/common/unicode/ucnv_cb.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/unicode/ucnv_err.h b/deps/icu-small/source/common/unicode/ucnv_err.h index ea7f757d0c..e8a79bcd81 100644 --- a/deps/icu-small/source/common/unicode/ucnv_err.h +++ b/deps/icu-small/source/common/unicode/ucnv_err.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/unicode/ucnvsel.h b/deps/icu-small/source/common/unicode/ucnvsel.h index b5820031a5..5fee53f179 100644 --- a/deps/icu-small/source/common/unicode/ucnvsel.h +++ b/deps/icu-small/source/common/unicode/ucnvsel.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/common/unicode/uconfig.h b/deps/icu-small/source/common/unicode/uconfig.h index d681febf4f..25f19a1a61 100644 --- a/deps/icu-small/source/common/unicode/uconfig.h +++ b/deps/icu-small/source/common/unicode/uconfig.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ********************************************************************** * file name: uconfig.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/ucurr.h b/deps/icu-small/source/common/unicode/ucurr.h index 8b5d41ae3b..ecb54d146f 100644 --- a/deps/icu-small/source/common/unicode/ucurr.h +++ b/deps/icu-small/source/common/unicode/ucurr.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/unicode/udata.h b/deps/icu-small/source/common/unicode/udata.h index abc043f6d0..6419c359f6 100644 --- a/deps/icu-small/source/common/unicode/udata.h +++ b/deps/icu-small/source/common/unicode/udata.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: udata.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/udisplaycontext.h b/deps/icu-small/source/common/unicode/udisplaycontext.h index eaef02d795..c4f6c957e9 100644 --- a/deps/icu-small/source/common/unicode/udisplaycontext.h +++ b/deps/icu-small/source/common/unicode/udisplaycontext.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ***************************************************************************************** diff --git a/deps/icu-small/source/common/unicode/uenum.h b/deps/icu-small/source/common/unicode/uenum.h index 50dde60da1..56faae8952 100644 --- a/deps/icu-small/source/common/unicode/uenum.h +++ b/deps/icu-small/source/common/unicode/uenum.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: uenum.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:2 * @@ -23,7 +23,9 @@ #include "unicode/localpointer.h" #if U_SHOW_CPLUSPLUS_API -#include "unicode/strenum.h" +U_NAMESPACE_BEGIN +class StringEnumeration; +U_NAMESPACE_END #endif /** diff --git a/deps/icu-small/source/common/unicode/uidna.h b/deps/icu-small/source/common/unicode/uidna.h index d49729a29c..cb79ba8545 100644 --- a/deps/icu-small/source/common/unicode/uidna.h +++ b/deps/icu-small/source/common/unicode/uidna.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: uidna.h - * encoding: US-ASCII + * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/uiter.h b/deps/icu-small/source/common/unicode/uiter.h index 74075e5a6e..3b8537204c 100644 --- a/deps/icu-small/source/common/unicode/uiter.h +++ b/deps/icu-small/source/common/unicode/uiter.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: uiter.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/uldnames.h b/deps/icu-small/source/common/unicode/uldnames.h index 8a3dfd0a6a..3a3c0a0657 100644 --- a/deps/icu-small/source/common/unicode/uldnames.h +++ b/deps/icu-small/source/common/unicode/uldnames.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/common/unicode/ulistformatter.h b/deps/icu-small/source/common/unicode/ulistformatter.h index bed18984e9..e98a9f0452 100644 --- a/deps/icu-small/source/common/unicode/ulistformatter.h +++ b/deps/icu-small/source/common/unicode/ulistformatter.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ***************************************************************************************** @@ -49,7 +49,7 @@ typedef struct UListFormatter UListFormatter; /**< C typedef for struct UListFo * or NULL if an error occurred. * @stable ICU 55 */ -U_STABLE UListFormatter* U_EXPORT2 +U_CAPI UListFormatter* U_EXPORT2 ulistfmt_open(const char* locale, UErrorCode* status); @@ -59,7 +59,7 @@ ulistfmt_open(const char* locale, * The UListFormatter object to close. * @stable ICU 55 */ -U_STABLE void U_EXPORT2 +U_CAPI void U_EXPORT2 ulistfmt_close(UListFormatter *listfmt); @@ -116,7 +116,7 @@ U_NAMESPACE_END * total buffer size needed (e.g. for illegal arguments). * @stable ICU 55 */ -U_DRAFT int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 ulistfmt_format(const UListFormatter* listfmt, const UChar* const strings[], const int32_t * stringLengths, diff --git a/deps/icu-small/source/common/unicode/uloc.h b/deps/icu-small/source/common/unicode/uloc.h index 5146000f28..5531070841 100644 --- a/deps/icu-small/source/common/unicode/uloc.h +++ b/deps/icu-small/source/common/unicode/uloc.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -61,7 +61,7 @@ * http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt * *

- * The second option includes an additonal ISO Country + * The second option includes an additional ISO Country * Code. These codes are the upper-case two-letter codes * as defined by ISO-3166. * You can find a full list of these codes at a number of sites, such as: @@ -69,7 +69,7 @@ * http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html * *

- * The third option requires another additonal information--the + * The third option requires another additional information--the * Variant. * The Variant codes are vendor and browser-specific. * For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX. @@ -157,7 +157,7 @@ * just a mechanism for identifying these services. * *

- * Each international serivce that performs locale-sensitive operations + * Each international service that performs locale-sensitive operations * allows you * to get all the available objects of that type. You can sift * through these objects by language, country, or variant, @@ -539,6 +539,9 @@ uloc_getISO3Country(const char* localeID); * Gets the Win32 LCID value for the specified locale. * If the ICU locale is not recognized by Windows, 0 will be returned. * + * LCIDs were deprecated with Windows Vista and Microsoft recommends + * that developers use BCP47 style tags instead (uloc_toLanguageTag). + * * @param localeID the locale to get the Win32 LCID value with * @return country the Win32 LCID for localeID * @stable ICU 2.0 @@ -577,7 +580,7 @@ uloc_getDisplayLanguage(const char* locale, * if the locale's language code is "en", passing Locale::getFrench() for * inLocale would result in "", while passing Locale::getGerman() * for inLocale would result in "". NULL may be used to specify the default. - * @param script the displayable country code for localeID + * @param script the displayable script for the localeID * @param scriptCapacity the size of the script buffer to store the * displayable script code with * @param status error information if retrieving the displayable script code failed @@ -852,10 +855,12 @@ uloc_openKeywords(const char* localeID, * Get the value for a keyword. Locale name does not need to be normalized. * * @param localeID locale name containing the keyword ("de_DE@currency=EURO;collation=PHONEBOOK") - * @param keywordName name of the keyword for which we want the value. Case insensitive. + * @param keywordName name of the keyword for which we want the value; must not be + * NULL or empty, and must consist only of [A-Za-z0-9]. Case insensitive. * @param buffer receiving buffer * @param bufferCapacity capacity of receiving buffer - * @param status containing error code - buffer not big enough. + * @param status containing error code: e.g. buffer not big enough or ill-formed localeID + * or keywordName parameters. * @return the length of keyword value * @stable ICU 2.8 */ @@ -872,18 +877,26 @@ uloc_getKeywordValue(const char* localeID, * For removing all keywords, use uloc_getBaseName(). * * NOTE: Unlike almost every other ICU function which takes a - * buffer, this function will NOT truncate the output text. If a - * BUFFER_OVERFLOW_ERROR is received, it means that the original - * buffer is untouched. This is done to prevent incorrect or possibly - * even malformed locales from being generated and used. - * - * @param keywordName name of the keyword to be set. Case insensitive. + * buffer, this function will NOT truncate the output text, and will + * not update the buffer with unterminated text setting a status of + * U_STRING_NOT_TERMINATED_WARNING. If a BUFFER_OVERFLOW_ERROR is received, + * it means a terminated version of the updated locale ID would not fit + * in the buffer, and the original buffer is untouched. This is done to + * prevent incorrect or possibly even malformed locales from being generated + * and used. + * + * @param keywordName name of the keyword to be set; must not be + * NULL or empty, and must consist only of [A-Za-z0-9]. Case insensitive. * @param keywordValue value of the keyword to be set. If 0-length or - * NULL, will result in the keyword being removed. No error is given if - * that keyword does not exist. - * @param buffer input buffer containing locale to be modified. + * NULL, will result in the keyword being removed; no error is given if + * that keyword does not exist. Otherwise, must consist only of + * [A-Za-z0-9] and [/_+-]. + * @param buffer input buffer containing well-formed locale ID to be + * modified. * @param bufferCapacity capacity of receiving buffer - * @param status containing error code - buffer not big enough. + * @param status containing error code: e.g. buffer not big enough + * or ill-formed keywordName or keywordValue parameters, or ill-formed + * locale ID in buffer on input. * @return the length needed for the buffer * @see uloc_getKeywordValue * @stable ICU 3.2 diff --git a/deps/icu-small/source/common/unicode/umachine.h b/deps/icu-small/source/common/unicode/umachine.h index 22820d4b00..30de4dba0d 100644 --- a/deps/icu-small/source/common/unicode/umachine.h +++ b/deps/icu-small/source/common/unicode/umachine.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: umachine.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -51,29 +51,6 @@ */ #include -#ifndef U_HIDE_INTERNAL_API -/* - * U_USE_CHAR16_T - * When defined, force use of char16_t for UChar. - * Note: char16_t is expected to become the default and required in the future, - * and this option will be removed. - * @internal - */ -#ifdef U_USE_CHAR16_T -#ifdef UCHAR_TYPE -#undef UCHAR_TYPE -#endif -#define UCHAR_TYPE char16_t - -/* - * In plain C, is needed for the definition of char16_t - */ -#ifndef __cplusplus -#include -#endif -#endif -#endif /* U_HIDE_INTERNAL_API */ - /*==========================================================================*/ /* For C wrappers, we use the symbol U_STABLE. */ /* This works properly if the includer is C or C++. */ @@ -313,30 +290,92 @@ typedef int8_t UBool; /** Number of bytes in a UChar. @stable ICU 2.0 */ #define U_SIZEOF_UCHAR 2 +/** + * \def U_CHAR16_IS_TYPEDEF + * If 1, then char16_t is a typedef and not a real type (yet) + * @internal + */ +#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11) +// for AIX, uchar.h needs to be included +# include +# define U_CHAR16_IS_TYPEDEF 1 +#else +# define U_CHAR16_IS_TYPEDEF 0 +#endif + + /** * \var UChar - * Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), - * or wchar_t if that is 16 bits wide; always assumed to be unsigned. - * If neither is available, then define UChar to be uint16_t. * - * This makes the definition of UChar platform-dependent - * but allows direct string type compatibility with platforms with - * 16-bit wchar_t types. + * The base type for UTF-16 code units and pointers. + * Unsigned 16-bit integer. + * Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar. + * + * UChar is configurable by defining the macro UCHAR_TYPE + * on the preprocessor or compiler command line: + * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc. + * (The UCHAR_TYPE can also be #defined earlier in this file, for outside the ICU library code.) + * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16. + * + * The default is UChar=char16_t. + * + * C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type. + * + * In C, char16_t is a simple typedef of uint_least16_t. + * ICU requires uint_least16_t=uint16_t for data memory mapping. + * On macOS, char16_t is not available because the uchar.h standard header is missing. * * @stable ICU 4.4 */ -#if defined(UCHAR_TYPE) + +#if 1 + // #if 1 is normal. UChar defaults to char16_t in C++. + // For configuration testing of UChar=uint16_t temporarily change this to #if 0. + // The intltest Makefile #defines UCHAR_TYPE=char16_t, + // so we only #define it to uint16_t if it is undefined so far. +#elif !defined(UCHAR_TYPE) +# define UCHAR_TYPE uint16_t +#endif + +#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \ + defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) + // Inside the ICU library code, never configurable. + typedef char16_t UChar; +#elif defined(UCHAR_TYPE) typedef UCHAR_TYPE UChar; -/* Not #elif U_HAVE_CHAR16_T -- because that is type-incompatible with pre-C++11 callers - typedef char16_t UChar; */ -#elif U_SIZEOF_WCHAR_T==2 - typedef wchar_t UChar; -#elif defined(__CHAR16_TYPE__) - typedef __CHAR16_TYPE__ UChar; +#elif defined(__cplusplus) + typedef char16_t UChar; #else typedef uint16_t UChar; #endif +/** + * \var OldUChar + * Default ICU 58 definition of UChar. + * A base type for UTF-16 code units and pointers. + * Unsigned 16-bit integer. + * + * Define OldUChar to be wchar_t if that is 16 bits wide. + * If wchar_t is not 16 bits wide, then define UChar to be uint16_t. + * + * This makes the definition of OldUChar platform-dependent + * but allows direct string type compatibility with platforms with + * 16-bit wchar_t types. + * + * This is how UChar was defined in ICU 58, for transition convenience. + * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined. + * The current UChar responds to UCHAR_TYPE but OldUChar does not. + * + * @draft ICU 59 + */ +#if U_SIZEOF_WCHAR_T==2 + typedef wchar_t OldUChar; +#elif defined(__CHAR16_TYPE__) + typedef __CHAR16_TYPE__ OldUChar; +#else + typedef uint16_t OldUChar; +#endif + /** * Define UChar32 as a type for single Unicode code points. * UChar32 is a signed 32-bit integer (same as int32_t). diff --git a/deps/icu-small/source/common/unicode/umisc.h b/deps/icu-small/source/common/unicode/umisc.h index 4cc665721a..a46fa323c8 100644 --- a/deps/icu-small/source/common/unicode/umisc.h +++ b/deps/icu-small/source/common/unicode/umisc.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ********************************************************************** * file name: umisc.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/unifilt.h b/deps/icu-small/source/common/unicode/unifilt.h index cedde81de4..99cce785b6 100644 --- a/deps/icu-small/source/common/unicode/unifilt.h +++ b/deps/icu-small/source/common/unicode/unifilt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -30,7 +30,7 @@ U_NAMESPACE_BEGIN * defined range. * @stable ICU 3.0 */ -#define U_ETHER ((UChar)0xFFFF) +#define U_ETHER ((char16_t)0xFFFF) /** * diff --git a/deps/icu-small/source/common/unicode/unifunct.h b/deps/icu-small/source/common/unicode/unifunct.h index 724893ad96..66a02ce7cd 100644 --- a/deps/icu-small/source/common/unicode/unifunct.h +++ b/deps/icu-small/source/common/unicode/unifunct.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/unicode/unimatch.h b/deps/icu-small/source/common/unicode/unimatch.h index a83199ef61..8bf3995018 100644 --- a/deps/icu-small/source/common/unicode/unimatch.h +++ b/deps/icu-small/source/common/unicode/unimatch.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* * Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved. diff --git a/deps/icu-small/source/common/unicode/uniset.h b/deps/icu-small/source/common/unicode/uniset.h index 32e973dd79..914818a00e 100644 --- a/deps/icu-small/source/common/unicode/uniset.h +++ b/deps/icu-small/source/common/unicode/uniset.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* *************************************************************************** @@ -294,7 +294,7 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { * indicating that toPattern() must generate a pattern * representation from the inversion list. */ - UChar *pat; + char16_t *pat; UVector* strings; // maintained in sorted order UnicodeSetStringSpan *stringSpan; @@ -891,7 +891,7 @@ public: * @stable ICU 3.8 * @see USetSpanCondition */ - int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const; + int32_t span(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const; /** * Returns the end of the substring of the input string according to the USetSpanCondition. @@ -924,7 +924,7 @@ public: * @stable ICU 3.8 * @see USetSpanCondition */ - int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const; + int32_t spanBack(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const; /** * Returns the start of the substring of the input string according to the USetSpanCondition. diff --git a/deps/icu-small/source/common/unicode/unistr.h b/deps/icu-small/source/common/unicode/unistr.h index 6f62244a1e..e0ab0b9eb7 100644 --- a/deps/icu-small/source/common/unicode/unistr.h +++ b/deps/icu-small/source/common/unicode/unistr.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -28,12 +28,13 @@ * \brief C++ API: Unicode String */ +#include #include "unicode/utypes.h" +#include "unicode/char16ptr.h" #include "unicode/rep.h" #include "unicode/std_string.h" #include "unicode/stringpiece.h" #include "unicode/bytestream.h" -#include "unicode/ucasemap.h" struct UConverter; // unicode/ucnv.h @@ -55,30 +56,34 @@ U_STABLE int32_t U_EXPORT2 u_strlen(const UChar *s); #endif -/** - * \def U_STRING_CASE_MAPPER_DEFINED - * @internal - */ -#ifndef U_STRING_CASE_MAPPER_DEFINED -#define U_STRING_CASE_MAPPER_DEFINED +U_NAMESPACE_BEGIN +#if !UCONFIG_NO_BREAK_ITERATION +class BreakIterator; // unicode/brkiter.h +#endif +class Edits; + +U_NAMESPACE_END + +// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper. /** * Internal string case mapping function type. + * All error checking must be done. + * src and dest must not overlap. * @internal */ typedef int32_t U_CALLCONV -UStringCaseMapper(const UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode); - +UStringCaseMapper(int32_t caseLocale, uint32_t options, +#if !UCONFIG_NO_BREAK_ITERATION + icu::BreakIterator *iter, #endif + char16_t *dest, int32_t destCapacity, + const char16_t *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode); U_NAMESPACE_BEGIN -#if !UCONFIG_NO_BREAK_ITERATION -class BreakIterator; // unicode/brkiter.h -#endif class Locale; // unicode/locid.h class StringCharacterIterator; class UnicodeStringAppendable; // unicode/appendable.h @@ -99,10 +104,12 @@ class UnicodeStringAppendable; // unicode/appendable.h /** * Unicode String literals in C++. - * Dependent on the platform properties, different UnicodeString - * constructors should be used to create a UnicodeString object from - * a string literal. - * The macros are defined for maximum performance. + * + * Note: these macros are not recommended for new code. + * Prior to the availability of C++11 and u"unicode string literals", + * these macros were provided for portability and efficiency when + * initializing UnicodeStrings from literals. + * * They work only for strings that contain "invariant characters", i.e., * only latin letters, digits, and some punctuation. * See utypes.h for details. @@ -110,18 +117,12 @@ class UnicodeStringAppendable; // unicode/appendable.h * The string parameter must be a C string literal. * The length of the string, not including the terminating * NUL, must be specified as a constant. - * The U_STRING_DECL macro should be invoked exactly once for one - * such string variable before it is used. * @stable ICU 2.0 */ -#if defined(U_DECLARE_UTF16) -# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length) -#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) -# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length) -#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY -# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length) +#if !U_CHAR16_IS_TYPEDEF +# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, u ## cs, _length) #else -# define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV) +# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const char16_t*)u ## cs, _length) #endif /** @@ -142,7 +143,7 @@ class UnicodeStringAppendable; // unicode/appendable.h /** * \def UNISTR_FROM_CHAR_EXPLICIT * This can be defined to be empty or "explicit". - * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32) + * If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32) * constructors are marked as explicit, preventing their inadvertent use. * @stable ICU 49 */ @@ -159,7 +160,7 @@ class UnicodeStringAppendable; // unicode/appendable.h /** * \def UNISTR_FROM_STRING_EXPLICIT * This can be defined to be empty or "explicit". - * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *) + * If explicit, then the UnicodeString(const char *) and UnicodeString(const char16_t *) * constructors are marked as explicit, preventing their inadvertent use. * * In particular, this helps prevent accidentally depending on ICU conversion code @@ -193,18 +194,18 @@ class UnicodeStringAppendable; // unicode/appendable.h * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models), * to hold the fields for heap-allocated strings. * Such a minimum size also ensures that the object is easily large enough - * to hold at least 2 UChars, for one supplementary code point (U16_MAX_LENGTH). + * to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH). * * sizeof(UnicodeString) >= 48 should work for all known platforms. * * For example, on a 64-bit machine where sizeof(vtable pointer) is 8, * sizeof(UnicodeString) = 64 would leave space for * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27 - * UChars stored inside the object. + * char16_ts stored inside the object. * * The minimum object size on a 64-bit machine would be * 4 * sizeof(pointer) = 4 * 8 = 32 bytes, - * and the internal buffer would hold up to 11 UChars in that case. + * and the internal buffer would hold up to 11 char16_ts in that case. * * @see U16_MAX_LENGTH * @stable ICU 56 @@ -236,7 +237,7 @@ class UnicodeStringAppendable; // unicode/appendable.h *

In ICU, a Unicode string consists of 16-bit Unicode code units. * A Unicode character may be stored with either one code unit * (the most common case) or with a matched pair of special code units - * ("surrogates"). The data type for code units is UChar. + * ("surrogates"). The data type for code units is char16_t. * For single-character handling, a Unicode character code point is a value * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.

* @@ -252,7 +253,7 @@ class UnicodeStringAppendable; // unicode/appendable.h * than other ICU APIs. In particular: * - If indexes are out of bounds for a UnicodeString object * (<0 or >length()) then they are "pinned" to the nearest boundary. - * - If primitive string pointer values (e.g., const UChar * or char *) + * - If primitive string pointer values (e.g., const char16_t * or char *) * for input strings are NULL, then those input string parameters are treated * as if they pointed to an empty string. * However, this is not the case for char * parameters for charset names @@ -439,7 +440,7 @@ public: * in srcChars. * @stable ICU 2.0 */ - inline int8_t compare(const UChar *srcChars, + inline int8_t compare(ConstChar16Ptr srcChars, int32_t srcLength) const; /** @@ -458,7 +459,7 @@ public: */ inline int8_t compare(int32_t start, int32_t length, - const UChar *srcChars) const; + const char16_t *srcChars) const; /** * Compare the characters bitwise in the range @@ -479,7 +480,7 @@ public: */ inline int8_t compare(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -593,7 +594,7 @@ public: * in code point order * @stable ICU 2.0 */ - inline int8_t compareCodePointOrder(const UChar *srcChars, + inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars, int32_t srcLength) const; /** @@ -617,7 +618,7 @@ public: */ inline int8_t compareCodePointOrder(int32_t start, int32_t length, - const UChar *srcChars) const; + const char16_t *srcChars) const; /** * Compare two Unicode strings in code point order. @@ -642,7 +643,7 @@ public: */ inline int8_t compareCodePointOrder(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -766,7 +767,7 @@ public: * @return A negative, zero, or positive integer indicating the comparison result. * @stable ICU 2.0 */ - inline int8_t caseCompare(const UChar *srcChars, + inline int8_t caseCompare(ConstChar16Ptr srcChars, int32_t srcLength, uint32_t options) const; @@ -792,7 +793,7 @@ public: */ inline int8_t caseCompare(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, uint32_t options) const; /** @@ -819,7 +820,7 @@ public: */ inline int8_t caseCompare(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength, uint32_t options) const; @@ -884,7 +885,7 @@ public: * FALSE otherwise * @stable ICU 2.0 */ - inline UBool startsWith(const UChar *srcChars, + inline UBool startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const; /** @@ -896,7 +897,7 @@ public: * @return TRUE if this ends with the characters in srcChars, FALSE otherwise * @stable ICU 2.0 */ - inline UBool startsWith(const UChar *srcChars, + inline UBool startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -931,7 +932,7 @@ public: * FALSE otherwise * @stable ICU 2.0 */ - inline UBool endsWith(const UChar *srcChars, + inline UBool endsWith(ConstChar16Ptr srcChars, int32_t srcLength) const; /** @@ -944,7 +945,7 @@ public: * FALSE otherwise * @stable ICU 2.0 */ - inline UBool endsWith(const UChar *srcChars, + inline UBool endsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -1021,7 +1022,7 @@ public: * or -1 if not found. * @stable ICU 2.0 */ - inline int32_t indexOf(const UChar *srcChars, + inline int32_t indexOf(const char16_t *srcChars, int32_t srcLength, int32_t start) const; @@ -1037,7 +1038,7 @@ public: * or -1 if not found. * @stable ICU 2.0 */ - inline int32_t indexOf(const UChar *srcChars, + inline int32_t indexOf(ConstChar16Ptr srcChars, int32_t srcLength, int32_t start, int32_t length) const; @@ -1058,7 +1059,7 @@ public: * or -1 if not found. * @stable ICU 2.0 */ - int32_t indexOf(const UChar *srcChars, + int32_t indexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, @@ -1071,7 +1072,7 @@ public: * @return The offset into this of c, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t indexOf(UChar c) const; + inline int32_t indexOf(char16_t c) const; /** * Locate in this the first occurrence of the code point c, @@ -1091,7 +1092,7 @@ public: * @return The offset into this of c, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t indexOf(UChar c, + inline int32_t indexOf(char16_t c, int32_t start) const; /** @@ -1116,7 +1117,7 @@ public: * @return The offset into this of c, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t indexOf(UChar c, + inline int32_t indexOf(char16_t c, int32_t start, int32_t length) const; @@ -1204,7 +1205,7 @@ public: * or -1 if not found. * @stable ICU 2.0 */ - inline int32_t lastIndexOf(const UChar *srcChars, + inline int32_t lastIndexOf(const char16_t *srcChars, int32_t srcLength, int32_t start) const; @@ -1220,7 +1221,7 @@ public: * or -1 if not found. * @stable ICU 2.0 */ - inline int32_t lastIndexOf(const UChar *srcChars, + inline int32_t lastIndexOf(ConstChar16Ptr srcChars, int32_t srcLength, int32_t start, int32_t length) const; @@ -1241,7 +1242,7 @@ public: * or -1 if not found. * @stable ICU 2.0 */ - int32_t lastIndexOf(const UChar *srcChars, + int32_t lastIndexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, @@ -1254,7 +1255,7 @@ public: * @return The offset into this of c, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t lastIndexOf(UChar c) const; + inline int32_t lastIndexOf(char16_t c) const; /** * Locate in this the last occurrence of the code point c, @@ -1274,7 +1275,7 @@ public: * @return The offset into this of c, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t lastIndexOf(UChar c, + inline int32_t lastIndexOf(char16_t c, int32_t start) const; /** @@ -1299,7 +1300,7 @@ public: * @return The offset into this of c, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t lastIndexOf(UChar c, + inline int32_t lastIndexOf(char16_t c, int32_t start, int32_t length) const; @@ -1329,7 +1330,7 @@ public: * or 0xffff if the offset is not valid for this string * @stable ICU 2.0 */ - inline UChar charAt(int32_t offset) const; + inline char16_t charAt(int32_t offset) const; /** * Return the code unit at offset offset. @@ -1338,7 +1339,7 @@ public: * @return the code unit at offset offset * @stable ICU 2.0 */ - inline UChar operator[] (int32_t offset) const; + inline char16_t operator[] (int32_t offset) const; /** * Return the code point that contains the code unit @@ -1459,7 +1460,7 @@ public: */ inline void extract(int32_t start, int32_t length, - UChar *dst, + Char16Ptr dst, int32_t dstStart = 0) const; /** @@ -1478,13 +1479,13 @@ public: * then extract() will not copy the contents. * * @param dest Destination string buffer. - * @param destCapacity Number of UChars available at dest. + * @param destCapacity Number of char16_ts available at dest. * @param errorCode ICU error code. * @return length() * @stable ICU 2.0 */ int32_t - extract(UChar *dest, int32_t destCapacity, + extract(Char16Ptr dest, int32_t destCapacity, UErrorCode &errorCode) const; /** @@ -1514,7 +1515,7 @@ public: */ inline void extractBetween(int32_t start, int32_t limit, - UChar *dst, + char16_t *dst, int32_t dstStart = 0) const; /** @@ -1715,8 +1716,6 @@ public: */ void toUTF8(ByteSink &sink) const; -#if U_HAVE_STD_STRING - /** * Convert the UnicodeString to UTF-8 and append the result * to a standard string. @@ -1736,8 +1735,6 @@ public: return result; } -#endif - /** * Convert the UnicodeString to UTF-32. * Unpaired surrogates are replaced with U+FFFD. @@ -1759,7 +1756,7 @@ public: /** * Return the length of the UnicodeString object. - * The length is the number of UChar code units are in the UnicodeString. + * The length is the number of char16_t code units are in the UnicodeString. * If you want the number of code points, please use countChar32(). * @return the length of the UnicodeString object * @see countChar32 @@ -1768,14 +1765,14 @@ public: inline int32_t length(void) const; /** - * Count Unicode code points in the length UChar code units of the string. - * A code point may occupy either one or two UChar code units. + * Count Unicode code points in the length char16_t code units of the string. + * A code point may occupy either one or two char16_t code units. * Counting code points involves reading all code units. * * This functions is basically the inverse of moveIndex32(). * * @param start the index of the first code unit to check - * @param length the number of UChar code units to check + * @param length the number of char16_t code units to check * @return the number of code points in the specified code units * @see length * @stable ICU 2.0 @@ -1784,7 +1781,7 @@ public: countChar32(int32_t start=0, int32_t length=INT32_MAX) const; /** - * Check if the length UChar code units of the string + * Check if the length char16_t code units of the string * contain more Unicode code points than a certain number. * This is more efficient than counting all code points in this part of the string * and comparing that number with a threshold. @@ -1792,10 +1789,10 @@ public: * falls within a certain range, and * never needs to count more than 'number+1' code points. * Logically equivalent to (countChar32(start, length)>number). - * A Unicode code point may occupy either one or two UChar code units. + * A Unicode code point may occupy either one or two char16_t code units. * * @param start the index of the first code unit to check (0 for the entire string) - * @param length the number of UChar code units to check + * @param length the number of char16_t code units to check * (use INT32_MAX for the entire string; remember that start/length * values are pinned) * @param number The number of code points in the (sub)string is compared against @@ -1821,7 +1818,7 @@ public: * This is useful together with the getBuffer functions. * See there for details. * - * @return the number of UChars available in the internal buffer + * @return the number of char16_ts available in the internal buffer * @see getBuffer * @stable ICU 2.0 */ @@ -1955,7 +1952,7 @@ public: * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& operator= (UChar ch); + inline UnicodeString& operator= (char16_t ch); /** * Assignment operator. Replace the characters in this UnicodeString @@ -2015,7 +2012,7 @@ public: * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& setTo(const UChar *srcChars, + inline UnicodeString& setTo(const char16_t *srcChars, int32_t srcLength); /** @@ -2026,7 +2023,7 @@ public: * @return a reference to this * @stable ICU 2.0 */ - UnicodeString& setTo(UChar srcChar); + UnicodeString& setTo(char16_t srcChar); /** * Set the characters in the UnicodeString object to the code point @@ -2039,7 +2036,7 @@ public: UnicodeString& setTo(UChar32 srcChar); /** - * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor. + * Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor. * The text will be used for the UnicodeString object, but * it will not be released when the UnicodeString is destroyed. * This has copy-on-write semantics: @@ -2062,11 +2059,11 @@ public: * @stable ICU 2.0 */ UnicodeString &setTo(UBool isTerminated, - const UChar *text, + ConstChar16Ptr text, int32_t textLength); /** - * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor. + * Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor. * The text will be used for the UnicodeString object, but * it will not be released when the UnicodeString is destroyed. * This has write-through semantics: @@ -2075,16 +2072,16 @@ public: * a new buffer will be allocated and the contents copied as with regularly * constructed strings. * In an assignment to another UnicodeString, the buffer will be copied. - * The extract(UChar *dst) function detects whether the dst pointer is the same + * The extract(Char16Ptr dst) function detects whether the dst pointer is the same * as the string buffer itself and will in this case not copy the contents. * * @param buffer The characters to alias for the UnicodeString. * @param buffLength The number of Unicode characters in buffer to alias. - * @param buffCapacity The size of buffer in UChars. + * @param buffCapacity The size of buffer in char16_ts. * @return a reference to this * @stable ICU 2.0 */ - UnicodeString &setTo(UChar *buffer, + UnicodeString &setTo(char16_t *buffer, int32_t buffLength, int32_t buffCapacity); @@ -2120,7 +2117,7 @@ public: * s.truncate(0); // set to an empty string (complete truncation), or * s=UnicodeString(); // assign an empty string, or * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or - * static const UChar nul=0; + * static const char16_t nul=0; * s.setTo(&nul, 0); // set to an empty C Unicode string * } * \endcode @@ -2138,7 +2135,7 @@ public: * @stable ICU 2.0 */ UnicodeString& setCharAt(int32_t offset, - UChar ch); + char16_t ch); /* Append operations */ @@ -2150,7 +2147,7 @@ public: * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& operator+= (UChar ch); + inline UnicodeString& operator+= (char16_t ch); /** * Append operator. Append the code point ch to the UnicodeString @@ -2210,7 +2207,7 @@ public: * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& append(const UChar *srcChars, + inline UnicodeString& append(const char16_t *srcChars, int32_t srcStart, int32_t srcLength); @@ -2223,7 +2220,7 @@ public: * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& append(const UChar *srcChars, + inline UnicodeString& append(ConstChar16Ptr srcChars, int32_t srcLength); /** @@ -2232,7 +2229,7 @@ public: * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& append(UChar srcChar); + inline UnicodeString& append(char16_t srcChar); /** * Append the code point srcChar to the UnicodeString object. @@ -2288,7 +2285,7 @@ public: * @stable ICU 2.0 */ inline UnicodeString& insert(int32_t start, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength); @@ -2302,7 +2299,7 @@ public: * @stable ICU 2.0 */ inline UnicodeString& insert(int32_t start, - const UChar *srcChars, + ConstChar16Ptr srcChars, int32_t srcLength); /** @@ -2314,7 +2311,7 @@ public: * @stable ICU 2.0 */ inline UnicodeString& insert(int32_t start, - UChar srcChar); + char16_t srcChar); /** * Insert the code point srcChar into the UnicodeString object at @@ -2388,7 +2385,7 @@ public: */ UnicodeString& replace(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength); @@ -2406,7 +2403,7 @@ public: */ inline UnicodeString& replace(int32_t start, int32_t length, - const UChar *srcChars, + ConstChar16Ptr srcChars, int32_t srcLength); /** @@ -2422,7 +2419,7 @@ public: */ inline UnicodeString& replace(int32_t start, int32_t length, - UChar srcChar); + char16_t srcChar); /** * Replace the characters in the range @@ -2620,7 +2617,7 @@ public: * @stable ICU 2.0 */ UBool padLeading(int32_t targetLength, - UChar padChar = 0x0020); + char16_t padChar = 0x0020); /** * Pad the end of this UnicodeString with the character padChar. @@ -2634,7 +2631,7 @@ public: * @stable ICU 2.0 */ UBool padTrailing(int32_t targetLength, - UChar padChar = 0x0020); + char16_t padChar = 0x0020); /** * Truncate this UnicodeString to the targetLength. @@ -2821,7 +2818,7 @@ public: /** * Get a read/write pointer to the internal buffer. - * The buffer is guaranteed to be large enough for at least minCapacity UChars, + * The buffer is guaranteed to be large enough for at least minCapacity char16_ts, * writable, and is still owned by the UnicodeString object. * Calls to getBuffer(minCapacity) must not be nested, and * must be matched with calls to releaseBuffer(newLength). @@ -2852,17 +2849,17 @@ public: * - You must call releaseBuffer(newLength) before and in order to * return to normal UnicodeString operation. * - * @param minCapacity the minimum number of UChars that are to be available + * @param minCapacity the minimum number of char16_ts that are to be available * in the buffer, starting at the returned pointer; * default to the current string capacity if minCapacity==-1 * @return a writable pointer to the internal string buffer, - * or 0 if an error occurs (nested calls, out of memory) + * or nullptr if an error occurs (nested calls, out of memory) * * @see releaseBuffer * @see getTerminatedBuffer() * @stable ICU 2.0 */ - UChar *getBuffer(int32_t minCapacity); + char16_t *getBuffer(int32_t minCapacity); /** * Release a read/write buffer on a UnicodeString object with an @@ -2910,13 +2907,13 @@ public: * be modified. * * @return a read-only pointer to the internal string buffer, - * or 0 if the string is empty or bogus + * or nullptr if the string is empty or bogus * * @see getBuffer(int32_t minCapacity) * @see getTerminatedBuffer() * @stable ICU 2.0 */ - inline const UChar *getBuffer() const; + inline const char16_t *getBuffer() const; /** * Get a read-only pointer to the internal buffer, @@ -2951,7 +2948,7 @@ public: * @see getBuffer() * @stable ICU 2.2 */ - const UChar *getTerminatedBuffer(); + const char16_t *getTerminatedBuffer(); //======================================== // Constructors @@ -2963,8 +2960,8 @@ public: inline UnicodeString(); /** - * Construct a UnicodeString with capacity to hold capacity UChars - * @param capacity the number of UChars this UnicodeString should hold + * Construct a UnicodeString with capacity to hold capacity char16_ts + * @param capacity the number of char16_ts this UnicodeString should hold * before a resize is necessary; if count is greater than 0 and count * code points c take up more space than capacity, then capacity is adjusted * accordingly. @@ -2976,7 +2973,7 @@ public: UnicodeString(int32_t capacity, UChar32 c, int32_t count); /** - * Single UChar (code unit) constructor. + * Single char16_t (code unit) constructor. * * It is recommended to mark this constructor "explicit" by * -DUNISTR_FROM_CHAR_EXPLICIT=explicit @@ -2984,7 +2981,7 @@ public: * @param ch the character to place in the UnicodeString * @stable ICU 2.0 */ - UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch); + UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch); /** * Single UChar32 (code point) constructor. @@ -2998,7 +2995,7 @@ public: UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch); /** - * UChar* constructor. + * char16_t* constructor. * * It is recommended to mark this constructor "explicit" by * -DUNISTR_FROM_STRING_EXPLICIT=explicit @@ -3007,20 +3004,121 @@ public: * must be NULL (U+0000) terminated. * @stable ICU 2.0 */ - UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text); + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text); + + /* + * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, + * it should always be available regardless of U_HIDE_DRAFT_API status + */ +#if !U_CHAR16_IS_TYPEDEF + /** + * uint16_t * constructor. + * Delegates to UnicodeString(const char16_t *). + * + * It is recommended to mark this constructor "explicit" by + * -DUNISTR_FROM_STRING_EXPLICIT=explicit + * on the compiler command line or similar. + * @param text NUL-terminated UTF-16 string + * @draft ICU 59 + */ + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) : + UnicodeString(ConstChar16Ptr(text)) {} +#endif + + /* + * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, + * it should always be available regardless of U_HIDE_DRAFT_API status + */ +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * wchar_t * constructor. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * Delegates to UnicodeString(const char16_t *). + * + * It is recommended to mark this constructor "explicit" by + * -DUNISTR_FROM_STRING_EXPLICIT=explicit + * on the compiler command line or similar. + * @param text NUL-terminated UTF-16 string + * @draft ICU 59 + */ + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) : + UnicodeString(ConstChar16Ptr(text)) {} +#endif + + /* + * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, + * it should always be available regardless of U_HIDE_DRAFT_API status + */ + /** + * nullptr_t constructor. + * Effectively the same as the default constructor, makes an empty string object. + * + * It is recommended to mark this constructor "explicit" by + * -DUNISTR_FROM_STRING_EXPLICIT=explicit + * on the compiler command line or similar. + * @param text nullptr + * @draft ICU 59 + */ + UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text); /** - * UChar* constructor. + * char16_t* constructor. * @param text The characters to place in the UnicodeString. * @param textLength The number of Unicode characters in text * to copy. * @stable ICU 2.0 */ - UnicodeString(const UChar *text, + UnicodeString(const char16_t *text, int32_t textLength); + /* + * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, + * it should always be available regardless of U_HIDE_DRAFT_API status + */ +#if !U_CHAR16_IS_TYPEDEF /** - * Readonly-aliasing UChar* constructor. + * uint16_t * constructor. + * Delegates to UnicodeString(const char16_t *, int32_t). + * @param text UTF-16 string + * @param length string length + * @draft ICU 59 + */ + UnicodeString(const uint16_t *text, int32_t length) : + UnicodeString(ConstChar16Ptr(text), length) {} +#endif + + /* + * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, + * it should always be available regardless of U_HIDE_DRAFT_API status + */ +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * wchar_t * constructor. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * Delegates to UnicodeString(const char16_t *, int32_t). + * @param text NUL-terminated UTF-16 string + * @param length string length + * @draft ICU 59 + */ + UnicodeString(const wchar_t *text, int32_t length) : + UnicodeString(ConstChar16Ptr(text), length) {} +#endif + + /* + * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, + * it should always be available regardless of U_HIDE_DRAFT_API status + */ + /** + * nullptr_t constructor. + * Effectively the same as the default constructor, makes an empty string object. + * @param text nullptr + * @param length ignored + * @draft ICU 59 + */ + inline UnicodeString(const std::nullptr_t text, int32_t length); + + /** + * Readonly-aliasing char16_t* constructor. * The text will be used for the UnicodeString object, but * it will not be released when the UnicodeString is destroyed. * This has copy-on-write semantics: @@ -3042,11 +3140,11 @@ public: * @stable ICU 2.0 */ UnicodeString(UBool isTerminated, - const UChar *text, + ConstChar16Ptr text, int32_t textLength); /** - * Writable-aliasing UChar* constructor. + * Writable-aliasing char16_t* constructor. * The text will be used for the UnicodeString object, but * it will not be released when the UnicodeString is destroyed. * This has write-through semantics: @@ -3055,15 +3153,64 @@ public: * a new buffer will be allocated and the contents copied as with regularly * constructed strings. * In an assignment to another UnicodeString, the buffer will be copied. - * The extract(UChar *dst) function detects whether the dst pointer is the same + * The extract(Char16Ptr dst) function detects whether the dst pointer is the same * as the string buffer itself and will in this case not copy the contents. * * @param buffer The characters to alias for the UnicodeString. * @param buffLength The number of Unicode characters in buffer to alias. - * @param buffCapacity The size of buffer in UChars. + * @param buffCapacity The size of buffer in char16_ts. * @stable ICU 2.0 */ - UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity); + UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity); + + /* + * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, + * it should always be available regardless of U_HIDE_DRAFT_API status + */ +#if !U_CHAR16_IS_TYPEDEF + /** + * Writable-aliasing uint16_t * constructor. + * Delegates to UnicodeString(const char16_t *, int32_t, int32_t). + * @param buffer writable buffer of/for UTF-16 text + * @param buffLength length of the current buffer contents + * @param buffCapacity buffer capacity + * @draft ICU 59 + */ + UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) : + UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} +#endif + + /* + * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, + * it should always be available regardless of U_HIDE_DRAFT_API status + */ +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * Writable-aliasing wchar_t * constructor. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * Delegates to UnicodeString(const char16_t *, int32_t, int32_t). + * @param buffer writable buffer of/for UTF-16 text + * @param buffLength length of the current buffer contents + * @param buffCapacity buffer capacity + * @draft ICU 59 + */ + UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) : + UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} +#endif + + /* + * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, + * it should always be available regardless of U_HIDE_DRAFT_API status + */ + /** + * Writable-aliasing nullptr_t constructor. + * Effectively the same as the default constructor, makes an empty string object. + * @param buffer nullptr + * @param buffLength ignored + * @param buffCapacity ignored + * @draft ICU 59 + */ + inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity); #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION @@ -3380,7 +3527,7 @@ protected: * UnicodeString::charAt() to be inline again (see jitterbug 709). * @stable ICU 2.4 */ - virtual UChar getCharAt(int32_t offset) const; + virtual char16_t getCharAt(int32_t offset) const; /** * The change in Replaceable to use virtual getChar32At() allows @@ -3416,7 +3563,7 @@ private: int8_t doCompare(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -3429,7 +3576,7 @@ private: int8_t doCompareCodePointOrder(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -3444,12 +3591,12 @@ private: int8_t doCaseCompare(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength, uint32_t options) const; - int32_t doIndexOf(UChar c, + int32_t doIndexOf(char16_t c, int32_t start, int32_t length) const; @@ -3457,7 +3604,7 @@ private: int32_t start, int32_t length) const; - int32_t doLastIndexOf(UChar c, + int32_t doLastIndexOf(char16_t c, int32_t start, int32_t length) const; @@ -3467,14 +3614,14 @@ private: void doExtract(int32_t start, int32_t length, - UChar *dst, + char16_t *dst, int32_t dstStart) const; inline void doExtract(int32_t start, int32_t length, UnicodeString& target) const; - inline UChar doCharAt(int32_t offset) const; + inline char16_t doCharAt(int32_t offset) const; UnicodeString& doReplace(int32_t start, int32_t length, @@ -3484,12 +3631,12 @@ private: UnicodeString& doReplace(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength); UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength); - UnicodeString& doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength); + UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength); UnicodeString& doReverse(int32_t start, int32_t length); @@ -3499,8 +3646,8 @@ private: // get pointer to start of array // these do not check for kOpenGetBuffer, unlike the public getBuffer() function - inline UChar* getArrayStart(void); - inline const UChar* getArrayStart(void) const; + inline char16_t* getArrayStart(void); + inline const char16_t* getArrayStart(void) const; inline UBool hasShortLength() const; inline int32_t getShortLength() const; @@ -3517,7 +3664,7 @@ private: inline void setShortLength(int32_t len); inline void setLength(int32_t len); inline void setToEmpty(); - inline void setArray(UChar *array, int32_t len, int32_t capacity); // sets length but not flags + inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags // allocate the array; result may be the stack buffer // sets refCount to 1 if appropriate @@ -3600,7 +3747,11 @@ private: * as in ustr_imp.h for ustrcase_map(). */ UnicodeString & - caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper); + caseMap(int32_t caseLocale, uint32_t options, +#if !UCONFIG_NO_BREAK_ITERATION + BreakIterator *iter, +#endif + UStringCaseMapper *stringCaseMapper); // ref counting void addRef(void); @@ -3691,15 +3842,15 @@ private: // Each struct of the union must begin with fLengthAndFlags. struct { int16_t fLengthAndFlags; // bit fields: see constants above - UChar fBuffer[US_STACKBUF_SIZE]; // buffer for short strings + char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings } fStackFields; struct { int16_t fLengthAndFlags; // bit fields: see constants above int32_t fLength; // number of characters in fArray if >127; else undefined - int32_t fCapacity; // capacity of fArray (in UChars) + int32_t fCapacity; // capacity of fArray (in char16_ts) // array pointer last to minimize padding for machines with P128 data model // or pointer sizes that are not a power of 2 - UChar *fArray; // the Unicode data + char16_t *fArray; // the Unicode data } fFields; } fUnion; }; @@ -3752,13 +3903,13 @@ UnicodeString::pinIndices(int32_t& start, } } -inline UChar* +inline char16_t* UnicodeString::getArrayStart() { return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; } -inline const UChar* +inline const char16_t* UnicodeString::getArrayStart() const { return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; @@ -3773,6 +3924,18 @@ UnicodeString::UnicodeString() { fUnion.fStackFields.fLengthAndFlags=kShortString; } +inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + +inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + +inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + //======================================== // Read-only implementation methods //======================================== @@ -3819,10 +3982,10 @@ UnicodeString::isBufferWritable() const (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1)); } -inline const UChar * +inline const char16_t * UnicodeString::getBuffer() const { if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) { - return 0; + return nullptr; } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) { return fUnion.fStackFields.fBuffer; } else { @@ -3890,7 +4053,7 @@ UnicodeString::compare(int32_t start, { return doCompare(start, _length, srcText, 0, srcText.length()); } inline int8_t -UnicodeString::compare(const UChar *srcChars, +UnicodeString::compare(ConstChar16Ptr srcChars, int32_t srcLength) const { return doCompare(0, length(), srcChars, 0, srcLength); } @@ -3905,13 +4068,13 @@ UnicodeString::compare(int32_t start, inline int8_t UnicodeString::compare(int32_t start, int32_t _length, - const UChar *srcChars) const + const char16_t *srcChars) const { return doCompare(start, _length, srcChars, 0, _length); } inline int8_t UnicodeString::compare(int32_t start, int32_t _length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { return doCompare(start, _length, srcChars, srcStart, srcLength); } @@ -3951,7 +4114,7 @@ UnicodeString::compareCodePointOrder(int32_t start, { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } inline int8_t -UnicodeString::compareCodePointOrder(const UChar *srcChars, +UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars, int32_t srcLength) const { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } @@ -3966,13 +4129,13 @@ UnicodeString::compareCodePointOrder(int32_t start, inline int8_t UnicodeString::compareCodePointOrder(int32_t start, int32_t _length, - const UChar *srcChars) const + const char16_t *srcChars) const { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } inline int8_t UnicodeString::compareCodePointOrder(int32_t start, int32_t _length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } @@ -4016,7 +4179,7 @@ UnicodeString::caseCompare(int32_t start, } inline int8_t -UnicodeString::caseCompare(const UChar *srcChars, +UnicodeString::caseCompare(ConstChar16Ptr srcChars, int32_t srcLength, uint32_t options) const { return doCaseCompare(0, length(), srcChars, 0, srcLength, options); @@ -4035,7 +4198,7 @@ UnicodeString::caseCompare(int32_t start, inline int8_t UnicodeString::caseCompare(int32_t start, int32_t _length, - const UChar *srcChars, + const char16_t *srcChars, uint32_t options) const { return doCaseCompare(start, _length, srcChars, 0, _length, options); } @@ -4043,7 +4206,7 @@ UnicodeString::caseCompare(int32_t start, inline int8_t UnicodeString::caseCompare(int32_t start, int32_t _length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength, uint32_t options) const { @@ -4094,7 +4257,7 @@ UnicodeString::indexOf(const UnicodeString& text, { return indexOf(text, 0, text.length(), start, _length); } inline int32_t -UnicodeString::indexOf(const UChar *srcChars, +UnicodeString::indexOf(const char16_t *srcChars, int32_t srcLength, int32_t start) const { pinIndex(start); @@ -4102,14 +4265,14 @@ UnicodeString::indexOf(const UChar *srcChars, } inline int32_t -UnicodeString::indexOf(const UChar *srcChars, +UnicodeString::indexOf(ConstChar16Ptr srcChars, int32_t srcLength, int32_t start, int32_t _length) const { return indexOf(srcChars, 0, srcLength, start, _length); } inline int32_t -UnicodeString::indexOf(UChar c, +UnicodeString::indexOf(char16_t c, int32_t start, int32_t _length) const { return doIndexOf(c, start, _length); } @@ -4121,7 +4284,7 @@ UnicodeString::indexOf(UChar32 c, { return doIndexOf(c, start, _length); } inline int32_t -UnicodeString::indexOf(UChar c) const +UnicodeString::indexOf(char16_t c) const { return doIndexOf(c, 0, length()); } inline int32_t @@ -4129,7 +4292,7 @@ UnicodeString::indexOf(UChar32 c) const { return indexOf(c, 0, length()); } inline int32_t -UnicodeString::indexOf(UChar c, +UnicodeString::indexOf(char16_t c, int32_t start) const { pinIndex(start); return doIndexOf(c, start, length() - start); @@ -4143,14 +4306,14 @@ UnicodeString::indexOf(UChar32 c, } inline int32_t -UnicodeString::lastIndexOf(const UChar *srcChars, +UnicodeString::lastIndexOf(ConstChar16Ptr srcChars, int32_t srcLength, int32_t start, int32_t _length) const { return lastIndexOf(srcChars, 0, srcLength, start, _length); } inline int32_t -UnicodeString::lastIndexOf(const UChar *srcChars, +UnicodeString::lastIndexOf(const char16_t *srcChars, int32_t srcLength, int32_t start) const { pinIndex(start); @@ -4191,7 +4354,7 @@ UnicodeString::lastIndexOf(const UnicodeString& text) const { return lastIndexOf(text, 0, text.length(), 0, length()); } inline int32_t -UnicodeString::lastIndexOf(UChar c, +UnicodeString::lastIndexOf(char16_t c, int32_t start, int32_t _length) const { return doLastIndexOf(c, start, _length); } @@ -4204,7 +4367,7 @@ UnicodeString::lastIndexOf(UChar32 c, } inline int32_t -UnicodeString::lastIndexOf(UChar c) const +UnicodeString::lastIndexOf(char16_t c) const { return doLastIndexOf(c, 0, length()); } inline int32_t @@ -4213,7 +4376,7 @@ UnicodeString::lastIndexOf(UChar32 c) const { } inline int32_t -UnicodeString::lastIndexOf(UChar c, +UnicodeString::lastIndexOf(char16_t c, int32_t start) const { pinIndex(start); return doLastIndexOf(c, start, length() - start); @@ -4237,17 +4400,17 @@ UnicodeString::startsWith(const UnicodeString& srcText, { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } inline UBool -UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const { +UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(srcChars); + srcLength = u_strlen(toUCharPtr(srcChars)); } return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; } inline UBool -UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const { +UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(srcChars); + srcLength = u_strlen(toUCharPtr(srcChars)); } return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; } @@ -4267,21 +4430,21 @@ UnicodeString::endsWith(const UnicodeString& srcText, } inline UBool -UnicodeString::endsWith(const UChar *srcChars, +UnicodeString::endsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(srcChars); + srcLength = u_strlen(toUCharPtr(srcChars)); } return doCompare(length() - srcLength, srcLength, srcChars, 0, srcLength) == 0; } inline UBool -UnicodeString::endsWith(const UChar *srcChars, +UnicodeString::endsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(srcChars + srcStart); + srcLength = u_strlen(toUCharPtr(srcChars + srcStart)); } return doCompare(length() - srcLength, srcLength, srcChars, srcStart, srcLength) == 0; @@ -4307,14 +4470,14 @@ UnicodeString::replace(int32_t start, inline UnicodeString& UnicodeString::replace(int32_t start, int32_t _length, - const UChar *srcChars, + ConstChar16Ptr srcChars, int32_t srcLength) { return doReplace(start, _length, srcChars, 0, srcLength); } inline UnicodeString& UnicodeString::replace(int32_t start, int32_t _length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) { return doReplace(start, _length, srcChars, srcStart, srcLength); } @@ -4322,7 +4485,7 @@ UnicodeString::replace(int32_t start, inline UnicodeString& UnicodeString::replace(int32_t start, int32_t _length, - UChar srcChar) + char16_t srcChar) { return doReplace(start, _length, &srcChar, 0, 1); } inline UnicodeString& @@ -4365,7 +4528,7 @@ UnicodeString::doExtract(int32_t start, inline void UnicodeString::extract(int32_t start, int32_t _length, - UChar *target, + Char16Ptr target, int32_t targetStart) const { doExtract(start, _length, target, targetStart); } @@ -4393,7 +4556,7 @@ UnicodeString::extract(int32_t start, inline void UnicodeString::extractBetween(int32_t start, int32_t limit, - UChar *dst, + char16_t *dst, int32_t dstStart) const { pinIndex(start); pinIndex(limit); @@ -4405,7 +4568,7 @@ UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { return tempSubString(start, limit - start); } -inline UChar +inline char16_t UnicodeString::doCharAt(int32_t offset) const { if((uint32_t)offset < (uint32_t)length()) { @@ -4415,11 +4578,11 @@ UnicodeString::doCharAt(int32_t offset) const } } -inline UChar +inline char16_t UnicodeString::charAt(int32_t offset) const { return doCharAt(offset); } -inline UChar +inline char16_t UnicodeString::operator[] (int32_t offset) const { return doCharAt(offset); } @@ -4460,14 +4623,14 @@ UnicodeString::setToEmpty() { } inline void -UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) { +UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) { setLength(len); fUnion.fFields.fArray = array; fUnion.fFields.fCapacity = capacity; } inline UnicodeString& -UnicodeString::operator= (UChar ch) +UnicodeString::operator= (char16_t ch) { return doReplace(0, length(), &ch, 0, 1); } inline UnicodeString& @@ -4499,7 +4662,7 @@ UnicodeString::setTo(const UnicodeString& srcText) } inline UnicodeString& -UnicodeString::setTo(const UChar *srcChars, +UnicodeString::setTo(const char16_t *srcChars, int32_t srcLength) { unBogus(); @@ -4507,7 +4670,7 @@ UnicodeString::setTo(const UChar *srcChars, } inline UnicodeString& -UnicodeString::setTo(UChar srcChar) +UnicodeString::setTo(char16_t srcChar) { unBogus(); return doReplace(0, length(), &srcChar, 0, 1); @@ -4531,22 +4694,22 @@ UnicodeString::append(const UnicodeString& srcText) { return doAppend(srcText, 0, srcText.length()); } inline UnicodeString& -UnicodeString::append(const UChar *srcChars, +UnicodeString::append(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) { return doAppend(srcChars, srcStart, srcLength); } inline UnicodeString& -UnicodeString::append(const UChar *srcChars, +UnicodeString::append(ConstChar16Ptr srcChars, int32_t srcLength) { return doAppend(srcChars, 0, srcLength); } inline UnicodeString& -UnicodeString::append(UChar srcChar) +UnicodeString::append(char16_t srcChar) { return doAppend(&srcChar, 0, 1); } inline UnicodeString& -UnicodeString::operator+= (UChar ch) +UnicodeString::operator+= (char16_t ch) { return doAppend(&ch, 0, 1); } inline UnicodeString& @@ -4572,20 +4735,20 @@ UnicodeString::insert(int32_t start, inline UnicodeString& UnicodeString::insert(int32_t start, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) { return doReplace(start, 0, srcChars, srcStart, srcLength); } inline UnicodeString& UnicodeString::insert(int32_t start, - const UChar *srcChars, + ConstChar16Ptr srcChars, int32_t srcLength) { return doReplace(start, 0, srcChars, 0, srcLength); } inline UnicodeString& UnicodeString::insert(int32_t start, - UChar srcChar) + char16_t srcChar) { return doReplace(start, 0, &srcChar, 0, 1); } inline UnicodeString& diff --git a/deps/icu-small/source/common/unicode/unorm.h b/deps/icu-small/source/common/unicode/unorm.h index f527c263f0..1b5af16700 100644 --- a/deps/icu-small/source/common/unicode/unorm.h +++ b/deps/icu-small/source/common/unicode/unorm.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/common/unicode/unorm2.h b/deps/icu-small/source/common/unicode/unorm2.h index 56e99b2a22..c6d3494d70 100644 --- a/deps/icu-small/source/common/unicode/unorm2.h +++ b/deps/icu-small/source/common/unicode/unorm2.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: unorm2.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/uobject.h b/deps/icu-small/source/common/unicode/uobject.h index 96b1b8fbea..080600e526 100644 --- a/deps/icu-small/source/common/unicode/uobject.h +++ b/deps/icu-small/source/common/unicode/uobject.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: uobject.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/urename.h b/deps/icu-small/source/common/unicode/urename.h index b220a54697..21c839abbf 100644 --- a/deps/icu-small/source/common/unicode/urename.h +++ b/deps/icu-small/source/common/unicode/urename.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -7,7 +7,7 @@ ******************************************************************************* * * file name: urename.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -100,12 +100,16 @@ #define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData) #define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data) #define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData) +#define _UTF16v2Data U_ICU_ENTRY_POINT_RENAME(_UTF16v2Data) #define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData) #define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data) #define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData) #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) +#define allowedHourFormatsCleanup U_ICU_ENTRY_POINT_RENAME(allowedHourFormatsCleanup) #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) +#define dayPeriodRulesCleanup U_ICU_ENTRY_POINT_RENAME(dayPeriodRulesCleanup) +#define deleteAllowedHourFormats U_ICU_ENTRY_POINT_RENAME(deleteAllowedHourFormats) #define gTimeZoneFilesInitOnce U_ICU_ENTRY_POINT_RENAME(gTimeZoneFilesInitOnce) #define izrule_clone U_ICU_ENTRY_POINT_RENAME(izrule_clone) #define izrule_close U_ICU_ENTRY_POINT_RENAME(izrule_close) @@ -121,16 +125,6 @@ #define izrule_getStaticClassID U_ICU_ENTRY_POINT_RENAME(izrule_getStaticClassID) #define izrule_isEquivalentTo U_ICU_ENTRY_POINT_RENAME(izrule_isEquivalentTo) #define izrule_open U_ICU_ENTRY_POINT_RENAME(izrule_open) -#define le_close U_ICU_ENTRY_POINT_RENAME(le_close) -#define le_create U_ICU_ENTRY_POINT_RENAME(le_create) -#define le_getCharIndices U_ICU_ENTRY_POINT_RENAME(le_getCharIndices) -#define le_getCharIndicesWithBase U_ICU_ENTRY_POINT_RENAME(le_getCharIndicesWithBase) -#define le_getGlyphCount U_ICU_ENTRY_POINT_RENAME(le_getGlyphCount) -#define le_getGlyphPosition U_ICU_ENTRY_POINT_RENAME(le_getGlyphPosition) -#define le_getGlyphPositions U_ICU_ENTRY_POINT_RENAME(le_getGlyphPositions) -#define le_getGlyphs U_ICU_ENTRY_POINT_RENAME(le_getGlyphs) -#define le_layoutChars U_ICU_ENTRY_POINT_RENAME(le_layoutChars) -#define le_reset U_ICU_ENTRY_POINT_RENAME(le_reset) #define locale_getKeywords U_ICU_ENTRY_POINT_RENAME(locale_getKeywords) #define locale_getKeywordsStart U_ICU_ENTRY_POINT_RENAME(locale_getKeywordsStart) #define locale_get_default U_ICU_ENTRY_POINT_RENAME(locale_get_default) @@ -486,6 +480,7 @@ #define ubrk_first U_ICU_ENTRY_POINT_RENAME(ubrk_first) #define ubrk_following U_ICU_ENTRY_POINT_RENAME(ubrk_following) #define ubrk_getAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_getAvailable) +#define ubrk_getBinaryRules U_ICU_ENTRY_POINT_RENAME(ubrk_getBinaryRules) #define ubrk_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ubrk_getLocaleByType) #define ubrk_getRuleStatus U_ICU_ENTRY_POINT_RENAME(ubrk_getRuleStatus) #define ubrk_getRuleStatusVec U_ICU_ENTRY_POINT_RENAME(ubrk_getRuleStatusVec) @@ -493,6 +488,7 @@ #define ubrk_last U_ICU_ENTRY_POINT_RENAME(ubrk_last) #define ubrk_next U_ICU_ENTRY_POINT_RENAME(ubrk_next) #define ubrk_open U_ICU_ENTRY_POINT_RENAME(ubrk_open) +#define ubrk_openBinaryRules U_ICU_ENTRY_POINT_RENAME(ubrk_openBinaryRules) #define ubrk_openRules U_ICU_ENTRY_POINT_RENAME(ubrk_openRules) #define ubrk_preceding U_ICU_ENTRY_POINT_RENAME(ubrk_preceding) #define ubrk_previous U_ICU_ENTRY_POINT_RENAME(ubrk_previous) @@ -554,7 +550,6 @@ #define ucase_addStringCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addStringCaseClosure) #define ucase_fold U_ICU_ENTRY_POINT_RENAME(ucase_fold) #define ucase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ucase_getCaseLocale) -#define ucase_getSingleton U_ICU_ENTRY_POINT_RENAME(ucase_getSingleton) #define ucase_getType U_ICU_ENTRY_POINT_RENAME(ucase_getType) #define ucase_getTypeOrIgnorable U_ICU_ENTRY_POINT_RENAME(ucase_getTypeOrIgnorable) #define ucase_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(ucase_hasBinaryProperty) @@ -1150,6 +1145,7 @@ #define unum_formatDecimal U_ICU_ENTRY_POINT_RENAME(unum_formatDecimal) #define unum_formatDouble U_ICU_ENTRY_POINT_RENAME(unum_formatDouble) #define unum_formatDoubleCurrency U_ICU_ENTRY_POINT_RENAME(unum_formatDoubleCurrency) +#define unum_formatDoubleForFields U_ICU_ENTRY_POINT_RENAME(unum_formatDoubleForFields) #define unum_formatInt64 U_ICU_ENTRY_POINT_RENAME(unum_formatInt64) #define unum_formatUFormattable U_ICU_ENTRY_POINT_RENAME(unum_formatUFormattable) #define unum_getAttribute U_ICU_ENTRY_POINT_RENAME(unum_getAttribute) @@ -1181,9 +1177,11 @@ #define unumsys_openAvailableNames U_ICU_ENTRY_POINT_RENAME(unumsys_openAvailableNames) #define unumsys_openByName U_ICU_ENTRY_POINT_RENAME(unumsys_openByName) #define uplrules_close U_ICU_ENTRY_POINT_RENAME(uplrules_close) +#define uplrules_getKeywords U_ICU_ENTRY_POINT_RENAME(uplrules_getKeywords) #define uplrules_open U_ICU_ENTRY_POINT_RENAME(uplrules_open) #define uplrules_openForType U_ICU_ENTRY_POINT_RENAME(uplrules_openForType) #define uplrules_select U_ICU_ENTRY_POINT_RENAME(uplrules_select) +#define uplrules_selectWithFormat U_ICU_ENTRY_POINT_RENAME(uplrules_selectWithFormat) #define uplug_closeLibrary U_ICU_ENTRY_POINT_RENAME(uplug_closeLibrary) #define uplug_findLibrary U_ICU_ENTRY_POINT_RENAME(uplug_findLibrary) #define uplug_getConfiguration U_ICU_ENTRY_POINT_RENAME(uplug_getConfiguration) @@ -1220,6 +1218,7 @@ #define uprv_compareInvEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_compareInvEbcdic) #define uprv_compareInvEbcdicAsAscii U_ICU_ENTRY_POINT_RENAME(uprv_compareInvEbcdicAsAscii) #define uprv_convertToLCID U_ICU_ENTRY_POINT_RENAME(uprv_convertToLCID) +#define uprv_convertToLCIDPlatform U_ICU_ENTRY_POINT_RENAME(uprv_convertToLCIDPlatform) #define uprv_convertToPosix U_ICU_ENTRY_POINT_RENAME(uprv_convertToPosix) #define uprv_copyAscii U_ICU_ENTRY_POINT_RENAME(uprv_copyAscii) #define uprv_copyEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_copyEbcdic) @@ -1654,12 +1653,13 @@ #define ustr_hashCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashCharsN) #define ustr_hashICharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashICharsN) #define ustr_hashUCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashUCharsN) +#define ustrcase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ustrcase_getCaseLocale) #define ustrcase_internalFold U_ICU_ENTRY_POINT_RENAME(ustrcase_internalFold) #define ustrcase_internalToLower U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToLower) #define ustrcase_internalToTitle U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToTitle) #define ustrcase_internalToUpper U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToUpper) #define ustrcase_map U_ICU_ENTRY_POINT_RENAME(ustrcase_map) -#define ustrcase_setTempCaseMapLocale U_ICU_ENTRY_POINT_RENAME(ustrcase_setTempCaseMapLocale) +#define ustrcase_mapWithOverlap U_ICU_ENTRY_POINT_RENAME(ustrcase_mapWithOverlap) #define utext_char32At U_ICU_ENTRY_POINT_RENAME(utext_char32At) #define utext_clone U_ICU_ENTRY_POINT_RENAME(utext_clone) #define utext_close U_ICU_ENTRY_POINT_RENAME(utext_close) @@ -1704,7 +1704,6 @@ #define utrace_functionName U_ICU_ENTRY_POINT_RENAME(utrace_functionName) #define utrace_getFunctions U_ICU_ENTRY_POINT_RENAME(utrace_getFunctions) #define utrace_getLevel U_ICU_ENTRY_POINT_RENAME(utrace_getLevel) -#define utrace_level U_ICU_ENTRY_POINT_RENAME(utrace_level) #define utrace_setFunctions U_ICU_ENTRY_POINT_RENAME(utrace_setFunctions) #define utrace_setLevel U_ICU_ENTRY_POINT_RENAME(utrace_setLevel) #define utrace_vformat U_ICU_ENTRY_POINT_RENAME(utrace_vformat) diff --git a/deps/icu-small/source/common/unicode/urep.h b/deps/icu-small/source/common/unicode/urep.h index 128f465319..c54ba7c466 100644 --- a/deps/icu-small/source/common/unicode/urep.h +++ b/deps/icu-small/source/common/unicode/urep.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/unicode/ures.h b/deps/icu-small/source/common/unicode/ures.h index 620d9268aa..918b9f208e 100644 --- a/deps/icu-small/source/common/unicode/ures.h +++ b/deps/icu-small/source/common/unicode/ures.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/unicode/uscript.h b/deps/icu-small/source/common/unicode/uscript.h index a20cd157a4..1420578f02 100644 --- a/deps/icu-small/source/common/unicode/uscript.h +++ b/deps/icu-small/source/common/unicode/uscript.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/unicode/uset.h b/deps/icu-small/source/common/unicode/uset.h index b4ed176eb9..5b7c5db9ec 100644 --- a/deps/icu-small/source/common/unicode/uset.h +++ b/deps/icu-small/source/common/unicode/uset.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: uset.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/usetiter.h b/deps/icu-small/source/common/unicode/usetiter.h index d70e897b22..057adbc04f 100644 --- a/deps/icu-small/source/common/unicode/usetiter.h +++ b/deps/icu-small/source/common/unicode/usetiter.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/unicode/ushape.h b/deps/icu-small/source/common/unicode/ushape.h index 97fe9e3880..5af8ffe1c5 100644 --- a/deps/icu-small/source/common/unicode/ushape.h +++ b/deps/icu-small/source/common/unicode/ushape.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: ushape.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/usprep.h b/deps/icu-small/source/common/unicode/usprep.h index fce161759d..33ca1461ce 100644 --- a/deps/icu-small/source/common/unicode/usprep.h +++ b/deps/icu-small/source/common/unicode/usprep.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: usprep.h - * encoding: US-ASCII + * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/ustring.h b/deps/icu-small/source/common/unicode/ustring.h index 8f4809c815..2099ab5913 100644 --- a/deps/icu-small/source/common/unicode/ustring.h +++ b/deps/icu-small/source/common/unicode/ustring.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/unicode/ustringtrie.h b/deps/icu-small/source/common/unicode/ustringtrie.h index 50d31ba226..fd85648225 100644 --- a/deps/icu-small/source/common/unicode/ustringtrie.h +++ b/deps/icu-small/source/common/unicode/ustringtrie.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: udicttrie.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/utext.h b/deps/icu-small/source/common/unicode/utext.h index 84ff22ad63..edcb267597 100644 --- a/deps/icu-small/source/common/unicode/utext.h +++ b/deps/icu-small/source/common/unicode/utext.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: utext.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/utf.h b/deps/icu-small/source/common/unicode/utf.h index 7f40190eb8..ab7e9ac96a 100644 --- a/deps/icu-small/source/common/unicode/utf.h +++ b/deps/icu-small/source/common/unicode/utf.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: utf.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/utf16.h b/deps/icu-small/source/common/unicode/utf16.h index 3455a40e56..0665381612 100644 --- a/deps/icu-small/source/common/unicode/utf16.h +++ b/deps/icu-small/source/common/unicode/utf16.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: utf16.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/utf32.h b/deps/icu-small/source/common/unicode/utf32.h index f93727c109..8822c4dd09 100644 --- a/deps/icu-small/source/common/unicode/utf32.h +++ b/deps/icu-small/source/common/unicode/utf32.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: utf32.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/utf8.h b/deps/icu-small/source/common/unicode/utf8.h index 095e955ecd..9e56b50474 100644 --- a/deps/icu-small/source/common/unicode/utf8.h +++ b/deps/icu-small/source/common/unicode/utf8.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: utf8.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -41,25 +41,7 @@ /* internal definitions ----------------------------------------------------- */ -/** - * \var utf8_countTrailBytes - * Internal array with numbers of trail bytes for any given byte used in - * lead byte position. - * - * This is internal since it is not meant to be called directly by external clients; - * however it is called by public macros in this file and thus must remain stable, - * and should not be hidden when other internal functions are hidden (otherwise - * public macros would fail to compile). - * @internal - */ -#ifdef U_UTF8_IMPL -U_EXPORT const uint8_t -#elif defined(U_STATIC_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) -U_CFUNC const uint8_t -#else -U_CFUNC U_IMPORT const uint8_t /* U_IMPORT2? */ /*U_IMPORT*/ -#endif -utf8_countTrailBytes[256]; + /** * Counts the trail bytes for a UTF-8 lead byte. diff --git a/deps/icu-small/source/common/unicode/utf_old.h b/deps/icu-small/source/common/unicode/utf_old.h index b550b28ae3..cb229cb301 100644 --- a/deps/icu-small/source/common/unicode/utf_old.h +++ b/deps/icu-small/source/common/unicode/utf_old.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: utf_old.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -267,6 +267,25 @@ typedef int32_t UTextOffset; /* Formerly utf8.h ---------------------------------------------------------- */ +/** +* \var utf8_countTrailBytes +* Internal array with numbers of trail bytes for any given byte used in +* lead byte position. +* +* This is internal since it is not meant to be called directly by external clients; +* however it is called by public macros in this file and thus must remain stable, +* and should not be hidden when other internal functions are hidden (otherwise +* public macros would fail to compile). +* @internal +*/ +#ifdef U_UTF8_IMPL +// No forward declaration if compiling utf_impl.cpp, which defines utf8_countTrailBytes. +#elif defined(U_STATIC_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) +U_CFUNC const uint8_t utf8_countTrailBytes[]; +#else +U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_IMPORT*/ +#endif + /** * Count the trail bytes for a UTF-8 lead byte. * @deprecated ICU 2.4. Renamed to U8_COUNT_TRAIL_BYTES, see utf_old.h. diff --git a/deps/icu-small/source/common/unicode/utrace.h b/deps/icu-small/source/common/unicode/utrace.h index 9add16f1cc..5d561109c7 100644 --- a/deps/icu-small/source/common/unicode/utrace.h +++ b/deps/icu-small/source/common/unicode/utrace.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: utrace.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unicode/utypes.h b/deps/icu-small/source/common/unicode/utypes.h index 8325d534ed..4c40e6a87c 100644 --- a/deps/icu-small/source/common/unicode/utypes.h +++ b/deps/icu-small/source/common/unicode/utypes.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -178,12 +178,12 @@ /** * \def NULL - * Define NULL if necessary, to 0 for C++ and to ((void *)0) for C. + * Define NULL if necessary, to nullptr for C++ and to ((void *)0) for C. * @stable ICU 2.0 */ #ifndef NULL #ifdef __cplusplus -#define NULL 0 +#define NULL nullptr #else #define NULL ((void *)0) #endif diff --git a/deps/icu-small/source/common/unicode/uvernum.h b/deps/icu-small/source/common/unicode/uvernum.h index 5590f9a5cc..cae59ad880 100644 --- a/deps/icu-small/source/common/unicode/uvernum.h +++ b/deps/icu-small/source/common/unicode/uvernum.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -7,7 +7,7 @@ ******************************************************************************* * * file name: uvernum.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -58,13 +58,13 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION_MAJOR_NUM 58 +#define U_ICU_VERSION_MAJOR_NUM 59 /** The current ICU minor version as an integer. * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#define U_ICU_VERSION_MINOR_NUM 2 +#define U_ICU_VERSION_MINOR_NUM 1 /** The current ICU patchlevel version as an integer. * This value will change in the subsequent releases of ICU @@ -84,7 +84,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#define U_ICU_VERSION_SUFFIX _58 +#define U_ICU_VERSION_SUFFIX _59 /** * \def U_DEF2_ICU_ENTRY_POINT_RENAME @@ -119,19 +119,24 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION "58.2" +#define U_ICU_VERSION "59.1" /** The current ICU library major/minor version as a string without dots, for library name suffixes. * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#define U_ICU_VERSION_SHORT "58" +#if U_PLATFORM_HAS_WINUWP_API == 0 +#define U_ICU_VERSION_SHORT "59" +#else +// U_DISABLE_RENAMING does not impact dat file name +#define U_ICU_VERSION_SHORT +#endif /* U_PLATFORM_HAS_WINUWP_API == 0 */ #ifndef U_HIDE_INTERNAL_API /** Data version in ICU4C. * @internal ICU 4.4 Internal Use Only **/ -#define U_ICU_DATA_VERSION "58.2" +#define U_ICU_DATA_VERSION "59.1" #endif /* U_HIDE_INTERNAL_API */ /*=========================================================================== diff --git a/deps/icu-small/source/common/unicode/uversion.h b/deps/icu-small/source/common/unicode/uversion.h index 63e2d17a36..cda24b6e0f 100644 --- a/deps/icu-small/source/common/unicode/uversion.h +++ b/deps/icu-small/source/common/unicode/uversion.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -7,7 +7,7 @@ ******************************************************************************* * * file name: uversion.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unifiedcache.cpp b/deps/icu-small/source/common/unifiedcache.cpp index 3e8e33a1ff..da1c88e84c 100644 --- a/deps/icu-small/source/common/unifiedcache.cpp +++ b/deps/icu-small/source/common/unifiedcache.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/unifiedcache.h b/deps/icu-small/source/common/unifiedcache.h index 67c676d453..5606e03bc9 100644 --- a/deps/icu-small/source/common/unifiedcache.h +++ b/deps/icu-small/source/common/unifiedcache.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/unifilt.cpp b/deps/icu-small/source/common/unifilt.cpp index 2d8ce355c7..4ab0d9b5f9 100644 --- a/deps/icu-small/source/common/unifilt.cpp +++ b/deps/icu-small/source/common/unifilt.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/unifunct.cpp b/deps/icu-small/source/common/unifunct.cpp index 8fdc638407..f3995b298d 100644 --- a/deps/icu-small/source/common/unifunct.cpp +++ b/deps/icu-small/source/common/unifunct.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/uniset.cpp b/deps/icu-small/source/common/uniset.cpp index ef1b6113d9..d828660796 100644 --- a/deps/icu-small/source/common/uniset.cpp +++ b/deps/icu-small/source/common/uniset.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/uniset_closure.cpp b/deps/icu-small/source/common/uniset_closure.cpp index cacadf4775..b5cc213941 100644 --- a/deps/icu-small/source/common/uniset_closure.cpp +++ b/deps/icu-small/source/common/uniset_closure.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: uniset_closure.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -184,7 +184,6 @@ UnicodeSet& UnicodeSet::closeOver(int32_t attribute) { return *this; } if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) { - const UCaseProps *csp = ucase_getSingleton(); { UnicodeSet foldSet(*this); UnicodeString str; @@ -207,7 +206,6 @@ UnicodeSet& UnicodeSet::closeOver(int32_t attribute) { int32_t n = getRangeCount(); UChar32 result; const UChar *full; - int32_t locCache = 0; for (int32_t i=0; isize(); ++j) { str = *(const UnicodeString *) strings->elementAt(j); str.foldCase(); - if(!ucase_addStringCaseClosure(csp, str.getBuffer(), str.length(), &sa)) { + if(!ucase_addStringCaseClosure(str.getBuffer(), str.length(), &sa)) { foldSet.add(str); // does not map to code points: add the folded string itself } } diff --git a/deps/icu-small/source/common/uniset_props.cpp b/deps/icu-small/source/common/uniset_props.cpp index 8348f8a33a..ea69d4161a 100644 --- a/deps/icu-small/source/common/uniset_props.cpp +++ b/deps/icu-small/source/common/uniset_props.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: uniset_props.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -195,7 +195,7 @@ void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status) { if(U_SUCCESS(status)) { impl->addPropertyStarts(&sa, status); } - ucase_addPropertyStarts(ucase_getSingleton(), &sa, &status); + ucase_addPropertyStarts(&sa, &status); break; } case UPROPS_SRC_NFC: { @@ -228,7 +228,7 @@ void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status) { } #endif case UPROPS_SRC_CASE: - ucase_addPropertyStarts(ucase_getSingleton(), &sa, &status); + ucase_addPropertyStarts(&sa, &status); break; case UPROPS_SRC_BIDI: ubidi_addPropertyStarts(ubidi_getSingleton(), &sa, &status); diff --git a/deps/icu-small/source/common/unisetspan.cpp b/deps/icu-small/source/common/unisetspan.cpp index 1179495d4f..09fb5b474c 100644 --- a/deps/icu-small/source/common/unisetspan.cpp +++ b/deps/icu-small/source/common/unisetspan.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: unisetspan.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unisetspan.h b/deps/icu-small/source/common/unisetspan.h index 36cdd98d04..f1e78ff3ee 100644 --- a/deps/icu-small/source/common/unisetspan.h +++ b/deps/icu-small/source/common/unisetspan.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: unisetspan.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/unistr.cpp b/deps/icu-small/source/common/unistr.cpp index f825de91bb..2db2856f0b 100644 --- a/deps/icu-small/source/common/unistr.cpp +++ b/deps/icu-small/source/common/unistr.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -218,9 +218,10 @@ UnicodeString::UnicodeString(const UChar *text, } UnicodeString::UnicodeString(UBool isTerminated, - const UChar *text, + ConstChar16Ptr textPtr, int32_t textLength) { fUnion.fFields.fLengthAndFlags = kReadonlyAlias; + const UChar *text = textPtr; if(text == NULL) { // treat as an empty string, do not alias setToEmpty(); @@ -234,7 +235,8 @@ UnicodeString::UnicodeString(UBool isTerminated, // text is terminated, or else it would have failed the above test textLength = u_strlen(text); } - setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); + setArray(const_cast(text), textLength, + isTerminated ? textLength + 1 : textLength); } } @@ -873,7 +875,7 @@ UnicodeString::doExtract(int32_t start, } int32_t -UnicodeString::extract(UChar *dest, int32_t destCapacity, +UnicodeString::extract(Char16Ptr dest, int32_t destCapacity, UErrorCode &errorCode) const { int32_t len = length(); if(U_SUCCESS(errorCode)) { @@ -1215,10 +1217,10 @@ UnicodeString::unBogus() { } } -const UChar * +const char16_t * UnicodeString::getTerminatedBuffer() { if(!isWritable()) { - return 0; + return nullptr; } UChar *array = getArrayStart(); int32_t len = length(); @@ -1249,14 +1251,14 @@ UnicodeString::getTerminatedBuffer() { array[len] = 0; return array; } else { - return NULL; + return nullptr; } } // setTo() analogous to the readonly-aliasing constructor with the same signature UnicodeString & UnicodeString::setTo(UBool isTerminated, - const UChar *text, + ConstChar16Ptr textPtr, int32_t textLength) { if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) { @@ -1264,6 +1266,7 @@ UnicodeString::setTo(UBool isTerminated, return *this; } + const UChar *text = textPtr; if(text == NULL) { // treat as an empty string, do not alias releaseArray(); @@ -1713,14 +1716,14 @@ UnicodeString::doHashCode() const // External Buffer //======================================== -UChar * +char16_t * UnicodeString::getBuffer(int32_t minCapacity) { if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) { fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer; setZeroLength(); return getArrayStart(); } else { - return 0; + return nullptr; } } diff --git a/deps/icu-small/source/common/unistr_case.cpp b/deps/icu-small/source/common/unistr_case.cpp index 1715b6ec66..1c62ce5e97 100644 --- a/deps/icu-small/source/common/unistr_case.cpp +++ b/deps/icu-small/source/common/unistr_case.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: unistr_case.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:2 * @@ -19,14 +19,17 @@ */ #include "unicode/utypes.h" +#include "unicode/casemap.h" +#include "unicode/edits.h" #include "unicode/putil.h" #include "cstring.h" #include "cmemory.h" #include "unicode/ustring.h" #include "unicode/unistr.h" #include "unicode/uchar.h" +#include "uassert.h" +#include "ucasemap_imp.h" #include "uelement.h" -#include "ustr_imp.h" U_NAMESPACE_BEGIN @@ -87,56 +90,104 @@ UnicodeString::doCaseCompare(int32_t start, //======================================== UnicodeString & -UnicodeString::caseMap(const UCaseMap *csm, +UnicodeString::caseMap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM UStringCaseMapper *stringCaseMapper) { if(isEmpty() || !isWritable()) { // nothing to do return *this; } - // We need to allocate a new buffer for the internal string case mapping function. - // This is very similar to how doReplace() keeps the old array pointer - // and deletes the old array itself after it is done. - // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array. - UChar oldStackBuffer[US_STACKBUF_SIZE]; + UChar oldBuffer[2 * US_STACKBUF_SIZE]; UChar *oldArray; - int32_t oldLength; - - if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) { - // copy the stack buffer contents because it will be overwritten - oldArray = oldStackBuffer; - oldLength = getShortLength(); - u_memcpy(oldStackBuffer, fUnion.fStackFields.fBuffer, oldLength); + int32_t oldLength = length(); + int32_t newLength; + UBool writable = isBufferWritable(); + UErrorCode errorCode = U_ZERO_ERROR; + + // Try to avoid heap-allocating a new character array for this string. + if (writable ? oldLength <= UPRV_LENGTHOF(oldBuffer) : oldLength < US_STACKBUF_SIZE) { + // Short string: Copy the contents into a temporary buffer and + // case-map back into the current array, or into the stack buffer. + UChar *buffer = getArrayStart(); + int32_t capacity; + oldArray = oldBuffer; + u_memcpy(oldBuffer, buffer, oldLength); + if (writable) { + capacity = getCapacity(); + } else { + // Switch from the read-only alias or shared heap buffer to the stack buffer. + if (!cloneArrayIfNeeded(US_STACKBUF_SIZE, US_STACKBUF_SIZE, /* doCopyArray= */ FALSE)) { + return *this; + } + U_ASSERT(fUnion.fFields.fLengthAndFlags & kUsingStackBuffer); + buffer = fUnion.fStackFields.fBuffer; + capacity = US_STACKBUF_SIZE; + } + newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR + buffer, capacity, + oldArray, oldLength, NULL, errorCode); + if (U_SUCCESS(errorCode)) { + setLength(newLength); + return *this; + } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) { + // common overflow handling below + } else { + setToBogus(); + return *this; + } } else { + // Longer string or read-only buffer: + // Collect only changes and then apply them to this string. + // Case mapping often changes only small parts of a string, + // and often does not change its length. oldArray = getArrayStart(); - oldLength = length(); + Edits edits; + UChar replacementChars[200]; + stringCaseMapper(caseLocale, options | UCASEMAP_OMIT_UNCHANGED_TEXT, UCASEMAP_BREAK_ITERATOR + replacementChars, UPRV_LENGTHOF(replacementChars), + oldArray, oldLength, &edits, errorCode); + if (U_SUCCESS(errorCode)) { + // Grow the buffer at most once, not for multiple doReplace() calls. + newLength = oldLength + edits.lengthDelta(); + if (newLength > oldLength && !cloneArrayIfNeeded(newLength, newLength)) { + return *this; + } + for (Edits::Iterator ei = edits.getCoarseChangesIterator(); ei.next(errorCode);) { + doReplace(ei.destinationIndex(), ei.oldLength(), + replacementChars, ei.replacementIndex(), ei.newLength()); + } + if (U_FAILURE(errorCode)) { + setToBogus(); + } + return *this; + } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) { + // common overflow handling below + newLength = oldLength + edits.lengthDelta(); + } else { + setToBogus(); + return *this; + } } - int32_t capacity; - if(oldLength <= US_STACKBUF_SIZE) { - capacity = US_STACKBUF_SIZE; - } else { - capacity = oldLength + 20; - } + // Handle buffer overflow, newLength is known. + // We need to allocate a new buffer for the internal string case mapping function. + // This is very similar to how doReplace() keeps the old array pointer + // and deletes the old array itself after it is done. + // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array. int32_t *bufferToDelete = 0; - if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) { + if (!cloneArrayIfNeeded(newLength, newLength, FALSE, &bufferToDelete, TRUE)) { return *this; } - - // Case-map, and if the result is too long, then reallocate and repeat. - UErrorCode errorCode; - int32_t newLength; - do { - errorCode = U_ZERO_ERROR; - newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(), - oldArray, oldLength, &errorCode); - setLength(newLength); - } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE)); - + errorCode = U_ZERO_ERROR; + newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR + getArrayStart(), getCapacity(), + oldArray, oldLength, NULL, errorCode); if (bufferToDelete) { uprv_free(bufferToDelete); } - if(U_FAILURE(errorCode)) { + if (U_SUCCESS(errorCode)) { + setLength(newLength); + } else { setToBogus(); } return *this; @@ -144,10 +195,7 @@ UnicodeString::caseMap(const UCaseMap *csm, UnicodeString & UnicodeString::foldCase(uint32_t options) { - UCaseMap csm=UCASEMAP_INITIALIZER; - csm.csp=ucase_getSingleton(); - csm.options=options; - return caseMap(&csm, ustrcase_internalFold); + return caseMap(UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold); } U_NAMESPACE_END diff --git a/deps/icu-small/source/common/unistr_case_locale.cpp b/deps/icu-small/source/common/unistr_case_locale.cpp index a01be5c30b..f0f3048d06 100644 --- a/deps/icu-small/source/common/unistr_case_locale.cpp +++ b/deps/icu-small/source/common/unistr_case_locale.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: unistr_case_locale.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -19,9 +19,9 @@ #include "unicode/utypes.h" #include "unicode/locid.h" +#include "unicode/ucasemap.h" #include "unicode/unistr.h" -#include "cmemory.h" -#include "ustr_imp.h" +#include "ucasemap_imp.h" U_NAMESPACE_BEGIN @@ -29,44 +29,28 @@ U_NAMESPACE_BEGIN // Write implementation //======================================== -/* - * Set parameters on an empty UCaseMap, for UCaseMap-less API functions. - * Do this fast because it is called with every function call. - */ -static inline void -setTempCaseMap(UCaseMap *csm, const char *locale) { - if(csm->csp==NULL) { - csm->csp=ucase_getSingleton(); - } - if(locale!=NULL && locale[0]==0) { - csm->locale[0]=0; - } else { - ustrcase_setTempCaseMapLocale(csm, locale); - } -} - UnicodeString & UnicodeString::toLower() { - return toLower(Locale::getDefault()); + return caseMap(ustrcase_getCaseLocale(NULL), 0, + UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower); } UnicodeString & UnicodeString::toLower(const Locale &locale) { - UCaseMap csm=UCASEMAP_INITIALIZER; - setTempCaseMap(&csm, locale.getName()); - return caseMap(&csm, ustrcase_internalToLower); + return caseMap(ustrcase_getCaseLocale(locale.getBaseName()), 0, + UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower); } UnicodeString & UnicodeString::toUpper() { - return toUpper(Locale::getDefault()); + return caseMap(ustrcase_getCaseLocale(NULL), 0, + UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper); } UnicodeString & UnicodeString::toUpper(const Locale &locale) { - UCaseMap csm=UCASEMAP_INITIALIZER; - setTempCaseMap(&csm, locale.getName()); - return caseMap(&csm, ustrcase_internalToUpper); + return caseMap(ustrcase_getCaseLocale(locale.getBaseName()), 0, + UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper); } U_NAMESPACE_END diff --git a/deps/icu-small/source/common/unistr_cnv.cpp b/deps/icu-small/source/common/unistr_cnv.cpp index a9b44ee424..64d3c16801 100644 --- a/deps/icu-small/source/common/unistr_cnv.cpp +++ b/deps/icu-small/source/common/unistr_cnv.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: unistr_cnv.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:2 * diff --git a/deps/icu-small/source/common/unistr_props.cpp b/deps/icu-small/source/common/unistr_props.cpp index 533a683928..691bd085d6 100644 --- a/deps/icu-small/source/common/unistr_props.cpp +++ b/deps/icu-small/source/common/unistr_props.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: unistr_props.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:2 * diff --git a/deps/icu-small/source/common/unistr_titlecase_brkiter.cpp b/deps/icu-small/source/common/unistr_titlecase_brkiter.cpp index 3d6737cfc5..3156fdfc57 100644 --- a/deps/icu-small/source/common/unistr_titlecase_brkiter.cpp +++ b/deps/icu-small/source/common/unistr_titlecase_brkiter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: unistr_titlecase_brkiter.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:2 * @@ -22,36 +22,10 @@ #if !UCONFIG_NO_BREAK_ITERATION #include "unicode/brkiter.h" -#include "unicode/ubrk.h" +#include "unicode/locid.h" +#include "unicode/ucasemap.h" #include "unicode/unistr.h" -#include "unicode/ustring.h" -#include "cmemory.h" -#include "ustr_imp.h" - -static int32_t U_CALLCONV -unistr_case_internalToTitle(const UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode) { - ubrk_setText(csm->iter, src, srcLength, pErrorCode); - return ustrcase_internalToTitle(csm, dest, destCapacity, src, srcLength, pErrorCode); -} - -/* - * Set parameters on an empty UCaseMap, for UCaseMap-less API functions. - * Do this fast because it is called with every function call. - */ -static inline void -setTempCaseMap(UCaseMap *csm, const char *locale) { - if(csm->csp==NULL) { - csm->csp=ucase_getSingleton(); - } - if(locale!=NULL && locale[0]==0) { - csm->locale[0]=0; - } else { - ustrcase_setTempCaseMapLocale(csm, locale); - } -} +#include "ucasemap_imp.h" U_NAMESPACE_BEGIN @@ -67,9 +41,6 @@ UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) { UnicodeString & UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) { - UCaseMap csm=UCASEMAP_INITIALIZER; - csm.options=options; - setTempCaseMap(&csm, locale.getName()); BreakIterator *bi=titleIter; if(bi==NULL) { UErrorCode errorCode=U_ZERO_ERROR; @@ -79,8 +50,12 @@ UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t return *this; } } - csm.iter=reinterpret_cast(bi); - caseMap(&csm, unistr_case_internalToTitle); + // Because the "this" string is both the source and the destination, + // make a copy of the original source for use by the break iterator. + // See tickets #13127 and #13128 + UnicodeString copyOfInput(*this); + bi->setText(copyOfInput); + caseMap(ustrcase_getCaseLocale(locale.getBaseName()), options, bi, ustrcase_internalToTitle); if(titleIter==NULL) { delete bi; } diff --git a/deps/icu-small/source/common/unistrappender.h b/deps/icu-small/source/common/unistrappender.h index 600fd90335..134f31497f 100644 --- a/deps/icu-small/source/common/unistrappender.h +++ b/deps/icu-small/source/common/unistrappender.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/unorm.cpp b/deps/icu-small/source/common/unorm.cpp index 75aaea9cdf..93f77e66af 100644 --- a/deps/icu-small/source/common/unorm.cpp +++ b/deps/icu-small/source/common/unorm.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/unormcmp.cpp b/deps/icu-small/source/common/unormcmp.cpp index b40a10a138..689b0b53b2 100644 --- a/deps/icu-small/source/common/unormcmp.cpp +++ b/deps/icu-small/source/common/unormcmp.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: unormcmp.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -145,7 +145,6 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1, uint32_t options, UErrorCode *pErrorCode) { const Normalizer2Impl *nfcImpl; - const UCaseProps *csp; /* current-level start/limit - s1/s2 as current */ const UChar *start1, *start2, *limit1, *limit2; @@ -183,11 +182,6 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1, } else { nfcImpl=NULL; } - if((options&U_COMPARE_IGNORE_CASE)!=0) { - csp=ucase_getSingleton(); - } else { - csp=NULL; - } if(U_FAILURE(*pErrorCode)) { return 0; } @@ -319,7 +313,7 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1, */ if( level1==0 && (options&U_COMPARE_IGNORE_CASE) && - (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0 + (length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0 ) { /* cp1 case-folds to the code point "length" or to p[length] */ if(U_IS_SURROGATE(c1)) { @@ -364,7 +358,7 @@ unorm_cmpEquivFold(const UChar *s1, int32_t length1, } if( level2==0 && (options&U_COMPARE_IGNORE_CASE) && - (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0 + (length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0 ) { /* cp2 case-folds to the code point "length" or to p[length] */ if(U_IS_SURROGATE(c2)) { diff --git a/deps/icu-small/source/common/unormimp.h b/deps/icu-small/source/common/unormimp.h index c382f38b8b..7f280551f7 100644 --- a/deps/icu-small/source/common/unormimp.h +++ b/deps/icu-small/source/common/unormimp.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: unormimp.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/uobject.cpp b/deps/icu-small/source/common/uobject.cpp index 37406e4a0d..1133dd9b67 100644 --- a/deps/icu-small/source/common/uobject.cpp +++ b/deps/icu-small/source/common/uobject.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: uobject.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/uposixdefs.h b/deps/icu-small/source/common/uposixdefs.h index 495deea49e..45ca1233ac 100644 --- a/deps/icu-small/source/common/uposixdefs.h +++ b/deps/icu-small/source/common/uposixdefs.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: uposixdefs.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -54,22 +54,18 @@ * * z/OS needs this definition for timeval and to get usleep. */ -#if !defined(_XOPEN_SOURCE_EXTENDED) +#if !defined(_XOPEN_SOURCE_EXTENDED) && defined(__TOS_MVS__) # define _XOPEN_SOURCE_EXTENDED 1 #endif -/* - * There is an issue with turning on _XOPEN_SOURCE_EXTENDED on certain platforms. - * A compatibility issue exists between turning on _XOPEN_SOURCE_EXTENDED and using - * standard C++ string class. As a result, standard C++ string class needs to be - * turned off for the follwing platforms: - * -AIX/VACPP - * -Solaris/GCC +/** + * Solaris says: + * "...it is invalid to compile an XPG6 or a POSIX.1-2001 application with anything other + * than a c99 or later compiler." + * Apparently C++11 is not "or later". Work around this. */ -#if (U_PLATFORM == U_PF_AIX && !defined(__GNUC__)) || (U_PLATFORM == U_PF_SOLARIS && defined(__GNUC__)) -# if _XOPEN_SOURCE_EXTENDED && !defined(U_HAVE_STD_STRING) -# define U_HAVE_STD_STRING 0 -# endif +#if defined(__cplusplus) && (defined(sun) || defined(__sun)) && !defined (_STDC_C99) +# define _STDC_C99 #endif #endif /* __UPOSIXDEFS_H__ */ diff --git a/deps/icu-small/source/common/uprops.cpp b/deps/icu-small/source/common/uprops.cpp index 46ceb66d8c..fc91c8903d 100644 --- a/deps/icu-small/source/common/uprops.cpp +++ b/deps/icu-small/source/common/uprops.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: uprops.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -128,9 +128,8 @@ static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UP } if(c>=0) { /* single code point */ - const UCaseProps *csp=ucase_getSingleton(); const UChar *resultString; - return (UBool)(ucase_toFullFolding(csp, c, &resultString, U_FOLD_CASE_DEFAULT)>=0); + return (UBool)(ucase_toFullFolding(c, &resultString, U_FOLD_CASE_DEFAULT)>=0); } else { /* guess some large but stack-friendly capacity */ UChar dest[2*UCASE_MAX_STRING_LENGTH]; @@ -576,14 +575,13 @@ u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *p // case folding and NFKC.) // For the derivation, see Unicode's DerivedNormalizationProps.txt. const Normalizer2 *nfkc=Normalizer2::getNFKCInstance(*pErrorCode); - const UCaseProps *csp=ucase_getSingleton(); if(U_FAILURE(*pErrorCode)) { return 0; } // first: b = NFKC(Fold(a)) UnicodeString folded1String; const UChar *folded1; - int32_t folded1Length=ucase_toFullFolding(csp, c, &folded1, U_FOLD_CASE_DEFAULT); + int32_t folded1Length=ucase_toFullFolding(c, &folded1, U_FOLD_CASE_DEFAULT); if(folded1Length<0) { const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc); if(nfkcImpl->getCompQuickCheck(nfkcImpl->getNorm16(c))!=UNORM_NO) { diff --git a/deps/icu-small/source/common/uprops.h b/deps/icu-small/source/common/uprops.h index 63c588088a..f5d69fe79c 100644 --- a/deps/icu-small/source/common/uprops.h +++ b/deps/icu-small/source/common/uprops.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: uprops.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/ures_cnv.c b/deps/icu-small/source/common/ures_cnv.c deleted file mode 100644 index a810fc0856..0000000000 --- a/deps/icu-small/source/common/ures_cnv.c +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1997-2006, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: ures_cnv.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2004aug25 -* created by: Markus W. Scherer -* -* Character conversion functions moved here from uresbund.c -*/ - -#include "unicode/utypes.h" -#include "unicode/putil.h" -#include "unicode/ustring.h" -#include "unicode/ucnv.h" -#include "unicode/ures.h" -#include "uinvchar.h" -#include "ustr_cnv.h" - -U_CAPI UResourceBundle * U_EXPORT2 -ures_openU(const UChar *myPath, - const char *localeID, - UErrorCode *status) -{ - char pathBuffer[1024]; - int32_t length; - char *path = pathBuffer; - - if(status==NULL || U_FAILURE(*status)) { - return NULL; - } - if(myPath==NULL) { - path = NULL; - } - else { - length=u_strlen(myPath); - if(length>=sizeof(pathBuffer)) { - *status=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } else if(uprv_isInvariantUString(myPath, length)) { - /* - * the invariant converter is sufficient for package and tree names - * and is more efficient - */ - u_UCharsToChars(myPath, path, length+1); /* length+1 to include the NUL */ - } else { -#if !UCONFIG_NO_CONVERSION - /* use the default converter to support variant-character paths */ - UConverter *cnv=u_getDefaultConverter(status); - length=ucnv_fromUChars(cnv, path, (int32_t)sizeof(pathBuffer), myPath, length, status); - u_releaseDefaultConverter(cnv); - if(U_FAILURE(*status)) { - return NULL; - } - if(length>=sizeof(pathBuffer)) { - /* not NUL-terminated - path too long */ - *status=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } -#else - /* the default converter is not available */ - *status=U_UNSUPPORTED_ERROR; - return NULL; -#endif - } - } - - return ures_open(path, localeID, status); -} diff --git a/deps/icu-small/source/common/ures_cnv.cpp b/deps/icu-small/source/common/ures_cnv.cpp new file mode 100644 index 0000000000..43515fda28 --- /dev/null +++ b/deps/icu-small/source/common/ures_cnv.cpp @@ -0,0 +1,78 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1997-2006, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ures_cnv.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2004aug25 +* created by: Markus W. Scherer +* +* Character conversion functions moved here from uresbund.c +*/ + +#include "unicode/utypes.h" +#include "unicode/putil.h" +#include "unicode/ustring.h" +#include "unicode/ucnv.h" +#include "unicode/ures.h" +#include "uinvchar.h" +#include "ustr_cnv.h" + +U_CAPI UResourceBundle * U_EXPORT2 +ures_openU(const UChar *myPath, + const char *localeID, + UErrorCode *status) +{ + char pathBuffer[1024]; + int32_t length; + char *path = pathBuffer; + + if(status==NULL || U_FAILURE(*status)) { + return NULL; + } + if(myPath==NULL) { + path = NULL; + } + else { + length=u_strlen(myPath); + if(length>=(int32_t)sizeof(pathBuffer)) { + *status=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } else if(uprv_isInvariantUString(myPath, length)) { + /* + * the invariant converter is sufficient for package and tree names + * and is more efficient + */ + u_UCharsToChars(myPath, path, length+1); /* length+1 to include the NUL */ + } else { +#if !UCONFIG_NO_CONVERSION + /* use the default converter to support variant-character paths */ + UConverter *cnv=u_getDefaultConverter(status); + length=ucnv_fromUChars(cnv, path, (int32_t)sizeof(pathBuffer), myPath, length, status); + u_releaseDefaultConverter(cnv); + if(U_FAILURE(*status)) { + return NULL; + } + if(length>=(int32_t)sizeof(pathBuffer)) { + /* not NUL-terminated - path too long */ + *status=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } +#else + /* the default converter is not available */ + *status=U_UNSUPPORTED_ERROR; + return NULL; +#endif + } + } + + return ures_open(path, localeID, status); +} diff --git a/deps/icu-small/source/common/uresbund.cpp b/deps/icu-small/source/common/uresbund.cpp index 6813645c98..0dcbcaaf90 100644 --- a/deps/icu-small/source/common/uresbund.cpp +++ b/deps/icu-small/source/common/uresbund.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/uresdata.cpp b/deps/icu-small/source/common/uresdata.cpp index f775bcdffb..a0b8d3ba90 100644 --- a/deps/icu-small/source/common/uresdata.cpp +++ b/deps/icu-small/source/common/uresdata.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * and others. All Rights Reserved. ******************************************************************************* * file name: uresdata.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -758,7 +758,9 @@ res_getTableItemByIndex(const ResourceData *pResData, Resource table, int32_t indexR, const char **key) { uint32_t offset=RES_GET_OFFSET(table); int32_t length; - U_ASSERT(indexR>=0); /* to ensure the index is not negative */ + if (indexR < 0) { + return RES_BOGUS; + } switch(RES_GET_TYPE(table)) { case URES_TABLE: { if (offset != 0) { /* empty if offset==0 */ @@ -836,7 +838,9 @@ UBool icu::ResourceTable::getKeyAndValue(int32_t i, U_CAPI Resource U_EXPORT2 res_getArrayItem(const ResourceData *pResData, Resource array, int32_t indexR) { uint32_t offset=RES_GET_OFFSET(array); - U_ASSERT(indexR>=0); /* to ensure the index is not negative */ + if (indexR < 0) { + return RES_BOGUS; + } switch(RES_GET_TYPE(array)) { case URES_ARRAY: { if (offset!=0) { /* empty if offset==0 */ @@ -923,14 +927,14 @@ res_findResource(const ResourceData *pResData, Resource r, char** path, const ch if(t2 == RES_BOGUS) { /* if we fail to get the resource by key, maybe we got an index */ indexR = uprv_strtol(pathP, &closeIndex, 10); - if(*closeIndex == 0) { + if(indexR >= 0 && *closeIndex == 0) { /* if we indeed have an index, try to get the item by index */ t2 = res_getTableItemByIndex(pResData, t1, indexR, key); - } + } // else t2 is already RES_BOGUS } } else if(URES_IS_ARRAY(type)) { indexR = uprv_strtol(pathP, &closeIndex, 10); - if(*closeIndex == 0) { + if(indexR >= 0 && *closeIndex == 0) { t2 = res_getArrayItem(pResData, t1, indexR); } else { t2 = RES_BOGUS; /* have an array, but don't have a valid index */ diff --git a/deps/icu-small/source/common/uresdata.h b/deps/icu-small/source/common/uresdata.h index ae7d9a817d..8d845e3dfc 100644 --- a/deps/icu-small/source/common/uresdata.h +++ b/deps/icu-small/source/common/uresdata.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ****************************************************************************** * file name: uresdata.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/uresimp.h b/deps/icu-small/source/common/uresimp.h index 8339240a27..e4f75c9f11 100644 --- a/deps/icu-small/source/common/uresimp.h +++ b/deps/icu-small/source/common/uresimp.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/ureslocs.h b/deps/icu-small/source/common/ureslocs.h index 85dd8fb4b7..f7c3344ef2 100644 --- a/deps/icu-small/source/common/ureslocs.h +++ b/deps/icu-small/source/common/ureslocs.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/usc_impl.c b/deps/icu-small/source/common/usc_impl.c deleted file mode 100644 index c2b2a3656c..0000000000 --- a/deps/icu-small/source/common/usc_impl.c +++ /dev/null @@ -1,361 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1999-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* File USC_IMPL.C -* -* Modification History: -* -* Date Name Description -* 07/08/2002 Eric Mader Creation. -****************************************************************************** -*/ - -#include "unicode/uscript.h" -#include "usc_impl.h" -#include "cmemory.h" - -#define PAREN_STACK_DEPTH 32 - -#define MOD(sp) ((sp) % PAREN_STACK_DEPTH) -#define LIMIT_INC(sp) (((sp) < PAREN_STACK_DEPTH)? (sp) + 1 : PAREN_STACK_DEPTH) -#define INC(sp,count) (MOD((sp) + (count))) -#define INC1(sp) (INC(sp, 1)) -#define DEC(sp,count) (MOD((sp) + PAREN_STACK_DEPTH - (count))) -#define DEC1(sp) (DEC(sp, 1)) -#define STACK_IS_EMPTY(scriptRun) ((scriptRun)->pushCount <= 0) -#define STACK_IS_NOT_EMPTY(scriptRun) (! STACK_IS_EMPTY(scriptRun)) -#define TOP(scriptRun) ((scriptRun)->parenStack[(scriptRun)->parenSP]) -#define SYNC_FIXUP(scriptRun) ((scriptRun)->fixupCount = 0) - -struct ParenStackEntry -{ - int32_t pairIndex; - UScriptCode scriptCode; -}; - -struct UScriptRun -{ - int32_t textLength; - const UChar *textArray; - - int32_t scriptStart; - int32_t scriptLimit; - UScriptCode scriptCode; - - struct ParenStackEntry parenStack[PAREN_STACK_DEPTH]; - int32_t parenSP; - int32_t pushCount; - int32_t fixupCount; -}; - -static int8_t highBit(int32_t value); - -static const UChar32 pairedChars[] = { - 0x0028, 0x0029, /* ascii paired punctuation */ - 0x003c, 0x003e, - 0x005b, 0x005d, - 0x007b, 0x007d, - 0x00ab, 0x00bb, /* guillemets */ - 0x2018, 0x2019, /* general punctuation */ - 0x201c, 0x201d, - 0x2039, 0x203a, - 0x3008, 0x3009, /* chinese paired punctuation */ - 0x300a, 0x300b, - 0x300c, 0x300d, - 0x300e, 0x300f, - 0x3010, 0x3011, - 0x3014, 0x3015, - 0x3016, 0x3017, - 0x3018, 0x3019, - 0x301a, 0x301b -}; - -static void push(UScriptRun *scriptRun, int32_t pairIndex, UScriptCode scriptCode) -{ - scriptRun->pushCount = LIMIT_INC(scriptRun->pushCount); - scriptRun->fixupCount = LIMIT_INC(scriptRun->fixupCount); - - scriptRun->parenSP = INC1(scriptRun->parenSP); - scriptRun->parenStack[scriptRun->parenSP].pairIndex = pairIndex; - scriptRun->parenStack[scriptRun->parenSP].scriptCode = scriptCode; -} - -static void pop(UScriptRun *scriptRun) -{ - if (STACK_IS_EMPTY(scriptRun)) { - return; - } - - if (scriptRun->fixupCount > 0) { - scriptRun->fixupCount -= 1; - } - - scriptRun->pushCount -= 1; - scriptRun->parenSP = DEC1(scriptRun->parenSP); - - /* If the stack is now empty, reset the stack - pointers to their initial values. - */ - if (STACK_IS_EMPTY(scriptRun)) { - scriptRun->parenSP = -1; - } -} - -static void fixup(UScriptRun *scriptRun, UScriptCode scriptCode) -{ - int32_t fixupSP = DEC(scriptRun->parenSP, scriptRun->fixupCount); - - while (scriptRun->fixupCount-- > 0) { - fixupSP = INC1(fixupSP); - scriptRun->parenStack[fixupSP].scriptCode = scriptCode; - } -} - -static int8_t -highBit(int32_t value) -{ - int8_t bit = 0; - - if (value <= 0) { - return -32; - } - - if (value >= 1 << 16) { - value >>= 16; - bit += 16; - } - - if (value >= 1 << 8) { - value >>= 8; - bit += 8; - } - - if (value >= 1 << 4) { - value >>= 4; - bit += 4; - } - - if (value >= 1 << 2) { - value >>= 2; - bit += 2; - } - - if (value >= 1 << 1) { - //value >>= 1; - bit += 1; - } - - return bit; -} - -static int32_t -getPairIndex(UChar32 ch) -{ - int32_t pairedCharCount = UPRV_LENGTHOF(pairedChars); - int32_t pairedCharPower = 1 << highBit(pairedCharCount); - int32_t pairedCharExtra = pairedCharCount - pairedCharPower; - - int32_t probe = pairedCharPower; - int32_t pairIndex = 0; - - if (ch >= pairedChars[pairedCharExtra]) { - pairIndex = pairedCharExtra; - } - - while (probe > (1 << 0)) { - probe >>= 1; - - if (ch >= pairedChars[pairIndex + probe]) { - pairIndex += probe; - } - } - - if (pairedChars[pairIndex] != ch) { - pairIndex = -1; - } - - return pairIndex; -} - -static UBool -sameScript(UScriptCode scriptOne, UScriptCode scriptTwo) -{ - return scriptOne <= USCRIPT_INHERITED || scriptTwo <= USCRIPT_INHERITED || scriptOne == scriptTwo; -} - -U_CAPI UScriptRun * U_EXPORT2 -uscript_openRun(const UChar *src, int32_t length, UErrorCode *pErrorCode) -{ - UScriptRun *result = NULL; - - if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) { - return NULL; - } - - result = uprv_malloc(sizeof (UScriptRun)); - - if (result == NULL) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - uscript_setRunText(result, src, length, pErrorCode); - - /* Release the UScriptRun if uscript_setRunText() returns an error */ - if (U_FAILURE(*pErrorCode)) { - uprv_free(result); - result = NULL; - } - - return result; -} - -U_CAPI void U_EXPORT2 -uscript_closeRun(UScriptRun *scriptRun) -{ - if (scriptRun != NULL) { - uprv_free(scriptRun); - } -} - -U_CAPI void U_EXPORT2 -uscript_resetRun(UScriptRun *scriptRun) -{ - if (scriptRun != NULL) { - scriptRun->scriptStart = 0; - scriptRun->scriptLimit = 0; - scriptRun->scriptCode = USCRIPT_INVALID_CODE; - scriptRun->parenSP = -1; - scriptRun->pushCount = 0; - scriptRun->fixupCount = 0; - } -} - -U_CAPI void U_EXPORT2 -uscript_setRunText(UScriptRun *scriptRun, const UChar *src, int32_t length, UErrorCode *pErrorCode) -{ - if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) { - return; - } - - if (scriptRun == NULL || length < 0 || ((src == NULL) != (length == 0))) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - scriptRun->textArray = src; - scriptRun->textLength = length; - - uscript_resetRun(scriptRun); -} - -U_CAPI UBool U_EXPORT2 -uscript_nextRun(UScriptRun *scriptRun, int32_t *pRunStart, int32_t *pRunLimit, UScriptCode *pRunScript) -{ - UErrorCode error = U_ZERO_ERROR; - - /* if we've fallen off the end of the text, we're done */ - if (scriptRun == NULL || scriptRun->scriptLimit >= scriptRun->textLength) { - return FALSE; - } - - SYNC_FIXUP(scriptRun); - scriptRun->scriptCode = USCRIPT_COMMON; - - for (scriptRun->scriptStart = scriptRun->scriptLimit; scriptRun->scriptLimit < scriptRun->textLength; scriptRun->scriptLimit += 1) { - UChar high = scriptRun->textArray[scriptRun->scriptLimit]; - UChar32 ch = high; - UScriptCode sc; - int32_t pairIndex; - - /* - * if the character is a high surrogate and it's not the last one - * in the text, see if it's followed by a low surrogate - */ - if (high >= 0xD800 && high <= 0xDBFF && scriptRun->scriptLimit < scriptRun->textLength - 1) { - UChar low = scriptRun->textArray[scriptRun->scriptLimit + 1]; - - /* - * if it is followed by a low surrogate, - * consume it and form the full character - */ - if (low >= 0xDC00 && low <= 0xDFFF) { - ch = (high - 0xD800) * 0x0400 + low - 0xDC00 + 0x10000; - scriptRun->scriptLimit += 1; - } - } - - sc = uscript_getScript(ch, &error); - pairIndex = getPairIndex(ch); - - /* - * Paired character handling: - * - * if it's an open character, push it onto the stack. - * if it's a close character, find the matching open on the - * stack, and use that script code. Any non-matching open - * characters above it on the stack will be poped. - */ - if (pairIndex >= 0) { - if ((pairIndex & 1) == 0) { - push(scriptRun, pairIndex, scriptRun->scriptCode); - } else { - int32_t pi = pairIndex & ~1; - - while (STACK_IS_NOT_EMPTY(scriptRun) && TOP(scriptRun).pairIndex != pi) { - pop(scriptRun); - } - - if (STACK_IS_NOT_EMPTY(scriptRun)) { - sc = TOP(scriptRun).scriptCode; - } - } - } - - if (sameScript(scriptRun->scriptCode, sc)) { - if (scriptRun->scriptCode <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) { - scriptRun->scriptCode = sc; - - fixup(scriptRun, scriptRun->scriptCode); - } - - /* - * if this character is a close paired character, - * pop the matching open character from the stack - */ - if (pairIndex >= 0 && (pairIndex & 1) != 0) { - pop(scriptRun); - } - } else { - /* - * if the run broke on a surrogate pair, - * end it before the high surrogate - */ - if (ch >= 0x10000) { - scriptRun->scriptLimit -= 1; - } - - break; - } - } - - - if (pRunStart != NULL) { - *pRunStart = scriptRun->scriptStart; - } - - if (pRunLimit != NULL) { - *pRunLimit = scriptRun->scriptLimit; - } - - if (pRunScript != NULL) { - *pRunScript = scriptRun->scriptCode; - } - - return TRUE; -} diff --git a/deps/icu-small/source/common/usc_impl.cpp b/deps/icu-small/source/common/usc_impl.cpp new file mode 100644 index 0000000000..d69880326a --- /dev/null +++ b/deps/icu-small/source/common/usc_impl.cpp @@ -0,0 +1,361 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1999-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File USC_IMPL.C +* +* Modification History: +* +* Date Name Description +* 07/08/2002 Eric Mader Creation. +****************************************************************************** +*/ + +#include "unicode/uscript.h" +#include "usc_impl.h" +#include "cmemory.h" + +#define PAREN_STACK_DEPTH 32 + +#define MOD(sp) ((sp) % PAREN_STACK_DEPTH) +#define LIMIT_INC(sp) (((sp) < PAREN_STACK_DEPTH)? (sp) + 1 : PAREN_STACK_DEPTH) +#define INC(sp,count) (MOD((sp) + (count))) +#define INC1(sp) (INC(sp, 1)) +#define DEC(sp,count) (MOD((sp) + PAREN_STACK_DEPTH - (count))) +#define DEC1(sp) (DEC(sp, 1)) +#define STACK_IS_EMPTY(scriptRun) ((scriptRun)->pushCount <= 0) +#define STACK_IS_NOT_EMPTY(scriptRun) (! STACK_IS_EMPTY(scriptRun)) +#define TOP(scriptRun) ((scriptRun)->parenStack[(scriptRun)->parenSP]) +#define SYNC_FIXUP(scriptRun) ((scriptRun)->fixupCount = 0) + +struct ParenStackEntry +{ + int32_t pairIndex; + UScriptCode scriptCode; +}; + +struct UScriptRun +{ + int32_t textLength; + const UChar *textArray; + + int32_t scriptStart; + int32_t scriptLimit; + UScriptCode scriptCode; + + struct ParenStackEntry parenStack[PAREN_STACK_DEPTH]; + int32_t parenSP; + int32_t pushCount; + int32_t fixupCount; +}; + +static int8_t highBit(int32_t value); + +static const UChar32 pairedChars[] = { + 0x0028, 0x0029, /* ascii paired punctuation */ + 0x003c, 0x003e, + 0x005b, 0x005d, + 0x007b, 0x007d, + 0x00ab, 0x00bb, /* guillemets */ + 0x2018, 0x2019, /* general punctuation */ + 0x201c, 0x201d, + 0x2039, 0x203a, + 0x3008, 0x3009, /* chinese paired punctuation */ + 0x300a, 0x300b, + 0x300c, 0x300d, + 0x300e, 0x300f, + 0x3010, 0x3011, + 0x3014, 0x3015, + 0x3016, 0x3017, + 0x3018, 0x3019, + 0x301a, 0x301b +}; + +static void push(UScriptRun *scriptRun, int32_t pairIndex, UScriptCode scriptCode) +{ + scriptRun->pushCount = LIMIT_INC(scriptRun->pushCount); + scriptRun->fixupCount = LIMIT_INC(scriptRun->fixupCount); + + scriptRun->parenSP = INC1(scriptRun->parenSP); + scriptRun->parenStack[scriptRun->parenSP].pairIndex = pairIndex; + scriptRun->parenStack[scriptRun->parenSP].scriptCode = scriptCode; +} + +static void pop(UScriptRun *scriptRun) +{ + if (STACK_IS_EMPTY(scriptRun)) { + return; + } + + if (scriptRun->fixupCount > 0) { + scriptRun->fixupCount -= 1; + } + + scriptRun->pushCount -= 1; + scriptRun->parenSP = DEC1(scriptRun->parenSP); + + /* If the stack is now empty, reset the stack + pointers to their initial values. + */ + if (STACK_IS_EMPTY(scriptRun)) { + scriptRun->parenSP = -1; + } +} + +static void fixup(UScriptRun *scriptRun, UScriptCode scriptCode) +{ + int32_t fixupSP = DEC(scriptRun->parenSP, scriptRun->fixupCount); + + while (scriptRun->fixupCount-- > 0) { + fixupSP = INC1(fixupSP); + scriptRun->parenStack[fixupSP].scriptCode = scriptCode; + } +} + +static int8_t +highBit(int32_t value) +{ + int8_t bit = 0; + + if (value <= 0) { + return -32; + } + + if (value >= 1 << 16) { + value >>= 16; + bit += 16; + } + + if (value >= 1 << 8) { + value >>= 8; + bit += 8; + } + + if (value >= 1 << 4) { + value >>= 4; + bit += 4; + } + + if (value >= 1 << 2) { + value >>= 2; + bit += 2; + } + + if (value >= 1 << 1) { + //value >>= 1; + bit += 1; + } + + return bit; +} + +static int32_t +getPairIndex(UChar32 ch) +{ + int32_t pairedCharCount = UPRV_LENGTHOF(pairedChars); + int32_t pairedCharPower = 1 << highBit(pairedCharCount); + int32_t pairedCharExtra = pairedCharCount - pairedCharPower; + + int32_t probe = pairedCharPower; + int32_t pairIndex = 0; + + if (ch >= pairedChars[pairedCharExtra]) { + pairIndex = pairedCharExtra; + } + + while (probe > (1 << 0)) { + probe >>= 1; + + if (ch >= pairedChars[pairIndex + probe]) { + pairIndex += probe; + } + } + + if (pairedChars[pairIndex] != ch) { + pairIndex = -1; + } + + return pairIndex; +} + +static UBool +sameScript(UScriptCode scriptOne, UScriptCode scriptTwo) +{ + return scriptOne <= USCRIPT_INHERITED || scriptTwo <= USCRIPT_INHERITED || scriptOne == scriptTwo; +} + +U_CAPI UScriptRun * U_EXPORT2 +uscript_openRun(const UChar *src, int32_t length, UErrorCode *pErrorCode) +{ + UScriptRun *result = NULL; + + if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) { + return NULL; + } + + result = (UScriptRun *)uprv_malloc(sizeof (UScriptRun)); + + if (result == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + + uscript_setRunText(result, src, length, pErrorCode); + + /* Release the UScriptRun if uscript_setRunText() returns an error */ + if (U_FAILURE(*pErrorCode)) { + uprv_free(result); + result = NULL; + } + + return result; +} + +U_CAPI void U_EXPORT2 +uscript_closeRun(UScriptRun *scriptRun) +{ + if (scriptRun != NULL) { + uprv_free(scriptRun); + } +} + +U_CAPI void U_EXPORT2 +uscript_resetRun(UScriptRun *scriptRun) +{ + if (scriptRun != NULL) { + scriptRun->scriptStart = 0; + scriptRun->scriptLimit = 0; + scriptRun->scriptCode = USCRIPT_INVALID_CODE; + scriptRun->parenSP = -1; + scriptRun->pushCount = 0; + scriptRun->fixupCount = 0; + } +} + +U_CAPI void U_EXPORT2 +uscript_setRunText(UScriptRun *scriptRun, const UChar *src, int32_t length, UErrorCode *pErrorCode) +{ + if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) { + return; + } + + if (scriptRun == NULL || length < 0 || ((src == NULL) != (length == 0))) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + scriptRun->textArray = src; + scriptRun->textLength = length; + + uscript_resetRun(scriptRun); +} + +U_CAPI UBool U_EXPORT2 +uscript_nextRun(UScriptRun *scriptRun, int32_t *pRunStart, int32_t *pRunLimit, UScriptCode *pRunScript) +{ + UErrorCode error = U_ZERO_ERROR; + + /* if we've fallen off the end of the text, we're done */ + if (scriptRun == NULL || scriptRun->scriptLimit >= scriptRun->textLength) { + return FALSE; + } + + SYNC_FIXUP(scriptRun); + scriptRun->scriptCode = USCRIPT_COMMON; + + for (scriptRun->scriptStart = scriptRun->scriptLimit; scriptRun->scriptLimit < scriptRun->textLength; scriptRun->scriptLimit += 1) { + UChar high = scriptRun->textArray[scriptRun->scriptLimit]; + UChar32 ch = high; + UScriptCode sc; + int32_t pairIndex; + + /* + * if the character is a high surrogate and it's not the last one + * in the text, see if it's followed by a low surrogate + */ + if (high >= 0xD800 && high <= 0xDBFF && scriptRun->scriptLimit < scriptRun->textLength - 1) { + UChar low = scriptRun->textArray[scriptRun->scriptLimit + 1]; + + /* + * if it is followed by a low surrogate, + * consume it and form the full character + */ + if (low >= 0xDC00 && low <= 0xDFFF) { + ch = (high - 0xD800) * 0x0400 + low - 0xDC00 + 0x10000; + scriptRun->scriptLimit += 1; + } + } + + sc = uscript_getScript(ch, &error); + pairIndex = getPairIndex(ch); + + /* + * Paired character handling: + * + * if it's an open character, push it onto the stack. + * if it's a close character, find the matching open on the + * stack, and use that script code. Any non-matching open + * characters above it on the stack will be poped. + */ + if (pairIndex >= 0) { + if ((pairIndex & 1) == 0) { + push(scriptRun, pairIndex, scriptRun->scriptCode); + } else { + int32_t pi = pairIndex & ~1; + + while (STACK_IS_NOT_EMPTY(scriptRun) && TOP(scriptRun).pairIndex != pi) { + pop(scriptRun); + } + + if (STACK_IS_NOT_EMPTY(scriptRun)) { + sc = TOP(scriptRun).scriptCode; + } + } + } + + if (sameScript(scriptRun->scriptCode, sc)) { + if (scriptRun->scriptCode <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) { + scriptRun->scriptCode = sc; + + fixup(scriptRun, scriptRun->scriptCode); + } + + /* + * if this character is a close paired character, + * pop the matching open character from the stack + */ + if (pairIndex >= 0 && (pairIndex & 1) != 0) { + pop(scriptRun); + } + } else { + /* + * if the run broke on a surrogate pair, + * end it before the high surrogate + */ + if (ch >= 0x10000) { + scriptRun->scriptLimit -= 1; + } + + break; + } + } + + + if (pRunStart != NULL) { + *pRunStart = scriptRun->scriptStart; + } + + if (pRunLimit != NULL) { + *pRunLimit = scriptRun->scriptLimit; + } + + if (pRunScript != NULL) { + *pRunScript = scriptRun->scriptCode; + } + + return TRUE; +} diff --git a/deps/icu-small/source/common/usc_impl.h b/deps/icu-small/source/common/usc_impl.h index 7c9c5e0950..44899649d4 100644 --- a/deps/icu-small/source/common/usc_impl.h +++ b/deps/icu-small/source/common/usc_impl.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/uscript.c b/deps/icu-small/source/common/uscript.c deleted file mode 100644 index 336e185799..0000000000 --- a/deps/icu-small/source/common/uscript.c +++ /dev/null @@ -1,144 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1997-2014, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* File USCRIPT.C -* -* Modification History: -* -* Date Name Description -* 07/06/2001 Ram Creation. -****************************************************************************** -*/ - -#include "unicode/uchar.h" -#include "unicode/uscript.h" -#include "unicode/uloc.h" -#include "cmemory.h" -#include "cstring.h" - -static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN }; -static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN }; -static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO }; - -static int32_t -setCodes(const UScriptCode *src, int32_t length, - UScriptCode *dest, int32_t capacity, UErrorCode *err) { - int32_t i; - if(U_FAILURE(*err)) { return 0; } - if(length > capacity) { - *err = U_BUFFER_OVERFLOW_ERROR; - return length; - } - for(i = 0; i < length; ++i) { - dest[i] = src[i]; - } - return length; -} - -static int32_t -setOneCode(UScriptCode script, UScriptCode *scripts, int32_t capacity, UErrorCode *err) { - if(U_FAILURE(*err)) { return 0; } - if(1 > capacity) { - *err = U_BUFFER_OVERFLOW_ERROR; - return 1; - } - scripts[0] = script; - return 1; -} - -static int32_t -getCodesFromLocale(const char *locale, - UScriptCode *scripts, int32_t capacity, UErrorCode *err) { - UErrorCode internalErrorCode = U_ZERO_ERROR; - char lang[8]; - char script[8]; - int32_t scriptLength; - if(U_FAILURE(*err)) { return 0; } - // Multi-script languages, equivalent to the LocaleScript data - // that we used to load from locale resource bundles. - /*length = */ uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &internalErrorCode); - if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) { - return 0; - } - if(0 == uprv_strcmp(lang, "ja")) { - return setCodes(JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, capacity, err); - } - if(0 == uprv_strcmp(lang, "ko")) { - return setCodes(KOREAN, UPRV_LENGTHOF(KOREAN), scripts, capacity, err); - } - scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &internalErrorCode); - if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) { - return 0; - } - if(0 == uprv_strcmp(lang, "zh") && 0 == uprv_strcmp(script, "Hant")) { - return setCodes(HAN_BOPO, UPRV_LENGTHOF(HAN_BOPO), scripts, capacity, err); - } - // Explicit script code. - if(scriptLength != 0) { - UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script); - if(scriptCode != USCRIPT_INVALID_CODE) { - if(scriptCode == USCRIPT_SIMPLIFIED_HAN || scriptCode == USCRIPT_TRADITIONAL_HAN) { - scriptCode = USCRIPT_HAN; - } - return setOneCode(scriptCode, scripts, capacity, err); - } - } - return 0; -} - -/* TODO: this is a bad API and should be deprecated, ticket #11141 */ -U_CAPI int32_t U_EXPORT2 -uscript_getCode(const char* nameOrAbbrOrLocale, - UScriptCode* fillIn, - int32_t capacity, - UErrorCode* err){ - UBool triedCode; - char likely[ULOC_FULLNAME_CAPACITY]; - UErrorCode internalErrorCode; - int32_t length; - - if(U_FAILURE(*err)) { - return 0; - } - if(nameOrAbbrOrLocale==NULL || - (fillIn == NULL ? capacity != 0 : capacity < 0)) { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - triedCode = FALSE; - if(uprv_strchr(nameOrAbbrOrLocale, '-')==NULL && uprv_strchr(nameOrAbbrOrLocale, '_')==NULL ){ - /* try long and abbreviated script names first */ - UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale); - if(code!=USCRIPT_INVALID_CODE) { - return setOneCode(code, fillIn, capacity, err); - } - triedCode = TRUE; - } - internalErrorCode = U_ZERO_ERROR; - length = getCodesFromLocale(nameOrAbbrOrLocale, fillIn, capacity, err); - if(U_FAILURE(*err) || length != 0) { - return length; - } - (void)uloc_addLikelySubtags(nameOrAbbrOrLocale, - likely, UPRV_LENGTHOF(likely), &internalErrorCode); - if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) { - length = getCodesFromLocale(likely, fillIn, capacity, err); - if(U_FAILURE(*err) || length != 0) { - return length; - } - } - if(!triedCode) { - /* still not found .. try long and abbreviated script names again */ - UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale); - if(code!=USCRIPT_INVALID_CODE) { - return setOneCode(code, fillIn, capacity, err); - } - } - return 0; -} diff --git a/deps/icu-small/source/common/uscript.cpp b/deps/icu-small/source/common/uscript.cpp new file mode 100644 index 0000000000..83b5f7ef16 --- /dev/null +++ b/deps/icu-small/source/common/uscript.cpp @@ -0,0 +1,144 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1997-2014, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File USCRIPT.C +* +* Modification History: +* +* Date Name Description +* 07/06/2001 Ram Creation. +****************************************************************************** +*/ + +#include "unicode/uchar.h" +#include "unicode/uscript.h" +#include "unicode/uloc.h" +#include "cmemory.h" +#include "cstring.h" + +static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN }; +static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN }; +static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO }; + +static int32_t +setCodes(const UScriptCode *src, int32_t length, + UScriptCode *dest, int32_t capacity, UErrorCode *err) { + int32_t i; + if(U_FAILURE(*err)) { return 0; } + if(length > capacity) { + *err = U_BUFFER_OVERFLOW_ERROR; + return length; + } + for(i = 0; i < length; ++i) { + dest[i] = src[i]; + } + return length; +} + +static int32_t +setOneCode(UScriptCode script, UScriptCode *scripts, int32_t capacity, UErrorCode *err) { + if(U_FAILURE(*err)) { return 0; } + if(1 > capacity) { + *err = U_BUFFER_OVERFLOW_ERROR; + return 1; + } + scripts[0] = script; + return 1; +} + +static int32_t +getCodesFromLocale(const char *locale, + UScriptCode *scripts, int32_t capacity, UErrorCode *err) { + UErrorCode internalErrorCode = U_ZERO_ERROR; + char lang[8]; + char script[8]; + int32_t scriptLength; + if(U_FAILURE(*err)) { return 0; } + // Multi-script languages, equivalent to the LocaleScript data + // that we used to load from locale resource bundles. + /*length = */ uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &internalErrorCode); + if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) { + return 0; + } + if(0 == uprv_strcmp(lang, "ja")) { + return setCodes(JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, capacity, err); + } + if(0 == uprv_strcmp(lang, "ko")) { + return setCodes(KOREAN, UPRV_LENGTHOF(KOREAN), scripts, capacity, err); + } + scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &internalErrorCode); + if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) { + return 0; + } + if(0 == uprv_strcmp(lang, "zh") && 0 == uprv_strcmp(script, "Hant")) { + return setCodes(HAN_BOPO, UPRV_LENGTHOF(HAN_BOPO), scripts, capacity, err); + } + // Explicit script code. + if(scriptLength != 0) { + UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script); + if(scriptCode != USCRIPT_INVALID_CODE) { + if(scriptCode == USCRIPT_SIMPLIFIED_HAN || scriptCode == USCRIPT_TRADITIONAL_HAN) { + scriptCode = USCRIPT_HAN; + } + return setOneCode(scriptCode, scripts, capacity, err); + } + } + return 0; +} + +/* TODO: this is a bad API and should be deprecated, ticket #11141 */ +U_CAPI int32_t U_EXPORT2 +uscript_getCode(const char* nameOrAbbrOrLocale, + UScriptCode* fillIn, + int32_t capacity, + UErrorCode* err){ + UBool triedCode; + char likely[ULOC_FULLNAME_CAPACITY]; + UErrorCode internalErrorCode; + int32_t length; + + if(U_FAILURE(*err)) { + return 0; + } + if(nameOrAbbrOrLocale==NULL || + (fillIn == NULL ? capacity != 0 : capacity < 0)) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + triedCode = FALSE; + if(uprv_strchr(nameOrAbbrOrLocale, '-')==NULL && uprv_strchr(nameOrAbbrOrLocale, '_')==NULL ){ + /* try long and abbreviated script names first */ + UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale); + if(code!=USCRIPT_INVALID_CODE) { + return setOneCode(code, fillIn, capacity, err); + } + triedCode = TRUE; + } + internalErrorCode = U_ZERO_ERROR; + length = getCodesFromLocale(nameOrAbbrOrLocale, fillIn, capacity, err); + if(U_FAILURE(*err) || length != 0) { + return length; + } + (void)uloc_addLikelySubtags(nameOrAbbrOrLocale, + likely, UPRV_LENGTHOF(likely), &internalErrorCode); + if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) { + length = getCodesFromLocale(likely, fillIn, capacity, err); + if(U_FAILURE(*err) || length != 0) { + return length; + } + } + if(!triedCode) { + /* still not found .. try long and abbreviated script names again */ + UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale); + if(code!=USCRIPT_INVALID_CODE) { + return setOneCode(code, fillIn, capacity, err); + } + } + return 0; +} diff --git a/deps/icu-small/source/common/uscript_props.cpp b/deps/icu-small/source/common/uscript_props.cpp index 20c7cdce2f..f8ec5e361d 100644 --- a/deps/icu-small/source/common/uscript_props.cpp +++ b/deps/icu-small/source/common/uscript_props.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: uscript_props.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/uset.cpp b/deps/icu-small/source/common/uset.cpp index f15d4a14ab..75ff5ddff5 100644 --- a/deps/icu-small/source/common/uset.cpp +++ b/deps/icu-small/source/common/uset.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: uset.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/uset_imp.h b/deps/icu-small/source/common/uset_imp.h index 8cb31edfcf..5f4a3113d9 100644 --- a/deps/icu-small/source/common/uset_imp.h +++ b/deps/icu-small/source/common/uset_imp.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: uset_imp.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/uset_props.cpp b/deps/icu-small/source/common/uset_props.cpp index cf772957ee..b68175c1d2 100644 --- a/deps/icu-small/source/common/uset_props.cpp +++ b/deps/icu-small/source/common/uset_props.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: uset_props.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/usetiter.cpp b/deps/icu-small/source/common/usetiter.cpp index 2be2079c51..5d5d3c4e3d 100644 --- a/deps/icu-small/source/common/usetiter.cpp +++ b/deps/icu-small/source/common/usetiter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/ushape.cpp b/deps/icu-small/source/common/ushape.cpp index 4913a3ff57..d7886ac06c 100644 --- a/deps/icu-small/source/common/ushape.cpp +++ b/deps/icu-small/source/common/ushape.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: ushape.cpp - * encoding: US-ASCII + * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/usprep.cpp b/deps/icu-small/source/common/usprep.cpp index fb9c18b66b..c4f831be2e 100644 --- a/deps/icu-small/source/common/usprep.cpp +++ b/deps/icu-small/source/common/usprep.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: usprep.cpp - * encoding: US-ASCII + * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/ustack.cpp b/deps/icu-small/source/common/ustack.cpp index 1de79ecfe3..fb314b0ebe 100644 --- a/deps/icu-small/source/common/ustack.cpp +++ b/deps/icu-small/source/common/ustack.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/ustr_cnv.cpp b/deps/icu-small/source/common/ustr_cnv.cpp index 4b845d5adc..951864f4a6 100644 --- a/deps/icu-small/source/common/ustr_cnv.cpp +++ b/deps/icu-small/source/common/ustr_cnv.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: ustr_cnv.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/ustr_cnv.h b/deps/icu-small/source/common/ustr_cnv.h index e647356560..12e86ea02f 100644 --- a/deps/icu-small/source/common/ustr_cnv.h +++ b/deps/icu-small/source/common/ustr_cnv.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ********************************************************************** * file name: ustr_cnv.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/ustr_imp.h b/deps/icu-small/source/common/ustr_imp.h index 21ee6cba62..eb5d072258 100644 --- a/deps/icu-small/source/common/ustr_imp.h +++ b/deps/icu-small/source/common/ustr_imp.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ********************************************************************** * file name: ustr_imp.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -18,23 +18,6 @@ #define __USTR_IMP_H__ #include "unicode/utypes.h" -#include "unicode/uiter.h" -#include "ucase.h" - -/** Simple declaration to avoid including unicode/ubrk.h. */ -#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR -# define UBRK_TYPEDEF_UBREAK_ITERATOR - typedef struct UBreakIterator UBreakIterator; -#endif - -#ifndef U_COMPARE_IGNORE_CASE -/* see also unorm.h */ -/** - * Option bit for unorm_compare: - * Perform case-insensitive comparison. - */ -#define U_COMPARE_IGNORE_CASE 0x10000 -#endif /** * Internal option for unorm_cmpEquivFold() for strncmp style. @@ -53,211 +36,6 @@ uprv_strCompare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, UBool strncmpStyle, UBool codePointOrder); -/** - * Internal API, used by u_strcasecmp() etc. - * Compare strings case-insensitively, - * in code point order or code unit order. - */ -U_CFUNC int32_t -u_strcmpFold(const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - uint32_t options, - UErrorCode *pErrorCode); - -/** - * Interanl API, used for detecting length of - * shared prefix case-insensitively. - * @param s1 input string 1 - * @param length1 length of string 1, or -1 (NULL terminated) - * @param s2 input string 2 - * @param length2 length of string 2, or -1 (NULL terminated) - * @param options compare options - * @param matchLen1 (output) length of partial prefix match in s1 - * @param matchLen2 (output) length of partial prefix match in s2 - * @param pErrorCode receives error status - */ -U_CAPI void -u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - uint32_t options, - int32_t *matchLen1, int32_t *matchLen2, - UErrorCode *pErrorCode); - -/** - * Are the Unicode properties loaded? - * This must be used before internal functions are called that do - * not perform this check. - * Generate a debug assertion failure if data is not loaded. - */ -U_CFUNC UBool -uprv_haveProperties(UErrorCode *pErrorCode); - -/** - * Load the Unicode property data. - * Intended primarily for use from u_init(). - * Has no effect if property data is already loaded. - * NOT thread safe. - */ -/*U_CFUNC int8_t -uprv_loadPropsData(UErrorCode *errorCode);*/ - -/* - * Internal string casing functions implementing - * ustring.h/ustrcase.c and UnicodeString case mapping functions. - */ - -struct UCaseMap { - const UCaseProps *csp; -#if !UCONFIG_NO_BREAK_ITERATION - UBreakIterator *iter; /* We adopt the iterator, so we own it. */ -#endif - char locale[32]; - int32_t locCache; - uint32_t options; -}; - -#ifndef __UCASEMAP_H__ -typedef struct UCaseMap UCaseMap; -#endif - -#if UCONFIG_NO_BREAK_ITERATION -# define UCASEMAP_INITIALIZER { NULL, { 0 }, 0, 0 } -#else -# define UCASEMAP_INITIALIZER { NULL, NULL, { 0 }, 0, 0 } -#endif - -U_CFUNC void -ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale); - -#ifndef U_STRING_CASE_MAPPER_DEFINED -#define U_STRING_CASE_MAPPER_DEFINED - -/** - * String case mapping function type, used by ustrcase_map(). - * All error checking must be done. - * The UCaseMap must be fully initialized, with locale and/or iter set as needed. - * src and dest must not overlap. - */ -typedef int32_t U_CALLCONV -UStringCaseMapper(const UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode); - -#endif - -/** Implements UStringCaseMapper. */ -U_CFUNC int32_t U_CALLCONV -ustrcase_internalToLower(const UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode); - -/** Implements UStringCaseMapper. */ -U_CFUNC int32_t U_CALLCONV -ustrcase_internalToUpper(const UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode); - -#if !UCONFIG_NO_BREAK_ITERATION - -/** Implements UStringCaseMapper. */ -U_CFUNC int32_t U_CALLCONV -ustrcase_internalToTitle(const UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode); - -#endif - -/** Implements UStringCaseMapper. */ -U_CFUNC int32_t U_CALLCONV -ustrcase_internalFold(const UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode); - -/** - * Implements argument checking and buffer handling - * for string case mapping as a common function. - */ -U_CFUNC int32_t -ustrcase_map(const UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UStringCaseMapper *stringCaseMapper, - UErrorCode *pErrorCode); - -/** - * UTF-8 string case mapping function type, used by ucasemap_mapUTF8(). - * UTF-8 version of UStringCaseMapper. - * All error checking must be done. - * The UCaseMap must be fully initialized, with locale and/or iter set as needed. - * src and dest must not overlap. - */ -typedef int32_t U_CALLCONV -UTF8CaseMapper(const UCaseMap *csm, - uint8_t *dest, int32_t destCapacity, - const uint8_t *src, int32_t srcLength, - UErrorCode *pErrorCode); - -/** Implements UTF8CaseMapper. */ -U_CFUNC int32_t U_CALLCONV -ucasemap_internalUTF8ToTitle(const UCaseMap *csm, - uint8_t *dest, int32_t destCapacity, - const uint8_t *src, int32_t srcLength, - UErrorCode *pErrorCode); - -/** - * Implements argument checking and buffer handling - * for UTF-8 string case mapping as a common function. - */ -U_CFUNC int32_t -ucasemap_mapUTF8(const UCaseMap *csm, - uint8_t *dest, int32_t destCapacity, - const uint8_t *src, int32_t srcLength, - UTF8CaseMapper *stringCaseMapper, - UErrorCode *pErrorCode); - -#ifdef __cplusplus - -U_NAMESPACE_BEGIN -namespace GreekUpper { - -// Data bits. -static const uint32_t UPPER_MASK = 0x3ff; -static const uint32_t HAS_VOWEL = 0x1000; -static const uint32_t HAS_YPOGEGRAMMENI = 0x2000; -static const uint32_t HAS_ACCENT = 0x4000; -static const uint32_t HAS_DIALYTIKA = 0x8000; -// Further bits during data building and processing, not stored in the data map. -static const uint32_t HAS_COMBINING_DIALYTIKA = 0x10000; -static const uint32_t HAS_OTHER_GREEK_DIACRITIC = 0x20000; - -static const uint32_t HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT; -static const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA = - HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA; -static const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA; - -// State bits. -static const uint32_t AFTER_CASED = 1; -static const uint32_t AFTER_VOWEL_WITH_ACCENT = 2; - -uint32_t getLetterData(UChar32 c); - -/** - * Returns a non-zero value for each of the Greek combining diacritics - * listed in The Unicode Standard, version 8, chapter 7.2 Greek, - * plus some perispomeni look-alikes. - */ -uint32_t getDiacriticData(UChar32 c); - -} // namespace GreekUpper -U_NAMESPACE_END - -#endif // __cplusplus - U_CAPI int32_t U_EXPORT2 ustr_hashUCharsN(const UChar *str, int32_t length); diff --git a/deps/icu-small/source/common/ustr_titlecase_brkiter.cpp b/deps/icu-small/source/common/ustr_titlecase_brkiter.cpp index 6380877619..0b2ba02064 100644 --- a/deps/icu-small/source/common/ustr_titlecase_brkiter.cpp +++ b/deps/icu-small/source/common/ustr_titlecase_brkiter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: ustr_titlecase_brkiter.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -22,30 +22,17 @@ #if !UCONFIG_NO_BREAK_ITERATION #include "unicode/brkiter.h" +#include "unicode/casemap.h" +#include "unicode/localpointer.h" #include "unicode/ubrk.h" #include "unicode/ucasemap.h" #include "cmemory.h" #include "ucase.h" -#include "ustr_imp.h" +#include "ucasemap_imp.h" -/* functions available in the common library (for unistr_case.cpp) */ +U_NAMESPACE_USE -/* - * Set parameters on an empty UCaseMap, for UCaseMap-less API functions. - * Do this fast because it is called with every function call. - * Duplicate of the same function in ustrcase.cpp, to keep it inline. - */ -static inline void -setTempCaseMap(UCaseMap *csm, const char *locale) { - if(csm->csp==NULL) { - csm->csp=ucase_getSingleton(); - } - if(locale!=NULL && locale[0]==0) { - csm->locale[0]=0; - } else { - ustrcase_setTempCaseMapLocale(csm, locale); - } -} +/* functions available in the common library (for unistr_case.cpp) */ /* public API functions */ @@ -55,39 +42,73 @@ u_strToTitle(UChar *dest, int32_t destCapacity, UBreakIterator *titleIter, const char *locale, UErrorCode *pErrorCode) { - UCaseMap csm=UCASEMAP_INITIALIZER; - setTempCaseMap(&csm, locale); + LocalPointer ownedIter; + BreakIterator *iter; if(titleIter!=NULL) { - ubrk_setText(csm.iter=titleIter, src, srcLength, pErrorCode); + iter=reinterpret_cast(titleIter); } else { - csm.iter=ubrk_open(UBRK_WORD, csm.locale, src, srcLength, pErrorCode); + iter=BreakIterator::createWordInstance(Locale(locale), *pErrorCode); + ownedIter.adoptInstead(iter); + } + if(U_FAILURE(*pErrorCode)) { + return 0; } - int32_t length=ustrcase_map( - &csm, + UnicodeString s(srcLength<0, src, srcLength); + iter->setText(s); + return ustrcase_mapWithOverlap( + ustrcase_getCaseLocale(locale), 0, iter, dest, destCapacity, src, srcLength, - ustrcase_internalToTitle, pErrorCode); - if(titleIter==NULL && csm.iter!=NULL) { - ubrk_close(csm.iter); + ustrcase_internalToTitle, *pErrorCode); +} + +U_NAMESPACE_BEGIN + +int32_t CaseMap::toTitle( + const char *locale, uint32_t options, BreakIterator *iter, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode) { + LocalPointer ownedIter; + if(iter==NULL) { + iter=BreakIterator::createWordInstance(Locale(locale), errorCode); + ownedIter.adoptInstead(iter); + } + if(U_FAILURE(errorCode)) { + return 0; } - return length; + UnicodeString s(srcLength<0, src, srcLength); + iter->setText(s); + return ustrcase_map( + ustrcase_getCaseLocale(locale), options, iter, + dest, destCapacity, + src, srcLength, + ustrcase_internalToTitle, edits, errorCode); } +U_NAMESPACE_END + U_CAPI int32_t U_EXPORT2 ucasemap_toTitle(UCaseMap *csm, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode) { - if(csm->iter!=NULL) { - ubrk_setText(csm->iter, src, srcLength, pErrorCode); - } else { - csm->iter=ubrk_open(UBRK_WORD, csm->locale, src, srcLength, pErrorCode); + if (U_FAILURE(*pErrorCode)) { + return 0; + } + if (csm->iter == NULL) { + csm->iter = BreakIterator::createWordInstance(Locale(csm->locale), *pErrorCode); + } + if (U_FAILURE(*pErrorCode)) { + return 0; } + UnicodeString s(srcLength<0, src, srcLength); + csm->iter->setText(s); return ustrcase_map( - csm, + csm->caseLocale, csm->options, csm->iter, dest, destCapacity, src, srcLength, - ustrcase_internalToTitle, pErrorCode); + ustrcase_internalToTitle, NULL, *pErrorCode); } #endif // !UCONFIG_NO_BREAK_ITERATION diff --git a/deps/icu-small/source/common/ustr_wcs.cpp b/deps/icu-small/source/common/ustr_wcs.cpp index 572e41290f..8b6e99221e 100644 --- a/deps/icu-small/source/common/ustr_wcs.cpp +++ b/deps/icu-small/source/common/ustr_wcs.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: ustr_wcs.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -254,7 +254,7 @@ u_strToWCS(wchar_t *dest, srcLength = u_strlen(src); } if(0 < srcLength && srcLength <= destCapacity){ - u_memcpy(dest, src, srcLength); + u_memcpy((UChar *)dest, src, srcLength); } if(pDestLength){ *pDestLength = srcLength; @@ -509,7 +509,7 @@ u_strFromWCS(UChar *dest, srcLength = u_strlen((const UChar *)src); } if(0 < srcLength && srcLength <= destCapacity){ - u_memcpy(dest, src, srcLength); + u_memcpy(dest, (const UChar *)src, srcLength); } if(pDestLength){ *pDestLength = srcLength; diff --git a/deps/icu-small/source/common/ustrcase.cpp b/deps/icu-small/source/common/ustrcase.cpp index be5c988bd1..b12e7a7c0b 100644 --- a/deps/icu-small/source/common/ustrcase.cpp +++ b/deps/icu-small/source/common/ustrcase.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: ustrcase.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -22,6 +22,8 @@ #include "unicode/utypes.h" #include "unicode/brkiter.h" +#include "unicode/casemap.h" +#include "unicode/edits.h" #include "unicode/ustring.h" #include "unicode/ucasemap.h" #include "unicode/ubrk.h" @@ -29,9 +31,30 @@ #include "unicode/utf16.h" #include "cmemory.h" #include "ucase.h" +#include "ucasemap_imp.h" #include "ustr_imp.h" #include "uassert.h" +U_NAMESPACE_BEGIN + +namespace { + +int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity, + Edits *edits, UErrorCode &errorCode) { + if (U_SUCCESS(errorCode)) { + if (destIndex > destCapacity) { + errorCode = U_BUFFER_OVERFLOW_ERROR; + } else if (edits != NULL) { + edits->copyErrorTo(errorCode); + } + } + return destIndex; +} + +} // namespace + +U_NAMESPACE_END + U_NAMESPACE_USE /* string casing ------------------------------------------------------------ */ @@ -39,21 +62,43 @@ U_NAMESPACE_USE /* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */ static inline int32_t appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, - int32_t result, const UChar *s) { + int32_t result, const UChar *s, + int32_t cpLength, uint32_t options, icu::Edits *edits) { UChar32 c; int32_t length; /* decode the result */ if(result<0) { /* (not) original code point */ + if(edits!=NULL) { + edits->addUnchanged(cpLength); + if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) { + return destIndex; + } + } c=~result; - length=U16_LENGTH(c); - } else if(result<=UCASE_MAX_STRING_LENGTH) { - c=U_SENTINEL; - length=result; + if(destIndexaddReplace(cpLength, 1); + } + return destIndex; + } else { + c=result; + length=U16_LENGTH(c); + } + if(edits!=NULL) { + edits->addReplace(cpLength, length); + } } if(length>(INT32_MAX-destIndex)) { return -1; // integer overflow @@ -99,9 +144,15 @@ appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) { } static inline int32_t -appendString(UChar *dest, int32_t destIndex, int32_t destCapacity, - const UChar *s, int32_t length) { +appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity, + const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) { if(length>0) { + if(edits!=NULL) { + edits->addUnchanged(length); + if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) { + return destIndex; + } + } if(length>(INT32_MAX-destIndex)) { return -1; // integer overflow } @@ -150,84 +201,66 @@ utf16_caseContextIterator(void *context, int8_t dir) { * context [0..srcLength[ into account. */ static int32_t -_caseMap(const UCaseMap *csm, UCaseMapFull *map, +_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map, UChar *dest, int32_t destCapacity, const UChar *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit, - UErrorCode *pErrorCode) { - const UChar *s; - UChar32 c, c2 = 0; - int32_t srcIndex, destIndex; - int32_t locCache; - - locCache=csm->locCache; - + icu::Edits *edits, + UErrorCode &errorCode) { /* case mapping loop */ - srcIndex=srcStart; - destIndex=0; + int32_t srcIndex=srcStart; + int32_t destIndex=0; while(srcIndexcpStart=srcIndex; + int32_t cpStart; + csc->cpStart=cpStart=srcIndex; + UChar32 c; U16_NEXT(src, srcIndex, srcLimit, c); csc->cpLimit=srcIndex; - c=map(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &locCache); - if((destIndexdestCapacity) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } return destIndex; } #if !UCONFIG_NO_BREAK_ITERATION U_CFUNC int32_t U_CALLCONV -ustrcase_internalToTitle(const UCaseMap *csm, +ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *iter, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode) { - const UChar *s; - UChar32 c; - int32_t prev, titleStart, titleLimit, idx, destIndex; - UBool isFirstIndex; - - if(U_FAILURE(*pErrorCode)) { + icu::Edits *edits, + UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return 0; } - // Use the C++ abstract base class to minimize dependencies. - // TODO: Change UCaseMap.iter to store a BreakIterator directly. - BreakIterator *bi=reinterpret_cast(csm->iter); - /* set up local variables */ - int32_t locCache=csm->locCache; UCaseContext csc=UCASECONTEXT_INITIALIZER; csc.p=(void *)src; csc.limit=srcLength; - destIndex=0; - prev=0; - isFirstIndex=TRUE; + int32_t destIndex=0; + int32_t prev=0; + UBool isFirstIndex=TRUE; /* titlecasing loop */ while(prevfirst(); + index=iter->first(); } else { - idx=bi->next(); + index=iter->next(); } - if(idx==UBRK_DONE || idx>srcLength) { - idx=srcLength; + if(index==UBRK_DONE || index>srcLength) { + index=srcLength; } /* @@ -243,29 +276,32 @@ ustrcase_internalToTitle(const UCaseMap *csm, * b) first case letter (titlecase) [titleStart..titleLimit[ * c) subsequent characters (lowercase) [titleLimit..index[ */ - if(prevoptions&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) { + int32_t titleStart=prev; + int32_t titleLimit=prev; + UChar32 c; + U16_NEXT(src, titleLimit, index, c); + if((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(c)) { /* Adjust the titlecasing index (titleStart) to the next cased character. */ for(;;) { titleStart=titleLimit; - if(titleLimit==idx) { + if(titleLimit==index) { /* * only uncased characters in [prev..index[ * stop with titleStart==titleLimit==index */ break; } - U16_NEXT(src, titleLimit, idx, c); - if(UCASE_NONE!=ucase_getType(csm->csp, c)) { + U16_NEXT(src, titleLimit, index, c); + if(UCASE_NONE!=ucase_getType(c)) { break; /* cased letter at [titleStart..titleLimit[ */ } } - destIndex=appendString(dest, destIndex, destCapacity, src+prev, titleStart-prev); + destIndex=appendUnchanged(dest, destIndex, destCapacity, + src+prev, titleStart-prev, options, edits); if(destIndex<0) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } @@ -274,48 +310,64 @@ ustrcase_internalToTitle(const UCaseMap *csm, /* titlecase c which is from [titleStart..titleLimit[ */ csc.cpStart=titleStart; csc.cpLimit=titleLimit; - c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, &csc, &s, csm->locale, &locCache); - destIndex=appendResult(dest, destIndex, destCapacity, c, s); + const UChar *s; + c=ucase_toFullTitle(c, utf16_caseContextIterator, &csc, &s, caseLocale); + destIndex=appendResult(dest, destIndex, destCapacity, c, s, + titleLimit-titleStart, options, edits); if(destIndex<0) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } /* Special case Dutch IJ titlecasing */ - if (titleStart+1 < idx && - ucase_getCaseLocale(csm->locale,&locCache) == UCASE_LOC_DUTCH && - (src[titleStart] == 0x0049 || src[titleStart] == 0x0069) && - (src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) { - destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A); - if(destIndex<0) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; + if (titleStart+1 < index && + caseLocale == UCASE_LOC_DUTCH && + (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) { + if (src[titleStart+1] == 0x006A) { + destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A); + if(destIndex<0) { + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + if(edits!=NULL) { + edits->addReplace(1, 1); + } + titleLimit++; + } else if (src[titleStart+1] == 0x004A) { + // Keep the capital J from getting lowercased. + destIndex=appendUnchanged(dest, destIndex, destCapacity, + src+titleStart+1, 1, options, edits); + if(destIndex<0) { + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + titleLimit++; } - titleLimit++; } /* lowercase [titleLimit..index[ */ - if(titleLimitoptions&U_TITLECASE_NO_LOWERCASE)==0) { + if(titleLimitdestCapacity) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - return destIndex; + return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); } #endif // !UCONFIG_NO_BREAK_ITERATION @@ -791,11 +840,11 @@ uint32_t getDiacriticData(UChar32 c) { } } -UBool isFollowedByCasedLetter(const UCaseProps *csp, const UChar *s, int32_t i, int32_t length) { +UBool isFollowedByCasedLetter(const UChar *s, int32_t i, int32_t length) { while (i < length) { UChar32 c; U16_NEXT(s, i, length, c); - int32_t type = ucase_getTypeOrIgnorable(csp, c); + int32_t type = ucase_getTypeOrIgnorable(c); if ((type & UCASE_IGNORABLE) != 0) { // Case-ignorable, continue with the loop. } else if (type != UCASE_NONE) { @@ -813,11 +862,11 @@ UBool isFollowedByCasedLetter(const UCaseProps *csp, const UChar *s, int32_t i, * for each character. * TODO: Try to re-consolidate one way or another with the non-Greek function. */ -int32_t toUpper(const UCaseMap *csm, +int32_t toUpper(uint32_t options, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode) { - int32_t locCache = UCASE_LOC_GREEK; + Edits *edits, + UErrorCode &errorCode) { int32_t destIndex=0; uint32_t state = 0; for (int32_t i = 0; i < srcLength;) { @@ -825,7 +874,7 @@ int32_t toUpper(const UCaseMap *csm, UChar32 c; U16_NEXT(src, nextIndex, srcLength, c); uint32_t nextState = 0; - int32_t type = ucase_getTypeOrIgnorable(csm->csp, c); + int32_t type = ucase_getTypeOrIgnorable(c); if ((type & UCASE_IGNORABLE) != 0) { // c is case-ignorable nextState |= (state & AFTER_CASED); @@ -872,7 +921,7 @@ int32_t toUpper(const UCaseMap *csm, (data & HAS_ACCENT) != 0 && numYpogegrammeni == 0 && (state & AFTER_CASED) == 0 && - !isFollowedByCasedLetter(csm->csp, src, nextIndex, srcLength)) { + !isFollowedByCasedLetter(src, nextIndex, srcLength)) { // Keep disjunctive "or" with (only) a tonos. // We use the same "word boundary" conditions as for the Final_Sigma test. if (i == nextIndex) { @@ -890,43 +939,67 @@ int32_t toUpper(const UCaseMap *csm, data &= ~HAS_EITHER_DIALYTIKA; } } - destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper); - if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) { - destIndex=appendUChar(dest, destIndex, destCapacity, 0x308); // restore or add a dialytika - } - if (destIndex >= 0 && addTonos) { - destIndex=appendUChar(dest, destIndex, destCapacity, 0x301); - } - while (destIndex >= 0 && numYpogegrammeni > 0) { - destIndex=appendUChar(dest, destIndex, destCapacity, 0x399); - --numYpogegrammeni; - } - if(destIndex<0) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; + + UBool change = TRUE; + if (edits != NULL) { + // Find out first whether we are changing the text. + change = src[i] != upper || numYpogegrammeni > 0; + int32_t i2 = i + 1; + if ((data & HAS_EITHER_DIALYTIKA) != 0) { + change |= i2 >= nextIndex || src[i2] != 0x308; + ++i2; + } + if (addTonos) { + change |= i2 >= nextIndex || src[i2] != 0x301; + ++i2; + } + int32_t oldLength = nextIndex - i; + int32_t newLength = (i2 - i) + numYpogegrammeni; + change |= oldLength != newLength; + if (change) { + if (edits != NULL) { + edits->addReplace(oldLength, newLength); + } + } else { + if (edits != NULL) { + edits->addUnchanged(oldLength); + } + // Write unchanged text? + change = (options & UCASEMAP_OMIT_UNCHANGED_TEXT) == 0; + } } - } else { - const UChar *s; - UChar32 c2 = 0; - c=ucase_toFullUpper(csm->csp, c, NULL, NULL, &s, csm->locale, &locCache); - if((destIndex= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) { + destIndex=appendUChar(dest, destIndex, destCapacity, 0x308); // restore or add a dialytika + } + if (destIndex >= 0 && addTonos) { + destIndex=appendUChar(dest, destIndex, destCapacity, 0x301); + } + while (destIndex >= 0 && numYpogegrammeni > 0) { + destIndex=appendUChar(dest, destIndex, destCapacity, 0x399); + --numYpogegrammeni; + } if(destIndex<0) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } + } else { + const UChar *s; + c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK); + destIndex = appendResult(dest, destIndex, destCapacity, c, s, + nextIndex - i, options, edits); + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } } i = nextIndex; state = nextState; } - if(destIndex>destCapacity) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } return destIndex; } @@ -936,94 +1009,128 @@ U_NAMESPACE_END /* functions available in the common library (for unistr_case.cpp) */ U_CFUNC int32_t U_CALLCONV -ustrcase_internalToLower(const UCaseMap *csm, +ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode) { + icu::Edits *edits, + UErrorCode &errorCode) { UCaseContext csc=UCASECONTEXT_INITIALIZER; csc.p=(void *)src; csc.limit=srcLength; - return _caseMap( - csm, ucase_toFullLower, + int32_t destIndex = _caseMap( + caseLocale, options, ucase_toFullLower, dest, destCapacity, src, &csc, 0, srcLength, - pErrorCode); + edits, errorCode); + return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); } U_CFUNC int32_t U_CALLCONV -ustrcase_internalToUpper(const UCaseMap *csm, +ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode) { - int32_t locCache = csm->locCache; - if (ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_GREEK) { - return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, pErrorCode); + icu::Edits *edits, + UErrorCode &errorCode) { + int32_t destIndex; + if (caseLocale == UCASE_LOC_GREEK) { + destIndex = GreekUpper::toUpper(options, dest, destCapacity, + src, srcLength, edits, errorCode); + } else { + UCaseContext csc=UCASECONTEXT_INITIALIZER; + csc.p=(void *)src; + csc.limit=srcLength; + destIndex = _caseMap( + caseLocale, options, ucase_toFullUpper, + dest, destCapacity, + src, &csc, 0, srcLength, + edits, errorCode); } - UCaseContext csc=UCASECONTEXT_INITIALIZER; - csc.p=(void *)src; - csc.limit=srcLength; - return _caseMap( - csm, ucase_toFullUpper, - dest, destCapacity, - src, &csc, 0, srcLength, - pErrorCode); + return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); } -static int32_t -ustr_foldCase(const UCaseProps *csp, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - uint32_t options, - UErrorCode *pErrorCode) { - int32_t srcIndex, destIndex; - - const UChar *s; - UChar32 c, c2 = 0; - +U_CFUNC int32_t U_CALLCONV +ustrcase_internalFold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode) { /* case mapping loop */ - srcIndex=destIndex=0; - while(srcIndexdestCapacity) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - return destIndex; -} - -U_CFUNC int32_t U_CALLCONV -ustrcase_internalFold(const UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode) { - return ustr_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode); + return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); } U_CFUNC int32_t -ustrcase_map(const UCaseMap *csm, +ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UStringCaseMapper *stringCaseMapper, - UErrorCode *pErrorCode) { + icu::Edits *edits, + UErrorCode &errorCode) { + int32_t destLength; + + /* check argument values */ + if(U_FAILURE(errorCode)) { + return 0; + } + if( destCapacity<0 || + (dest==NULL && destCapacity>0) || + src==NULL || + srcLength<-1 + ) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* get the string length */ + if(srcLength==-1) { + srcLength=u_strlen(src); + } + + /* check for overlapping source and destination */ + if( dest!=NULL && + ((src>=dest && src<(dest+destCapacity)) || + (dest>=src && dest<(src+srcLength))) + ) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if(edits!=NULL) { + edits->reset(); + } + destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR + dest, destCapacity, src, srcLength, edits, errorCode); + return u_terminateUChars(dest, destCapacity, destLength, &errorCode); +} + +U_CFUNC int32_t +ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UStringCaseMapper *stringCaseMapper, + UErrorCode &errorCode) { UChar buffer[300]; UChar *temp; int32_t destLength; /* check argument values */ - if(U_FAILURE(*pErrorCode)) { + if(U_FAILURE(errorCode)) { return 0; } if( destCapacity<0 || @@ -1031,7 +1138,7 @@ ustrcase_map(const UCaseMap *csm, src==NULL || srcLength<-1 ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + errorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } @@ -1053,7 +1160,7 @@ ustrcase_map(const UCaseMap *csm, /* allocate a buffer */ temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR); if(temp==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + errorCode=U_MEMORY_ALLOCATION_ERROR; return 0; } } @@ -1061,21 +1168,19 @@ ustrcase_map(const UCaseMap *csm, temp=dest; } - destLength=stringCaseMapper(csm, temp, destCapacity, src, srcLength, pErrorCode); + destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR + temp, destCapacity, src, srcLength, NULL, errorCode); if(temp!=dest) { /* copy the result string to the destination buffer */ - if(destLength>0) { - int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity; - if(copyLength>0) { - u_memmove(dest, temp, copyLength); - } + if (U_SUCCESS(errorCode) && 0 < destLength && destLength <= destCapacity) { + u_memmove(dest, temp, destLength); } if(temp!=buffer) { uprv_free(temp); } } - return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); + return u_terminateUChars(dest, destCapacity, destLength, &errorCode); } /* public API functions */ @@ -1085,16 +1190,29 @@ u_strFoldCase(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, uint32_t options, UErrorCode *pErrorCode) { - UCaseMap csm=UCASEMAP_INITIALIZER; - csm.csp=ucase_getSingleton(); - csm.options=options; + return ustrcase_mapWithOverlap( + UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL + dest, destCapacity, + src, srcLength, + ustrcase_internalFold, *pErrorCode); +} + +U_NAMESPACE_BEGIN + +int32_t CaseMap::fold( + uint32_t options, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode) { return ustrcase_map( - &csm, + UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL dest, destCapacity, src, srcLength, - ustrcase_internalFold, pErrorCode); + ustrcase_internalFold, edits, errorCode); } +U_NAMESPACE_END + /* case-insensitive string comparisons -------------------------------------- */ /* @@ -1134,8 +1252,6 @@ static int32_t _cmpFold( UErrorCode *pErrorCode) { int32_t cmpRes = 0; - const UCaseProps *csp; - /* current-level start/limit - s1/s2 as current */ const UChar *start1, *start2, *limit1, *limit2; @@ -1167,7 +1283,6 @@ static int32_t _cmpFold( * assume that at least the option U_COMPARE_IGNORE_CASE is set * otherwise this function would have to behave exactly as uprv_strCompare() */ - csp=ucase_getSingleton(); if(U_FAILURE(*pErrorCode)) { return 0; } @@ -1349,7 +1464,7 @@ static int32_t _cmpFold( */ if( level1==0 && - (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0 + (length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0 ) { /* cp1 case-folds to the code point "length" or to p[length] */ if(U_IS_SURROGATE(c1)) { @@ -1395,7 +1510,7 @@ static int32_t _cmpFold( } if( level2==0 && - (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0 + (length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0 ) { /* cp2 case-folds to the code point "length" or to p[length] */ if(U_IS_SURROGATE(c2)) { diff --git a/deps/icu-small/source/common/ustrcase_locale.cpp b/deps/icu-small/source/common/ustrcase_locale.cpp index 78f4bbd7a2..2ecd24f03e 100644 --- a/deps/icu-small/source/common/ustrcase_locale.cpp +++ b/deps/icu-small/source/common/ustrcase_locale.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: ustrcase_locale.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -18,66 +18,24 @@ */ #include "unicode/utypes.h" +#include "uassert.h" +#include "unicode/brkiter.h" +#include "unicode/casemap.h" #include "unicode/ucasemap.h" #include "unicode/uloc.h" #include "unicode/ustring.h" #include "ucase.h" -#include "ustr_imp.h" +#include "ucasemap_imp.h" -U_CFUNC void -ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale) { - /* - * We could call ucasemap_setLocale(), but here we really only care about - * the initial language subtag, we need not return the real string via - * ucasemap_getLocale(), and we don't care about only getting "x" from - * "x-some-thing" etc. - * - * We ignore locales with a longer-than-3 initial subtag. - * - * We also do not fill in the locCache because it is rarely used, - * and not worth setting unless we reuse it for many case mapping operations. - * (That's why UCaseMap was created.) - */ - int i; - char c; - - /* the internal functions require locale!=NULL */ - if(locale==NULL) { - // Do not call uprv_getDefaultLocaleID() because that does not see - // changes to the default locale via uloc_setDefault(). - // It would also be inefficient if used frequently because uprv_getDefaultLocaleID() - // does not cache the locale ID. - // - // Unfortunately, uloc_getDefault() has many dependencies. - // We only care about a small set of language subtags, - // and we do not need the locale ID to be canonicalized. - // - // Best is to not call case mapping functions with a NULL locale ID. - locale=uloc_getDefault(); - } - for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) { - csm->locale[i]=c; - } - if(i<=3) { - csm->locale[i]=0; /* Up to 3 non-separator characters. */ - } else { - csm->locale[0]=0; /* Longer-than-3 initial subtag: Ignore. */ - } -} - -/* - * Set parameters on an empty UCaseMap, for UCaseMap-less API functions. - * Do this fast because it is called with every function call. - */ -static inline void -setTempCaseMap(UCaseMap *csm, const char *locale) { - if(csm->csp==NULL) { - csm->csp=ucase_getSingleton(); +U_CFUNC int32_t +ustrcase_getCaseLocale(const char *locale) { + if (locale == NULL) { + locale = uloc_getDefault(); } - if(locale!=NULL && locale[0]==0) { - csm->locale[0]=0; + if (*locale == 0) { + return UCASE_LOC_ROOT; } else { - ustrcase_setTempCaseMapLocale(csm, locale); + return ucase_getCaseLocale(locale); } } @@ -88,13 +46,11 @@ u_strToLower(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode) { - UCaseMap csm=UCASEMAP_INITIALIZER; - setTempCaseMap(&csm, locale); - return ustrcase_map( - &csm, + return ustrcase_mapWithOverlap( + ustrcase_getCaseLocale(locale), 0, UCASEMAP_BREAK_ITERATOR_NULL dest, destCapacity, src, srcLength, - ustrcase_internalToLower, pErrorCode); + ustrcase_internalToLower, *pErrorCode); } U_CAPI int32_t U_EXPORT2 @@ -102,11 +58,37 @@ u_strToUpper(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode) { - UCaseMap csm=UCASEMAP_INITIALIZER; - setTempCaseMap(&csm, locale); + return ustrcase_mapWithOverlap( + ustrcase_getCaseLocale(locale), 0, UCASEMAP_BREAK_ITERATOR_NULL + dest, destCapacity, + src, srcLength, + ustrcase_internalToUpper, *pErrorCode); +} + +U_NAMESPACE_BEGIN + +int32_t CaseMap::toLower( + const char *locale, uint32_t options, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode) { + return ustrcase_map( + ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL + dest, destCapacity, + src, srcLength, + ustrcase_internalToLower, edits, errorCode); +} + +int32_t CaseMap::toUpper( + const char *locale, uint32_t options, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode) { return ustrcase_map( - &csm, + ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL dest, destCapacity, src, srcLength, - ustrcase_internalToUpper, pErrorCode); + ustrcase_internalToUpper, edits, errorCode); } + +U_NAMESPACE_END diff --git a/deps/icu-small/source/common/ustrenum.cpp b/deps/icu-small/source/common/ustrenum.cpp index 699ce32cfe..8be79c9899 100644 --- a/deps/icu-small/source/common/ustrenum.cpp +++ b/deps/icu-small/source/common/ustrenum.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/ustrenum.h b/deps/icu-small/source/common/ustrenum.h index 868fddcb53..582727cd1f 100644 --- a/deps/icu-small/source/common/ustrenum.h +++ b/deps/icu-small/source/common/ustrenum.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/ustrfmt.c b/deps/icu-small/source/common/ustrfmt.c deleted file mode 100644 index c7805d8d2a..0000000000 --- a/deps/icu-small/source/common/ustrfmt.c +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2001-2006, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ - -#include "cstring.h" -#include "ustrfmt.h" - - -/*** - * Fills in a UChar* string with the radix-based representation of a - * uint32_t number padded with zeroes to minwidth. The result - * will be null terminated if there is room. - * - * @param buffer UChar buffer to receive result - * @param capacity capacity of buffer - * @param i the unsigned number to be formatted - * @param radix the radix from 2..36 - * @param minwidth the minimum width. If the result is narrower than - * this, '0's will be added on the left. Must be <= - * capacity. - * @return the length of the result, not including any terminating - * null - */ -U_CAPI int32_t U_EXPORT2 -uprv_itou (UChar * buffer, int32_t capacity, - uint32_t i, uint32_t radix, int32_t minwidth) -{ - int32_t length = 0; - int digit; - int32_t j; - UChar temp; - - do{ - digit = (int)(i % radix); - buffer[length++]=(UChar)(digit<=9?(0x0030+digit):(0x0030+digit+7)); - i=i/radix; - } while(i && lengthp; // the current buffer mapIndex = ix - u8b->toUCharsMapStart; + U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars)); ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx; return TRUE; @@ -1298,6 +1305,10 @@ fillReverse: // Can only do this if the incoming index is somewhere in the interior of the string. // If index is at the end, there is no character there to look at. if (ix != ut->b) { + // Note: this function will only move the index back if it is on a trail byte + // and there is a preceding lead byte and the sequence from the lead + // through this trail could be part of a valid UTF-8 sequence + // Otherwise the index remains unchanged. U8_SET_CP_START(s8, 0, ix); } @@ -1311,7 +1322,10 @@ fillReverse: UChar *buf = u8b->buf; uint8_t *mapToNative = u8b->mapToNative; uint8_t *mapToUChars = u8b->mapToUChars; - int32_t toUCharsMapStart = ix - (UTF8_TEXT_CHUNK_SIZE*3 + 1); + int32_t toUCharsMapStart = ix - sizeof(UTF8Buf::mapToUChars) + 1; + // Note that toUCharsMapStart can be negative. Happens when the remaining + // text from current position to the beginning is less than the buffer size. + // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry. int32_t destIx = UTF8_TEXT_CHUNK_SIZE+2; // Start in the overflow region // at end of buffer to leave room // for a surrogate pair at the @@ -1338,6 +1352,7 @@ fillReverse: if (c<0x80) { // Special case ASCII range for speed. buf[destIx] = (UChar)c; + U_ASSERT(toUCharsMapStart <= srcIx); mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx; mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart); } else { @@ -1367,6 +1382,7 @@ fillReverse: do { mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx; } while (sIx >= srcIx); + U_ASSERT(toUCharsMapStart <= (srcIx+1)); // Set native indexing limit to be the current position. // We are processing a non-ascii, non-native-indexing char now; @@ -1541,6 +1557,7 @@ utf8TextMapIndexToUTF16(const UText *ut, int64_t index64) { U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit); U_ASSERT(index<=ut->chunkNativeLimit); int32_t mapIndex = index - u8b->toUCharsMapStart; + U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars)); int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx; U_ASSERT(offset>=0 && offset<=ut->chunkLength); return offset; @@ -2225,13 +2242,13 @@ unistrTextCopy(UText *ut, } if(move) { - // move: copy to destIndex, then replace original with nothing + // move: copy to destIndex, then remove original int32_t segLength=limit32-start32; us->copy(start32, limit32, destIndex32); if(destIndex32replace(start32, segLength, NULL, 0); + us->remove(start32, segLength); } else { // copy us->copy(start32, limit32, destIndex32); diff --git a/deps/icu-small/source/common/utf_impl.c b/deps/icu-small/source/common/utf_impl.c deleted file mode 100644 index 91cb9ba5f2..0000000000 --- a/deps/icu-small/source/common/utf_impl.c +++ /dev/null @@ -1,328 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: utf_impl.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999sep13 -* created by: Markus W. Scherer -* -* This file provides implementation functions for macros in the utfXX.h -* that would otherwise be too long as macros. -*/ - -/* set import/export definitions */ -#ifndef U_UTF8_IMPL -# define U_UTF8_IMPL -#endif - -#include "unicode/utypes.h" -#include "unicode/utf.h" -#include "unicode/utf8.h" -#include "unicode/utf_old.h" -#include "uassert.h" - -/* - * This table could be replaced on many machines by - * a few lines of assembler code using an - * "index of first 0-bit from msb" instruction and - * one or two more integer instructions. - * - * For example, on an i386, do something like - * - MOV AL, leadByte - * - NOT AL (8-bit, leave b15..b8==0..0, reverse only b7..b0) - * - MOV AH, 0 - * - BSR BX, AX (16-bit) - * - MOV AX, 6 (result) - * - JZ finish (ZF==1 if leadByte==0xff) - * - SUB AX, BX (result) - * -finish: - * (BSR: Bit Scan Reverse, scans for a 1-bit, starting from the MSB) - * - * In Unicode, all UTF-8 byte sequences with more than 4 bytes are illegal; - * lead bytes above 0xf4 are illegal. - * We keep them in this table for skipping long ISO 10646-UTF-8 sequences. - */ -U_EXPORT const uint8_t -utf8_countTrailBytes[256]={ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, - 3, 3, 3, /* illegal in Unicode */ - 4, 4, 4, 4, /* illegal in Unicode */ - 5, 5, /* illegal in Unicode */ - 0, 0 /* illegal bytes 0xfe and 0xff */ -}; - -static const UChar32 -utf8_minLegal[4]={ 0, 0x80, 0x800, 0x10000 }; - -static const UChar32 -utf8_errorValue[6]={ - UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_2, UTF_ERROR_VALUE, 0x10ffff, - 0x3ffffff, 0x7fffffff -}; - -static UChar32 -errorValue(int32_t count, int8_t strict) { - if(strict>=0) { - return utf8_errorValue[count]; - } else if(strict==-3) { - return 0xfffd; - } else { - return U_SENTINEL; - } -} - -/* - * Handle the non-inline part of the U8_NEXT() and U8_NEXT_FFFD() macros - * and their obsolete sibling UTF8_NEXT_CHAR_SAFE(). - * - * U8_NEXT() supports NUL-terminated strings indicated via length<0. - * - * The "strict" parameter controls the error behavior: - * <0 "Safe" behavior of U8_NEXT(): - * -1: All illegal byte sequences yield U_SENTINEL=-1. - * -2: Same as -1, except for lenient treatment of surrogate code points as legal. - * Some implementations use this for roundtripping of - * Unicode 16-bit strings that are not well-formed UTF-16, that is, they - * contain unpaired surrogates. - * -3: All illegal byte sequences yield U+FFFD. - * 0 Obsolete "safe" behavior of UTF8_NEXT_CHAR_SAFE(..., FALSE): - * All illegal byte sequences yield a positive code point such that this - * result code point would be encoded with the same number of bytes as - * the illegal sequence. - * >0 Obsolete "strict" behavior of UTF8_NEXT_CHAR_SAFE(..., TRUE): - * Same as the obsolete "safe" behavior, but non-characters are also treated - * like illegal sequences. - * - * Note that a UBool is the same as an int8_t. - */ -U_CAPI UChar32 U_EXPORT2 -utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict) { - int32_t i=*pi; - uint8_t count=U8_COUNT_TRAIL_BYTES(c); - U_ASSERT(count <= 5); /* U8_COUNT_TRAIL_BYTES returns value 0...5 */ - if(i+count<=length || length<0) { - uint8_t trail; - - U8_MASK_LEAD_BYTE(c, count); - /* support NUL-terminated strings: do not read beyond the first non-trail byte */ - switch(count) { - /* each branch falls through to the next one */ - case 0: - /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */ - case 5: - case 4: - /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */ - break; - case 3: - trail=s[i++]-0x80; - c=(c<<6)|trail; - /* c>=0x110 would result in code point>0x10ffff, outside Unicode */ - if(c>=0x110 || trail>0x3f) { break; } - case 2: - trail=s[i++]-0x80; - c=(c<<6)|trail; - /* - * test for a surrogate d800..dfff unless we are lenient: - * before the last (c<<6), a surrogate is c=360..37f - */ - if(((c&0xffe0)==0x360 && strict!=-2) || trail>0x3f) { break; } - case 1: - trail=s[i++]-0x80; - c=(c<<6)|trail; - if(trail>0x3f) { break; } - /* correct sequence - all trail bytes have (b7..b6)==(10) */ - if(c>=utf8_minLegal[count] && - /* strict: forbid non-characters like U+fffe */ - (strict<=0 || !U_IS_UNICODE_NONCHAR(c))) { - *pi=i; - return c; - } - /* no default branch to optimize switch() - all values are covered */ - } - } else { - /* too few bytes left */ - count=length-i; - } - - /* error handling */ - i=*pi; - while(count>0 && U8_IS_TRAIL(s[i])) { - ++i; - --count; - } - c=errorValue(i-*pi, strict); - *pi=i; - return c; -} - -U_CAPI int32_t U_EXPORT2 -utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError) { - if((uint32_t)(c)<=0x7ff) { - if((i)+1<(length)) { - (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); - return i; - } - } else if((uint32_t)(c)<=0xffff) { - /* Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8. */ - if((i)+2<(length) && !U_IS_SURROGATE(c)) { - (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); - (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); - return i; - } - } else if((uint32_t)(c)<=0x10ffff) { - if((i)+3<(length)) { - (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); - (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); - (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); - return i; - } - } - /* c>0x10ffff or not enough space, write an error value */ - if(pIsError!=NULL) { - *pIsError=TRUE; - } else { - length-=i; - if(length>0) { - int32_t offset; - if(length>3) { - length=3; - } - s+=i; - offset=0; - c=utf8_errorValue[length-1]; - UTF8_APPEND_CHAR_UNSAFE(s, offset, c); - i=i+offset; - } - } - return i; -} - -U_CAPI UChar32 U_EXPORT2 -utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict) { - int32_t i=*pi; - uint8_t b, count=1, shift=6; - - if(!U8_IS_TRAIL(c)) { return errorValue(0, strict); } - - /* extract value bits from the last trail byte */ - c&=0x3f; - - for(;;) { - if(i<=start) { - /* no lead byte at all */ - return errorValue(0, strict); - } - - /* read another previous byte */ - b=s[--i]; - if((uint8_t)(b-0x80)<0x7e) { /* 0x80<=b<0xfe */ - if(b&0x40) { - /* lead byte, this will always end the loop */ - uint8_t shouldCount=U8_COUNT_TRAIL_BYTES(b); - - if(count==shouldCount) { - /* set the new position */ - *pi=i; - U8_MASK_LEAD_BYTE(b, count); - c|=(UChar32)b<=4 || c>0x10ffff || c0 && U_IS_UNICODE_NONCHAR(c))) { - /* illegal sequence or (strict and non-character) */ - if(count>=4) { - count=3; - } - c=errorValue(count, strict); - } else { - /* exit with correct c */ - } - } else { - /* the lead byte does not match the number of trail bytes */ - /* only set the position to the lead byte if it would - include the trail byte that we started with */ - if(countstart) { - Z=I-5; - } else { - Z=start; - } - - /* return I if the sequence starting there is long enough to include i */ - do { - b=s[I]; - if((uint8_t)(b-0x80)>=0x7e) { /* not 0x80<=b<0xfe */ - break; - } else if(b>=0xc0) { - if(U8_COUNT_TRAIL_BYTES(b)>=(i-I)) { - return I; - } else { - break; - } - } - } while(Z<=--I); - - /* return i itself to be consistent with the FWD_1 macro */ - return i; -} diff --git a/deps/icu-small/source/common/utf_impl.cpp b/deps/icu-small/source/common/utf_impl.cpp new file mode 100644 index 0000000000..293e6f181f --- /dev/null +++ b/deps/icu-small/source/common/utf_impl.cpp @@ -0,0 +1,339 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: utf_impl.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep13 +* created by: Markus W. Scherer +* +* This file provides implementation functions for macros in the utfXX.h +* that would otherwise be too long as macros. +*/ + +/* set import/export definitions */ +#ifndef U_UTF8_IMPL +# define U_UTF8_IMPL +#endif + +#include "unicode/utypes.h" +#include "unicode/utf.h" +#include "unicode/utf8.h" +#include "unicode/utf_old.h" +#include "uassert.h" + +/* + * Table of the number of utf8 trail bytes, indexed by the lead byte. + * Used by the deprecated macro UTF8_COUNT_TRAIL_BYTES, defined in utf_old.h + * + * The current macro, U8_COUNT_TRAIL_BYTES, does _not_ use this table. + * + * Note that this table cannot be removed, even if UTF8_COUNT_TRAIL_BYTES were + * changed to no longer use it. References to the table from expansions of UTF8_COUNT_TRAIL_BYTES + * may exist in old client code that must continue to run with newer icu library versions. + * + * This table could be replaced on many machines by + * a few lines of assembler code using an + * "index of first 0-bit from msb" instruction and + * one or two more integer instructions. + * + * For example, on an i386, do something like + * - MOV AL, leadByte + * - NOT AL (8-bit, leave b15..b8==0..0, reverse only b7..b0) + * - MOV AH, 0 + * - BSR BX, AX (16-bit) + * - MOV AX, 6 (result) + * - JZ finish (ZF==1 if leadByte==0xff) + * - SUB AX, BX (result) + * -finish: + * (BSR: Bit Scan Reverse, scans for a 1-bit, starting from the MSB) + * + * In Unicode, all UTF-8 byte sequences with more than 4 bytes are illegal; + * lead bytes above 0xf4 are illegal. + * We keep them in this table for skipping long ISO 10646-UTF-8 sequences. + */ +extern "C" U_EXPORT const uint8_t +utf8_countTrailBytes[256]={ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, + 3, 3, 3, /* illegal in Unicode */ + 4, 4, 4, 4, /* illegal in Unicode */ + 5, 5, /* illegal in Unicode */ + 0, 0 /* illegal bytes 0xfe and 0xff */ +}; + +static const UChar32 +utf8_minLegal[4]={ 0, 0x80, 0x800, 0x10000 }; + +static const UChar32 +utf8_errorValue[6]={ + UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_2, UTF_ERROR_VALUE, 0x10ffff, + 0x3ffffff, 0x7fffffff +}; + +static UChar32 +errorValue(int32_t count, int8_t strict) { + if(strict>=0) { + return utf8_errorValue[count]; + } else if(strict==-3) { + return 0xfffd; + } else { + return U_SENTINEL; + } +} + +/* + * Handle the non-inline part of the U8_NEXT() and U8_NEXT_FFFD() macros + * and their obsolete sibling UTF8_NEXT_CHAR_SAFE(). + * + * U8_NEXT() supports NUL-terminated strings indicated via length<0. + * + * The "strict" parameter controls the error behavior: + * <0 "Safe" behavior of U8_NEXT(): + * -1: All illegal byte sequences yield U_SENTINEL=-1. + * -2: Same as -1, except for lenient treatment of surrogate code points as legal. + * Some implementations use this for roundtripping of + * Unicode 16-bit strings that are not well-formed UTF-16, that is, they + * contain unpaired surrogates. + * -3: All illegal byte sequences yield U+FFFD. + * 0 Obsolete "safe" behavior of UTF8_NEXT_CHAR_SAFE(..., FALSE): + * All illegal byte sequences yield a positive code point such that this + * result code point would be encoded with the same number of bytes as + * the illegal sequence. + * >0 Obsolete "strict" behavior of UTF8_NEXT_CHAR_SAFE(..., TRUE): + * Same as the obsolete "safe" behavior, but non-characters are also treated + * like illegal sequences. + * + * Note that a UBool is the same as an int8_t. + */ +U_CAPI UChar32 U_EXPORT2 +utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict) { + int32_t i=*pi; + uint8_t count=U8_COUNT_TRAIL_BYTES(c); + U_ASSERT(count <= 5); /* U8_COUNT_TRAIL_BYTES returns value 0...5 */ + if(i+count<=length || length<0) { + uint8_t trail; + + U8_MASK_LEAD_BYTE(c, count); + /* support NUL-terminated strings: do not read beyond the first non-trail byte */ + switch(count) { + /* each branch falls through to the next one */ + case 0: + /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */ + case 5: + case 4: + /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */ + break; + case 3: + trail=s[i++]-0x80; + c=(c<<6)|trail; + /* c>=0x110 would result in code point>0x10ffff, outside Unicode */ + if(c>=0x110 || trail>0x3f) { break; } + U_FALLTHROUGH; + case 2: + trail=s[i++]-0x80; + c=(c<<6)|trail; + /* + * test for a surrogate d800..dfff unless we are lenient: + * before the last (c<<6), a surrogate is c=360..37f + */ + if(((c&0xffe0)==0x360 && strict!=-2) || trail>0x3f) { break; } + U_FALLTHROUGH; + case 1: + trail=s[i++]-0x80; + c=(c<<6)|trail; + if(trail>0x3f) { break; } + /* correct sequence - all trail bytes have (b7..b6)==(10) */ + if(c>=utf8_minLegal[count] && + /* strict: forbid non-characters like U+fffe */ + (strict<=0 || !U_IS_UNICODE_NONCHAR(c))) { + *pi=i; + return c; + } + /* no default branch to optimize switch() - all values are covered */ + } + } else { + /* too few bytes left */ + count=length-i; + } + + /* error handling */ + i=*pi; + while(count>0 && U8_IS_TRAIL(s[i])) { + ++i; + --count; + } + c=errorValue(i-*pi, strict); + *pi=i; + return c; +} + +U_CAPI int32_t U_EXPORT2 +utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError) { + if((uint32_t)(c)<=0x7ff) { + if((i)+1<(length)) { + (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); + (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); + return i; + } + } else if((uint32_t)(c)<=0xffff) { + /* Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8. */ + if((i)+2<(length) && !U_IS_SURROGATE(c)) { + (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); + (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); + (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); + return i; + } + } else if((uint32_t)(c)<=0x10ffff) { + if((i)+3<(length)) { + (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); + (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); + (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); + (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); + return i; + } + } + /* c>0x10ffff or not enough space, write an error value */ + if(pIsError!=NULL) { + *pIsError=TRUE; + } else { + length-=i; + if(length>0) { + int32_t offset; + if(length>3) { + length=3; + } + s+=i; + offset=0; + c=utf8_errorValue[length-1]; + UTF8_APPEND_CHAR_UNSAFE(s, offset, c); + i=i+offset; + } + } + return i; +} + +U_CAPI UChar32 U_EXPORT2 +utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict) { + int32_t i=*pi; + uint8_t b, count=1, shift=6; + + if(!U8_IS_TRAIL(c)) { return errorValue(0, strict); } + + /* extract value bits from the last trail byte */ + c&=0x3f; + + for(;;) { + if(i<=start) { + /* no lead byte at all */ + return errorValue(0, strict); + } + + /* read another previous byte */ + b=s[--i]; + if((uint8_t)(b-0x80)<0x7e) { /* 0x80<=b<0xfe */ + if(b&0x40) { + /* lead byte, this will always end the loop */ + uint8_t shouldCount=U8_COUNT_TRAIL_BYTES(b); + + if(count==shouldCount) { + /* set the new position */ + *pi=i; + U8_MASK_LEAD_BYTE(b, count); + c|=(UChar32)b<=4 || c>0x10ffff || c0 && U_IS_UNICODE_NONCHAR(c))) { + /* illegal sequence or (strict and non-character) */ + if(count>=4) { + count=3; + } + c=errorValue(count, strict); + } else { + /* exit with correct c */ + } + } else { + /* the lead byte does not match the number of trail bytes */ + /* only set the position to the lead byte if it would + include the trail byte that we started with */ + if(countstart) { + Z=I-5; + } else { + Z=start; + } + + /* return I if the sequence starting there is long enough to include i */ + do { + b=s[I]; + if((uint8_t)(b-0x80)>=0x7e) { /* not 0x80<=b<0xfe */ + break; + } else if(b>=0xc0) { + if(U8_COUNT_TRAIL_BYTES(b)>=(i-I)) { + return I; + } else { + break; + } + } + } while(Z<=--I); + + /* return i itself to be consistent with the FWD_1 macro */ + return i; +} diff --git a/deps/icu-small/source/common/util.cpp b/deps/icu-small/source/common/util.cpp index b5d1fa4888..838a201a73 100644 --- a/deps/icu-small/source/common/util.cpp +++ b/deps/icu-small/source/common/util.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/util.h b/deps/icu-small/source/common/util.h index b3ed037ab6..7af9a32d8f 100644 --- a/deps/icu-small/source/common/util.h +++ b/deps/icu-small/source/common/util.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/util_props.cpp b/deps/icu-small/source/common/util_props.cpp index 3b7bb20d25..36057a6066 100644 --- a/deps/icu-small/source/common/util_props.cpp +++ b/deps/icu-small/source/common/util_props.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/utrace.c b/deps/icu-small/source/common/utrace.c deleted file mode 100644 index e702497d8c..0000000000 --- a/deps/icu-small/source/common/utrace.c +++ /dev/null @@ -1,489 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2003-2014, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: utrace.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -*/ - -#define UTRACE_IMPL -#include "unicode/utrace.h" -#include "utracimp.h" -#include "cstring.h" -#include "uassert.h" -#include "ucln_cmn.h" - - -static UTraceEntry *pTraceEntryFunc = NULL; -static UTraceExit *pTraceExitFunc = NULL; -static UTraceData *pTraceDataFunc = NULL; -static const void *gTraceContext = NULL; - -U_EXPORT int32_t -utrace_level = UTRACE_ERROR; - -U_CAPI void U_EXPORT2 -utrace_entry(int32_t fnNumber) { - if (pTraceEntryFunc != NULL) { - (*pTraceEntryFunc)(gTraceContext, fnNumber); - } -} - - -static const char gExitFmt[] = "Returns."; -static const char gExitFmtValue[] = "Returns %d."; -static const char gExitFmtStatus[] = "Returns. Status = %d."; -static const char gExitFmtValueStatus[] = "Returns %d. Status = %d."; -static const char gExitFmtPtrStatus[] = "Returns %d. Status = %p."; - -U_CAPI void U_EXPORT2 -utrace_exit(int32_t fnNumber, int32_t returnType, ...) { - if (pTraceExitFunc != NULL) { - va_list args; - const char *fmt; - - switch (returnType) { - case 0: - fmt = gExitFmt; - break; - case UTRACE_EXITV_I32: - fmt = gExitFmtValue; - break; - case UTRACE_EXITV_STATUS: - fmt = gExitFmtStatus; - break; - case UTRACE_EXITV_I32 | UTRACE_EXITV_STATUS: - fmt = gExitFmtValueStatus; - break; - case UTRACE_EXITV_PTR | UTRACE_EXITV_STATUS: - fmt = gExitFmtPtrStatus; - break; - default: - U_ASSERT(FALSE); - fmt = gExitFmt; - } - - va_start(args, returnType); - (*pTraceExitFunc)(gTraceContext, fnNumber, fmt, args); - va_end(args); - } -} - - - -U_CAPI void U_EXPORT2 -utrace_data(int32_t fnNumber, int32_t level, const char *fmt, ...) { - if (pTraceDataFunc != NULL) { - va_list args; - va_start(args, fmt ); - (*pTraceDataFunc)(gTraceContext, fnNumber, level, fmt, args); - va_end(args); - } -} - - -static void outputChar(char c, char *outBuf, int32_t *outIx, int32_t capacity, int32_t indent) { - int32_t i; - /* Check whether a start of line indenting is needed. Three cases: - * 1. At the start of the first line (output index == 0). - * 2. At the start of subsequent lines (preceeding char in buffer == '\n') - * 3. When preflighting buffer len (buffer capacity is exceeded), when - * a \n is output. Ideally we wouldn't do the indent until the following char - * is received, but that won't work because there's no place to remember that - * the preceding char was \n. Meaning that we may overstimate the - * buffer size needed. No harm done. - */ - if (*outIx==0 || /* case 1. */ - (c!='\n' && c!=0 && *outIx < capacity && outBuf[(*outIx)-1]=='\n') || /* case 2. */ - (c=='\n' && *outIx>=capacity)) /* case 3 */ - { - /* At the start of a line. Indent. */ - for(i=0; i= 0; shiftCount-=4) { - char c = gHexChars[(val >> shiftCount) & 0xf]; - outputChar(c, outBuf, outIx, capacity, 0); - } -} - -/* Output a pointer value in hex. Work with any size of pointer */ -static void outputPtrBytes(void *val, char *outBuf, int32_t *outIx, int32_t capacity) { - int32_t i; - int32_t incVal = 1; /* +1 for big endian, -1 for little endian */ - char *p = (char *)&val; /* point to current byte to output in the ptr val */ - -#if !U_IS_BIG_ENDIAN - /* Little Endian. Move p to most significant end of the value */ - incVal = -1; - p += sizeof(void *) - 1; -#endif - - /* Loop through the bytes of the ptr as it sits in memory, from - * most significant to least significant end */ - for (i=0; i 0) { - outputHexBytes(longArg, charsToOutput, outBuf, &outIx, capacity); - outputChar(' ', outBuf, &outIx, capacity, indent); - } - if (vectorLen == -1 && longArg == 0) { - break; - } - } - } - outputChar('[', outBuf, &outIx, capacity, indent); - outputHexBytes(vectorLen, 8, outBuf, &outIx, capacity); - outputChar(']', outBuf, &outIx, capacity, indent); - } - break; - - - default: - /* %. in format string, where . is some character not in the set - * of recognized format chars. Just output it as if % wasn't there. - * (Covers "%%" outputing a single '%') - */ - outputChar(fmtC, outBuf, &outIx, capacity, indent); - } - } - outputChar(0, outBuf, &outIx, capacity, indent); /* Make sure that output is null terminated */ - return outIx + 1; /* outIx + 1 because outIx does not increment when outputing final null. */ -} - - - - -U_CAPI int32_t U_EXPORT2 -utrace_format(char *outBuf, int32_t capacity, - int32_t indent, const char *fmt, ...) { - int32_t retVal; - va_list args; - va_start(args, fmt ); - retVal = utrace_vformat(outBuf, capacity, indent, fmt, args); - va_end(args); - return retVal; -} - - -U_CAPI void U_EXPORT2 -utrace_setFunctions(const void *context, - UTraceEntry *e, UTraceExit *x, UTraceData *d) { - pTraceEntryFunc = e; - pTraceExitFunc = x; - pTraceDataFunc = d; - gTraceContext = context; -} - - -U_CAPI void U_EXPORT2 -utrace_getFunctions(const void **context, - UTraceEntry **e, UTraceExit **x, UTraceData **d) { - *e = pTraceEntryFunc; - *x = pTraceExitFunc; - *d = pTraceDataFunc; - *context = gTraceContext; -} - -U_CAPI void U_EXPORT2 -utrace_setLevel(int32_t level) { - if (level < UTRACE_OFF) { - level = UTRACE_OFF; - } - if (level > UTRACE_VERBOSE) { - level = UTRACE_VERBOSE; - } - utrace_level = level; -} - -U_CAPI int32_t U_EXPORT2 -utrace_getLevel() { - return utrace_level; -} - - -U_CFUNC UBool -utrace_cleanup() { - pTraceEntryFunc = NULL; - pTraceExitFunc = NULL; - pTraceDataFunc = NULL; - utrace_level = UTRACE_OFF; - gTraceContext = NULL; - return TRUE; -} - - -static const char * const -trFnName[] = { - "u_init", - "u_cleanup", - NULL -}; - - -static const char * const -trConvNames[] = { - "ucnv_open", - "ucnv_openPackage", - "ucnv_openAlgorithmic", - "ucnv_clone", - "ucnv_close", - "ucnv_flushCache", - "ucnv_load", - "ucnv_unload", - NULL -}; - - -static const char * const -trCollNames[] = { - "ucol_open", - "ucol_close", - "ucol_strcoll", - "ucol_getSortKey", - "ucol_getLocale", - "ucol_nextSortKeyPart", - "ucol_strcollIter", - "ucol_openFromShortString", - "ucol_strcollUTF8", - NULL -}; - - -U_CAPI const char * U_EXPORT2 -utrace_functionName(int32_t fnNumber) { - if(UTRACE_FUNCTION_START <= fnNumber && fnNumber < UTRACE_FUNCTION_LIMIT) { - return trFnName[fnNumber]; - } else if(UTRACE_CONVERSION_START <= fnNumber && fnNumber < UTRACE_CONVERSION_LIMIT) { - return trConvNames[fnNumber - UTRACE_CONVERSION_START]; - } else if(UTRACE_COLLATION_START <= fnNumber && fnNumber < UTRACE_COLLATION_LIMIT){ - return trCollNames[fnNumber - UTRACE_COLLATION_START]; - } else { - return "[BOGUS Trace Function Number]"; - } -} diff --git a/deps/icu-small/source/common/utrace.cpp b/deps/icu-small/source/common/utrace.cpp new file mode 100644 index 0000000000..7d0ddc6f8b --- /dev/null +++ b/deps/icu-small/source/common/utrace.cpp @@ -0,0 +1,492 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2003-2014, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: utrace.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +*/ + +#include "unicode/utrace.h" +#include "utracimp.h" +#include "cstring.h" +#include "uassert.h" +#include "ucln_cmn.h" + + +static UTraceEntry *pTraceEntryFunc = NULL; +static UTraceExit *pTraceExitFunc = NULL; +static UTraceData *pTraceDataFunc = NULL; +static const void *gTraceContext = NULL; + +/** + * \var utrace_level + * Trace level variable. Negative for "off". + */ +static int32_t +utrace_level = UTRACE_ERROR; + +U_CAPI void U_EXPORT2 +utrace_entry(int32_t fnNumber) { + if (pTraceEntryFunc != NULL) { + (*pTraceEntryFunc)(gTraceContext, fnNumber); + } +} + + +static const char gExitFmt[] = "Returns."; +static const char gExitFmtValue[] = "Returns %d."; +static const char gExitFmtStatus[] = "Returns. Status = %d."; +static const char gExitFmtValueStatus[] = "Returns %d. Status = %d."; +static const char gExitFmtPtrStatus[] = "Returns %d. Status = %p."; + +U_CAPI void U_EXPORT2 +utrace_exit(int32_t fnNumber, int32_t returnType, ...) { + if (pTraceExitFunc != NULL) { + va_list args; + const char *fmt; + + switch (returnType) { + case 0: + fmt = gExitFmt; + break; + case UTRACE_EXITV_I32: + fmt = gExitFmtValue; + break; + case UTRACE_EXITV_STATUS: + fmt = gExitFmtStatus; + break; + case UTRACE_EXITV_I32 | UTRACE_EXITV_STATUS: + fmt = gExitFmtValueStatus; + break; + case UTRACE_EXITV_PTR | UTRACE_EXITV_STATUS: + fmt = gExitFmtPtrStatus; + break; + default: + U_ASSERT(FALSE); + fmt = gExitFmt; + } + + va_start(args, returnType); + (*pTraceExitFunc)(gTraceContext, fnNumber, fmt, args); + va_end(args); + } +} + + + +U_CAPI void U_EXPORT2 +utrace_data(int32_t fnNumber, int32_t level, const char *fmt, ...) { + if (pTraceDataFunc != NULL) { + va_list args; + va_start(args, fmt ); + (*pTraceDataFunc)(gTraceContext, fnNumber, level, fmt, args); + va_end(args); + } +} + + +static void outputChar(char c, char *outBuf, int32_t *outIx, int32_t capacity, int32_t indent) { + int32_t i; + /* Check whether a start of line indenting is needed. Three cases: + * 1. At the start of the first line (output index == 0). + * 2. At the start of subsequent lines (preceeding char in buffer == '\n') + * 3. When preflighting buffer len (buffer capacity is exceeded), when + * a \n is output. Ideally we wouldn't do the indent until the following char + * is received, but that won't work because there's no place to remember that + * the preceding char was \n. Meaning that we may overstimate the + * buffer size needed. No harm done. + */ + if (*outIx==0 || /* case 1. */ + (c!='\n' && c!=0 && *outIx < capacity && outBuf[(*outIx)-1]=='\n') || /* case 2. */ + (c=='\n' && *outIx>=capacity)) /* case 3 */ + { + /* At the start of a line. Indent. */ + for(i=0; i= 0; shiftCount-=4) { + char c = gHexChars[(val >> shiftCount) & 0xf]; + outputChar(c, outBuf, outIx, capacity, 0); + } +} + +/* Output a pointer value in hex. Work with any size of pointer */ +static void outputPtrBytes(void *val, char *outBuf, int32_t *outIx, int32_t capacity) { + uint32_t i; + int32_t incVal = 1; /* +1 for big endian, -1 for little endian */ + char *p = (char *)&val; /* point to current byte to output in the ptr val */ + +#if !U_IS_BIG_ENDIAN + /* Little Endian. Move p to most significant end of the value */ + incVal = -1; + p += sizeof(void *) - 1; +#endif + + /* Loop through the bytes of the ptr as it sits in memory, from + * most significant to least significant end */ + for (i=0; i 0) { + outputHexBytes(longArg, charsToOutput, outBuf, &outIx, capacity); + outputChar(' ', outBuf, &outIx, capacity, indent); + } + if (vectorLen == -1 && longArg == 0) { + break; + } + } + } + outputChar('[', outBuf, &outIx, capacity, indent); + outputHexBytes(vectorLen, 8, outBuf, &outIx, capacity); + outputChar(']', outBuf, &outIx, capacity, indent); + } + break; + + + default: + /* %. in format string, where . is some character not in the set + * of recognized format chars. Just output it as if % wasn't there. + * (Covers "%%" outputing a single '%') + */ + outputChar(fmtC, outBuf, &outIx, capacity, indent); + } + } + outputChar(0, outBuf, &outIx, capacity, indent); /* Make sure that output is null terminated */ + return outIx + 1; /* outIx + 1 because outIx does not increment when outputing final null. */ +} + + + + +U_CAPI int32_t U_EXPORT2 +utrace_format(char *outBuf, int32_t capacity, + int32_t indent, const char *fmt, ...) { + int32_t retVal; + va_list args; + va_start(args, fmt ); + retVal = utrace_vformat(outBuf, capacity, indent, fmt, args); + va_end(args); + return retVal; +} + + +U_CAPI void U_EXPORT2 +utrace_setFunctions(const void *context, + UTraceEntry *e, UTraceExit *x, UTraceData *d) { + pTraceEntryFunc = e; + pTraceExitFunc = x; + pTraceDataFunc = d; + gTraceContext = context; +} + + +U_CAPI void U_EXPORT2 +utrace_getFunctions(const void **context, + UTraceEntry **e, UTraceExit **x, UTraceData **d) { + *e = pTraceEntryFunc; + *x = pTraceExitFunc; + *d = pTraceDataFunc; + *context = gTraceContext; +} + +U_CAPI void U_EXPORT2 +utrace_setLevel(int32_t level) { + if (level < UTRACE_OFF) { + level = UTRACE_OFF; + } + if (level > UTRACE_VERBOSE) { + level = UTRACE_VERBOSE; + } + utrace_level = level; +} + +U_CAPI int32_t U_EXPORT2 +utrace_getLevel() { + return utrace_level; +} + + +U_CFUNC UBool +utrace_cleanup() { + pTraceEntryFunc = NULL; + pTraceExitFunc = NULL; + pTraceDataFunc = NULL; + utrace_level = UTRACE_OFF; + gTraceContext = NULL; + return TRUE; +} + + +static const char * const +trFnName[] = { + "u_init", + "u_cleanup", + NULL +}; + + +static const char * const +trConvNames[] = { + "ucnv_open", + "ucnv_openPackage", + "ucnv_openAlgorithmic", + "ucnv_clone", + "ucnv_close", + "ucnv_flushCache", + "ucnv_load", + "ucnv_unload", + NULL +}; + + +static const char * const +trCollNames[] = { + "ucol_open", + "ucol_close", + "ucol_strcoll", + "ucol_getSortKey", + "ucol_getLocale", + "ucol_nextSortKeyPart", + "ucol_strcollIter", + "ucol_openFromShortString", + "ucol_strcollUTF8", + NULL +}; + + +U_CAPI const char * U_EXPORT2 +utrace_functionName(int32_t fnNumber) { + if(UTRACE_FUNCTION_START <= fnNumber && fnNumber < UTRACE_FUNCTION_LIMIT) { + return trFnName[fnNumber]; + } else if(UTRACE_CONVERSION_START <= fnNumber && fnNumber < UTRACE_CONVERSION_LIMIT) { + return trConvNames[fnNumber - UTRACE_CONVERSION_START]; + } else if(UTRACE_COLLATION_START <= fnNumber && fnNumber < UTRACE_COLLATION_LIMIT){ + return trCollNames[fnNumber - UTRACE_COLLATION_START]; + } else { + return "[BOGUS Trace Function Number]"; + } +} diff --git a/deps/icu-small/source/common/utracimp.h b/deps/icu-small/source/common/utracimp.h index 5ec047a967..c2819830e1 100644 --- a/deps/icu-small/source/common/utracimp.h +++ b/deps/icu-small/source/common/utracimp.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: utracimp.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -47,20 +47,6 @@ U_CDECL_BEGIN -/** - * \var utrace_level - * Trace level variable. Negative for "off". - * Use only via UTRACE_ macros. - * @internal - */ -#ifdef UTRACE_IMPL -U_EXPORT int32_t -#else -U_CFUNC U_COMMON_API int32_t -#endif -utrace_level; - - /** * Traced Function Exit return types. * Flags indicating the number and types of varargs included in a call diff --git a/deps/icu-small/source/common/utrie.cpp b/deps/icu-small/source/common/utrie.cpp index 478560df66..ecf9b1cba7 100644 --- a/deps/icu-small/source/common/utrie.cpp +++ b/deps/icu-small/source/common/utrie.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: utrie.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/utrie.h b/deps/icu-small/source/common/utrie.h index 17e660b3e3..9c5382c594 100644 --- a/deps/icu-small/source/common/utrie.h +++ b/deps/icu-small/source/common/utrie.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: utrie.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/utrie2.cpp b/deps/icu-small/source/common/utrie2.cpp index 4bd35a924d..cec7224d90 100644 --- a/deps/icu-small/source/common/utrie2.cpp +++ b/deps/icu-small/source/common/utrie2.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: utrie2.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/utrie2.h b/deps/icu-small/source/common/utrie2.h index 810bcfc959..8e87bf8fbd 100644 --- a/deps/icu-small/source/common/utrie2.h +++ b/deps/icu-small/source/common/utrie2.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: utrie2.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/utrie2_builder.cpp b/deps/icu-small/source/common/utrie2_builder.cpp index 664051c5f9..d8a3a06757 100644 --- a/deps/icu-small/source/common/utrie2_builder.cpp +++ b/deps/icu-small/source/common/utrie2_builder.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: utrie2_builder.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/utrie2_impl.h b/deps/icu-small/source/common/utrie2_impl.h index 48883638b1..b7dc9d3fb4 100644 --- a/deps/icu-small/source/common/utrie2_impl.h +++ b/deps/icu-small/source/common/utrie2_impl.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: utrie2_impl.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/uts46.cpp b/deps/icu-small/source/common/uts46.cpp index 7bc4f925ca..f2cff2d5ea 100644 --- a/deps/icu-small/source/common/uts46.cpp +++ b/deps/icu-small/source/common/uts46.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: uts46.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/common/utypeinfo.h b/deps/icu-small/source/common/utypeinfo.h index b39aa0d605..c6663734fc 100644 --- a/deps/icu-small/source/common/utypeinfo.h +++ b/deps/icu-small/source/common/utypeinfo.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -24,7 +24,7 @@ #include using std::exception; #endif -#if !defined(_MSC_VER) +#if defined(__GLIBCXX__) namespace std { class type_info; } // WORKAROUND: http://llvm.org/bugs/show_bug.cgi?id=13364 #endif #include // for 'typeid' to work diff --git a/deps/icu-small/source/common/utypes.c b/deps/icu-small/source/common/utypes.c deleted file mode 100644 index c506dd44f5..0000000000 --- a/deps/icu-small/source/common/utypes.c +++ /dev/null @@ -1,224 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1997-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* FILE NAME : utypes.c (previously putil.c) -* -* Date Name Description -* 10/07/2004 grhoten split from putil.c -****************************************************************************** -*/ - -#include "unicode/utypes.h" - -/* u_errorName() ------------------------------------------------------------ */ - -static const char * const -_uErrorInfoName[U_ERROR_WARNING_LIMIT-U_ERROR_WARNING_START]={ - "U_USING_FALLBACK_WARNING", - "U_USING_DEFAULT_WARNING", - "U_SAFECLONE_ALLOCATED_WARNING", - "U_STATE_OLD_WARNING", - "U_STRING_NOT_TERMINATED_WARNING", - "U_SORT_KEY_TOO_SHORT_WARNING", - "U_AMBIGUOUS_ALIAS_WARNING", - "U_DIFFERENT_UCA_VERSION", - "U_PLUGIN_CHANGED_LEVEL_WARNING", -}; - -static const char * const -_uTransErrorName[U_PARSE_ERROR_LIMIT - U_PARSE_ERROR_START]={ - "U_BAD_VARIABLE_DEFINITION", - "U_MALFORMED_RULE", - "U_MALFORMED_SET", - "U_MALFORMED_SYMBOL_REFERENCE", - "U_MALFORMED_UNICODE_ESCAPE", - "U_MALFORMED_VARIABLE_DEFINITION", - "U_MALFORMED_VARIABLE_REFERENCE", - "U_MISMATCHED_SEGMENT_DELIMITERS", - "U_MISPLACED_ANCHOR_START", - "U_MISPLACED_CURSOR_OFFSET", - "U_MISPLACED_QUANTIFIER", - "U_MISSING_OPERATOR", - "U_MISSING_SEGMENT_CLOSE", - "U_MULTIPLE_ANTE_CONTEXTS", - "U_MULTIPLE_CURSORS", - "U_MULTIPLE_POST_CONTEXTS", - "U_TRAILING_BACKSLASH", - "U_UNDEFINED_SEGMENT_REFERENCE", - "U_UNDEFINED_VARIABLE", - "U_UNQUOTED_SPECIAL", - "U_UNTERMINATED_QUOTE", - "U_RULE_MASK_ERROR", - "U_MISPLACED_COMPOUND_FILTER", - "U_MULTIPLE_COMPOUND_FILTERS", - "U_INVALID_RBT_SYNTAX", - "U_INVALID_PROPERTY_PATTERN", - "U_MALFORMED_PRAGMA", - "U_UNCLOSED_SEGMENT", - "U_ILLEGAL_CHAR_IN_SEGMENT", - "U_VARIABLE_RANGE_EXHAUSTED", - "U_VARIABLE_RANGE_OVERLAP", - "U_ILLEGAL_CHARACTER", - "U_INTERNAL_TRANSLITERATOR_ERROR", - "U_INVALID_ID", - "U_INVALID_FUNCTION" -}; - -static const char * const -_uErrorName[U_STANDARD_ERROR_LIMIT]={ - "U_ZERO_ERROR", - - "U_ILLEGAL_ARGUMENT_ERROR", - "U_MISSING_RESOURCE_ERROR", - "U_INVALID_FORMAT_ERROR", - "U_FILE_ACCESS_ERROR", - "U_INTERNAL_PROGRAM_ERROR", - "U_MESSAGE_PARSE_ERROR", - "U_MEMORY_ALLOCATION_ERROR", - "U_INDEX_OUTOFBOUNDS_ERROR", - "U_PARSE_ERROR", - "U_INVALID_CHAR_FOUND", - "U_TRUNCATED_CHAR_FOUND", - "U_ILLEGAL_CHAR_FOUND", - "U_INVALID_TABLE_FORMAT", - "U_INVALID_TABLE_FILE", - "U_BUFFER_OVERFLOW_ERROR", - "U_UNSUPPORTED_ERROR", - "U_RESOURCE_TYPE_MISMATCH", - "U_ILLEGAL_ESCAPE_SEQUENCE", - "U_UNSUPPORTED_ESCAPE_SEQUENCE", - "U_NO_SPACE_AVAILABLE", - "U_CE_NOT_FOUND_ERROR", - "U_PRIMARY_TOO_LONG_ERROR", - "U_STATE_TOO_OLD_ERROR", - "U_TOO_MANY_ALIASES_ERROR", - "U_ENUM_OUT_OF_SYNC_ERROR", - "U_INVARIANT_CONVERSION_ERROR", - "U_INVALID_STATE_ERROR", - "U_COLLATOR_VERSION_MISMATCH", - "U_USELESS_COLLATOR_ERROR", - "U_NO_WRITE_PERMISSION" -}; -static const char * const -_uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = { - "U_UNEXPECTED_TOKEN", - "U_MULTIPLE_DECIMAL_SEPARATORS", - "U_MULTIPLE_EXPONENTIAL_SYMBOLS", - "U_MALFORMED_EXPONENTIAL_PATTERN", - "U_MULTIPLE_PERCENT_SYMBOLS", - "U_MULTIPLE_PERMILL_SYMBOLS", - "U_MULTIPLE_PAD_SPECIFIERS", - "U_PATTERN_SYNTAX_ERROR", - "U_ILLEGAL_PAD_POSITION", - "U_UNMATCHED_BRACES", - "U_UNSUPPORTED_PROPERTY", - "U_UNSUPPORTED_ATTRIBUTE", - "U_ARGUMENT_TYPE_MISMATCH", - "U_DUPLICATE_KEYWORD", - "U_UNDEFINED_KEYWORD", - "U_DEFAULT_KEYWORD_MISSING", - "U_DECIMAL_NUMBER_SYNTAX_ERROR", - "U_FORMAT_INEXACT_ERROR" -}; - -static const char * const -_uBrkErrorName[U_BRK_ERROR_LIMIT - U_BRK_ERROR_START] = { - "U_BRK_INTERNAL_ERROR", - "U_BRK_HEX_DIGITS_EXPECTED", - "U_BRK_SEMICOLON_EXPECTED", - "U_BRK_RULE_SYNTAX", - "U_BRK_UNCLOSED_SET", - "U_BRK_ASSIGN_ERROR", - "U_BRK_VARIABLE_REDFINITION", - "U_BRK_MISMATCHED_PAREN", - "U_BRK_NEW_LINE_IN_QUOTED_STRING", - "U_BRK_UNDEFINED_VARIABLE", - "U_BRK_INIT_ERROR", - "U_BRK_RULE_EMPTY_SET", - "U_BRK_UNRECOGNIZED_OPTION", - "U_BRK_MALFORMED_RULE_TAG" -}; - -static const char * const -_uRegexErrorName[U_REGEX_ERROR_LIMIT - U_REGEX_ERROR_START] = { - "U_REGEX_INTERNAL_ERROR", - "U_REGEX_RULE_SYNTAX", - "U_REGEX_INVALID_STATE", - "U_REGEX_BAD_ESCAPE_SEQUENCE", - "U_REGEX_PROPERTY_SYNTAX", - "U_REGEX_UNIMPLEMENTED", - "U_REGEX_MISMATCHED_PAREN", - "U_REGEX_NUMBER_TOO_BIG", - "U_REGEX_BAD_INTERVAL", - "U_REGEX_MAX_LT_MIN", - "U_REGEX_INVALID_BACK_REF", - "U_REGEX_INVALID_FLAG", - "U_REGEX_LOOK_BEHIND_LIMIT", - "U_REGEX_SET_CONTAINS_STRING", - "U_REGEX_OCTAL_TOO_BIG", - "U_REGEX_MISSING_CLOSE_BRACKET", - "U_REGEX_INVALID_RANGE", - "U_REGEX_STACK_OVERFLOW", - "U_REGEX_TIME_OUT", - "U_REGEX_STOPPED_BY_CALLER", - "U_REGEX_PATTERN_TOO_BIG", - "U_REGEX_INVALID_CAPTURE_GROUP_NAME" -}; - -static const char * const -_uIDNAErrorName[U_IDNA_ERROR_LIMIT - U_IDNA_ERROR_START] = { - "U_STRINGPREP_PROHIBITED_ERROR", - "U_STRINGPREP_UNASSIGNED_ERROR", - "U_STRINGPREP_CHECK_BIDI_ERROR", - "U_IDNA_STD3_ASCII_RULES_ERROR", - "U_IDNA_ACE_PREFIX_ERROR", - "U_IDNA_VERIFICATION_ERROR", - "U_IDNA_LABEL_TOO_LONG_ERROR", - "U_IDNA_ZERO_LENGTH_LABEL_ERROR", - "U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR" -}; - -static const char * const -_uPluginErrorName[U_PLUGIN_ERROR_LIMIT - U_PLUGIN_ERROR_START] = { - "U_PLUGIN_TOO_HIGH", - "U_PLUGIN_DIDNT_SET_LEVEL", -}; - -U_CAPI const char * U_EXPORT2 -u_errorName(UErrorCode code) { - if(U_ZERO_ERROR <= code && code < U_STANDARD_ERROR_LIMIT) { - return _uErrorName[code]; - } else if(U_ERROR_WARNING_START <= code && code < U_ERROR_WARNING_LIMIT) { - return _uErrorInfoName[code - U_ERROR_WARNING_START]; - } else if(U_PARSE_ERROR_START <= code && code < U_PARSE_ERROR_LIMIT){ - return _uTransErrorName[code - U_PARSE_ERROR_START]; - } else if(U_FMT_PARSE_ERROR_START <= code && code < U_FMT_PARSE_ERROR_LIMIT){ - return _uFmtErrorName[code - U_FMT_PARSE_ERROR_START]; - } else if (U_BRK_ERROR_START <= code && code < U_BRK_ERROR_LIMIT){ - return _uBrkErrorName[code - U_BRK_ERROR_START]; - } else if (U_REGEX_ERROR_START <= code && code < U_REGEX_ERROR_LIMIT) { - return _uRegexErrorName[code - U_REGEX_ERROR_START]; - } else if(U_IDNA_ERROR_START <= code && code < U_IDNA_ERROR_LIMIT) { - return _uIDNAErrorName[code - U_IDNA_ERROR_START]; - } else if(U_PLUGIN_ERROR_START <= code && code < U_PLUGIN_ERROR_LIMIT) { - return _uPluginErrorName[code - U_PLUGIN_ERROR_START]; - } else { - return "[BOGUS UErrorCode]"; - } -} - -/* - * Hey, Emacs, please set the following: - * - * Local Variables: - * indent-tabs-mode: nil - * End: - * - */ diff --git a/deps/icu-small/source/common/utypes.cpp b/deps/icu-small/source/common/utypes.cpp new file mode 100644 index 0000000000..8f5791be16 --- /dev/null +++ b/deps/icu-small/source/common/utypes.cpp @@ -0,0 +1,224 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* FILE NAME : utypes.c (previously putil.c) +* +* Date Name Description +* 10/07/2004 grhoten split from putil.c +****************************************************************************** +*/ + +#include "unicode/utypes.h" + +/* u_errorName() ------------------------------------------------------------ */ + +static const char * const +_uErrorInfoName[U_ERROR_WARNING_LIMIT-U_ERROR_WARNING_START]={ + "U_USING_FALLBACK_WARNING", + "U_USING_DEFAULT_WARNING", + "U_SAFECLONE_ALLOCATED_WARNING", + "U_STATE_OLD_WARNING", + "U_STRING_NOT_TERMINATED_WARNING", + "U_SORT_KEY_TOO_SHORT_WARNING", + "U_AMBIGUOUS_ALIAS_WARNING", + "U_DIFFERENT_UCA_VERSION", + "U_PLUGIN_CHANGED_LEVEL_WARNING", +}; + +static const char * const +_uTransErrorName[U_PARSE_ERROR_LIMIT - U_PARSE_ERROR_START]={ + "U_BAD_VARIABLE_DEFINITION", + "U_MALFORMED_RULE", + "U_MALFORMED_SET", + "U_MALFORMED_SYMBOL_REFERENCE", + "U_MALFORMED_UNICODE_ESCAPE", + "U_MALFORMED_VARIABLE_DEFINITION", + "U_MALFORMED_VARIABLE_REFERENCE", + "U_MISMATCHED_SEGMENT_DELIMITERS", + "U_MISPLACED_ANCHOR_START", + "U_MISPLACED_CURSOR_OFFSET", + "U_MISPLACED_QUANTIFIER", + "U_MISSING_OPERATOR", + "U_MISSING_SEGMENT_CLOSE", + "U_MULTIPLE_ANTE_CONTEXTS", + "U_MULTIPLE_CURSORS", + "U_MULTIPLE_POST_CONTEXTS", + "U_TRAILING_BACKSLASH", + "U_UNDEFINED_SEGMENT_REFERENCE", + "U_UNDEFINED_VARIABLE", + "U_UNQUOTED_SPECIAL", + "U_UNTERMINATED_QUOTE", + "U_RULE_MASK_ERROR", + "U_MISPLACED_COMPOUND_FILTER", + "U_MULTIPLE_COMPOUND_FILTERS", + "U_INVALID_RBT_SYNTAX", + "U_INVALID_PROPERTY_PATTERN", + "U_MALFORMED_PRAGMA", + "U_UNCLOSED_SEGMENT", + "U_ILLEGAL_CHAR_IN_SEGMENT", + "U_VARIABLE_RANGE_EXHAUSTED", + "U_VARIABLE_RANGE_OVERLAP", + "U_ILLEGAL_CHARACTER", + "U_INTERNAL_TRANSLITERATOR_ERROR", + "U_INVALID_ID", + "U_INVALID_FUNCTION" +}; + +static const char * const +_uErrorName[U_STANDARD_ERROR_LIMIT]={ + "U_ZERO_ERROR", + + "U_ILLEGAL_ARGUMENT_ERROR", + "U_MISSING_RESOURCE_ERROR", + "U_INVALID_FORMAT_ERROR", + "U_FILE_ACCESS_ERROR", + "U_INTERNAL_PROGRAM_ERROR", + "U_MESSAGE_PARSE_ERROR", + "U_MEMORY_ALLOCATION_ERROR", + "U_INDEX_OUTOFBOUNDS_ERROR", + "U_PARSE_ERROR", + "U_INVALID_CHAR_FOUND", + "U_TRUNCATED_CHAR_FOUND", + "U_ILLEGAL_CHAR_FOUND", + "U_INVALID_TABLE_FORMAT", + "U_INVALID_TABLE_FILE", + "U_BUFFER_OVERFLOW_ERROR", + "U_UNSUPPORTED_ERROR", + "U_RESOURCE_TYPE_MISMATCH", + "U_ILLEGAL_ESCAPE_SEQUENCE", + "U_UNSUPPORTED_ESCAPE_SEQUENCE", + "U_NO_SPACE_AVAILABLE", + "U_CE_NOT_FOUND_ERROR", + "U_PRIMARY_TOO_LONG_ERROR", + "U_STATE_TOO_OLD_ERROR", + "U_TOO_MANY_ALIASES_ERROR", + "U_ENUM_OUT_OF_SYNC_ERROR", + "U_INVARIANT_CONVERSION_ERROR", + "U_INVALID_STATE_ERROR", + "U_COLLATOR_VERSION_MISMATCH", + "U_USELESS_COLLATOR_ERROR", + "U_NO_WRITE_PERMISSION" +}; +static const char * const +_uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = { + "U_UNEXPECTED_TOKEN", + "U_MULTIPLE_DECIMAL_SEPARATORS", + "U_MULTIPLE_EXPONENTIAL_SYMBOLS", + "U_MALFORMED_EXPONENTIAL_PATTERN", + "U_MULTIPLE_PERCENT_SYMBOLS", + "U_MULTIPLE_PERMILL_SYMBOLS", + "U_MULTIPLE_PAD_SPECIFIERS", + "U_PATTERN_SYNTAX_ERROR", + "U_ILLEGAL_PAD_POSITION", + "U_UNMATCHED_BRACES", + "U_UNSUPPORTED_PROPERTY", + "U_UNSUPPORTED_ATTRIBUTE", + "U_ARGUMENT_TYPE_MISMATCH", + "U_DUPLICATE_KEYWORD", + "U_UNDEFINED_KEYWORD", + "U_DEFAULT_KEYWORD_MISSING", + "U_DECIMAL_NUMBER_SYNTAX_ERROR", + "U_FORMAT_INEXACT_ERROR" +}; + +static const char * const +_uBrkErrorName[U_BRK_ERROR_LIMIT - U_BRK_ERROR_START] = { + "U_BRK_INTERNAL_ERROR", + "U_BRK_HEX_DIGITS_EXPECTED", + "U_BRK_SEMICOLON_EXPECTED", + "U_BRK_RULE_SYNTAX", + "U_BRK_UNCLOSED_SET", + "U_BRK_ASSIGN_ERROR", + "U_BRK_VARIABLE_REDFINITION", + "U_BRK_MISMATCHED_PAREN", + "U_BRK_NEW_LINE_IN_QUOTED_STRING", + "U_BRK_UNDEFINED_VARIABLE", + "U_BRK_INIT_ERROR", + "U_BRK_RULE_EMPTY_SET", + "U_BRK_UNRECOGNIZED_OPTION", + "U_BRK_MALFORMED_RULE_TAG" +}; + +static const char * const +_uRegexErrorName[U_REGEX_ERROR_LIMIT - U_REGEX_ERROR_START] = { + "U_REGEX_INTERNAL_ERROR", + "U_REGEX_RULE_SYNTAX", + "U_REGEX_INVALID_STATE", + "U_REGEX_BAD_ESCAPE_SEQUENCE", + "U_REGEX_PROPERTY_SYNTAX", + "U_REGEX_UNIMPLEMENTED", + "U_REGEX_MISMATCHED_PAREN", + "U_REGEX_NUMBER_TOO_BIG", + "U_REGEX_BAD_INTERVAL", + "U_REGEX_MAX_LT_MIN", + "U_REGEX_INVALID_BACK_REF", + "U_REGEX_INVALID_FLAG", + "U_REGEX_LOOK_BEHIND_LIMIT", + "U_REGEX_SET_CONTAINS_STRING", + "U_REGEX_OCTAL_TOO_BIG", + "U_REGEX_MISSING_CLOSE_BRACKET", + "U_REGEX_INVALID_RANGE", + "U_REGEX_STACK_OVERFLOW", + "U_REGEX_TIME_OUT", + "U_REGEX_STOPPED_BY_CALLER", + "U_REGEX_PATTERN_TOO_BIG", + "U_REGEX_INVALID_CAPTURE_GROUP_NAME" +}; + +static const char * const +_uIDNAErrorName[U_IDNA_ERROR_LIMIT - U_IDNA_ERROR_START] = { + "U_STRINGPREP_PROHIBITED_ERROR", + "U_STRINGPREP_UNASSIGNED_ERROR", + "U_STRINGPREP_CHECK_BIDI_ERROR", + "U_IDNA_STD3_ASCII_RULES_ERROR", + "U_IDNA_ACE_PREFIX_ERROR", + "U_IDNA_VERIFICATION_ERROR", + "U_IDNA_LABEL_TOO_LONG_ERROR", + "U_IDNA_ZERO_LENGTH_LABEL_ERROR", + "U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR" +}; + +static const char * const +_uPluginErrorName[U_PLUGIN_ERROR_LIMIT - U_PLUGIN_ERROR_START] = { + "U_PLUGIN_TOO_HIGH", + "U_PLUGIN_DIDNT_SET_LEVEL", +}; + +U_CAPI const char * U_EXPORT2 +u_errorName(UErrorCode code) { + if(U_ZERO_ERROR <= code && code < U_STANDARD_ERROR_LIMIT) { + return _uErrorName[code]; + } else if(U_ERROR_WARNING_START <= code && code < U_ERROR_WARNING_LIMIT) { + return _uErrorInfoName[code - U_ERROR_WARNING_START]; + } else if(U_PARSE_ERROR_START <= code && code < U_PARSE_ERROR_LIMIT){ + return _uTransErrorName[code - U_PARSE_ERROR_START]; + } else if(U_FMT_PARSE_ERROR_START <= code && code < U_FMT_PARSE_ERROR_LIMIT){ + return _uFmtErrorName[code - U_FMT_PARSE_ERROR_START]; + } else if (U_BRK_ERROR_START <= code && code < U_BRK_ERROR_LIMIT){ + return _uBrkErrorName[code - U_BRK_ERROR_START]; + } else if (U_REGEX_ERROR_START <= code && code < U_REGEX_ERROR_LIMIT) { + return _uRegexErrorName[code - U_REGEX_ERROR_START]; + } else if(U_IDNA_ERROR_START <= code && code < U_IDNA_ERROR_LIMIT) { + return _uIDNAErrorName[code - U_IDNA_ERROR_START]; + } else if(U_PLUGIN_ERROR_START <= code && code < U_PLUGIN_ERROR_LIMIT) { + return _uPluginErrorName[code - U_PLUGIN_ERROR_START]; + } else { + return "[BOGUS UErrorCode]"; + } +} + +/* + * Hey, Emacs, please set the following: + * + * Local Variables: + * indent-tabs-mode: nil + * End: + * + */ diff --git a/deps/icu-small/source/common/uvector.cpp b/deps/icu-small/source/common/uvector.cpp index e48913e55e..ad3a813e37 100644 --- a/deps/icu-small/source/common/uvector.cpp +++ b/deps/icu-small/source/common/uvector.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/uvector.h b/deps/icu-small/source/common/uvector.h index f6abe3e78a..ad75e23400 100644 --- a/deps/icu-small/source/common/uvector.h +++ b/deps/icu-small/source/common/uvector.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/uvectr32.cpp b/deps/icu-small/source/common/uvectr32.cpp index 3607d974ce..6e0760d718 100644 --- a/deps/icu-small/source/common/uvectr32.cpp +++ b/deps/icu-small/source/common/uvectr32.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/uvectr32.h b/deps/icu-small/source/common/uvectr32.h index c975de769b..3174e94c9f 100644 --- a/deps/icu-small/source/common/uvectr32.h +++ b/deps/icu-small/source/common/uvectr32.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/uvectr64.cpp b/deps/icu-small/source/common/uvectr64.cpp index 9196274aba..145db246dc 100644 --- a/deps/icu-small/source/common/uvectr64.cpp +++ b/deps/icu-small/source/common/uvectr64.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/common/uvectr64.h b/deps/icu-small/source/common/uvectr64.h index fec1098398..1db4a1fe2e 100644 --- a/deps/icu-small/source/common/uvectr64.h +++ b/deps/icu-small/source/common/uvectr64.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/common/wintz.c b/deps/icu-small/source/common/wintz.c deleted file mode 100644 index 5a3ecae0a4..0000000000 --- a/deps/icu-small/source/common/wintz.c +++ /dev/null @@ -1,441 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************** -* Copyright (C) 2005-2015, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************** -* -* File WINTZ.CPP -* -******************************************************************************** -*/ - -#include "unicode/utypes.h" - -#if U_PLATFORM_HAS_WIN32_API - -#include "wintz.h" -#include "cmemory.h" -#include "cstring.h" - -#include "unicode/ures.h" -#include "unicode/ustring.h" - -# define WIN32_LEAN_AND_MEAN -# define VC_EXTRALEAN -# define NOUSER -# define NOSERVICE -# define NOIME -# define NOMCX -#include - -#define MAX_LENGTH_ID 40 - -/* The layout of the Tzi value in the registry */ -typedef struct -{ - int32_t bias; - int32_t standardBias; - int32_t daylightBias; - SYSTEMTIME standardDate; - SYSTEMTIME daylightDate; -} TZI; - -/** - * Various registry keys and key fragments. - */ -static const char CURRENT_ZONE_REGKEY[] = "SYSTEM\\CurrentControlSet\\Control\\TimeZoneInformation\\"; -/* static const char STANDARD_NAME_REGKEY[] = "StandardName"; Currently unused constant */ -static const char STANDARD_TIME_REGKEY[] = " Standard Time"; -static const char TZI_REGKEY[] = "TZI"; -static const char STD_REGKEY[] = "Std"; - -/** - * HKLM subkeys used to probe for the flavor of Windows. Note that we - * specifically check for the "GMT" zone subkey; this is present on - * NT, but on XP has become "GMT Standard Time". We need to - * discriminate between these cases. - */ -static const char* const WIN_TYPE_PROBE_REGKEY[] = { - /* WIN_9X_ME_TYPE */ - "SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Time Zones", - - /* WIN_NT_TYPE */ - "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones\\GMT" - - /* otherwise: WIN_2K_XP_TYPE */ -}; - -/** - * The time zone root subkeys (under HKLM) for different flavors of - * Windows. - */ -static const char* const TZ_REGKEY[] = { - /* WIN_9X_ME_TYPE */ - "SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Time Zones\\", - - /* WIN_NT_TYPE | WIN_2K_XP_TYPE */ - "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones\\" -}; - -/** - * Flavor of Windows, from our perspective. Not a real OS version, - * but rather the flavor of the layout of the time zone information in - * the registry. - */ -enum { - WIN_9X_ME_TYPE = 1, - WIN_NT_TYPE = 2, - WIN_2K_XP_TYPE = 3 -}; - -static int32_t gWinType = 0; - -static int32_t detectWindowsType() -{ - int32_t winType; - LONG result; - HKEY hkey; - - /* Detect the version of windows by trying to open a sequence of - probe keys. We don't use the OS version API because what we - really want to know is how the registry is laid out. - Specifically, is it 9x/Me or not, and is it "GMT" or "GMT - Standard Time". */ - for (winType = 0; winType < 2; winType++) { - result = RegOpenKeyExA(HKEY_LOCAL_MACHINE, - WIN_TYPE_PROBE_REGKEY[winType], - 0, - KEY_QUERY_VALUE, - &hkey); - RegCloseKey(hkey); - - if (result == ERROR_SUCCESS) { - break; - } - } - - return winType+1; /* +1 to bring it inline with the enum */ -} - -static LONG openTZRegKey(HKEY *hkey, const char *winid) -{ - char subKeyName[110]; /* TODO: why 96?? */ - char *name; - LONG result; - - /* This isn't thread safe, but it's good enough because the result should be constant per system. */ - if (gWinType <= 0) { - gWinType = detectWindowsType(); - } - - uprv_strcpy(subKeyName, TZ_REGKEY[(gWinType != WIN_9X_ME_TYPE)]); - name = &subKeyName[strlen(subKeyName)]; - uprv_strcat(subKeyName, winid); - - if (gWinType == WIN_9X_ME_TYPE) { - /* Remove " Standard Time" */ - char *pStd = uprv_strstr(subKeyName, STANDARD_TIME_REGKEY); - if (pStd) { - *pStd = 0; - } - } - - result = RegOpenKeyExA(HKEY_LOCAL_MACHINE, - subKeyName, - 0, - KEY_QUERY_VALUE, - hkey); - return result; -} - -static LONG getTZI(const char *winid, TZI *tzi) -{ - DWORD cbData = sizeof(TZI); - LONG result; - HKEY hkey; - - result = openTZRegKey(&hkey, winid); - - if (result == ERROR_SUCCESS) { - result = RegQueryValueExA(hkey, - TZI_REGKEY, - NULL, - NULL, - (LPBYTE)tzi, - &cbData); - - } - - RegCloseKey(hkey); - - return result; -} - -static LONG getSTDName(const char *winid, char *regStdName, int32_t length) { - DWORD cbData = length; - LONG result; - HKEY hkey; - - result = openTZRegKey(&hkey, winid); - - if (result == ERROR_SUCCESS) { - result = RegQueryValueExA(hkey, - STD_REGKEY, - NULL, - NULL, - (LPBYTE)regStdName, - &cbData); - - } - - RegCloseKey(hkey); - - return result; -} - -static LONG getTZKeyName(char* tzKeyName, int32_t length) { - HKEY hkey; - LONG result = FALSE; - DWORD cbData = length; - - if(ERROR_SUCCESS == RegOpenKeyExA( - HKEY_LOCAL_MACHINE, - CURRENT_ZONE_REGKEY, - 0, - KEY_QUERY_VALUE, - &hkey)) - { - result = RegQueryValueExA( - hkey, - "TimeZoneKeyName", - NULL, - NULL, - (LPBYTE)tzKeyName, - &cbData); - } - - return result; -} - -/* - This code attempts to detect the Windows time zone, as set in the - Windows Date and Time control panel. It attempts to work on - multiple flavors of Windows (9x, Me, NT, 2000, XP) and on localized - installs. It works by directly interrogating the registry and - comparing the data there with the data returned by the - GetTimeZoneInformation API, along with some other strategies. The - registry contains time zone data under one of two keys (depending on - the flavor of Windows): - - HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\Time Zones\ - HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones\ - - Under this key are several subkeys, one for each time zone. These - subkeys are named "Pacific" on Win9x/Me and "Pacific Standard Time" - on WinNT/2k/XP. There are some other wrinkles; see the code for - details. The subkey name is NOT LOCALIZED, allowing us to support - localized installs. - - Under the subkey are data values. We care about: - - Std Standard time display name, localized - TZI Binary block of data - - The TZI data is of particular interest. It contains the offset, two - more offsets for standard and daylight time, and the start and end - rules. This is the same data returned by the GetTimeZoneInformation - API. The API may modify the data on the way out, so we have to be - careful, but essentially we do a binary comparison against the TZI - blocks of various registry keys. When we find a match, we know what - time zone Windows is set to. Since the registry key is not - localized, we can then translate the key through a simple table - lookup into the corresponding ICU time zone. - - This strategy doesn't always work because there are zones which - share an offset and rules, so more than one TZI block will match. - For example, both Tokyo and Seoul are at GMT+9 with no DST rules; - their TZI blocks are identical. For these cases, we fall back to a - name lookup. We attempt to match the display name as stored in the - registry for the current zone to the display name stored in the - registry for various Windows zones. By comparing the registry data - directly we avoid conversion complications. - - Author: Alan Liu - Since: ICU 2.6 - Based on original code by Carl Brown -*/ - -/** - * Main Windows time zone detection function. Returns the Windows - * time zone, translated to an ICU time zone, or NULL upon failure. - */ -U_CFUNC const char* U_EXPORT2 -uprv_detectWindowsTimeZone() { - UErrorCode status = U_ZERO_ERROR; - UResourceBundle* bundle = NULL; - char* icuid = NULL; - char apiStdName[MAX_LENGTH_ID]; - char regStdName[MAX_LENGTH_ID]; - char tmpid[MAX_LENGTH_ID]; - int32_t len; - int id; - int errorCode; - UChar ISOcodeW[3]; /* 2 letter iso code in UTF-16*/ - char ISOcodeA[3]; /* 2 letter iso code in ansi */ - - LONG result; - TZI tziKey; - TZI tziReg; - TIME_ZONE_INFORMATION apiTZI; - - BOOL isVistaOrHigher; - BOOL tryPreVistaFallback; - OSVERSIONINFO osVerInfo; - - /* Obtain TIME_ZONE_INFORMATION from the API, and then convert it - to TZI. We could also interrogate the registry directly; we do - this below if needed. */ - uprv_memset(&apiTZI, 0, sizeof(apiTZI)); - uprv_memset(&tziKey, 0, sizeof(tziKey)); - uprv_memset(&tziReg, 0, sizeof(tziReg)); - GetTimeZoneInformation(&apiTZI); - tziKey.bias = apiTZI.Bias; - uprv_memcpy((char *)&tziKey.standardDate, (char*)&apiTZI.StandardDate, - sizeof(apiTZI.StandardDate)); - uprv_memcpy((char *)&tziKey.daylightDate, (char*)&apiTZI.DaylightDate, - sizeof(apiTZI.DaylightDate)); - - /* Convert the wchar_t* standard name to char* */ - uprv_memset(apiStdName, 0, sizeof(apiStdName)); - wcstombs(apiStdName, apiTZI.StandardName, MAX_LENGTH_ID); - - tmpid[0] = 0; - - id = GetUserGeoID(GEOCLASS_NATION); - errorCode = GetGeoInfoW(id,GEO_ISO2,ISOcodeW,3,0); - u_strToUTF8(ISOcodeA, 3, NULL, ISOcodeW, 3, &status); - - bundle = ures_openDirect(NULL, "windowsZones", &status); - ures_getByKey(bundle, "mapTimezones", bundle, &status); - - /* - Windows Vista+ provides us with a "TimeZoneKeyName" that is not localized - and can be used to directly map a name in our bundle. Try to use that first - if we're on Vista or higher - */ - uprv_memset(&osVerInfo, 0, sizeof(osVerInfo)); - osVerInfo.dwOSVersionInfoSize = sizeof(osVerInfo); - GetVersionEx(&osVerInfo); - isVistaOrHigher = osVerInfo.dwMajorVersion >= 6; /* actually includes Windows Server 2008 as well, but don't worry about it */ - tryPreVistaFallback = TRUE; - if(isVistaOrHigher) { - result = getTZKeyName(regStdName, sizeof(regStdName)); - if(ERROR_SUCCESS == result) { - UResourceBundle* winTZ = ures_getByKey(bundle, regStdName, NULL, &status); - if(U_SUCCESS(status)) { - const UChar* icuTZ = NULL; - if (errorCode != 0) { - icuTZ = ures_getStringByKey(winTZ, ISOcodeA, &len, &status); - } - if (errorCode==0 || icuTZ==NULL) { - /* fallback to default "001" and reset status */ - status = U_ZERO_ERROR; - icuTZ = ures_getStringByKey(winTZ, "001", &len, &status); - } - - if(U_SUCCESS(status)) { - int index=0; - while (! (*icuTZ == '\0' || *icuTZ ==' ')) { - tmpid[index++]=(char)(*icuTZ++); /* safe to assume 'char' is ASCII compatible on windows */ - } - tmpid[index]='\0'; - tryPreVistaFallback = FALSE; - } - } - ures_close(winTZ); - } - } - - if(tryPreVistaFallback) { - - /* Note: We get the winid not from static tables but from resource bundle. */ - while (U_SUCCESS(status) && ures_hasNext(bundle)) { - UBool idFound = FALSE; - const char* winid; - UResourceBundle* winTZ = ures_getNextResource(bundle, NULL, &status); - if (U_FAILURE(status)) { - break; - } - winid = ures_getKey(winTZ); - result = getTZI(winid, &tziReg); - - if (result == ERROR_SUCCESS) { - /* Windows alters the DaylightBias in some situations. - Using the bias and the rules suffices, so overwrite - these unreliable fields. */ - tziKey.standardBias = tziReg.standardBias; - tziKey.daylightBias = tziReg.daylightBias; - - if (uprv_memcmp((char *)&tziKey, (char*)&tziReg, sizeof(tziKey)) == 0) { - const UChar* icuTZ = NULL; - if (errorCode != 0) { - icuTZ = ures_getStringByKey(winTZ, ISOcodeA, &len, &status); - } - if (errorCode==0 || icuTZ==NULL) { - /* fallback to default "001" and reset status */ - status = U_ZERO_ERROR; - icuTZ = ures_getStringByKey(winTZ, "001", &len, &status); - } - - if (U_SUCCESS(status)) { - /* Get the standard name from the registry key to compare with - the one from Windows API call. */ - uprv_memset(regStdName, 0, sizeof(regStdName)); - result = getSTDName(winid, regStdName, sizeof(regStdName)); - if (result == ERROR_SUCCESS) { - if (uprv_strcmp(apiStdName, regStdName) == 0) { - idFound = TRUE; - } - } - - /* tmpid buffer holds the ICU timezone ID corresponding to the timezone ID from Windows. - * If none is found, tmpid buffer will contain a fallback ID (i.e. the time zone ID matching - * the current time zone information) - */ - if (idFound || tmpid[0] == 0) { - /* if icuTZ has more than one city, take only the first (i.e. terminate icuTZ at first space) */ - int index=0; - while (! (*icuTZ == '\0' || *icuTZ ==' ')) { - tmpid[index++]=(char)(*icuTZ++); /* safe to assume 'char' is ASCII compatible on windows */ - } - tmpid[index]='\0'; - } - } - } - } - ures_close(winTZ); - if (idFound) { - break; - } - } - } - - /* - * Copy the timezone ID to icuid to be returned. - */ - if (tmpid[0] != 0) { - len = uprv_strlen(tmpid); - icuid = (char*)uprv_calloc(len + 1, sizeof(char)); - if (icuid != NULL) { - uprv_strcpy(icuid, tmpid); - } - } - - ures_close(bundle); - - return icuid; -} - -#endif /* U_PLATFORM_HAS_WIN32_API */ diff --git a/deps/icu-small/source/common/wintz.cpp b/deps/icu-small/source/common/wintz.cpp new file mode 100644 index 0000000000..c30a5dbc60 --- /dev/null +++ b/deps/icu-small/source/common/wintz.cpp @@ -0,0 +1,384 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 2005-2015, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************** +* +* File WINTZ.CPP +* +******************************************************************************** +*/ + +#include "unicode/utypes.h" + +// This file contains only desktop Windows behavior +// Windows UWP calls Windows::Globalization directly, so this isn't needed there. +#if U_PLATFORM_USES_ONLY_WIN32_API && (U_PLATFORM_HAS_WINUWP_API == 0) + +#include "wintz.h" +#include "cmemory.h" +#include "cstring.h" + +#include "unicode/ures.h" +#include "unicode/ustring.h" + +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif +# define VC_EXTRALEAN +# define NOUSER +# define NOSERVICE +# define NOIME +# define NOMCX +#include + +#define MAX_LENGTH_ID 40 + +/* The layout of the Tzi value in the registry */ +typedef struct +{ + int32_t bias; + int32_t standardBias; + int32_t daylightBias; + SYSTEMTIME standardDate; + SYSTEMTIME daylightDate; +} TZI; + +/** + * Various registry keys and key fragments. + */ +static const char CURRENT_ZONE_REGKEY[] = "SYSTEM\\CurrentControlSet\\Control\\TimeZoneInformation\\"; +static const char STANDARD_TIME_REGKEY[] = " Standard Time"; +static const char TZI_REGKEY[] = "TZI"; +static const char STD_REGKEY[] = "Std"; + +/** + * The time zone root keys (under HKLM) for Win7+ + */ +static const char TZ_REGKEY[] = "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones\\"; + +static LONG openTZRegKey(HKEY *hkey, const char *winid) +{ + char subKeyName[110]; /* TODO: why 110?? */ + char *name; + LONG result; + + uprv_strcpy(subKeyName, TZ_REGKEY); + name = &subKeyName[strlen(subKeyName)]; + uprv_strcat(subKeyName, winid); + + result = RegOpenKeyExA(HKEY_LOCAL_MACHINE, + subKeyName, + 0, + KEY_QUERY_VALUE, + hkey); + return result; +} + +static LONG getTZI(const char *winid, TZI *tzi) +{ + DWORD cbData = sizeof(TZI); + LONG result; + HKEY hkey; + + result = openTZRegKey(&hkey, winid); + + if (result == ERROR_SUCCESS) + { + result = RegQueryValueExA(hkey, + TZI_REGKEY, + NULL, + NULL, + (LPBYTE)tzi, + &cbData); + RegCloseKey(hkey); + } + + return result; +} + +static LONG getSTDName(const char *winid, char *regStdName, int32_t length) +{ + DWORD cbData = length; + LONG result; + HKEY hkey; + + result = openTZRegKey(&hkey, winid); + + if (result == ERROR_SUCCESS) + { + result = RegQueryValueExA(hkey, + STD_REGKEY, + NULL, + NULL, + (LPBYTE)regStdName, + &cbData); + RegCloseKey(hkey); + } + + return result; +} + +static LONG getTZKeyName(char* tzKeyName, int32_t length) +{ + HKEY hkey; + LONG result = FALSE; + DWORD cbData = length; + + if(ERROR_SUCCESS == RegOpenKeyExA( + HKEY_LOCAL_MACHINE, + CURRENT_ZONE_REGKEY, + 0, + KEY_QUERY_VALUE, + &hkey)) + { + result = RegQueryValueExA( + hkey, + "TimeZoneKeyName", + NULL, + NULL, + (LPBYTE)tzKeyName, + &cbData); + + RegCloseKey(hkey); + } + + return result; +} + +/* + This code attempts to detect the Windows time zone directly, + as set in the Windows Date and Time control panel. It attempts + to work on versions greater than Windows Vista and on localized + installs. It works by directly interrogating the registry and + comparing the data there with the data returned by the + GetTimeZoneInformation API, along with some other strategies. The + registry contains time zone data under this key: + + HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones\ + + Under this key are several subkeys, one for each time zone. For + example these subkeys are named "Pacific Standard Time" on Vista+. + There are some other wrinkles; see the code for + details. The subkey name is NOT LOCALIZED, allowing us to support + localized installs. + + Under the subkey are data values. We care about: + + Std Standard time display name, localized + TZI Binary block of data + + The TZI data is of particular interest. It contains the offset, two + more offsets for standard and daylight time, and the start and end + rules. This is the same data returned by the GetTimeZoneInformation + API. The API may modify the data on the way out, so we have to be + careful, but essentially we do a binary comparison against the TZI + blocks of various registry keys. When we find a match, we know what + time zone Windows is set to. Since the registry key is not + localized, we can then translate the key through a simple table + lookup into the corresponding ICU time zone. + + This strategy doesn't always work because there are zones which + share an offset and rules, so more than one TZI block will match. + For example, both Tokyo and Seoul are at GMT+9 with no DST rules; + their TZI blocks are identical. For these cases, we fall back to a + name lookup. We attempt to match the display name as stored in the + registry for the current zone to the display name stored in the + registry for various Windows zones. By comparing the registry data + directly we avoid conversion complications. + + Author: Alan Liu + Since: ICU 2.6 + Based on original code by Carl Brown +*/ + +/** + * Main Windows time zone detection function. Returns the Windows + * time zone, translated to an ICU time zone, or NULL upon failure. + */ +U_CFUNC const char* U_EXPORT2 +uprv_detectWindowsTimeZone() +{ + UErrorCode status = U_ZERO_ERROR; + UResourceBundle* bundle = NULL; + char* icuid = NULL; + char apiStdName[MAX_LENGTH_ID]; + char regStdName[MAX_LENGTH_ID]; + char tmpid[MAX_LENGTH_ID]; + int32_t len; + int id; + int errorCode; + wchar_t ISOcodeW[3]; /* 2 letter iso code in UTF-16*/ + char ISOcodeA[3]; /* 2 letter iso code in ansi */ + + LONG result; + TZI tziKey; + TZI tziReg; + TIME_ZONE_INFORMATION apiTZI; + + BOOL tryPreVistaFallback; + OSVERSIONINFO osVerInfo; + + /* Obtain TIME_ZONE_INFORMATION from the API, and then convert it + to TZI. We could also interrogate the registry directly; we do + this below if needed. */ + uprv_memset(&apiTZI, 0, sizeof(apiTZI)); + uprv_memset(&tziKey, 0, sizeof(tziKey)); + uprv_memset(&tziReg, 0, sizeof(tziReg)); + GetTimeZoneInformation(&apiTZI); + tziKey.bias = apiTZI.Bias; + uprv_memcpy((char *)&tziKey.standardDate, (char*)&apiTZI.StandardDate, + sizeof(apiTZI.StandardDate)); + uprv_memcpy((char *)&tziKey.daylightDate, (char*)&apiTZI.DaylightDate, + sizeof(apiTZI.DaylightDate)); + + /* Convert the wchar_t* standard name to char* */ + uprv_memset(apiStdName, 0, sizeof(apiStdName)); + wcstombs(apiStdName, apiTZI.StandardName, MAX_LENGTH_ID); + + tmpid[0] = 0; + + id = GetUserGeoID(GEOCLASS_NATION); + errorCode = GetGeoInfoW(id, GEO_ISO2, ISOcodeW, 3, 0); + u_strToUTF8(ISOcodeA, 3, NULL, (const UChar *)ISOcodeW, 3, &status); + + bundle = ures_openDirect(NULL, "windowsZones", &status); + ures_getByKey(bundle, "mapTimezones", bundle, &status); + + /* + Windows Vista+ provides us with a "TimeZoneKeyName" that is not localized + and can be used to directly map a name in our bundle. Try to use that first + if we're on Vista or higher + */ + uprv_memset(&osVerInfo, 0, sizeof(osVerInfo)); + osVerInfo.dwOSVersionInfoSize = sizeof(osVerInfo); + tryPreVistaFallback = TRUE; + result = getTZKeyName(regStdName, sizeof(regStdName)); + if(ERROR_SUCCESS == result) + { + UResourceBundle* winTZ = ures_getByKey(bundle, regStdName, NULL, &status); + if(U_SUCCESS(status)) + { + const UChar* icuTZ = NULL; + if (errorCode != 0) + { + icuTZ = ures_getStringByKey(winTZ, ISOcodeA, &len, &status); + } + if (errorCode==0 || icuTZ==NULL) + { + /* fallback to default "001" and reset status */ + status = U_ZERO_ERROR; + icuTZ = ures_getStringByKey(winTZ, "001", &len, &status); + } + + if(U_SUCCESS(status)) + { + int index=0; + while (! (*icuTZ == '\0' || *icuTZ ==' ')) + { + tmpid[index++]=(char)(*icuTZ++); /* safe to assume 'char' is ASCII compatible on windows */ + } + tmpid[index]='\0'; + tryPreVistaFallback = FALSE; + } + } + ures_close(winTZ); + } + + if(tryPreVistaFallback) + { + /* Note: We get the winid not from static tables but from resource bundle. */ + while (U_SUCCESS(status) && ures_hasNext(bundle)) + { + UBool idFound = FALSE; + const char* winid; + UResourceBundle* winTZ = ures_getNextResource(bundle, NULL, &status); + if (U_FAILURE(status)) + { + break; + } + winid = ures_getKey(winTZ); + result = getTZI(winid, &tziReg); + + if (result == ERROR_SUCCESS) + { + /* Windows alters the DaylightBias in some situations. + Using the bias and the rules suffices, so overwrite + these unreliable fields. */ + tziKey.standardBias = tziReg.standardBias; + tziKey.daylightBias = tziReg.daylightBias; + + if (uprv_memcmp((char *)&tziKey, (char*)&tziReg, sizeof(tziKey)) == 0) + { + const UChar* icuTZ = NULL; + if (errorCode != 0) + { + icuTZ = ures_getStringByKey(winTZ, ISOcodeA, &len, &status); + } + if (errorCode==0 || icuTZ==NULL) + { + /* fallback to default "001" and reset status */ + status = U_ZERO_ERROR; + icuTZ = ures_getStringByKey(winTZ, "001", &len, &status); + } + + if (U_SUCCESS(status)) + { + /* Get the standard name from the registry key to compare with + the one from Windows API call. */ + uprv_memset(regStdName, 0, sizeof(regStdName)); + result = getSTDName(winid, regStdName, sizeof(regStdName)); + if (result == ERROR_SUCCESS) + { + if (uprv_strcmp(apiStdName, regStdName) == 0) + { + idFound = TRUE; + } + } + + /* tmpid buffer holds the ICU timezone ID corresponding to the timezone ID from Windows. + * If none is found, tmpid buffer will contain a fallback ID (i.e. the time zone ID matching + * the current time zone information) + */ + if (idFound || tmpid[0] == 0) + { + /* if icuTZ has more than one city, take only the first (i.e. terminate icuTZ at first space) */ + int index=0; + while (! (*icuTZ == '\0' || *icuTZ ==' ')) + { + tmpid[index++]=(char)(*icuTZ++); /* safe to assume 'char' is ASCII compatible on windows */ + } + tmpid[index]='\0'; + } + } + } + } + ures_close(winTZ); + if (idFound) + { + break; + } + } + } + + /* + * Copy the timezone ID to icuid to be returned. + */ + if (tmpid[0] != 0) + { + len = uprv_strlen(tmpid); + icuid = (char*)uprv_calloc(len + 1, sizeof(char)); + if (icuid != NULL) + { + uprv_strcpy(icuid, tmpid); + } + } + + ures_close(bundle); + + return icuid; +} + +#endif /* U_PLATFORM_USES_ONLY_WIN32_API && (U_PLATFORM_HAS_WINUWP_API == 0) */ diff --git a/deps/icu-small/source/common/wintz.h b/deps/icu-small/source/common/wintz.h index 01a906a401..9e8cbbcfab 100644 --- a/deps/icu-small/source/common/wintz.h +++ b/deps/icu-small/source/common/wintz.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** @@ -16,7 +16,9 @@ #include "unicode/utypes.h" -#if U_PLATFORM_HAS_WIN32_API +// This file contains only desktop windows behavior +// Windows UWP calls Windows::Globalization directly, so this isn't needed there. +#if U_PLATFORM_USES_ONLY_WIN32_API && (U_PLATFORM_HAS_WINUWP_API == 0) /** * \file @@ -31,6 +33,6 @@ U_CDECL_END U_CFUNC const char* U_EXPORT2 uprv_detectWindowsTimeZone(); -#endif /* U_PLATFORM_HAS_WIN32_API */ +#endif /* U_PLATFORM_USES_ONLY_WIN32_API && (U_PLATFORM_HAS_WINUWP_API == 0) */ #endif /* __WINTZ */ diff --git a/deps/icu-small/source/data/in/icudt58l.dat b/deps/icu-small/source/data/in/icudt58l.dat deleted file mode 100644 index bc36c1313d..0000000000 Binary files a/deps/icu-small/source/data/in/icudt58l.dat and /dev/null differ diff --git a/deps/icu-small/source/data/in/icudt59l.dat b/deps/icu-small/source/data/in/icudt59l.dat new file mode 100644 index 0000000000..e7fb9b2598 Binary files /dev/null and b/deps/icu-small/source/data/in/icudt59l.dat differ diff --git a/deps/icu-small/source/i18n/affixpatternparser.cpp b/deps/icu-small/source/i18n/affixpatternparser.cpp index 05e45dbb12..0d65d13057 100644 --- a/deps/icu-small/source/i18n/affixpatternparser.cpp +++ b/deps/icu-small/source/i18n/affixpatternparser.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* * Copyright (C) 2015, International Business Machines @@ -13,7 +13,9 @@ #include "unicode/dcfmtsym.h" #include "unicode/plurrule.h" +#include "unicode/strenum.h" #include "unicode/ucurr.h" +#include "unicode/ustring.h" #include "affixpatternparser.h" #include "charstr.h" #include "precision.h" diff --git a/deps/icu-small/source/i18n/affixpatternparser.h b/deps/icu-small/source/i18n/affixpatternparser.h index 07f84c5314..b54c749c70 100644 --- a/deps/icu-small/source/i18n/affixpatternparser.h +++ b/deps/icu-small/source/i18n/affixpatternparser.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/alphaindex.cpp b/deps/icu-small/source/i18n/alphaindex.cpp index 8a2f0a3cb5..692f5809b6 100644 --- a/deps/icu-small/source/i18n/alphaindex.cpp +++ b/deps/icu-small/source/i18n/alphaindex.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/anytrans.cpp b/deps/icu-small/source/i18n/anytrans.cpp index 8ec6f837cf..e7d5375d69 100644 --- a/deps/icu-small/source/i18n/anytrans.cpp +++ b/deps/icu-small/source/i18n/anytrans.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ***************************************************************** diff --git a/deps/icu-small/source/i18n/anytrans.h b/deps/icu-small/source/i18n/anytrans.h index d06d2baa57..703d42b6d4 100644 --- a/deps/icu-small/source/i18n/anytrans.h +++ b/deps/icu-small/source/i18n/anytrans.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* *********************************************************************** diff --git a/deps/icu-small/source/i18n/astro.cpp b/deps/icu-small/source/i18n/astro.cpp index d657aaa20d..0bf32ae854 100644 --- a/deps/icu-small/source/i18n/astro.cpp +++ b/deps/icu-small/source/i18n/astro.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /************************************************************************ * Copyright (C) 1996-2012, International Business Machines Corporation diff --git a/deps/icu-small/source/i18n/astro.h b/deps/icu-small/source/i18n/astro.h index e854661763..a246489005 100644 --- a/deps/icu-small/source/i18n/astro.h +++ b/deps/icu-small/source/i18n/astro.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /************************************************************************ * Copyright (C) 1996-2008, International Business Machines Corporation * diff --git a/deps/icu-small/source/i18n/basictz.cpp b/deps/icu-small/source/i18n/basictz.cpp index b464f5cc5f..6cd93f4d3c 100644 --- a/deps/icu-small/source/i18n/basictz.cpp +++ b/deps/icu-small/source/i18n/basictz.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/bocsu.cpp b/deps/icu-small/source/i18n/bocsu.cpp index a2f94f239e..861a76a042 100644 --- a/deps/icu-small/source/i18n/bocsu.cpp +++ b/deps/icu-small/source/i18n/bocsu.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: bocsu.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/i18n/bocsu.h b/deps/icu-small/source/i18n/bocsu.h index 56b03500b1..6b8ed51970 100644 --- a/deps/icu-small/source/i18n/bocsu.h +++ b/deps/icu-small/source/i18n/bocsu.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: bocsu.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/i18n/brktrans.cpp b/deps/icu-small/source/i18n/brktrans.cpp index 714a0a8720..ab5a803842 100644 --- a/deps/icu-small/source/i18n/brktrans.cpp +++ b/deps/icu-small/source/i18n/brktrans.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/brktrans.h b/deps/icu-small/source/i18n/brktrans.h index 27228321e6..fcc8bdd002 100644 --- a/deps/icu-small/source/i18n/brktrans.h +++ b/deps/icu-small/source/i18n/brktrans.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/buddhcal.cpp b/deps/icu-small/source/i18n/buddhcal.cpp index c8a3a91a2c..b6ccbc4749 100644 --- a/deps/icu-small/source/i18n/buddhcal.cpp +++ b/deps/icu-small/source/i18n/buddhcal.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/buddhcal.h b/deps/icu-small/source/i18n/buddhcal.h index 95db980600..89e3f3dec3 100644 --- a/deps/icu-small/source/i18n/buddhcal.h +++ b/deps/icu-small/source/i18n/buddhcal.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** diff --git a/deps/icu-small/source/i18n/calendar.cpp b/deps/icu-small/source/i18n/calendar.cpp index 9b0f5a0ad3..5b7d64d20d 100644 --- a/deps/icu-small/source/i18n/calendar.cpp +++ b/deps/icu-small/source/i18n/calendar.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/casetrn.cpp b/deps/icu-small/source/i18n/casetrn.cpp index 7f71362c33..f08d448881 100644 --- a/deps/icu-small/source/i18n/casetrn.cpp +++ b/deps/icu-small/source/i18n/casetrn.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: casetrn.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -92,7 +92,6 @@ UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(CaseMapTransliterator) */ CaseMapTransliterator::CaseMapTransliterator(const UnicodeString &id, UCaseMapFull *map) : Transliterator(id, 0), - fCsp(ucase_getSingleton()), fMap(map) { // TODO test incremental mode with context-sensitive text (e.g. greek sigma) @@ -110,7 +109,7 @@ CaseMapTransliterator::~CaseMapTransliterator() { */ CaseMapTransliterator::CaseMapTransliterator(const CaseMapTransliterator& o) : Transliterator(o), - fCsp(o.fCsp), fMap(o.fMap) + fMap(o.fMap) { } @@ -119,7 +118,6 @@ CaseMapTransliterator::CaseMapTransliterator(const CaseMapTransliterator& o) : */ /*CaseMapTransliterator& CaseMapTransliterator::operator=(const CaseMapTransliterator& o) { Transliterator::operator=(o); - fCsp = o.fCsp; fMap = o.fMap; return *this; }*/ @@ -151,14 +149,14 @@ void CaseMapTransliterator::handleTransliterate(Replaceable& text, UnicodeString tmp; const UChar *s; UChar32 c; - int32_t textPos, delta, result, locCache=0; + int32_t textPos, delta, result; for(textPos=offsets.start; textPosmarkAsSet(); // Everything up to first 0 is the prefix - unit->prefix = formatStr.tempSubString(0, firstIdx); + unit->prefix = positivePart.tempSubString(0, firstIdx); fixQuotes(unit->prefix); // Everything beyond the last 0 is the suffix - unit->suffix = formatStr.tempSubString(lastIdx + 1); + unit->suffix = positivePart.tempSubString(lastIdx + 1); fixQuotes(unit->suffix); // If there is effectively no prefix or suffix, ignore the actual number of @@ -804,7 +813,7 @@ static int32_t populatePrefixSuffix( // Calculate number of zeros before decimal point int32_t idx = firstIdx + 1; - while (idx <= lastIdx && formatStr.charAt(idx) == u_0) { + while (idx <= lastIdx && positivePart.charAt(idx) == u_0) { ++idx; } return (idx - firstIdx); diff --git a/deps/icu-small/source/i18n/coptccal.cpp b/deps/icu-small/source/i18n/coptccal.cpp index ce531ca0e8..39691217d0 100644 --- a/deps/icu-small/source/i18n/coptccal.cpp +++ b/deps/icu-small/source/i18n/coptccal.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/coptccal.h b/deps/icu-small/source/i18n/coptccal.h index 523769fabe..0b82c36088 100644 --- a/deps/icu-small/source/i18n/coptccal.h +++ b/deps/icu-small/source/i18n/coptccal.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/cpdtrans.cpp b/deps/icu-small/source/i18n/cpdtrans.cpp index b6e328f92c..a204de5a53 100644 --- a/deps/icu-small/source/i18n/cpdtrans.cpp +++ b/deps/icu-small/source/i18n/cpdtrans.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/cpdtrans.h b/deps/icu-small/source/i18n/cpdtrans.h index 6f832df883..29f3ba83fc 100644 --- a/deps/icu-small/source/i18n/cpdtrans.h +++ b/deps/icu-small/source/i18n/cpdtrans.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/csdetect.cpp b/deps/icu-small/source/i18n/csdetect.cpp index 8ddbe8129b..0afecb287a 100644 --- a/deps/icu-small/source/i18n/csdetect.cpp +++ b/deps/icu-small/source/i18n/csdetect.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/csdetect.h b/deps/icu-small/source/i18n/csdetect.h index d0dc0d2077..d4bfa75eef 100644 --- a/deps/icu-small/source/i18n/csdetect.h +++ b/deps/icu-small/source/i18n/csdetect.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/csmatch.cpp b/deps/icu-small/source/i18n/csmatch.cpp index ea8d37cd20..7ed6e0ee1a 100644 --- a/deps/icu-small/source/i18n/csmatch.cpp +++ b/deps/icu-small/source/i18n/csmatch.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/csmatch.h b/deps/icu-small/source/i18n/csmatch.h index a94b86ae7c..0dc0a9e468 100644 --- a/deps/icu-small/source/i18n/csmatch.h +++ b/deps/icu-small/source/i18n/csmatch.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/csr2022.cpp b/deps/icu-small/source/i18n/csr2022.cpp index 9566ee4796..aa7f8446eb 100644 --- a/deps/icu-small/source/i18n/csr2022.cpp +++ b/deps/icu-small/source/i18n/csr2022.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/csr2022.h b/deps/icu-small/source/i18n/csr2022.h index 9ff2648505..6d5b7bffcc 100644 --- a/deps/icu-small/source/i18n/csr2022.h +++ b/deps/icu-small/source/i18n/csr2022.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/csrecog.cpp b/deps/icu-small/source/i18n/csrecog.cpp index 7ae7765399..d02be2bef6 100644 --- a/deps/icu-small/source/i18n/csrecog.cpp +++ b/deps/icu-small/source/i18n/csrecog.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/csrecog.h b/deps/icu-small/source/i18n/csrecog.h index 1759ca561b..51c25396ad 100644 --- a/deps/icu-small/source/i18n/csrecog.h +++ b/deps/icu-small/source/i18n/csrecog.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/csrmbcs.cpp b/deps/icu-small/source/i18n/csrmbcs.cpp index d61269f5e3..0c2df594d5 100644 --- a/deps/icu-small/source/i18n/csrmbcs.cpp +++ b/deps/icu-small/source/i18n/csrmbcs.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/csrmbcs.h b/deps/icu-small/source/i18n/csrmbcs.h index 6a49a85972..8ccf1d56a9 100644 --- a/deps/icu-small/source/i18n/csrmbcs.h +++ b/deps/icu-small/source/i18n/csrmbcs.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/csrsbcs.cpp b/deps/icu-small/source/i18n/csrsbcs.cpp index 48e7dc5123..3d0b7269cc 100644 --- a/deps/icu-small/source/i18n/csrsbcs.cpp +++ b/deps/icu-small/source/i18n/csrsbcs.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/csrsbcs.h b/deps/icu-small/source/i18n/csrsbcs.h index 2f967dd9c2..bae124c05a 100644 --- a/deps/icu-small/source/i18n/csrsbcs.h +++ b/deps/icu-small/source/i18n/csrsbcs.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/csrucode.cpp b/deps/icu-small/source/i18n/csrucode.cpp index 201b2996c7..b84011c259 100644 --- a/deps/icu-small/source/i18n/csrucode.cpp +++ b/deps/icu-small/source/i18n/csrucode.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/csrucode.h b/deps/icu-small/source/i18n/csrucode.h index 10e5fafe66..4465bf35a1 100644 --- a/deps/icu-small/source/i18n/csrucode.h +++ b/deps/icu-small/source/i18n/csrucode.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/csrutf8.cpp b/deps/icu-small/source/i18n/csrutf8.cpp index 4f29fa2af7..bc06fa8bb8 100644 --- a/deps/icu-small/source/i18n/csrutf8.cpp +++ b/deps/icu-small/source/i18n/csrutf8.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/csrutf8.h b/deps/icu-small/source/i18n/csrutf8.h index 71309eade7..dc4f79b824 100644 --- a/deps/icu-small/source/i18n/csrutf8.h +++ b/deps/icu-small/source/i18n/csrutf8.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/curramt.cpp b/deps/icu-small/source/i18n/curramt.cpp index 4475ff611e..019c17df8e 100644 --- a/deps/icu-small/source/i18n/curramt.cpp +++ b/deps/icu-small/source/i18n/curramt.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -19,12 +19,12 @@ U_NAMESPACE_BEGIN -CurrencyAmount::CurrencyAmount(const Formattable& amount, const UChar* isoCode, +CurrencyAmount::CurrencyAmount(const Formattable& amount, ConstChar16Ptr isoCode, UErrorCode& ec) : Measure(amount, new CurrencyUnit(isoCode, ec), ec) { } -CurrencyAmount::CurrencyAmount(double amount, const UChar* isoCode, +CurrencyAmount::CurrencyAmount(double amount, ConstChar16Ptr isoCode, UErrorCode& ec) : Measure(Formattable(amount), new CurrencyUnit(isoCode, ec), ec) { } diff --git a/deps/icu-small/source/i18n/currfmt.cpp b/deps/icu-small/source/i18n/currfmt.cpp index b92aa00e5c..06bdad042a 100644 --- a/deps/icu-small/source/i18n/currfmt.cpp +++ b/deps/icu-small/source/i18n/currfmt.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/currfmt.h b/deps/icu-small/source/i18n/currfmt.h index 83e0272465..97d44cbb1d 100644 --- a/deps/icu-small/source/i18n/currfmt.h +++ b/deps/icu-small/source/i18n/currfmt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/currpinf.cpp b/deps/icu-small/source/i18n/currpinf.cpp index 7c16fff961..5d3ca62089 100644 --- a/deps/icu-small/source/i18n/currpinf.cpp +++ b/deps/icu-small/source/i18n/currpinf.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -20,6 +20,7 @@ #include "unicode/locid.h" #include "unicode/plurrule.h" +#include "unicode/strenum.h" #include "unicode/ures.h" #include "unicode/numsys.h" #include "cstring.h" diff --git a/deps/icu-small/source/i18n/currunit.cpp b/deps/icu-small/source/i18n/currunit.cpp index f538d65ada..2192492696 100644 --- a/deps/icu-small/source/i18n/currunit.cpp +++ b/deps/icu-small/source/i18n/currunit.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -19,10 +19,10 @@ U_NAMESPACE_BEGIN -CurrencyUnit::CurrencyUnit(const UChar* _isoCode, UErrorCode& ec) { +CurrencyUnit::CurrencyUnit(ConstChar16Ptr _isoCode, UErrorCode& ec) { *isoCode = 0; if (U_SUCCESS(ec)) { - if (_isoCode && u_strlen(_isoCode)==3) { + if (_isoCode != nullptr && u_strlen(_isoCode)==3) { u_strcpy(isoCode, _isoCode); char simpleIsoCode[4]; u_UCharsToChars(isoCode, simpleIsoCode, 4); diff --git a/deps/icu-small/source/i18n/dangical.cpp b/deps/icu-small/source/i18n/dangical.cpp index 3a7b2ebb8f..bc3951f210 100644 --- a/deps/icu-small/source/i18n/dangical.cpp +++ b/deps/icu-small/source/i18n/dangical.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/i18n/dangical.h b/deps/icu-small/source/i18n/dangical.h index 17a5004bdc..1a1e06b902 100644 --- a/deps/icu-small/source/i18n/dangical.h +++ b/deps/icu-small/source/i18n/dangical.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ***************************************************************************** diff --git a/deps/icu-small/source/i18n/datefmt.cpp b/deps/icu-small/source/i18n/datefmt.cpp index 00f46cfdfd..47cc852691 100644 --- a/deps/icu-small/source/i18n/datefmt.cpp +++ b/deps/icu-small/source/i18n/datefmt.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -80,7 +80,7 @@ public: fSkeleton(other.fSkeleton) { } virtual ~DateFmtBestPatternKey(); virtual int32_t hashCode() const { - return 37 * LocaleCacheKey::hashCode() + fSkeleton.hashCode(); + return (int32_t)(37u * (uint32_t)LocaleCacheKey::hashCode() + (uint32_t)fSkeleton.hashCode()); } virtual UBool operator==(const CacheKeyBase &other) const { // reflexive @@ -498,7 +498,7 @@ DateFormat* U_EXPORT2 DateFormat::create(EStyle timeStyle, EStyle dateStyle, const Locale& locale) { UErrorCode status = U_ZERO_ERROR; -#if U_PLATFORM_HAS_WIN32_API +#if U_PLATFORM_USES_ONLY_WIN32_API char buffer[8]; int32_t count = locale.getKeywordValue("compat", buffer, sizeof(buffer), status); diff --git a/deps/icu-small/source/i18n/dayperiodrules.cpp b/deps/icu-small/source/i18n/dayperiodrules.cpp index 30414823ef..f7ec1e6dc2 100644 --- a/deps/icu-small/source/i18n/dayperiodrules.cpp +++ b/deps/icu-small/source/i18n/dayperiodrules.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -174,7 +174,7 @@ struct DayPeriodRulesDataSink : public ResourceSink { } } - void addCutoff(CutoffType type, UnicodeString hour_str, UErrorCode &errorCode) { + void addCutoff(CutoffType type, const UnicodeString &hour_str, UErrorCode &errorCode) { if (U_FAILURE(errorCode)) { return; } if (type == CUTOFF_TYPE_UNKNOWN) { diff --git a/deps/icu-small/source/i18n/dayperiodrules.h b/deps/icu-small/source/i18n/dayperiodrules.h index 3c006cdc2f..610c6175bf 100644 --- a/deps/icu-small/source/i18n/dayperiodrules.h +++ b/deps/icu-small/source/i18n/dayperiodrules.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/dcfmtimp.h b/deps/icu-small/source/i18n/dcfmtimp.h index 5885872841..e582efb344 100644 --- a/deps/icu-small/source/i18n/dcfmtimp.h +++ b/deps/icu-small/source/i18n/dcfmtimp.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** diff --git a/deps/icu-small/source/i18n/dcfmtsym.cpp b/deps/icu-small/source/i18n/dcfmtsym.cpp index b8b9c32a42..c702c2e7d0 100644 --- a/deps/icu-small/source/i18n/dcfmtsym.cpp +++ b/deps/icu-small/source/i18n/dcfmtsym.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/decContext.c b/deps/icu-small/source/i18n/decContext.c deleted file mode 100644 index 498e1fede9..0000000000 --- a/deps/icu-small/source/i18n/decContext.c +++ /dev/null @@ -1,431 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* ------------------------------------------------------------------ */ -/* Decimal Context module */ -/* ------------------------------------------------------------------ */ -/* Copyright (c) IBM Corporation, 2000-2012. All rights reserved. */ -/* */ -/* This software is made available under the terms of the */ -/* ICU License -- ICU 1.8.1 and later. */ -/* */ -/* The description and User's Guide ("The decNumber C Library") for */ -/* this software is called decNumber.pdf. This document is */ -/* available, together with arithmetic and format specifications, */ -/* testcases, and Web links, on the General Decimal Arithmetic page. */ -/* */ -/* Please send comments, suggestions, and corrections to the author: */ -/* mfc@uk.ibm.com */ -/* Mike Cowlishaw, IBM Fellow */ -/* IBM UK, PO Box 31, Birmingham Road, Warwick CV34 5JL, UK */ -/* ------------------------------------------------------------------ */ -/* This module comprises the routines for handling arithmetic */ -/* context structures. */ -/* ------------------------------------------------------------------ */ - -#include /* for strcmp */ -#include /* for printf if DECCHECK */ -#include "decContext.h" /* context and base types */ -#include "decNumberLocal.h" /* decNumber local types, etc. */ - -#if 0 /* ICU: No need to test endianness at runtime. */ -/* compile-time endian tester [assumes sizeof(Int)>1] */ -static const Int mfcone=1; /* constant 1 */ -static const Flag *mfctop=(Flag *)&mfcone; /* -> top byte */ -#define LITEND *mfctop /* named flag; 1=little-endian */ -#endif - -/* ------------------------------------------------------------------ */ -/* decContextClearStatus -- clear bits in current status */ -/* */ -/* context is the context structure to be queried */ -/* mask indicates the bits to be cleared (the status bit that */ -/* corresponds to each 1 bit in the mask is cleared) */ -/* returns context */ -/* */ -/* No error is possible. */ -/* ------------------------------------------------------------------ */ -U_CAPI decContext * U_EXPORT2 uprv_decContextClearStatus(decContext *context, uInt mask) { - context->status&=~mask; - return context; - } /* decContextClearStatus */ - -/* ------------------------------------------------------------------ */ -/* decContextDefault -- initialize a context structure */ -/* */ -/* context is the structure to be initialized */ -/* kind selects the required set of default values, one of: */ -/* DEC_INIT_BASE -- select ANSI X3-274 defaults */ -/* DEC_INIT_DECIMAL32 -- select IEEE 754 defaults, 32-bit */ -/* DEC_INIT_DECIMAL64 -- select IEEE 754 defaults, 64-bit */ -/* DEC_INIT_DECIMAL128 -- select IEEE 754 defaults, 128-bit */ -/* For any other value a valid context is returned, but with */ -/* Invalid_operation set in the status field. */ -/* returns a context structure with the appropriate initial values. */ -/* ------------------------------------------------------------------ */ -U_CAPI decContext * U_EXPORT2 uprv_decContextDefault(decContext *context, Int kind) { - /* set defaults... */ - context->digits=9; /* 9 digits */ - context->emax=DEC_MAX_EMAX; /* 9-digit exponents */ - context->emin=DEC_MIN_EMIN; /* .. balanced */ - context->round=DEC_ROUND_HALF_UP; /* 0.5 rises */ - context->traps=DEC_Errors; /* all but informational */ - context->status=0; /* cleared */ - context->clamp=0; /* no clamping */ - #if DECSUBSET - context->extended=0; /* cleared */ - #endif - switch (kind) { - case DEC_INIT_BASE: - /* [use defaults] */ - break; - case DEC_INIT_DECIMAL32: - context->digits=7; /* digits */ - context->emax=96; /* Emax */ - context->emin=-95; /* Emin */ - context->round=DEC_ROUND_HALF_EVEN; /* 0.5 to nearest even */ - context->traps=0; /* no traps set */ - context->clamp=1; /* clamp exponents */ - #if DECSUBSET - context->extended=1; /* set */ - #endif - break; - case DEC_INIT_DECIMAL64: - context->digits=16; /* digits */ - context->emax=384; /* Emax */ - context->emin=-383; /* Emin */ - context->round=DEC_ROUND_HALF_EVEN; /* 0.5 to nearest even */ - context->traps=0; /* no traps set */ - context->clamp=1; /* clamp exponents */ - #if DECSUBSET - context->extended=1; /* set */ - #endif - break; - case DEC_INIT_DECIMAL128: - context->digits=34; /* digits */ - context->emax=6144; /* Emax */ - context->emin=-6143; /* Emin */ - context->round=DEC_ROUND_HALF_EVEN; /* 0.5 to nearest even */ - context->traps=0; /* no traps set */ - context->clamp=1; /* clamp exponents */ - #if DECSUBSET - context->extended=1; /* set */ - #endif - break; - - default: /* invalid Kind */ - /* use defaults, and .. */ - uprv_decContextSetStatus(context, DEC_Invalid_operation); /* trap */ - } - - return context;} /* decContextDefault */ - -/* ------------------------------------------------------------------ */ -/* decContextGetRounding -- return current rounding mode */ -/* */ -/* context is the context structure to be queried */ -/* returns the rounding mode */ -/* */ -/* No error is possible. */ -/* ------------------------------------------------------------------ */ -U_CAPI enum rounding U_EXPORT2 uprv_decContextGetRounding(decContext *context) { - return context->round; - } /* decContextGetRounding */ - -/* ------------------------------------------------------------------ */ -/* decContextGetStatus -- return current status */ -/* */ -/* context is the context structure to be queried */ -/* returns status */ -/* */ -/* No error is possible. */ -/* ------------------------------------------------------------------ */ -U_CAPI uInt U_EXPORT2 uprv_decContextGetStatus(decContext *context) { - return context->status; - } /* decContextGetStatus */ - -/* ------------------------------------------------------------------ */ -/* decContextRestoreStatus -- restore bits in current status */ -/* */ -/* context is the context structure to be updated */ -/* newstatus is the source for the bits to be restored */ -/* mask indicates the bits to be restored (the status bit that */ -/* corresponds to each 1 bit in the mask is set to the value of */ -/* the correspnding bit in newstatus) */ -/* returns context */ -/* */ -/* No error is possible. */ -/* ------------------------------------------------------------------ */ -U_CAPI decContext * U_EXPORT2 uprv_decContextRestoreStatus(decContext *context, - uInt newstatus, uInt mask) { - context->status&=~mask; /* clear the selected bits */ - context->status|=(mask&newstatus); /* or in the new bits */ - return context; - } /* decContextRestoreStatus */ - -/* ------------------------------------------------------------------ */ -/* decContextSaveStatus -- save bits in current status */ -/* */ -/* context is the context structure to be queried */ -/* mask indicates the bits to be saved (the status bits that */ -/* correspond to each 1 bit in the mask are saved) */ -/* returns the AND of the mask and the current status */ -/* */ -/* No error is possible. */ -/* ------------------------------------------------------------------ */ -U_CAPI uInt U_EXPORT2 uprv_decContextSaveStatus(decContext *context, uInt mask) { - return context->status&mask; - } /* decContextSaveStatus */ - -/* ------------------------------------------------------------------ */ -/* decContextSetRounding -- set current rounding mode */ -/* */ -/* context is the context structure to be updated */ -/* newround is the value which will replace the current mode */ -/* returns context */ -/* */ -/* No error is possible. */ -/* ------------------------------------------------------------------ */ -U_CAPI decContext * U_EXPORT2 uprv_decContextSetRounding(decContext *context, - enum rounding newround) { - context->round=newround; - return context; - } /* decContextSetRounding */ - -/* ------------------------------------------------------------------ */ -/* decContextSetStatus -- set status and raise trap if appropriate */ -/* */ -/* context is the context structure to be updated */ -/* status is the DEC_ exception code */ -/* returns the context structure */ -/* */ -/* Control may never return from this routine, if there is a signal */ -/* handler and it takes a long jump. */ -/* ------------------------------------------------------------------ */ -U_CAPI decContext * U_EXPORT2 uprv_decContextSetStatus(decContext *context, uInt status) { - context->status|=status; -#if 0 /* ICU: Do not raise signals. */ - if (status & context->traps) raise(SIGFPE); -#endif - return context;} /* decContextSetStatus */ - -/* ------------------------------------------------------------------ */ -/* decContextSetStatusFromString -- set status from a string + trap */ -/* */ -/* context is the context structure to be updated */ -/* string is a string exactly equal to one that might be returned */ -/* by decContextStatusToString */ -/* */ -/* The status bit corresponding to the string is set, and a trap */ -/* is raised if appropriate. */ -/* */ -/* returns the context structure, unless the string is equal to */ -/* DEC_Condition_MU or is not recognized. In these cases NULL is */ -/* returned. */ -/* ------------------------------------------------------------------ */ -U_CAPI decContext * U_EXPORT2 uprv_decContextSetStatusFromString(decContext *context, - const char *string) { - if (strcmp(string, DEC_Condition_CS)==0) - return uprv_decContextSetStatus(context, DEC_Conversion_syntax); - if (strcmp(string, DEC_Condition_DZ)==0) - return uprv_decContextSetStatus(context, DEC_Division_by_zero); - if (strcmp(string, DEC_Condition_DI)==0) - return uprv_decContextSetStatus(context, DEC_Division_impossible); - if (strcmp(string, DEC_Condition_DU)==0) - return uprv_decContextSetStatus(context, DEC_Division_undefined); - if (strcmp(string, DEC_Condition_IE)==0) - return uprv_decContextSetStatus(context, DEC_Inexact); - if (strcmp(string, DEC_Condition_IS)==0) - return uprv_decContextSetStatus(context, DEC_Insufficient_storage); - if (strcmp(string, DEC_Condition_IC)==0) - return uprv_decContextSetStatus(context, DEC_Invalid_context); - if (strcmp(string, DEC_Condition_IO)==0) - return uprv_decContextSetStatus(context, DEC_Invalid_operation); - #if DECSUBSET - if (strcmp(string, DEC_Condition_LD)==0) - return uprv_decContextSetStatus(context, DEC_Lost_digits); - #endif - if (strcmp(string, DEC_Condition_OV)==0) - return uprv_decContextSetStatus(context, DEC_Overflow); - if (strcmp(string, DEC_Condition_PA)==0) - return uprv_decContextSetStatus(context, DEC_Clamped); - if (strcmp(string, DEC_Condition_RO)==0) - return uprv_decContextSetStatus(context, DEC_Rounded); - if (strcmp(string, DEC_Condition_SU)==0) - return uprv_decContextSetStatus(context, DEC_Subnormal); - if (strcmp(string, DEC_Condition_UN)==0) - return uprv_decContextSetStatus(context, DEC_Underflow); - if (strcmp(string, DEC_Condition_ZE)==0) - return context; - return NULL; /* Multiple status, or unknown */ - } /* decContextSetStatusFromString */ - -/* ------------------------------------------------------------------ */ -/* decContextSetStatusFromStringQuiet -- set status from a string */ -/* */ -/* context is the context structure to be updated */ -/* string is a string exactly equal to one that might be returned */ -/* by decContextStatusToString */ -/* */ -/* The status bit corresponding to the string is set; no trap is */ -/* raised. */ -/* */ -/* returns the context structure, unless the string is equal to */ -/* DEC_Condition_MU or is not recognized. In these cases NULL is */ -/* returned. */ -/* ------------------------------------------------------------------ */ -U_CAPI decContext * U_EXPORT2 uprv_decContextSetStatusFromStringQuiet(decContext *context, - const char *string) { - if (strcmp(string, DEC_Condition_CS)==0) - return uprv_decContextSetStatusQuiet(context, DEC_Conversion_syntax); - if (strcmp(string, DEC_Condition_DZ)==0) - return uprv_decContextSetStatusQuiet(context, DEC_Division_by_zero); - if (strcmp(string, DEC_Condition_DI)==0) - return uprv_decContextSetStatusQuiet(context, DEC_Division_impossible); - if (strcmp(string, DEC_Condition_DU)==0) - return uprv_decContextSetStatusQuiet(context, DEC_Division_undefined); - if (strcmp(string, DEC_Condition_IE)==0) - return uprv_decContextSetStatusQuiet(context, DEC_Inexact); - if (strcmp(string, DEC_Condition_IS)==0) - return uprv_decContextSetStatusQuiet(context, DEC_Insufficient_storage); - if (strcmp(string, DEC_Condition_IC)==0) - return uprv_decContextSetStatusQuiet(context, DEC_Invalid_context); - if (strcmp(string, DEC_Condition_IO)==0) - return uprv_decContextSetStatusQuiet(context, DEC_Invalid_operation); - #if DECSUBSET - if (strcmp(string, DEC_Condition_LD)==0) - return uprv_decContextSetStatusQuiet(context, DEC_Lost_digits); - #endif - if (strcmp(string, DEC_Condition_OV)==0) - return uprv_decContextSetStatusQuiet(context, DEC_Overflow); - if (strcmp(string, DEC_Condition_PA)==0) - return uprv_decContextSetStatusQuiet(context, DEC_Clamped); - if (strcmp(string, DEC_Condition_RO)==0) - return uprv_decContextSetStatusQuiet(context, DEC_Rounded); - if (strcmp(string, DEC_Condition_SU)==0) - return uprv_decContextSetStatusQuiet(context, DEC_Subnormal); - if (strcmp(string, DEC_Condition_UN)==0) - return uprv_decContextSetStatusQuiet(context, DEC_Underflow); - if (strcmp(string, DEC_Condition_ZE)==0) - return context; - return NULL; /* Multiple status, or unknown */ - } /* decContextSetStatusFromStringQuiet */ - -/* ------------------------------------------------------------------ */ -/* decContextSetStatusQuiet -- set status without trap */ -/* */ -/* context is the context structure to be updated */ -/* status is the DEC_ exception code */ -/* returns the context structure */ -/* */ -/* No error is possible. */ -/* ------------------------------------------------------------------ */ -U_CAPI decContext * U_EXPORT2 uprv_decContextSetStatusQuiet(decContext *context, uInt status) { - context->status|=status; - return context;} /* decContextSetStatusQuiet */ - -/* ------------------------------------------------------------------ */ -/* decContextStatusToString -- convert status flags to a string */ -/* */ -/* context is a context with valid status field */ -/* */ -/* returns a constant string describing the condition. If multiple */ -/* (or no) flags are set, a generic constant message is returned. */ -/* ------------------------------------------------------------------ */ -U_CAPI const char * U_EXPORT2 uprv_decContextStatusToString(const decContext *context) { - Int status=context->status; - - /* test the five IEEE first, as some of the others are ambiguous when */ - /* DECEXTFLAG=0 */ - if (status==DEC_Invalid_operation ) return DEC_Condition_IO; - if (status==DEC_Division_by_zero ) return DEC_Condition_DZ; - if (status==DEC_Overflow ) return DEC_Condition_OV; - if (status==DEC_Underflow ) return DEC_Condition_UN; - if (status==DEC_Inexact ) return DEC_Condition_IE; - - if (status==DEC_Division_impossible ) return DEC_Condition_DI; - if (status==DEC_Division_undefined ) return DEC_Condition_DU; - if (status==DEC_Rounded ) return DEC_Condition_RO; - if (status==DEC_Clamped ) return DEC_Condition_PA; - if (status==DEC_Subnormal ) return DEC_Condition_SU; - if (status==DEC_Conversion_syntax ) return DEC_Condition_CS; - if (status==DEC_Insufficient_storage ) return DEC_Condition_IS; - if (status==DEC_Invalid_context ) return DEC_Condition_IC; - #if DECSUBSET - if (status==DEC_Lost_digits ) return DEC_Condition_LD; - #endif - if (status==0 ) return DEC_Condition_ZE; - return DEC_Condition_MU; /* Multiple errors */ - } /* decContextStatusToString */ - -/* ------------------------------------------------------------------ */ -/* decContextTestEndian -- test whether DECLITEND is set correctly */ -/* */ -/* quiet is 1 to suppress message; 0 otherwise */ -/* returns 0 if DECLITEND is correct */ -/* 1 if DECLITEND is incorrect and should be 1 */ -/* -1 if DECLITEND is incorrect and should be 0 */ -/* */ -/* A message is displayed if the return value is not 0 and quiet==0. */ -/* */ -/* No error is possible. */ -/* ------------------------------------------------------------------ */ -#if 0 /* ICU: Unused function. Anyway, do not call printf(). */ -U_CAPI Int U_EXPORT2 uprv_decContextTestEndian(Flag quiet) { - Int res=0; /* optimist */ - uInt dle=(uInt)DECLITEND; /* unsign */ - if (dle>1) dle=1; /* ensure 0 or 1 */ - - if (LITEND!=DECLITEND) { - const char *adj; - if (!quiet) { - if (LITEND) adj="little"; - else adj="big"; - printf("Warning: DECLITEND is set to %d, but this computer appears to be %s-endian\n", - DECLITEND, adj); - } - res=(Int)LITEND-dle; - } - return res; - } /* decContextTestEndian */ -#endif - -/* ------------------------------------------------------------------ */ -/* decContextTestSavedStatus -- test bits in saved status */ -/* */ -/* oldstatus is the status word to be tested */ -/* mask indicates the bits to be tested (the oldstatus bits that */ -/* correspond to each 1 bit in the mask are tested) */ -/* returns 1 if any of the tested bits are 1, or 0 otherwise */ -/* */ -/* No error is possible. */ -/* ------------------------------------------------------------------ */ -U_CAPI uInt U_EXPORT2 uprv_decContextTestSavedStatus(uInt oldstatus, uInt mask) { - return (oldstatus&mask)!=0; - } /* decContextTestSavedStatus */ - -/* ------------------------------------------------------------------ */ -/* decContextTestStatus -- test bits in current status */ -/* */ -/* context is the context structure to be updated */ -/* mask indicates the bits to be tested (the status bits that */ -/* correspond to each 1 bit in the mask are tested) */ -/* returns 1 if any of the tested bits are 1, or 0 otherwise */ -/* */ -/* No error is possible. */ -/* ------------------------------------------------------------------ */ -U_CAPI uInt U_EXPORT2 uprv_decContextTestStatus(decContext *context, uInt mask) { - return (context->status&mask)!=0; - } /* decContextTestStatus */ - -/* ------------------------------------------------------------------ */ -/* decContextZeroStatus -- clear all status bits */ -/* */ -/* context is the context structure to be updated */ -/* returns context */ -/* */ -/* No error is possible. */ -/* ------------------------------------------------------------------ */ -U_CAPI decContext * U_EXPORT2 uprv_decContextZeroStatus(decContext *context) { - context->status=0; - return context; - } /* decContextZeroStatus */ diff --git a/deps/icu-small/source/i18n/decContext.cpp b/deps/icu-small/source/i18n/decContext.cpp new file mode 100644 index 0000000000..bead83efff --- /dev/null +++ b/deps/icu-small/source/i18n/decContext.cpp @@ -0,0 +1,431 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* ------------------------------------------------------------------ */ +/* Decimal Context module */ +/* ------------------------------------------------------------------ */ +/* Copyright (c) IBM Corporation, 2000-2012. All rights reserved. */ +/* */ +/* This software is made available under the terms of the */ +/* ICU License -- ICU 1.8.1 and later. */ +/* */ +/* The description and User's Guide ("The decNumber C Library") for */ +/* this software is called decNumber.pdf. This document is */ +/* available, together with arithmetic and format specifications, */ +/* testcases, and Web links, on the General Decimal Arithmetic page. */ +/* */ +/* Please send comments, suggestions, and corrections to the author: */ +/* mfc@uk.ibm.com */ +/* Mike Cowlishaw, IBM Fellow */ +/* IBM UK, PO Box 31, Birmingham Road, Warwick CV34 5JL, UK */ +/* ------------------------------------------------------------------ */ +/* This module comprises the routines for handling arithmetic */ +/* context structures. */ +/* ------------------------------------------------------------------ */ + +#include /* for strcmp */ +#include /* for printf if DECCHECK */ +#include "decContext.h" /* context and base types */ +#include "decNumberLocal.h" /* decNumber local types, etc. */ + +#if 0 /* ICU: No need to test endianness at runtime. */ +/* compile-time endian tester [assumes sizeof(Int)>1] */ +static const Int mfcone=1; /* constant 1 */ +static const Flag *mfctop=(Flag *)&mfcone; /* -> top byte */ +#define LITEND *mfctop /* named flag; 1=little-endian */ +#endif + +/* ------------------------------------------------------------------ */ +/* decContextClearStatus -- clear bits in current status */ +/* */ +/* context is the context structure to be queried */ +/* mask indicates the bits to be cleared (the status bit that */ +/* corresponds to each 1 bit in the mask is cleared) */ +/* returns context */ +/* */ +/* No error is possible. */ +/* ------------------------------------------------------------------ */ +U_CAPI decContext * U_EXPORT2 uprv_decContextClearStatus(decContext *context, uInt mask) { + context->status&=~mask; + return context; + } /* decContextClearStatus */ + +/* ------------------------------------------------------------------ */ +/* decContextDefault -- initialize a context structure */ +/* */ +/* context is the structure to be initialized */ +/* kind selects the required set of default values, one of: */ +/* DEC_INIT_BASE -- select ANSI X3-274 defaults */ +/* DEC_INIT_DECIMAL32 -- select IEEE 754 defaults, 32-bit */ +/* DEC_INIT_DECIMAL64 -- select IEEE 754 defaults, 64-bit */ +/* DEC_INIT_DECIMAL128 -- select IEEE 754 defaults, 128-bit */ +/* For any other value a valid context is returned, but with */ +/* Invalid_operation set in the status field. */ +/* returns a context structure with the appropriate initial values. */ +/* ------------------------------------------------------------------ */ +U_CAPI decContext * U_EXPORT2 uprv_decContextDefault(decContext *context, Int kind) { + /* set defaults... */ + context->digits=9; /* 9 digits */ + context->emax=DEC_MAX_EMAX; /* 9-digit exponents */ + context->emin=DEC_MIN_EMIN; /* .. balanced */ + context->round=DEC_ROUND_HALF_UP; /* 0.5 rises */ + context->traps=DEC_Errors; /* all but informational */ + context->status=0; /* cleared */ + context->clamp=0; /* no clamping */ + #if DECSUBSET + context->extended=0; /* cleared */ + #endif + switch (kind) { + case DEC_INIT_BASE: + /* [use defaults] */ + break; + case DEC_INIT_DECIMAL32: + context->digits=7; /* digits */ + context->emax=96; /* Emax */ + context->emin=-95; /* Emin */ + context->round=DEC_ROUND_HALF_EVEN; /* 0.5 to nearest even */ + context->traps=0; /* no traps set */ + context->clamp=1; /* clamp exponents */ + #if DECSUBSET + context->extended=1; /* set */ + #endif + break; + case DEC_INIT_DECIMAL64: + context->digits=16; /* digits */ + context->emax=384; /* Emax */ + context->emin=-383; /* Emin */ + context->round=DEC_ROUND_HALF_EVEN; /* 0.5 to nearest even */ + context->traps=0; /* no traps set */ + context->clamp=1; /* clamp exponents */ + #if DECSUBSET + context->extended=1; /* set */ + #endif + break; + case DEC_INIT_DECIMAL128: + context->digits=34; /* digits */ + context->emax=6144; /* Emax */ + context->emin=-6143; /* Emin */ + context->round=DEC_ROUND_HALF_EVEN; /* 0.5 to nearest even */ + context->traps=0; /* no traps set */ + context->clamp=1; /* clamp exponents */ + #if DECSUBSET + context->extended=1; /* set */ + #endif + break; + + default: /* invalid Kind */ + /* use defaults, and .. */ + uprv_decContextSetStatus(context, DEC_Invalid_operation); /* trap */ + } + + return context;} /* decContextDefault */ + +/* ------------------------------------------------------------------ */ +/* decContextGetRounding -- return current rounding mode */ +/* */ +/* context is the context structure to be queried */ +/* returns the rounding mode */ +/* */ +/* No error is possible. */ +/* ------------------------------------------------------------------ */ +U_CAPI enum rounding U_EXPORT2 uprv_decContextGetRounding(decContext *context) { + return context->round; + } /* decContextGetRounding */ + +/* ------------------------------------------------------------------ */ +/* decContextGetStatus -- return current status */ +/* */ +/* context is the context structure to be queried */ +/* returns status */ +/* */ +/* No error is possible. */ +/* ------------------------------------------------------------------ */ +U_CAPI uInt U_EXPORT2 uprv_decContextGetStatus(decContext *context) { + return context->status; + } /* decContextGetStatus */ + +/* ------------------------------------------------------------------ */ +/* decContextRestoreStatus -- restore bits in current status */ +/* */ +/* context is the context structure to be updated */ +/* newstatus is the source for the bits to be restored */ +/* mask indicates the bits to be restored (the status bit that */ +/* corresponds to each 1 bit in the mask is set to the value of */ +/* the correspnding bit in newstatus) */ +/* returns context */ +/* */ +/* No error is possible. */ +/* ------------------------------------------------------------------ */ +U_CAPI decContext * U_EXPORT2 uprv_decContextRestoreStatus(decContext *context, + uInt newstatus, uInt mask) { + context->status&=~mask; /* clear the selected bits */ + context->status|=(mask&newstatus); /* or in the new bits */ + return context; + } /* decContextRestoreStatus */ + +/* ------------------------------------------------------------------ */ +/* decContextSaveStatus -- save bits in current status */ +/* */ +/* context is the context structure to be queried */ +/* mask indicates the bits to be saved (the status bits that */ +/* correspond to each 1 bit in the mask are saved) */ +/* returns the AND of the mask and the current status */ +/* */ +/* No error is possible. */ +/* ------------------------------------------------------------------ */ +U_CAPI uInt U_EXPORT2 uprv_decContextSaveStatus(decContext *context, uInt mask) { + return context->status&mask; + } /* decContextSaveStatus */ + +/* ------------------------------------------------------------------ */ +/* decContextSetRounding -- set current rounding mode */ +/* */ +/* context is the context structure to be updated */ +/* newround is the value which will replace the current mode */ +/* returns context */ +/* */ +/* No error is possible. */ +/* ------------------------------------------------------------------ */ +U_CAPI decContext * U_EXPORT2 uprv_decContextSetRounding(decContext *context, + enum rounding newround) { + context->round=newround; + return context; + } /* decContextSetRounding */ + +/* ------------------------------------------------------------------ */ +/* decContextSetStatus -- set status and raise trap if appropriate */ +/* */ +/* context is the context structure to be updated */ +/* status is the DEC_ exception code */ +/* returns the context structure */ +/* */ +/* Control may never return from this routine, if there is a signal */ +/* handler and it takes a long jump. */ +/* ------------------------------------------------------------------ */ +U_CAPI decContext * U_EXPORT2 uprv_decContextSetStatus(decContext *context, uInt status) { + context->status|=status; +#if 0 /* ICU: Do not raise signals. */ + if (status & context->traps) raise(SIGFPE); +#endif + return context;} /* decContextSetStatus */ + +/* ------------------------------------------------------------------ */ +/* decContextSetStatusFromString -- set status from a string + trap */ +/* */ +/* context is the context structure to be updated */ +/* string is a string exactly equal to one that might be returned */ +/* by decContextStatusToString */ +/* */ +/* The status bit corresponding to the string is set, and a trap */ +/* is raised if appropriate. */ +/* */ +/* returns the context structure, unless the string is equal to */ +/* DEC_Condition_MU or is not recognized. In these cases NULL is */ +/* returned. */ +/* ------------------------------------------------------------------ */ +U_CAPI decContext * U_EXPORT2 uprv_decContextSetStatusFromString(decContext *context, + const char *string) { + if (strcmp(string, DEC_Condition_CS)==0) + return uprv_decContextSetStatus(context, DEC_Conversion_syntax); + if (strcmp(string, DEC_Condition_DZ)==0) + return uprv_decContextSetStatus(context, DEC_Division_by_zero); + if (strcmp(string, DEC_Condition_DI)==0) + return uprv_decContextSetStatus(context, DEC_Division_impossible); + if (strcmp(string, DEC_Condition_DU)==0) + return uprv_decContextSetStatus(context, DEC_Division_undefined); + if (strcmp(string, DEC_Condition_IE)==0) + return uprv_decContextSetStatus(context, DEC_Inexact); + if (strcmp(string, DEC_Condition_IS)==0) + return uprv_decContextSetStatus(context, DEC_Insufficient_storage); + if (strcmp(string, DEC_Condition_IC)==0) + return uprv_decContextSetStatus(context, DEC_Invalid_context); + if (strcmp(string, DEC_Condition_IO)==0) + return uprv_decContextSetStatus(context, DEC_Invalid_operation); + #if DECSUBSET + if (strcmp(string, DEC_Condition_LD)==0) + return uprv_decContextSetStatus(context, DEC_Lost_digits); + #endif + if (strcmp(string, DEC_Condition_OV)==0) + return uprv_decContextSetStatus(context, DEC_Overflow); + if (strcmp(string, DEC_Condition_PA)==0) + return uprv_decContextSetStatus(context, DEC_Clamped); + if (strcmp(string, DEC_Condition_RO)==0) + return uprv_decContextSetStatus(context, DEC_Rounded); + if (strcmp(string, DEC_Condition_SU)==0) + return uprv_decContextSetStatus(context, DEC_Subnormal); + if (strcmp(string, DEC_Condition_UN)==0) + return uprv_decContextSetStatus(context, DEC_Underflow); + if (strcmp(string, DEC_Condition_ZE)==0) + return context; + return NULL; /* Multiple status, or unknown */ + } /* decContextSetStatusFromString */ + +/* ------------------------------------------------------------------ */ +/* decContextSetStatusFromStringQuiet -- set status from a string */ +/* */ +/* context is the context structure to be updated */ +/* string is a string exactly equal to one that might be returned */ +/* by decContextStatusToString */ +/* */ +/* The status bit corresponding to the string is set; no trap is */ +/* raised. */ +/* */ +/* returns the context structure, unless the string is equal to */ +/* DEC_Condition_MU or is not recognized. In these cases NULL is */ +/* returned. */ +/* ------------------------------------------------------------------ */ +U_CAPI decContext * U_EXPORT2 uprv_decContextSetStatusFromStringQuiet(decContext *context, + const char *string) { + if (strcmp(string, DEC_Condition_CS)==0) + return uprv_decContextSetStatusQuiet(context, DEC_Conversion_syntax); + if (strcmp(string, DEC_Condition_DZ)==0) + return uprv_decContextSetStatusQuiet(context, DEC_Division_by_zero); + if (strcmp(string, DEC_Condition_DI)==0) + return uprv_decContextSetStatusQuiet(context, DEC_Division_impossible); + if (strcmp(string, DEC_Condition_DU)==0) + return uprv_decContextSetStatusQuiet(context, DEC_Division_undefined); + if (strcmp(string, DEC_Condition_IE)==0) + return uprv_decContextSetStatusQuiet(context, DEC_Inexact); + if (strcmp(string, DEC_Condition_IS)==0) + return uprv_decContextSetStatusQuiet(context, DEC_Insufficient_storage); + if (strcmp(string, DEC_Condition_IC)==0) + return uprv_decContextSetStatusQuiet(context, DEC_Invalid_context); + if (strcmp(string, DEC_Condition_IO)==0) + return uprv_decContextSetStatusQuiet(context, DEC_Invalid_operation); + #if DECSUBSET + if (strcmp(string, DEC_Condition_LD)==0) + return uprv_decContextSetStatusQuiet(context, DEC_Lost_digits); + #endif + if (strcmp(string, DEC_Condition_OV)==0) + return uprv_decContextSetStatusQuiet(context, DEC_Overflow); + if (strcmp(string, DEC_Condition_PA)==0) + return uprv_decContextSetStatusQuiet(context, DEC_Clamped); + if (strcmp(string, DEC_Condition_RO)==0) + return uprv_decContextSetStatusQuiet(context, DEC_Rounded); + if (strcmp(string, DEC_Condition_SU)==0) + return uprv_decContextSetStatusQuiet(context, DEC_Subnormal); + if (strcmp(string, DEC_Condition_UN)==0) + return uprv_decContextSetStatusQuiet(context, DEC_Underflow); + if (strcmp(string, DEC_Condition_ZE)==0) + return context; + return NULL; /* Multiple status, or unknown */ + } /* decContextSetStatusFromStringQuiet */ + +/* ------------------------------------------------------------------ */ +/* decContextSetStatusQuiet -- set status without trap */ +/* */ +/* context is the context structure to be updated */ +/* status is the DEC_ exception code */ +/* returns the context structure */ +/* */ +/* No error is possible. */ +/* ------------------------------------------------------------------ */ +U_CAPI decContext * U_EXPORT2 uprv_decContextSetStatusQuiet(decContext *context, uInt status) { + context->status|=status; + return context;} /* decContextSetStatusQuiet */ + +/* ------------------------------------------------------------------ */ +/* decContextStatusToString -- convert status flags to a string */ +/* */ +/* context is a context with valid status field */ +/* */ +/* returns a constant string describing the condition. If multiple */ +/* (or no) flags are set, a generic constant message is returned. */ +/* ------------------------------------------------------------------ */ +U_CAPI const char * U_EXPORT2 uprv_decContextStatusToString(const decContext *context) { + Int status=context->status; + + /* test the five IEEE first, as some of the others are ambiguous when */ + /* DECEXTFLAG=0 */ + if (status==DEC_Invalid_operation ) return DEC_Condition_IO; + if (status==DEC_Division_by_zero ) return DEC_Condition_DZ; + if (status==DEC_Overflow ) return DEC_Condition_OV; + if (status==DEC_Underflow ) return DEC_Condition_UN; + if (status==DEC_Inexact ) return DEC_Condition_IE; + + if (status==DEC_Division_impossible ) return DEC_Condition_DI; + if (status==DEC_Division_undefined ) return DEC_Condition_DU; + if (status==DEC_Rounded ) return DEC_Condition_RO; + if (status==DEC_Clamped ) return DEC_Condition_PA; + if (status==DEC_Subnormal ) return DEC_Condition_SU; + if (status==DEC_Conversion_syntax ) return DEC_Condition_CS; + if (status==DEC_Insufficient_storage ) return DEC_Condition_IS; + if (status==DEC_Invalid_context ) return DEC_Condition_IC; + #if DECSUBSET + if (status==DEC_Lost_digits ) return DEC_Condition_LD; + #endif + if (status==0 ) return DEC_Condition_ZE; + return DEC_Condition_MU; /* Multiple errors */ + } /* decContextStatusToString */ + +/* ------------------------------------------------------------------ */ +/* decContextTestEndian -- test whether DECLITEND is set correctly */ +/* */ +/* quiet is 1 to suppress message; 0 otherwise */ +/* returns 0 if DECLITEND is correct */ +/* 1 if DECLITEND is incorrect and should be 1 */ +/* -1 if DECLITEND is incorrect and should be 0 */ +/* */ +/* A message is displayed if the return value is not 0 and quiet==0. */ +/* */ +/* No error is possible. */ +/* ------------------------------------------------------------------ */ +#if 0 /* ICU: Unused function. Anyway, do not call printf(). */ +U_CAPI Int U_EXPORT2 uprv_decContextTestEndian(Flag quiet) { + Int res=0; /* optimist */ + uInt dle=(uInt)DECLITEND; /* unsign */ + if (dle>1) dle=1; /* ensure 0 or 1 */ + + if (LITEND!=DECLITEND) { + const char *adj; + if (!quiet) { + if (LITEND) adj="little"; + else adj="big"; + printf("Warning: DECLITEND is set to %d, but this computer appears to be %s-endian\n", + DECLITEND, adj); + } + res=(Int)LITEND-dle; + } + return res; + } /* decContextTestEndian */ +#endif + +/* ------------------------------------------------------------------ */ +/* decContextTestSavedStatus -- test bits in saved status */ +/* */ +/* oldstatus is the status word to be tested */ +/* mask indicates the bits to be tested (the oldstatus bits that */ +/* correspond to each 1 bit in the mask are tested) */ +/* returns 1 if any of the tested bits are 1, or 0 otherwise */ +/* */ +/* No error is possible. */ +/* ------------------------------------------------------------------ */ +U_CAPI uInt U_EXPORT2 uprv_decContextTestSavedStatus(uInt oldstatus, uInt mask) { + return (oldstatus&mask)!=0; + } /* decContextTestSavedStatus */ + +/* ------------------------------------------------------------------ */ +/* decContextTestStatus -- test bits in current status */ +/* */ +/* context is the context structure to be updated */ +/* mask indicates the bits to be tested (the status bits that */ +/* correspond to each 1 bit in the mask are tested) */ +/* returns 1 if any of the tested bits are 1, or 0 otherwise */ +/* */ +/* No error is possible. */ +/* ------------------------------------------------------------------ */ +U_CAPI uInt U_EXPORT2 uprv_decContextTestStatus(decContext *context, uInt mask) { + return (context->status&mask)!=0; + } /* decContextTestStatus */ + +/* ------------------------------------------------------------------ */ +/* decContextZeroStatus -- clear all status bits */ +/* */ +/* context is the context structure to be updated */ +/* returns context */ +/* */ +/* No error is possible. */ +/* ------------------------------------------------------------------ */ +U_CAPI decContext * U_EXPORT2 uprv_decContextZeroStatus(decContext *context) { + context->status=0; + return context; + } /* decContextZeroStatus */ diff --git a/deps/icu-small/source/i18n/decContext.h b/deps/icu-small/source/i18n/decContext.h index 62123ff440..1fd18e5d3d 100644 --- a/deps/icu-small/source/i18n/decContext.h +++ b/deps/icu-small/source/i18n/decContext.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ------------------------------------------------------------------ */ /* Decimal Context module header */ diff --git a/deps/icu-small/source/i18n/decNumber.c b/deps/icu-small/source/i18n/decNumber.c deleted file mode 100644 index b25845e0aa..0000000000 --- a/deps/icu-small/source/i18n/decNumber.c +++ /dev/null @@ -1,8188 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* ------------------------------------------------------------------ */ -/* Decimal Number arithmetic module */ -/* ------------------------------------------------------------------ */ -/* Copyright (c) IBM Corporation, 2000-2014. All rights reserved. */ -/* */ -/* This software is made available under the terms of the */ -/* ICU License -- ICU 1.8.1 and later. */ -/* */ -/* The description and User's Guide ("The decNumber C Library") for */ -/* this software is called decNumber.pdf. This document is */ -/* available, together with arithmetic and format specifications, */ -/* testcases, and Web links, on the General Decimal Arithmetic page. */ -/* */ -/* Please send comments, suggestions, and corrections to the author: */ -/* mfc@uk.ibm.com */ -/* Mike Cowlishaw, IBM Fellow */ -/* IBM UK, PO Box 31, Birmingham Road, Warwick CV34 5JL, UK */ -/* ------------------------------------------------------------------ */ - -/* Modified version, for use from within ICU. - * Renamed public functions, to avoid an unwanted export of the - * standard names from the ICU library. - * - * Use ICU's uprv_malloc() and uprv_free() - * - * Revert comment syntax to plain C - * - * Remove a few compiler warnings. - */ - -/* This module comprises the routines for arbitrary-precision General */ -/* Decimal Arithmetic as defined in the specification which may be */ -/* found on the General Decimal Arithmetic pages. It implements both */ -/* the full ('extended') arithmetic and the simpler ('subset') */ -/* arithmetic. */ -/* */ -/* Usage notes: */ -/* */ -/* 1. This code is ANSI C89 except: */ -/* */ -/* a) C99 line comments (double forward slash) are used. (Most C */ -/* compilers accept these. If yours does not, a simple script */ -/* can be used to convert them to ANSI C comments.) */ -/* */ -/* b) Types from C99 stdint.h are used. If you do not have this */ -/* header file, see the User's Guide section of the decNumber */ -/* documentation; this lists the necessary definitions. */ -/* */ -/* c) If DECDPUN>4 or DECUSE64=1, the C99 64-bit int64_t and */ -/* uint64_t types may be used. To avoid these, set DECUSE64=0 */ -/* and DECDPUN<=4 (see documentation). */ -/* */ -/* The code also conforms to C99 restrictions; in particular, */ -/* strict aliasing rules are observed. */ -/* */ -/* 2. The decNumber format which this library uses is optimized for */ -/* efficient processing of relatively short numbers; in particular */ -/* it allows the use of fixed sized structures and minimizes copy */ -/* and move operations. It does, however, support arbitrary */ -/* precision (up to 999,999,999 digits) and arbitrary exponent */ -/* range (Emax in the range 0 through 999,999,999 and Emin in the */ -/* range -999,999,999 through 0). Mathematical functions (for */ -/* example decNumberExp) as identified below are restricted more */ -/* tightly: digits, emax, and -emin in the context must be <= */ -/* DEC_MAX_MATH (999999), and their operand(s) must be within */ -/* these bounds. */ -/* */ -/* 3. Logical functions are further restricted; their operands must */ -/* be finite, positive, have an exponent of zero, and all digits */ -/* must be either 0 or 1. The result will only contain digits */ -/* which are 0 or 1 (and will have exponent=0 and a sign of 0). */ -/* */ -/* 4. Operands to operator functions are never modified unless they */ -/* are also specified to be the result number (which is always */ -/* permitted). Other than that case, operands must not overlap. */ -/* */ -/* 5. Error handling: the type of the error is ORed into the status */ -/* flags in the current context (decContext structure). The */ -/* SIGFPE signal is then raised if the corresponding trap-enabler */ -/* flag in the decContext is set (is 1). */ -/* */ -/* It is the responsibility of the caller to clear the status */ -/* flags as required. */ -/* */ -/* The result of any routine which returns a number will always */ -/* be a valid number (which may be a special value, such as an */ -/* Infinity or NaN). */ -/* */ -/* 6. The decNumber format is not an exchangeable concrete */ -/* representation as it comprises fields which may be machine- */ -/* dependent (packed or unpacked, or special length, for example). */ -/* Canonical conversions to and from strings are provided; other */ -/* conversions are available in separate modules. */ -/* */ -/* 7. Normally, input operands are assumed to be valid. Set DECCHECK */ -/* to 1 for extended operand checking (including NULL operands). */ -/* Results are undefined if a badly-formed structure (or a NULL */ -/* pointer to a structure) is provided, though with DECCHECK */ -/* enabled the operator routines are protected against exceptions. */ -/* (Except if the result pointer is NULL, which is unrecoverable.) */ -/* */ -/* However, the routines will never cause exceptions if they are */ -/* given well-formed operands, even if the value of the operands */ -/* is inappropriate for the operation and DECCHECK is not set. */ -/* (Except for SIGFPE, as and where documented.) */ -/* */ -/* 8. Subset arithmetic is available only if DECSUBSET is set to 1. */ -/* ------------------------------------------------------------------ */ -/* Implementation notes for maintenance of this module: */ -/* */ -/* 1. Storage leak protection: Routines which use malloc are not */ -/* permitted to use return for fastpath or error exits (i.e., */ -/* they follow strict structured programming conventions). */ -/* Instead they have a do{}while(0); construct surrounding the */ -/* code which is protected -- break may be used to exit this. */ -/* Other routines can safely use the return statement inline. */ -/* */ -/* Storage leak accounting can be enabled using DECALLOC. */ -/* */ -/* 2. All loops use the for(;;) construct. Any do construct does */ -/* not loop; it is for allocation protection as just described. */ -/* */ -/* 3. Setting status in the context must always be the very last */ -/* action in a routine, as non-0 status may raise a trap and hence */ -/* the call to set status may not return (if the handler uses long */ -/* jump). Therefore all cleanup must be done first. In general, */ -/* to achieve this status is accumulated and is only applied just */ -/* before return by calling decContextSetStatus (via decStatus). */ -/* */ -/* Routines which allocate storage cannot, in general, use the */ -/* 'top level' routines which could cause a non-returning */ -/* transfer of control. The decXxxxOp routines are safe (do not */ -/* call decStatus even if traps are set in the context) and should */ -/* be used instead (they are also a little faster). */ -/* */ -/* 4. Exponent checking is minimized by allowing the exponent to */ -/* grow outside its limits during calculations, provided that */ -/* the decFinalize function is called later. Multiplication and */ -/* division, and intermediate calculations in exponentiation, */ -/* require more careful checks because of the risk of 31-bit */ -/* overflow (the most negative valid exponent is -1999999997, for */ -/* a 999999999-digit number with adjusted exponent of -999999999). */ -/* */ -/* 5. Rounding is deferred until finalization of results, with any */ -/* 'off to the right' data being represented as a single digit */ -/* residue (in the range -1 through 9). This avoids any double- */ -/* rounding when more than one shortening takes place (for */ -/* example, when a result is subnormal). */ -/* */ -/* 6. The digits count is allowed to rise to a multiple of DECDPUN */ -/* during many operations, so whole Units are handled and exact */ -/* accounting of digits is not needed. The correct digits value */ -/* is found by decGetDigits, which accounts for leading zeros. */ -/* This must be called before any rounding if the number of digits */ -/* is not known exactly. */ -/* */ -/* 7. The multiply-by-reciprocal 'trick' is used for partitioning */ -/* numbers up to four digits, using appropriate constants. This */ -/* is not useful for longer numbers because overflow of 32 bits */ -/* would lead to 4 multiplies, which is almost as expensive as */ -/* a divide (unless a floating-point or 64-bit multiply is */ -/* assumed to be available). */ -/* */ -/* 8. Unusual abbreviations that may be used in the commentary: */ -/* lhs -- left hand side (operand, of an operation) */ -/* lsd -- least significant digit (of coefficient) */ -/* lsu -- least significant Unit (of coefficient) */ -/* msd -- most significant digit (of coefficient) */ -/* msi -- most significant item (in an array) */ -/* msu -- most significant Unit (of coefficient) */ -/* rhs -- right hand side (operand, of an operation) */ -/* +ve -- positive */ -/* -ve -- negative */ -/* ** -- raise to the power */ -/* ------------------------------------------------------------------ */ - -#include /* for malloc, free, etc. */ -/* #include */ /* for printf [if needed] */ -#include /* for strcpy */ -#include /* for lower */ -#include "cmemory.h" /* for uprv_malloc, etc., in ICU */ -#include "decNumber.h" /* base number library */ -#include "decNumberLocal.h" /* decNumber local types, etc. */ -#include "uassert.h" - -/* Constants */ -/* Public lookup table used by the D2U macro */ -static const uByte d2utable[DECMAXD2U+1]=D2UTABLE; - -#define DECVERB 1 /* set to 1 for verbose DECCHECK */ -#define powers DECPOWERS /* old internal name */ - -/* Local constants */ -#define DIVIDE 0x80 /* Divide operators */ -#define REMAINDER 0x40 /* .. */ -#define DIVIDEINT 0x20 /* .. */ -#define REMNEAR 0x10 /* .. */ -#define COMPARE 0x01 /* Compare operators */ -#define COMPMAX 0x02 /* .. */ -#define COMPMIN 0x03 /* .. */ -#define COMPTOTAL 0x04 /* .. */ -#define COMPNAN 0x05 /* .. [NaN processing] */ -#define COMPSIG 0x06 /* .. [signaling COMPARE] */ -#define COMPMAXMAG 0x07 /* .. */ -#define COMPMINMAG 0x08 /* .. */ - -#define DEC_sNaN 0x40000000 /* local status: sNaN signal */ -#define BADINT (Int)0x80000000 /* most-negative Int; error indicator */ -/* Next two indicate an integer >= 10**6, and its parity (bottom bit) */ -#define BIGEVEN (Int)0x80000002 -#define BIGODD (Int)0x80000003 - -static const Unit uarrone[1]={1}; /* Unit array of 1, used for incrementing */ - -/* ------------------------------------------------------------------ */ -/* round-for-reround digits */ -/* ------------------------------------------------------------------ */ -#if 0 -static const uByte DECSTICKYTAB[10]={1,1,2,3,4,6,6,7,8,9}; /* used if sticky */ -#endif - -/* ------------------------------------------------------------------ */ -/* Powers of ten (powers[n]==10**n, 0<=n<=9) */ -/* ------------------------------------------------------------------ */ -static const uInt DECPOWERS[10]={1, 10, 100, 1000, 10000, 100000, 1000000, - 10000000, 100000000, 1000000000}; - - -/* Granularity-dependent code */ -#if DECDPUN<=4 - #define eInt Int /* extended integer */ - #define ueInt uInt /* unsigned extended integer */ - /* Constant multipliers for divide-by-power-of five using reciprocal */ - /* multiply, after removing powers of 2 by shifting, and final shift */ - /* of 17 [we only need up to **4] */ - static const uInt multies[]={131073, 26215, 5243, 1049, 210}; - /* QUOT10 -- macro to return the quotient of unit u divided by 10**n */ - #define QUOT10(u, n) ((((uInt)(u)>>(n))*multies[n])>>17) -#else - /* For DECDPUN>4 non-ANSI-89 64-bit types are needed. */ - #if !DECUSE64 - #error decNumber.c: DECUSE64 must be 1 when DECDPUN>4 - #endif - #define eInt Long /* extended integer */ - #define ueInt uLong /* unsigned extended integer */ -#endif - -/* Local routines */ -static decNumber * decAddOp(decNumber *, const decNumber *, const decNumber *, - decContext *, uByte, uInt *); -static Flag decBiStr(const char *, const char *, const char *); -static uInt decCheckMath(const decNumber *, decContext *, uInt *); -static void decApplyRound(decNumber *, decContext *, Int, uInt *); -static Int decCompare(const decNumber *lhs, const decNumber *rhs, Flag); -static decNumber * decCompareOp(decNumber *, const decNumber *, - const decNumber *, decContext *, - Flag, uInt *); -static void decCopyFit(decNumber *, const decNumber *, decContext *, - Int *, uInt *); -static decNumber * decDecap(decNumber *, Int); -static decNumber * decDivideOp(decNumber *, const decNumber *, - const decNumber *, decContext *, Flag, uInt *); -static decNumber * decExpOp(decNumber *, const decNumber *, - decContext *, uInt *); -static void decFinalize(decNumber *, decContext *, Int *, uInt *); -static Int decGetDigits(Unit *, Int); -static Int decGetInt(const decNumber *); -static decNumber * decLnOp(decNumber *, const decNumber *, - decContext *, uInt *); -static decNumber * decMultiplyOp(decNumber *, const decNumber *, - const decNumber *, decContext *, - uInt *); -static decNumber * decNaNs(decNumber *, const decNumber *, - const decNumber *, decContext *, uInt *); -static decNumber * decQuantizeOp(decNumber *, const decNumber *, - const decNumber *, decContext *, Flag, - uInt *); -static void decReverse(Unit *, Unit *); -static void decSetCoeff(decNumber *, decContext *, const Unit *, - Int, Int *, uInt *); -static void decSetMaxValue(decNumber *, decContext *); -static void decSetOverflow(decNumber *, decContext *, uInt *); -static void decSetSubnormal(decNumber *, decContext *, Int *, uInt *); -static Int decShiftToLeast(Unit *, Int, Int); -static Int decShiftToMost(Unit *, Int, Int); -static void decStatus(decNumber *, uInt, decContext *); -static void decToString(const decNumber *, char[], Flag); -static decNumber * decTrim(decNumber *, decContext *, Flag, Flag, Int *); -static Int decUnitAddSub(const Unit *, Int, const Unit *, Int, Int, - Unit *, Int); -static Int decUnitCompare(const Unit *, Int, const Unit *, Int, Int); - -#if !DECSUBSET -/* decFinish == decFinalize when no subset arithmetic needed */ -#define decFinish(a,b,c,d) decFinalize(a,b,c,d) -#else -static void decFinish(decNumber *, decContext *, Int *, uInt *); -static decNumber * decRoundOperand(const decNumber *, decContext *, uInt *); -#endif - -/* Local macros */ -/* masked special-values bits */ -#define SPECIALARG (rhs->bits & DECSPECIAL) -#define SPECIALARGS ((lhs->bits | rhs->bits) & DECSPECIAL) - -/* For use in ICU */ -#define malloc(a) uprv_malloc(a) -#define free(a) uprv_free(a) - -/* Diagnostic macros, etc. */ -#if DECALLOC -/* Handle malloc/free accounting. If enabled, our accountable routines */ -/* are used; otherwise the code just goes straight to the system malloc */ -/* and free routines. */ -#define malloc(a) decMalloc(a) -#define free(a) decFree(a) -#define DECFENCE 0x5a /* corruption detector */ -/* 'Our' malloc and free: */ -static void *decMalloc(size_t); -static void decFree(void *); -uInt decAllocBytes=0; /* count of bytes allocated */ -/* Note that DECALLOC code only checks for storage buffer overflow. */ -/* To check for memory leaks, the decAllocBytes variable must be */ -/* checked to be 0 at appropriate times (e.g., after the test */ -/* harness completes a set of tests). This checking may be unreliable */ -/* if the testing is done in a multi-thread environment. */ -#endif - -#if DECCHECK -/* Optional checking routines. Enabling these means that decNumber */ -/* and decContext operands to operator routines are checked for */ -/* correctness. This roughly doubles the execution time of the */ -/* fastest routines (and adds 600+ bytes), so should not normally be */ -/* used in 'production'. */ -/* decCheckInexact is used to check that inexact results have a full */ -/* complement of digits (where appropriate -- this is not the case */ -/* for Quantize, for example) */ -#define DECUNRESU ((decNumber *)(void *)0xffffffff) -#define DECUNUSED ((const decNumber *)(void *)0xffffffff) -#define DECUNCONT ((decContext *)(void *)(0xffffffff)) -static Flag decCheckOperands(decNumber *, const decNumber *, - const decNumber *, decContext *); -static Flag decCheckNumber(const decNumber *); -static void decCheckInexact(const decNumber *, decContext *); -#endif - -#if DECTRACE || DECCHECK -/* Optional trace/debugging routines (may or may not be used) */ -void decNumberShow(const decNumber *); /* displays the components of a number */ -static void decDumpAr(char, const Unit *, Int); -#endif - -/* ================================================================== */ -/* Conversions */ -/* ================================================================== */ - -/* ------------------------------------------------------------------ */ -/* from-int32 -- conversion from Int or uInt */ -/* */ -/* dn is the decNumber to receive the integer */ -/* in or uin is the integer to be converted */ -/* returns dn */ -/* */ -/* No error is possible. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberFromInt32(decNumber *dn, Int in) { - uInt unsig; - if (in>=0) unsig=in; - else { /* negative (possibly BADINT) */ - if (in==BADINT) unsig=(uInt)1073741824*2; /* special case */ - else unsig=-in; /* invert */ - } - /* in is now positive */ - uprv_decNumberFromUInt32(dn, unsig); - if (in<0) dn->bits=DECNEG; /* sign needed */ - return dn; - } /* decNumberFromInt32 */ - -U_CAPI decNumber * U_EXPORT2 uprv_decNumberFromUInt32(decNumber *dn, uInt uin) { - Unit *up; /* work pointer */ - uprv_decNumberZero(dn); /* clean */ - if (uin==0) return dn; /* [or decGetDigits bad call] */ - for (up=dn->lsu; uin>0; up++) { - *up=(Unit)(uin%(DECDPUNMAX+1)); - uin=uin/(DECDPUNMAX+1); - } - dn->digits=decGetDigits(dn->lsu, up-dn->lsu); - return dn; - } /* decNumberFromUInt32 */ - -/* ------------------------------------------------------------------ */ -/* to-int32 -- conversion to Int or uInt */ -/* */ -/* dn is the decNumber to convert */ -/* set is the context for reporting errors */ -/* returns the converted decNumber, or 0 if Invalid is set */ -/* */ -/* Invalid is set if the decNumber does not have exponent==0 or if */ -/* it is a NaN, Infinite, or out-of-range. */ -/* ------------------------------------------------------------------ */ -U_CAPI Int U_EXPORT2 uprv_decNumberToInt32(const decNumber *dn, decContext *set) { - #if DECCHECK - if (decCheckOperands(DECUNRESU, DECUNUSED, dn, set)) return 0; - #endif - - /* special or too many digits, or bad exponent */ - if (dn->bits&DECSPECIAL || dn->digits>10 || dn->exponent!=0) ; /* bad */ - else { /* is a finite integer with 10 or fewer digits */ - Int d; /* work */ - const Unit *up; /* .. */ - uInt hi=0, lo; /* .. */ - up=dn->lsu; /* -> lsu */ - lo=*up; /* get 1 to 9 digits */ - #if DECDPUN>1 /* split to higher */ - hi=lo/10; - lo=lo%10; - #endif - up++; - /* collect remaining Units, if any, into hi */ - for (d=DECDPUN; ddigits; up++, d+=DECDPUN) hi+=*up*powers[d-1]; - /* now low has the lsd, hi the remainder */ - if (hi>214748364 || (hi==214748364 && lo>7)) { /* out of range? */ - /* most-negative is a reprieve */ - if (dn->bits&DECNEG && hi==214748364 && lo==8) return 0x80000000; - /* bad -- drop through */ - } - else { /* in-range always */ - Int i=X10(hi)+lo; - if (dn->bits&DECNEG) return -i; - return i; - } - } /* integer */ - uprv_decContextSetStatus(set, DEC_Invalid_operation); /* [may not return] */ - return 0; - } /* decNumberToInt32 */ - -U_CAPI uInt U_EXPORT2 uprv_decNumberToUInt32(const decNumber *dn, decContext *set) { - #if DECCHECK - if (decCheckOperands(DECUNRESU, DECUNUSED, dn, set)) return 0; - #endif - /* special or too many digits, or bad exponent, or negative (<0) */ - if (dn->bits&DECSPECIAL || dn->digits>10 || dn->exponent!=0 - || (dn->bits&DECNEG && !ISZERO(dn))); /* bad */ - else { /* is a finite integer with 10 or fewer digits */ - Int d; /* work */ - const Unit *up; /* .. */ - uInt hi=0, lo; /* .. */ - up=dn->lsu; /* -> lsu */ - lo=*up; /* get 1 to 9 digits */ - #if DECDPUN>1 /* split to higher */ - hi=lo/10; - lo=lo%10; - #endif - up++; - /* collect remaining Units, if any, into hi */ - for (d=DECDPUN; ddigits; up++, d+=DECDPUN) hi+=*up*powers[d-1]; - - /* now low has the lsd, hi the remainder */ - if (hi>429496729 || (hi==429496729 && lo>5)) ; /* no reprieve possible */ - else return X10(hi)+lo; - } /* integer */ - uprv_decContextSetStatus(set, DEC_Invalid_operation); /* [may not return] */ - return 0; - } /* decNumberToUInt32 */ - -/* ------------------------------------------------------------------ */ -/* to-scientific-string -- conversion to numeric string */ -/* to-engineering-string -- conversion to numeric string */ -/* */ -/* decNumberToString(dn, string); */ -/* decNumberToEngString(dn, string); */ -/* */ -/* dn is the decNumber to convert */ -/* string is the string where the result will be laid out */ -/* */ -/* string must be at least dn->digits+14 characters long */ -/* */ -/* No error is possible, and no status can be set. */ -/* ------------------------------------------------------------------ */ -U_CAPI char * U_EXPORT2 uprv_decNumberToString(const decNumber *dn, char *string){ - decToString(dn, string, 0); - return string; - } /* DecNumberToString */ - -U_CAPI char * U_EXPORT2 uprv_decNumberToEngString(const decNumber *dn, char *string){ - decToString(dn, string, 1); - return string; - } /* DecNumberToEngString */ - -/* ------------------------------------------------------------------ */ -/* to-number -- conversion from numeric string */ -/* */ -/* decNumberFromString -- convert string to decNumber */ -/* dn -- the number structure to fill */ -/* chars[] -- the string to convert ('\0' terminated) */ -/* set -- the context used for processing any error, */ -/* determining the maximum precision available */ -/* (set.digits), determining the maximum and minimum */ -/* exponent (set.emax and set.emin), determining if */ -/* extended values are allowed, and checking the */ -/* rounding mode if overflow occurs or rounding is */ -/* needed. */ -/* */ -/* The length of the coefficient and the size of the exponent are */ -/* checked by this routine, so the correct error (Underflow or */ -/* Overflow) can be reported or rounding applied, as necessary. */ -/* */ -/* If bad syntax is detected, the result will be a quiet NaN. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberFromString(decNumber *dn, const char chars[], - decContext *set) { - Int exponent=0; /* working exponent [assume 0] */ - uByte bits=0; /* working flags [assume +ve] */ - Unit *res; /* where result will be built */ - Unit resbuff[SD2U(DECBUFFER+9)];/* local buffer in case need temporary */ - /* [+9 allows for ln() constants] */ - Unit *allocres=NULL; /* -> allocated result, iff allocated */ - Int d=0; /* count of digits found in decimal part */ - const char *dotchar=NULL; /* where dot was found */ - const char *cfirst=chars; /* -> first character of decimal part */ - const char *last=NULL; /* -> last digit of decimal part */ - const char *c; /* work */ - Unit *up; /* .. */ - #if DECDPUN>1 - Int cut, out; /* .. */ - #endif - Int residue; /* rounding residue */ - uInt status=0; /* error code */ - - #if DECCHECK - if (decCheckOperands(DECUNRESU, DECUNUSED, DECUNUSED, set)) - return uprv_decNumberZero(dn); - #endif - - do { /* status & malloc protection */ - for (c=chars;; c++) { /* -> input character */ - if (*c>='0' && *c<='9') { /* test for Arabic digit */ - last=c; - d++; /* count of real digits */ - continue; /* still in decimal part */ - } - if (*c=='.' && dotchar==NULL) { /* first '.' */ - dotchar=c; /* record offset into decimal part */ - if (c==cfirst) cfirst++; /* first digit must follow */ - continue;} - if (c==chars) { /* first in string... */ - if (*c=='-') { /* valid - sign */ - cfirst++; - bits=DECNEG; - continue;} - if (*c=='+') { /* valid + sign */ - cfirst++; - continue;} - } - /* *c is not a digit, or a valid +, -, or '.' */ - break; - } /* c */ - - if (last==NULL) { /* no digits yet */ - status=DEC_Conversion_syntax;/* assume the worst */ - if (*c=='\0') break; /* and no more to come... */ - #if DECSUBSET - /* if subset then infinities and NaNs are not allowed */ - if (!set->extended) break; /* hopeless */ - #endif - /* Infinities and NaNs are possible, here */ - if (dotchar!=NULL) break; /* .. unless had a dot */ - uprv_decNumberZero(dn); /* be optimistic */ - if (decBiStr(c, "infinity", "INFINITY") - || decBiStr(c, "inf", "INF")) { - dn->bits=bits | DECINF; - status=0; /* is OK */ - break; /* all done */ - } - /* a NaN expected */ - /* 2003.09.10 NaNs are now permitted to have a sign */ - dn->bits=bits | DECNAN; /* assume simple NaN */ - if (*c=='s' || *c=='S') { /* looks like an sNaN */ - c++; - dn->bits=bits | DECSNAN; - } - if (*c!='n' && *c!='N') break; /* check caseless "NaN" */ - c++; - if (*c!='a' && *c!='A') break; /* .. */ - c++; - if (*c!='n' && *c!='N') break; /* .. */ - c++; - /* now either nothing, or nnnn payload, expected */ - /* -> start of integer and skip leading 0s [including plain 0] */ - for (cfirst=c; *cfirst=='0';) cfirst++; - if (*cfirst=='\0') { /* "NaN" or "sNaN", maybe with all 0s */ - status=0; /* it's good */ - break; /* .. */ - } - /* something other than 0s; setup last and d as usual [no dots] */ - for (c=cfirst;; c++, d++) { - if (*c<'0' || *c>'9') break; /* test for Arabic digit */ - last=c; - } - if (*c!='\0') break; /* not all digits */ - if (d>set->digits-1) { - /* [NB: payload in a decNumber can be full length unless */ - /* clamped, in which case can only be digits-1] */ - if (set->clamp) break; - if (d>set->digits) break; - } /* too many digits? */ - /* good; drop through to convert the integer to coefficient */ - status=0; /* syntax is OK */ - bits=dn->bits; /* for copy-back */ - } /* last==NULL */ - - else if (*c!='\0') { /* more to process... */ - /* had some digits; exponent is only valid sequence now */ - Flag nege; /* 1=negative exponent */ - const char *firstexp; /* -> first significant exponent digit */ - status=DEC_Conversion_syntax;/* assume the worst */ - if (*c!='e' && *c!='E') break; - /* Found 'e' or 'E' -- now process explicit exponent */ - /* 1998.07.11: sign no longer required */ - nege=0; - c++; /* to (possible) sign */ - if (*c=='-') {nege=1; c++;} - else if (*c=='+') c++; - if (*c=='\0') break; - - for (; *c=='0' && *(c+1)!='\0';) c++; /* strip insignificant zeros */ - firstexp=c; /* save exponent digit place */ - for (; ;c++) { - if (*c<'0' || *c>'9') break; /* not a digit */ - exponent=X10(exponent)+(Int)*c-(Int)'0'; - } /* c */ - /* if not now on a '\0', *c must not be a digit */ - if (*c!='\0') break; - - /* (this next test must be after the syntax checks) */ - /* if it was too long the exponent may have wrapped, so check */ - /* carefully and set it to a certain overflow if wrap possible */ - if (c>=firstexp+9+1) { - if (c>firstexp+9+1 || *firstexp>'1') exponent=DECNUMMAXE*2; - /* [up to 1999999999 is OK, for example 1E-1000000998] */ - } - if (nege) exponent=-exponent; /* was negative */ - status=0; /* is OK */ - } /* stuff after digits */ - - /* Here when whole string has been inspected; syntax is good */ - /* cfirst->first digit (never dot), last->last digit (ditto) */ - - /* strip leading zeros/dot [leave final 0 if all 0's] */ - if (*cfirst=='0') { /* [cfirst has stepped over .] */ - for (c=cfirst; cextended) { - uprv_decNumberZero(dn); /* clean result */ - break; /* [could be return] */ - } - #endif - } /* at least one leading 0 */ - - /* Handle decimal point... */ - if (dotchar!=NULL && dotchardigits) res=dn->lsu; /* fits into supplied decNumber */ - else { /* rounding needed */ - Int needbytes=D2U(d)*sizeof(Unit);/* bytes needed */ - res=resbuff; /* assume use local buffer */ - if (needbytes>(Int)sizeof(resbuff)) { /* too big for local */ - allocres=(Unit *)malloc(needbytes); - if (allocres==NULL) {status|=DEC_Insufficient_storage; break;} - res=allocres; - } - } - /* res now -> number lsu, buffer, or allocated storage for Unit array */ - - /* Place the coefficient into the selected Unit array */ - /* [this is often 70% of the cost of this function when DECDPUN>1] */ - #if DECDPUN>1 - out=0; /* accumulator */ - up=res+D2U(d)-1; /* -> msu */ - cut=d-(up-res)*DECDPUN; /* digits in top unit */ - for (c=cfirst;; c++) { /* along the digits */ - if (*c=='.') continue; /* ignore '.' [don't decrement cut] */ - out=X10(out)+(Int)*c-(Int)'0'; - if (c==last) break; /* done [never get to trailing '.'] */ - cut--; - if (cut>0) continue; /* more for this unit */ - *up=(Unit)out; /* write unit */ - up--; /* prepare for unit below.. */ - cut=DECDPUN; /* .. */ - out=0; /* .. */ - } /* c */ - *up=(Unit)out; /* write lsu */ - - #else - /* DECDPUN==1 */ - up=res; /* -> lsu */ - for (c=last; c>=cfirst; c--) { /* over each character, from least */ - if (*c=='.') continue; /* ignore . [don't step up] */ - *up=(Unit)((Int)*c-(Int)'0'); - up++; - } /* c */ - #endif - - dn->bits=bits; - dn->exponent=exponent; - dn->digits=d; - - /* if not in number (too long) shorten into the number */ - if (d>set->digits) { - residue=0; - decSetCoeff(dn, set, res, d, &residue, &status); - /* always check for overflow or subnormal and round as needed */ - decFinalize(dn, set, &residue, &status); - } - else { /* no rounding, but may still have overflow or subnormal */ - /* [these tests are just for performance; finalize repeats them] */ - if ((dn->exponent-1emin-dn->digits) - || (dn->exponent-1>set->emax-set->digits)) { - residue=0; - decFinalize(dn, set, &residue, &status); - } - } - /* decNumberShow(dn); */ - } while(0); /* [for break] */ - - if (allocres!=NULL) free(allocres); /* drop any storage used */ - if (status!=0) decStatus(dn, status, set); - return dn; - } /* decNumberFromString */ - -/* ================================================================== */ -/* Operators */ -/* ================================================================== */ - -/* ------------------------------------------------------------------ */ -/* decNumberAbs -- absolute value operator */ -/* */ -/* This computes C = abs(A) */ -/* */ -/* res is C, the result. C may be A */ -/* rhs is A */ -/* set is the context */ -/* */ -/* See also decNumberCopyAbs for a quiet bitwise version of this. */ -/* C must have space for set->digits digits. */ -/* ------------------------------------------------------------------ */ -/* This has the same effect as decNumberPlus unless A is negative, */ -/* in which case it has the same effect as decNumberMinus. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberAbs(decNumber *res, const decNumber *rhs, - decContext *set) { - decNumber dzero; /* for 0 */ - uInt status=0; /* accumulator */ - - #if DECCHECK - if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; - #endif - - uprv_decNumberZero(&dzero); /* set 0 */ - dzero.exponent=rhs->exponent; /* [no coefficient expansion] */ - decAddOp(res, &dzero, rhs, set, (uByte)(rhs->bits & DECNEG), &status); - if (status!=0) decStatus(res, status, set); - #if DECCHECK - decCheckInexact(res, set); - #endif - return res; - } /* decNumberAbs */ - -/* ------------------------------------------------------------------ */ -/* decNumberAdd -- add two Numbers */ -/* */ -/* This computes C = A + B */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X+X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context */ -/* */ -/* C must have space for set->digits digits. */ -/* ------------------------------------------------------------------ */ -/* This just calls the routine shared with Subtract */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberAdd(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - uInt status=0; /* accumulator */ - decAddOp(res, lhs, rhs, set, 0, &status); - if (status!=0) decStatus(res, status, set); - #if DECCHECK - decCheckInexact(res, set); - #endif - return res; - } /* decNumberAdd */ - -/* ------------------------------------------------------------------ */ -/* decNumberAnd -- AND two Numbers, digitwise */ -/* */ -/* This computes C = A & B */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X&X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context (used for result length and error report) */ -/* */ -/* C must have space for set->digits digits. */ -/* */ -/* Logical function restrictions apply (see above); a NaN is */ -/* returned with Invalid_operation if a restriction is violated. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberAnd(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - const Unit *ua, *ub; /* -> operands */ - const Unit *msua, *msub; /* -> operand msus */ - Unit *uc, *msuc; /* -> result and its msu */ - Int msudigs; /* digits in res msu */ - #if DECCHECK - if (decCheckOperands(res, lhs, rhs, set)) return res; - #endif - - if (lhs->exponent!=0 || decNumberIsSpecial(lhs) || decNumberIsNegative(lhs) - || rhs->exponent!=0 || decNumberIsSpecial(rhs) || decNumberIsNegative(rhs)) { - decStatus(res, DEC_Invalid_operation, set); - return res; - } - - /* operands are valid */ - ua=lhs->lsu; /* bottom-up */ - ub=rhs->lsu; /* .. */ - uc=res->lsu; /* .. */ - msua=ua+D2U(lhs->digits)-1; /* -> msu of lhs */ - msub=ub+D2U(rhs->digits)-1; /* -> msu of rhs */ - msuc=uc+D2U(set->digits)-1; /* -> msu of result */ - msudigs=MSUDIGITS(set->digits); /* [faster than remainder] */ - for (; uc<=msuc; ua++, ub++, uc++) { /* Unit loop */ - Unit a, b; /* extract units */ - if (ua>msua) a=0; - else a=*ua; - if (ub>msub) b=0; - else b=*ub; - *uc=0; /* can now write back */ - if (a|b) { /* maybe 1 bits to examine */ - Int i, j; - *uc=0; /* can now write back */ - /* This loop could be unrolled and/or use BIN2BCD tables */ - for (i=0; i1) { - decStatus(res, DEC_Invalid_operation, set); - return res; - } - if (uc==msuc && i==msudigs-1) break; /* just did final digit */ - } /* each digit */ - } /* both OK */ - } /* each unit */ - /* [here uc-1 is the msu of the result] */ - res->digits=decGetDigits(res->lsu, uc-res->lsu); - res->exponent=0; /* integer */ - res->bits=0; /* sign=0 */ - return res; /* [no status to set] */ - } /* decNumberAnd */ - -/* ------------------------------------------------------------------ */ -/* decNumberCompare -- compare two Numbers */ -/* */ -/* This computes C = A ? B */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X?X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context */ -/* */ -/* C must have space for one digit (or NaN). */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberCompare(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - uInt status=0; /* accumulator */ - decCompareOp(res, lhs, rhs, set, COMPARE, &status); - if (status!=0) decStatus(res, status, set); - return res; - } /* decNumberCompare */ - -/* ------------------------------------------------------------------ */ -/* decNumberCompareSignal -- compare, signalling on all NaNs */ -/* */ -/* This computes C = A ? B */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X?X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context */ -/* */ -/* C must have space for one digit (or NaN). */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberCompareSignal(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - uInt status=0; /* accumulator */ - decCompareOp(res, lhs, rhs, set, COMPSIG, &status); - if (status!=0) decStatus(res, status, set); - return res; - } /* decNumberCompareSignal */ - -/* ------------------------------------------------------------------ */ -/* decNumberCompareTotal -- compare two Numbers, using total ordering */ -/* */ -/* This computes C = A ? B, under total ordering */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X?X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context */ -/* */ -/* C must have space for one digit; the result will always be one of */ -/* -1, 0, or 1. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberCompareTotal(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - uInt status=0; /* accumulator */ - decCompareOp(res, lhs, rhs, set, COMPTOTAL, &status); - if (status!=0) decStatus(res, status, set); - return res; - } /* decNumberCompareTotal */ - -/* ------------------------------------------------------------------ */ -/* decNumberCompareTotalMag -- compare, total ordering of magnitudes */ -/* */ -/* This computes C = |A| ? |B|, under total ordering */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X?X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context */ -/* */ -/* C must have space for one digit; the result will always be one of */ -/* -1, 0, or 1. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberCompareTotalMag(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - uInt status=0; /* accumulator */ - uInt needbytes; /* for space calculations */ - decNumber bufa[D2N(DECBUFFER+1)];/* +1 in case DECBUFFER=0 */ - decNumber *allocbufa=NULL; /* -> allocated bufa, iff allocated */ - decNumber bufb[D2N(DECBUFFER+1)]; - decNumber *allocbufb=NULL; /* -> allocated bufb, iff allocated */ - decNumber *a, *b; /* temporary pointers */ - - #if DECCHECK - if (decCheckOperands(res, lhs, rhs, set)) return res; - #endif - - do { /* protect allocated storage */ - /* if either is negative, take a copy and absolute */ - if (decNumberIsNegative(lhs)) { /* lhs<0 */ - a=bufa; - needbytes=sizeof(decNumber)+(D2U(lhs->digits)-1)*sizeof(Unit); - if (needbytes>sizeof(bufa)) { /* need malloc space */ - allocbufa=(decNumber *)malloc(needbytes); - if (allocbufa==NULL) { /* hopeless -- abandon */ - status|=DEC_Insufficient_storage; - break;} - a=allocbufa; /* use the allocated space */ - } - uprv_decNumberCopy(a, lhs); /* copy content */ - a->bits&=~DECNEG; /* .. and clear the sign */ - lhs=a; /* use copy from here on */ - } - if (decNumberIsNegative(rhs)) { /* rhs<0 */ - b=bufb; - needbytes=sizeof(decNumber)+(D2U(rhs->digits)-1)*sizeof(Unit); - if (needbytes>sizeof(bufb)) { /* need malloc space */ - allocbufb=(decNumber *)malloc(needbytes); - if (allocbufb==NULL) { /* hopeless -- abandon */ - status|=DEC_Insufficient_storage; - break;} - b=allocbufb; /* use the allocated space */ - } - uprv_decNumberCopy(b, rhs); /* copy content */ - b->bits&=~DECNEG; /* .. and clear the sign */ - rhs=b; /* use copy from here on */ - } - decCompareOp(res, lhs, rhs, set, COMPTOTAL, &status); - } while(0); /* end protected */ - - if (allocbufa!=NULL) free(allocbufa); /* drop any storage used */ - if (allocbufb!=NULL) free(allocbufb); /* .. */ - if (status!=0) decStatus(res, status, set); - return res; - } /* decNumberCompareTotalMag */ - -/* ------------------------------------------------------------------ */ -/* decNumberDivide -- divide one number by another */ -/* */ -/* This computes C = A / B */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X/X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context */ -/* */ -/* C must have space for set->digits digits. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberDivide(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - uInt status=0; /* accumulator */ - decDivideOp(res, lhs, rhs, set, DIVIDE, &status); - if (status!=0) decStatus(res, status, set); - #if DECCHECK - decCheckInexact(res, set); - #endif - return res; - } /* decNumberDivide */ - -/* ------------------------------------------------------------------ */ -/* decNumberDivideInteger -- divide and return integer quotient */ -/* */ -/* This computes C = A # B, where # is the integer divide operator */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X#X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context */ -/* */ -/* C must have space for set->digits digits. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberDivideInteger(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - uInt status=0; /* accumulator */ - decDivideOp(res, lhs, rhs, set, DIVIDEINT, &status); - if (status!=0) decStatus(res, status, set); - return res; - } /* decNumberDivideInteger */ - -/* ------------------------------------------------------------------ */ -/* decNumberExp -- exponentiation */ -/* */ -/* This computes C = exp(A) */ -/* */ -/* res is C, the result. C may be A */ -/* rhs is A */ -/* set is the context; note that rounding mode has no effect */ -/* */ -/* C must have space for set->digits digits. */ -/* */ -/* Mathematical function restrictions apply (see above); a NaN is */ -/* returned with Invalid_operation if a restriction is violated. */ -/* */ -/* Finite results will always be full precision and Inexact, except */ -/* when A is a zero or -Infinity (giving 1 or 0 respectively). */ -/* */ -/* An Inexact result is rounded using DEC_ROUND_HALF_EVEN; it will */ -/* almost always be correctly rounded, but may be up to 1 ulp in */ -/* error in rare cases. */ -/* ------------------------------------------------------------------ */ -/* This is a wrapper for decExpOp which can handle the slightly wider */ -/* (double) range needed by Ln (which has to be able to calculate */ -/* exp(-a) where a can be the tiniest number (Ntiny). */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberExp(decNumber *res, const decNumber *rhs, - decContext *set) { - uInt status=0; /* accumulator */ - #if DECSUBSET - decNumber *allocrhs=NULL; /* non-NULL if rounded rhs allocated */ - #endif - - #if DECCHECK - if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; - #endif - - /* Check restrictions; these restrictions ensure that if h=8 (see */ - /* decExpOp) then the result will either overflow or underflow to 0. */ - /* Other math functions restrict the input range, too, for inverses. */ - /* If not violated then carry out the operation. */ - if (!decCheckMath(rhs, set, &status)) do { /* protect allocation */ - #if DECSUBSET - if (!set->extended) { - /* reduce operand and set lostDigits status, as needed */ - if (rhs->digits>set->digits) { - allocrhs=decRoundOperand(rhs, set, &status); - if (allocrhs==NULL) break; - rhs=allocrhs; - } - } - #endif - decExpOp(res, rhs, set, &status); - } while(0); /* end protected */ - - #if DECSUBSET - if (allocrhs !=NULL) free(allocrhs); /* drop any storage used */ - #endif - /* apply significant status */ - if (status!=0) decStatus(res, status, set); - #if DECCHECK - decCheckInexact(res, set); - #endif - return res; - } /* decNumberExp */ - -/* ------------------------------------------------------------------ */ -/* decNumberFMA -- fused multiply add */ -/* */ -/* This computes D = (A * B) + C with only one rounding */ -/* */ -/* res is D, the result. D may be A or B or C (e.g., X=FMA(X,X,X)) */ -/* lhs is A */ -/* rhs is B */ -/* fhs is C [far hand side] */ -/* set is the context */ -/* */ -/* Mathematical function restrictions apply (see above); a NaN is */ -/* returned with Invalid_operation if a restriction is violated. */ -/* */ -/* C must have space for set->digits digits. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberFMA(decNumber *res, const decNumber *lhs, - const decNumber *rhs, const decNumber *fhs, - decContext *set) { - uInt status=0; /* accumulator */ - decContext dcmul; /* context for the multiplication */ - uInt needbytes; /* for space calculations */ - decNumber bufa[D2N(DECBUFFER*2+1)]; - decNumber *allocbufa=NULL; /* -> allocated bufa, iff allocated */ - decNumber *acc; /* accumulator pointer */ - decNumber dzero; /* work */ - - #if DECCHECK - if (decCheckOperands(res, lhs, rhs, set)) return res; - if (decCheckOperands(res, fhs, DECUNUSED, set)) return res; - #endif - - do { /* protect allocated storage */ - #if DECSUBSET - if (!set->extended) { /* [undefined if subset] */ - status|=DEC_Invalid_operation; - break;} - #endif - /* Check math restrictions [these ensure no overflow or underflow] */ - if ((!decNumberIsSpecial(lhs) && decCheckMath(lhs, set, &status)) - || (!decNumberIsSpecial(rhs) && decCheckMath(rhs, set, &status)) - || (!decNumberIsSpecial(fhs) && decCheckMath(fhs, set, &status))) break; - /* set up context for multiply */ - dcmul=*set; - dcmul.digits=lhs->digits+rhs->digits; /* just enough */ - /* [The above may be an over-estimate for subset arithmetic, but that's OK] */ - dcmul.emax=DEC_MAX_EMAX; /* effectively unbounded .. */ - dcmul.emin=DEC_MIN_EMIN; /* [thanks to Math restrictions] */ - /* set up decNumber space to receive the result of the multiply */ - acc=bufa; /* may fit */ - needbytes=sizeof(decNumber)+(D2U(dcmul.digits)-1)*sizeof(Unit); - if (needbytes>sizeof(bufa)) { /* need malloc space */ - allocbufa=(decNumber *)malloc(needbytes); - if (allocbufa==NULL) { /* hopeless -- abandon */ - status|=DEC_Insufficient_storage; - break;} - acc=allocbufa; /* use the allocated space */ - } - /* multiply with extended range and necessary precision */ - /*printf("emin=%ld\n", dcmul.emin); */ - decMultiplyOp(acc, lhs, rhs, &dcmul, &status); - /* Only Invalid operation (from sNaN or Inf * 0) is possible in */ - /* status; if either is seen than ignore fhs (in case it is */ - /* another sNaN) and set acc to NaN unless we had an sNaN */ - /* [decMultiplyOp leaves that to caller] */ - /* Note sNaN has to go through addOp to shorten payload if */ - /* necessary */ - if ((status&DEC_Invalid_operation)!=0) { - if (!(status&DEC_sNaN)) { /* but be true invalid */ - uprv_decNumberZero(res); /* acc not yet set */ - res->bits=DECNAN; - break; - } - uprv_decNumberZero(&dzero); /* make 0 (any non-NaN would do) */ - fhs=&dzero; /* use that */ - } - #if DECCHECK - else { /* multiply was OK */ - if (status!=0) printf("Status=%08lx after FMA multiply\n", (LI)status); - } - #endif - /* add the third operand and result -> res, and all is done */ - decAddOp(res, acc, fhs, set, 0, &status); - } while(0); /* end protected */ - - if (allocbufa!=NULL) free(allocbufa); /* drop any storage used */ - if (status!=0) decStatus(res, status, set); - #if DECCHECK - decCheckInexact(res, set); - #endif - return res; - } /* decNumberFMA */ - -/* ------------------------------------------------------------------ */ -/* decNumberInvert -- invert a Number, digitwise */ -/* */ -/* This computes C = ~A */ -/* */ -/* res is C, the result. C may be A (e.g., X=~X) */ -/* rhs is A */ -/* set is the context (used for result length and error report) */ -/* */ -/* C must have space for set->digits digits. */ -/* */ -/* Logical function restrictions apply (see above); a NaN is */ -/* returned with Invalid_operation if a restriction is violated. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberInvert(decNumber *res, const decNumber *rhs, - decContext *set) { - const Unit *ua, *msua; /* -> operand and its msu */ - Unit *uc, *msuc; /* -> result and its msu */ - Int msudigs; /* digits in res msu */ - #if DECCHECK - if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; - #endif - - if (rhs->exponent!=0 || decNumberIsSpecial(rhs) || decNumberIsNegative(rhs)) { - decStatus(res, DEC_Invalid_operation, set); - return res; - } - /* operand is valid */ - ua=rhs->lsu; /* bottom-up */ - uc=res->lsu; /* .. */ - msua=ua+D2U(rhs->digits)-1; /* -> msu of rhs */ - msuc=uc+D2U(set->digits)-1; /* -> msu of result */ - msudigs=MSUDIGITS(set->digits); /* [faster than remainder] */ - for (; uc<=msuc; ua++, uc++) { /* Unit loop */ - Unit a; /* extract unit */ - Int i, j; /* work */ - if (ua>msua) a=0; - else a=*ua; - *uc=0; /* can now write back */ - /* always need to examine all bits in rhs */ - /* This loop could be unrolled and/or use BIN2BCD tables */ - for (i=0; i1) { - decStatus(res, DEC_Invalid_operation, set); - return res; - } - if (uc==msuc && i==msudigs-1) break; /* just did final digit */ - } /* each digit */ - } /* each unit */ - /* [here uc-1 is the msu of the result] */ - res->digits=decGetDigits(res->lsu, uc-res->lsu); - res->exponent=0; /* integer */ - res->bits=0; /* sign=0 */ - return res; /* [no status to set] */ - } /* decNumberInvert */ - -/* ------------------------------------------------------------------ */ -/* decNumberLn -- natural logarithm */ -/* */ -/* This computes C = ln(A) */ -/* */ -/* res is C, the result. C may be A */ -/* rhs is A */ -/* set is the context; note that rounding mode has no effect */ -/* */ -/* C must have space for set->digits digits. */ -/* */ -/* Notable cases: */ -/* A<0 -> Invalid */ -/* A=0 -> -Infinity (Exact) */ -/* A=+Infinity -> +Infinity (Exact) */ -/* A=1 exactly -> 0 (Exact) */ -/* */ -/* Mathematical function restrictions apply (see above); a NaN is */ -/* returned with Invalid_operation if a restriction is violated. */ -/* */ -/* An Inexact result is rounded using DEC_ROUND_HALF_EVEN; it will */ -/* almost always be correctly rounded, but may be up to 1 ulp in */ -/* error in rare cases. */ -/* ------------------------------------------------------------------ */ -/* This is a wrapper for decLnOp which can handle the slightly wider */ -/* (+11) range needed by Ln, Log10, etc. (which may have to be able */ -/* to calculate at p+e+2). */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberLn(decNumber *res, const decNumber *rhs, - decContext *set) { - uInt status=0; /* accumulator */ - #if DECSUBSET - decNumber *allocrhs=NULL; /* non-NULL if rounded rhs allocated */ - #endif - - #if DECCHECK - if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; - #endif - - /* Check restrictions; this is a math function; if not violated */ - /* then carry out the operation. */ - if (!decCheckMath(rhs, set, &status)) do { /* protect allocation */ - #if DECSUBSET - if (!set->extended) { - /* reduce operand and set lostDigits status, as needed */ - if (rhs->digits>set->digits) { - allocrhs=decRoundOperand(rhs, set, &status); - if (allocrhs==NULL) break; - rhs=allocrhs; - } - /* special check in subset for rhs=0 */ - if (ISZERO(rhs)) { /* +/- zeros -> error */ - status|=DEC_Invalid_operation; - break;} - } /* extended=0 */ - #endif - decLnOp(res, rhs, set, &status); - } while(0); /* end protected */ - - #if DECSUBSET - if (allocrhs !=NULL) free(allocrhs); /* drop any storage used */ - #endif - /* apply significant status */ - if (status!=0) decStatus(res, status, set); - #if DECCHECK - decCheckInexact(res, set); - #endif - return res; - } /* decNumberLn */ - -/* ------------------------------------------------------------------ */ -/* decNumberLogB - get adjusted exponent, by 754 rules */ -/* */ -/* This computes C = adjustedexponent(A) */ -/* */ -/* res is C, the result. C may be A */ -/* rhs is A */ -/* set is the context, used only for digits and status */ -/* */ -/* C must have space for 10 digits (A might have 10**9 digits and */ -/* an exponent of +999999999, or one digit and an exponent of */ -/* -1999999999). */ -/* */ -/* This returns the adjusted exponent of A after (in theory) padding */ -/* with zeros on the right to set->digits digits while keeping the */ -/* same value. The exponent is not limited by emin/emax. */ -/* */ -/* Notable cases: */ -/* A<0 -> Use |A| */ -/* A=0 -> -Infinity (Division by zero) */ -/* A=Infinite -> +Infinity (Exact) */ -/* A=1 exactly -> 0 (Exact) */ -/* NaNs are propagated as usual */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberLogB(decNumber *res, const decNumber *rhs, - decContext *set) { - uInt status=0; /* accumulator */ - - #if DECCHECK - if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; - #endif - - /* NaNs as usual; Infinities return +Infinity; 0->oops */ - if (decNumberIsNaN(rhs)) decNaNs(res, rhs, NULL, set, &status); - else if (decNumberIsInfinite(rhs)) uprv_decNumberCopyAbs(res, rhs); - else if (decNumberIsZero(rhs)) { - uprv_decNumberZero(res); /* prepare for Infinity */ - res->bits=DECNEG|DECINF; /* -Infinity */ - status|=DEC_Division_by_zero; /* as per 754 */ - } - else { /* finite non-zero */ - Int ae=rhs->exponent+rhs->digits-1; /* adjusted exponent */ - uprv_decNumberFromInt32(res, ae); /* lay it out */ - } - - if (status!=0) decStatus(res, status, set); - return res; - } /* decNumberLogB */ - -/* ------------------------------------------------------------------ */ -/* decNumberLog10 -- logarithm in base 10 */ -/* */ -/* This computes C = log10(A) */ -/* */ -/* res is C, the result. C may be A */ -/* rhs is A */ -/* set is the context; note that rounding mode has no effect */ -/* */ -/* C must have space for set->digits digits. */ -/* */ -/* Notable cases: */ -/* A<0 -> Invalid */ -/* A=0 -> -Infinity (Exact) */ -/* A=+Infinity -> +Infinity (Exact) */ -/* A=10**n (if n is an integer) -> n (Exact) */ -/* */ -/* Mathematical function restrictions apply (see above); a NaN is */ -/* returned with Invalid_operation if a restriction is violated. */ -/* */ -/* An Inexact result is rounded using DEC_ROUND_HALF_EVEN; it will */ -/* almost always be correctly rounded, but may be up to 1 ulp in */ -/* error in rare cases. */ -/* ------------------------------------------------------------------ */ -/* This calculates ln(A)/ln(10) using appropriate precision. For */ -/* ln(A) this is the max(p, rhs->digits + t) + 3, where p is the */ -/* requested digits and t is the number of digits in the exponent */ -/* (maximum 6). For ln(10) it is p + 3; this is often handled by the */ -/* fastpath in decLnOp. The final division is done to the requested */ -/* precision. */ -/* ------------------------------------------------------------------ */ -#if defined(__clang__) || U_GCC_MAJOR_MINOR >= 406 -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Warray-bounds" -#endif -U_CAPI decNumber * U_EXPORT2 uprv_decNumberLog10(decNumber *res, const decNumber *rhs, - decContext *set) { - uInt status=0, ignore=0; /* status accumulators */ - uInt needbytes; /* for space calculations */ - Int p; /* working precision */ - Int t; /* digits in exponent of A */ - - /* buffers for a and b working decimals */ - /* (adjustment calculator, same size) */ - decNumber bufa[D2N(DECBUFFER+2)]; - decNumber *allocbufa=NULL; /* -> allocated bufa, iff allocated */ - decNumber *a=bufa; /* temporary a */ - decNumber bufb[D2N(DECBUFFER+2)]; - decNumber *allocbufb=NULL; /* -> allocated bufb, iff allocated */ - decNumber *b=bufb; /* temporary b */ - decNumber bufw[D2N(10)]; /* working 2-10 digit number */ - decNumber *w=bufw; /* .. */ - #if DECSUBSET - decNumber *allocrhs=NULL; /* non-NULL if rounded rhs allocated */ - #endif - - decContext aset; /* working context */ - - #if DECCHECK - if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; - #endif - - /* Check restrictions; this is a math function; if not violated */ - /* then carry out the operation. */ - if (!decCheckMath(rhs, set, &status)) do { /* protect malloc */ - #if DECSUBSET - if (!set->extended) { - /* reduce operand and set lostDigits status, as needed */ - if (rhs->digits>set->digits) { - allocrhs=decRoundOperand(rhs, set, &status); - if (allocrhs==NULL) break; - rhs=allocrhs; - } - /* special check in subset for rhs=0 */ - if (ISZERO(rhs)) { /* +/- zeros -> error */ - status|=DEC_Invalid_operation; - break;} - } /* extended=0 */ - #endif - - uprv_decContextDefault(&aset, DEC_INIT_DECIMAL64); /* clean context */ - - /* handle exact powers of 10; only check if +ve finite */ - if (!(rhs->bits&(DECNEG|DECSPECIAL)) && !ISZERO(rhs)) { - Int residue=0; /* (no residue) */ - uInt copystat=0; /* clean status */ - - /* round to a single digit... */ - aset.digits=1; - decCopyFit(w, rhs, &aset, &residue, ©stat); /* copy & shorten */ - /* if exact and the digit is 1, rhs is a power of 10 */ - if (!(copystat&DEC_Inexact) && w->lsu[0]==1) { - /* the exponent, conveniently, is the power of 10; making */ - /* this the result needs a little care as it might not fit, */ - /* so first convert it into the working number, and then move */ - /* to res */ - uprv_decNumberFromInt32(w, w->exponent); - residue=0; - decCopyFit(res, w, set, &residue, &status); /* copy & round */ - decFinish(res, set, &residue, &status); /* cleanup/set flags */ - break; - } /* not a power of 10 */ - } /* not a candidate for exact */ - - /* simplify the information-content calculation to use 'total */ - /* number of digits in a, including exponent' as compared to the */ - /* requested digits, as increasing this will only rarely cost an */ - /* iteration in ln(a) anyway */ - t=6; /* it can never be >6 */ - - /* allocate space when needed... */ - p=(rhs->digits+t>set->digits?rhs->digits+t:set->digits)+3; - needbytes=sizeof(decNumber)+(D2U(p)-1)*sizeof(Unit); - if (needbytes>sizeof(bufa)) { /* need malloc space */ - allocbufa=(decNumber *)malloc(needbytes); - if (allocbufa==NULL) { /* hopeless -- abandon */ - status|=DEC_Insufficient_storage; - break;} - a=allocbufa; /* use the allocated space */ - } - aset.digits=p; /* as calculated */ - aset.emax=DEC_MAX_MATH; /* usual bounds */ - aset.emin=-DEC_MAX_MATH; /* .. */ - aset.clamp=0; /* and no concrete format */ - decLnOp(a, rhs, &aset, &status); /* a=ln(rhs) */ - - /* skip the division if the result so far is infinite, NaN, or */ - /* zero, or there was an error; note NaN from sNaN needs copy */ - if (status&DEC_NaNs && !(status&DEC_sNaN)) break; - if (a->bits&DECSPECIAL || ISZERO(a)) { - uprv_decNumberCopy(res, a); /* [will fit] */ - break;} - - /* for ln(10) an extra 3 digits of precision are needed */ - p=set->digits+3; - needbytes=sizeof(decNumber)+(D2U(p)-1)*sizeof(Unit); - if (needbytes>sizeof(bufb)) { /* need malloc space */ - allocbufb=(decNumber *)malloc(needbytes); - if (allocbufb==NULL) { /* hopeless -- abandon */ - status|=DEC_Insufficient_storage; - break;} - b=allocbufb; /* use the allocated space */ - } - uprv_decNumberZero(w); /* set up 10... */ - #if DECDPUN==1 - w->lsu[1]=1; w->lsu[0]=0; /* .. */ - #else - w->lsu[0]=10; /* .. */ - #endif - w->digits=2; /* .. */ - - aset.digits=p; - decLnOp(b, w, &aset, &ignore); /* b=ln(10) */ - - aset.digits=set->digits; /* for final divide */ - decDivideOp(res, a, b, &aset, DIVIDE, &status); /* into result */ - } while(0); /* [for break] */ - - if (allocbufa!=NULL) free(allocbufa); /* drop any storage used */ - if (allocbufb!=NULL) free(allocbufb); /* .. */ - #if DECSUBSET - if (allocrhs !=NULL) free(allocrhs); /* .. */ - #endif - /* apply significant status */ - if (status!=0) decStatus(res, status, set); - #if DECCHECK - decCheckInexact(res, set); - #endif - return res; - } /* decNumberLog10 */ -#if defined(__clang__) || U_GCC_MAJOR_MINOR >= 406 -#pragma GCC diagnostic pop -#endif - -/* ------------------------------------------------------------------ */ -/* decNumberMax -- compare two Numbers and return the maximum */ -/* */ -/* This computes C = A ? B, returning the maximum by 754 rules */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X?X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context */ -/* */ -/* C must have space for set->digits digits. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberMax(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - uInt status=0; /* accumulator */ - decCompareOp(res, lhs, rhs, set, COMPMAX, &status); - if (status!=0) decStatus(res, status, set); - #if DECCHECK - decCheckInexact(res, set); - #endif - return res; - } /* decNumberMax */ - -/* ------------------------------------------------------------------ */ -/* decNumberMaxMag -- compare and return the maximum by magnitude */ -/* */ -/* This computes C = A ? B, returning the maximum by 754 rules */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X?X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context */ -/* */ -/* C must have space for set->digits digits. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberMaxMag(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - uInt status=0; /* accumulator */ - decCompareOp(res, lhs, rhs, set, COMPMAXMAG, &status); - if (status!=0) decStatus(res, status, set); - #if DECCHECK - decCheckInexact(res, set); - #endif - return res; - } /* decNumberMaxMag */ - -/* ------------------------------------------------------------------ */ -/* decNumberMin -- compare two Numbers and return the minimum */ -/* */ -/* This computes C = A ? B, returning the minimum by 754 rules */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X?X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context */ -/* */ -/* C must have space for set->digits digits. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberMin(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - uInt status=0; /* accumulator */ - decCompareOp(res, lhs, rhs, set, COMPMIN, &status); - if (status!=0) decStatus(res, status, set); - #if DECCHECK - decCheckInexact(res, set); - #endif - return res; - } /* decNumberMin */ - -/* ------------------------------------------------------------------ */ -/* decNumberMinMag -- compare and return the minimum by magnitude */ -/* */ -/* This computes C = A ? B, returning the minimum by 754 rules */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X?X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context */ -/* */ -/* C must have space for set->digits digits. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberMinMag(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - uInt status=0; /* accumulator */ - decCompareOp(res, lhs, rhs, set, COMPMINMAG, &status); - if (status!=0) decStatus(res, status, set); - #if DECCHECK - decCheckInexact(res, set); - #endif - return res; - } /* decNumberMinMag */ - -/* ------------------------------------------------------------------ */ -/* decNumberMinus -- prefix minus operator */ -/* */ -/* This computes C = 0 - A */ -/* */ -/* res is C, the result. C may be A */ -/* rhs is A */ -/* set is the context */ -/* */ -/* See also decNumberCopyNegate for a quiet bitwise version of this. */ -/* C must have space for set->digits digits. */ -/* ------------------------------------------------------------------ */ -/* Simply use AddOp for the subtract, which will do the necessary. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberMinus(decNumber *res, const decNumber *rhs, - decContext *set) { - decNumber dzero; - uInt status=0; /* accumulator */ - - #if DECCHECK - if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; - #endif - - uprv_decNumberZero(&dzero); /* make 0 */ - dzero.exponent=rhs->exponent; /* [no coefficient expansion] */ - decAddOp(res, &dzero, rhs, set, DECNEG, &status); - if (status!=0) decStatus(res, status, set); - #if DECCHECK - decCheckInexact(res, set); - #endif - return res; - } /* decNumberMinus */ - -/* ------------------------------------------------------------------ */ -/* decNumberNextMinus -- next towards -Infinity */ -/* */ -/* This computes C = A - infinitesimal, rounded towards -Infinity */ -/* */ -/* res is C, the result. C may be A */ -/* rhs is A */ -/* set is the context */ -/* */ -/* This is a generalization of 754 NextDown. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberNextMinus(decNumber *res, const decNumber *rhs, - decContext *set) { - decNumber dtiny; /* constant */ - decContext workset=*set; /* work */ - uInt status=0; /* accumulator */ - #if DECCHECK - if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; - #endif - - /* +Infinity is the special case */ - if ((rhs->bits&(DECINF|DECNEG))==DECINF) { - decSetMaxValue(res, set); /* is +ve */ - /* there is no status to set */ - return res; - } - uprv_decNumberZero(&dtiny); /* start with 0 */ - dtiny.lsu[0]=1; /* make number that is .. */ - dtiny.exponent=DEC_MIN_EMIN-1; /* .. smaller than tiniest */ - workset.round=DEC_ROUND_FLOOR; - decAddOp(res, rhs, &dtiny, &workset, DECNEG, &status); - status&=DEC_Invalid_operation|DEC_sNaN; /* only sNaN Invalid please */ - if (status!=0) decStatus(res, status, set); - return res; - } /* decNumberNextMinus */ - -/* ------------------------------------------------------------------ */ -/* decNumberNextPlus -- next towards +Infinity */ -/* */ -/* This computes C = A + infinitesimal, rounded towards +Infinity */ -/* */ -/* res is C, the result. C may be A */ -/* rhs is A */ -/* set is the context */ -/* */ -/* This is a generalization of 754 NextUp. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberNextPlus(decNumber *res, const decNumber *rhs, - decContext *set) { - decNumber dtiny; /* constant */ - decContext workset=*set; /* work */ - uInt status=0; /* accumulator */ - #if DECCHECK - if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; - #endif - - /* -Infinity is the special case */ - if ((rhs->bits&(DECINF|DECNEG))==(DECINF|DECNEG)) { - decSetMaxValue(res, set); - res->bits=DECNEG; /* negative */ - /* there is no status to set */ - return res; - } - uprv_decNumberZero(&dtiny); /* start with 0 */ - dtiny.lsu[0]=1; /* make number that is .. */ - dtiny.exponent=DEC_MIN_EMIN-1; /* .. smaller than tiniest */ - workset.round=DEC_ROUND_CEILING; - decAddOp(res, rhs, &dtiny, &workset, 0, &status); - status&=DEC_Invalid_operation|DEC_sNaN; /* only sNaN Invalid please */ - if (status!=0) decStatus(res, status, set); - return res; - } /* decNumberNextPlus */ - -/* ------------------------------------------------------------------ */ -/* decNumberNextToward -- next towards rhs */ -/* */ -/* This computes C = A +/- infinitesimal, rounded towards */ -/* +/-Infinity in the direction of B, as per 754-1985 nextafter */ -/* modified during revision but dropped from 754-2008. */ -/* */ -/* res is C, the result. C may be A or B. */ -/* lhs is A */ -/* rhs is B */ -/* set is the context */ -/* */ -/* This is a generalization of 754-1985 NextAfter. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberNextToward(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - decNumber dtiny; /* constant */ - decContext workset=*set; /* work */ - Int result; /* .. */ - uInt status=0; /* accumulator */ - #if DECCHECK - if (decCheckOperands(res, lhs, rhs, set)) return res; - #endif - - if (decNumberIsNaN(lhs) || decNumberIsNaN(rhs)) { - decNaNs(res, lhs, rhs, set, &status); - } - else { /* Is numeric, so no chance of sNaN Invalid, etc. */ - result=decCompare(lhs, rhs, 0); /* sign matters */ - if (result==BADINT) status|=DEC_Insufficient_storage; /* rare */ - else { /* valid compare */ - if (result==0) uprv_decNumberCopySign(res, lhs, rhs); /* easy */ - else { /* differ: need NextPlus or NextMinus */ - uByte sub; /* add or subtract */ - if (result<0) { /* lhsbits&(DECINF|DECNEG))==(DECINF|DECNEG)) { - decSetMaxValue(res, set); - res->bits=DECNEG; /* negative */ - return res; /* there is no status to set */ - } - workset.round=DEC_ROUND_CEILING; - sub=0; /* add, please */ - } /* plus */ - else { /* lhs>rhs, do nextminus */ - /* +Infinity is the special case */ - if ((lhs->bits&(DECINF|DECNEG))==DECINF) { - decSetMaxValue(res, set); - return res; /* there is no status to set */ - } - workset.round=DEC_ROUND_FLOOR; - sub=DECNEG; /* subtract, please */ - } /* minus */ - uprv_decNumberZero(&dtiny); /* start with 0 */ - dtiny.lsu[0]=1; /* make number that is .. */ - dtiny.exponent=DEC_MIN_EMIN-1; /* .. smaller than tiniest */ - decAddOp(res, lhs, &dtiny, &workset, sub, &status); /* + or - */ - /* turn off exceptions if the result is a normal number */ - /* (including Nmin), otherwise let all status through */ - if (uprv_decNumberIsNormal(res, set)) status=0; - } /* unequal */ - } /* compare OK */ - } /* numeric */ - if (status!=0) decStatus(res, status, set); - return res; - } /* decNumberNextToward */ - -/* ------------------------------------------------------------------ */ -/* decNumberOr -- OR two Numbers, digitwise */ -/* */ -/* This computes C = A | B */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X|X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context (used for result length and error report) */ -/* */ -/* C must have space for set->digits digits. */ -/* */ -/* Logical function restrictions apply (see above); a NaN is */ -/* returned with Invalid_operation if a restriction is violated. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberOr(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - const Unit *ua, *ub; /* -> operands */ - const Unit *msua, *msub; /* -> operand msus */ - Unit *uc, *msuc; /* -> result and its msu */ - Int msudigs; /* digits in res msu */ - #if DECCHECK - if (decCheckOperands(res, lhs, rhs, set)) return res; - #endif - - if (lhs->exponent!=0 || decNumberIsSpecial(lhs) || decNumberIsNegative(lhs) - || rhs->exponent!=0 || decNumberIsSpecial(rhs) || decNumberIsNegative(rhs)) { - decStatus(res, DEC_Invalid_operation, set); - return res; - } - /* operands are valid */ - ua=lhs->lsu; /* bottom-up */ - ub=rhs->lsu; /* .. */ - uc=res->lsu; /* .. */ - msua=ua+D2U(lhs->digits)-1; /* -> msu of lhs */ - msub=ub+D2U(rhs->digits)-1; /* -> msu of rhs */ - msuc=uc+D2U(set->digits)-1; /* -> msu of result */ - msudigs=MSUDIGITS(set->digits); /* [faster than remainder] */ - for (; uc<=msuc; ua++, ub++, uc++) { /* Unit loop */ - Unit a, b; /* extract units */ - if (ua>msua) a=0; - else a=*ua; - if (ub>msub) b=0; - else b=*ub; - *uc=0; /* can now write back */ - if (a|b) { /* maybe 1 bits to examine */ - Int i, j; - /* This loop could be unrolled and/or use BIN2BCD tables */ - for (i=0; i1) { - decStatus(res, DEC_Invalid_operation, set); - return res; - } - if (uc==msuc && i==msudigs-1) break; /* just did final digit */ - } /* each digit */ - } /* non-zero */ - } /* each unit */ - /* [here uc-1 is the msu of the result] */ - res->digits=decGetDigits(res->lsu, uc-res->lsu); - res->exponent=0; /* integer */ - res->bits=0; /* sign=0 */ - return res; /* [no status to set] */ - } /* decNumberOr */ - -/* ------------------------------------------------------------------ */ -/* decNumberPlus -- prefix plus operator */ -/* */ -/* This computes C = 0 + A */ -/* */ -/* res is C, the result. C may be A */ -/* rhs is A */ -/* set is the context */ -/* */ -/* See also decNumberCopy for a quiet bitwise version of this. */ -/* C must have space for set->digits digits. */ -/* ------------------------------------------------------------------ */ -/* This simply uses AddOp; Add will take fast path after preparing A. */ -/* Performance is a concern here, as this routine is often used to */ -/* check operands and apply rounding and overflow/underflow testing. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberPlus(decNumber *res, const decNumber *rhs, - decContext *set) { - decNumber dzero; - uInt status=0; /* accumulator */ - #if DECCHECK - if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; - #endif - - uprv_decNumberZero(&dzero); /* make 0 */ - dzero.exponent=rhs->exponent; /* [no coefficient expansion] */ - decAddOp(res, &dzero, rhs, set, 0, &status); - if (status!=0) decStatus(res, status, set); - #if DECCHECK - decCheckInexact(res, set); - #endif - return res; - } /* decNumberPlus */ - -/* ------------------------------------------------------------------ */ -/* decNumberMultiply -- multiply two Numbers */ -/* */ -/* This computes C = A x B */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X+X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context */ -/* */ -/* C must have space for set->digits digits. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberMultiply(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - uInt status=0; /* accumulator */ - decMultiplyOp(res, lhs, rhs, set, &status); - if (status!=0) decStatus(res, status, set); - #if DECCHECK - decCheckInexact(res, set); - #endif - return res; - } /* decNumberMultiply */ - -/* ------------------------------------------------------------------ */ -/* decNumberPower -- raise a number to a power */ -/* */ -/* This computes C = A ** B */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X**X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context */ -/* */ -/* C must have space for set->digits digits. */ -/* */ -/* Mathematical function restrictions apply (see above); a NaN is */ -/* returned with Invalid_operation if a restriction is violated. */ -/* */ -/* However, if 1999999997<=B<=999999999 and B is an integer then the */ -/* restrictions on A and the context are relaxed to the usual bounds, */ -/* for compatibility with the earlier (integer power only) version */ -/* of this function. */ -/* */ -/* When B is an integer, the result may be exact, even if rounded. */ -/* */ -/* The final result is rounded according to the context; it will */ -/* almost always be correctly rounded, but may be up to 1 ulp in */ -/* error in rare cases. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberPower(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - #if DECSUBSET - decNumber *alloclhs=NULL; /* non-NULL if rounded lhs allocated */ - decNumber *allocrhs=NULL; /* .., rhs */ - #endif - decNumber *allocdac=NULL; /* -> allocated acc buffer, iff used */ - decNumber *allocinv=NULL; /* -> allocated 1/x buffer, iff used */ - Int reqdigits=set->digits; /* requested DIGITS */ - Int n; /* rhs in binary */ - Flag rhsint=0; /* 1 if rhs is an integer */ - Flag useint=0; /* 1 if can use integer calculation */ - Flag isoddint=0; /* 1 if rhs is an integer and odd */ - Int i; /* work */ - #if DECSUBSET - Int dropped; /* .. */ - #endif - uInt needbytes; /* buffer size needed */ - Flag seenbit; /* seen a bit while powering */ - Int residue=0; /* rounding residue */ - uInt status=0; /* accumulators */ - uByte bits=0; /* result sign if errors */ - decContext aset; /* working context */ - decNumber dnOne; /* work value 1... */ - /* local accumulator buffer [a decNumber, with digits+elength+1 digits] */ - decNumber dacbuff[D2N(DECBUFFER+9)]; - decNumber *dac=dacbuff; /* -> result accumulator */ - /* same again for possible 1/lhs calculation */ - decNumber invbuff[D2N(DECBUFFER+9)]; - - #if DECCHECK - if (decCheckOperands(res, lhs, rhs, set)) return res; - #endif - - do { /* protect allocated storage */ - #if DECSUBSET - if (!set->extended) { /* reduce operands and set status, as needed */ - if (lhs->digits>reqdigits) { - alloclhs=decRoundOperand(lhs, set, &status); - if (alloclhs==NULL) break; - lhs=alloclhs; - } - if (rhs->digits>reqdigits) { - allocrhs=decRoundOperand(rhs, set, &status); - if (allocrhs==NULL) break; - rhs=allocrhs; - } - } - #endif - /* [following code does not require input rounding] */ - - /* handle NaNs and rhs Infinity (lhs infinity is harder) */ - if (SPECIALARGS) { - if (decNumberIsNaN(lhs) || decNumberIsNaN(rhs)) { /* NaNs */ - decNaNs(res, lhs, rhs, set, &status); - break;} - if (decNumberIsInfinite(rhs)) { /* rhs Infinity */ - Flag rhsneg=rhs->bits&DECNEG; /* save rhs sign */ - if (decNumberIsNegative(lhs) /* lhs<0 */ - && !decNumberIsZero(lhs)) /* .. */ - status|=DEC_Invalid_operation; - else { /* lhs >=0 */ - uprv_decNumberZero(&dnOne); /* set up 1 */ - dnOne.lsu[0]=1; - uprv_decNumberCompare(dac, lhs, &dnOne, set); /* lhs ? 1 */ - uprv_decNumberZero(res); /* prepare for 0/1/Infinity */ - if (decNumberIsNegative(dac)) { /* lhs<1 */ - if (rhsneg) res->bits|=DECINF; /* +Infinity [else is +0] */ - } - else if (dac->lsu[0]==0) { /* lhs=1 */ - /* 1**Infinity is inexact, so return fully-padded 1.0000 */ - Int shift=set->digits-1; - *res->lsu=1; /* was 0, make int 1 */ - res->digits=decShiftToMost(res->lsu, 1, shift); - res->exponent=-shift; /* make 1.0000... */ - status|=DEC_Inexact|DEC_Rounded; /* deemed inexact */ - } - else { /* lhs>1 */ - if (!rhsneg) res->bits|=DECINF; /* +Infinity [else is +0] */ - } - } /* lhs>=0 */ - break;} - /* [lhs infinity drops through] */ - } /* specials */ - - /* Original rhs may be an integer that fits and is in range */ - n=decGetInt(rhs); - if (n!=BADINT) { /* it is an integer */ - rhsint=1; /* record the fact for 1**n */ - isoddint=(Flag)n&1; /* [works even if big] */ - if (n!=BIGEVEN && n!=BIGODD) /* can use integer path? */ - useint=1; /* looks good */ - } - - if (decNumberIsNegative(lhs) /* -x .. */ - && isoddint) bits=DECNEG; /* .. to an odd power */ - - /* handle LHS infinity */ - if (decNumberIsInfinite(lhs)) { /* [NaNs already handled] */ - uByte rbits=rhs->bits; /* save */ - uprv_decNumberZero(res); /* prepare */ - if (n==0) *res->lsu=1; /* [-]Inf**0 => 1 */ - else { - /* -Inf**nonint -> error */ - if (!rhsint && decNumberIsNegative(lhs)) { - status|=DEC_Invalid_operation; /* -Inf**nonint is error */ - break;} - if (!(rbits & DECNEG)) bits|=DECINF; /* was not a **-n */ - /* [otherwise will be 0 or -0] */ - res->bits=bits; - } - break;} - - /* similarly handle LHS zero */ - if (decNumberIsZero(lhs)) { - if (n==0) { /* 0**0 => Error */ - #if DECSUBSET - if (!set->extended) { /* [unless subset] */ - uprv_decNumberZero(res); - *res->lsu=1; /* return 1 */ - break;} - #endif - status|=DEC_Invalid_operation; - } - else { /* 0**x */ - uByte rbits=rhs->bits; /* save */ - if (rbits & DECNEG) { /* was a 0**(-n) */ - #if DECSUBSET - if (!set->extended) { /* [bad if subset] */ - status|=DEC_Invalid_operation; - break;} - #endif - bits|=DECINF; - } - uprv_decNumberZero(res); /* prepare */ - /* [otherwise will be 0 or -0] */ - res->bits=bits; - } - break;} - - /* here both lhs and rhs are finite; rhs==0 is handled in the */ - /* integer path. Next handle the non-integer cases */ - if (!useint) { /* non-integral rhs */ - /* any -ve lhs is bad, as is either operand or context out of */ - /* bounds */ - if (decNumberIsNegative(lhs)) { - status|=DEC_Invalid_operation; - break;} - if (decCheckMath(lhs, set, &status) - || decCheckMath(rhs, set, &status)) break; /* variable status */ - - uprv_decContextDefault(&aset, DEC_INIT_DECIMAL64); /* clean context */ - aset.emax=DEC_MAX_MATH; /* usual bounds */ - aset.emin=-DEC_MAX_MATH; /* .. */ - aset.clamp=0; /* and no concrete format */ - - /* calculate the result using exp(ln(lhs)*rhs), which can */ - /* all be done into the accumulator, dac. The precision needed */ - /* is enough to contain the full information in the lhs (which */ - /* is the total digits, including exponent), or the requested */ - /* precision, if larger, + 4; 6 is used for the exponent */ - /* maximum length, and this is also used when it is shorter */ - /* than the requested digits as it greatly reduces the >0.5 ulp */ - /* cases at little cost (because Ln doubles digits each */ - /* iteration so a few extra digits rarely causes an extra */ - /* iteration) */ - aset.digits=MAXI(lhs->digits, set->digits)+6+4; - } /* non-integer rhs */ - - else { /* rhs is in-range integer */ - if (n==0) { /* x**0 = 1 */ - /* (0**0 was handled above) */ - uprv_decNumberZero(res); /* result=1 */ - *res->lsu=1; /* .. */ - break;} - /* rhs is a non-zero integer */ - if (n<0) n=-n; /* use abs(n) */ - - aset=*set; /* clone the context */ - aset.round=DEC_ROUND_HALF_EVEN; /* internally use balanced */ - /* calculate the working DIGITS */ - aset.digits=reqdigits+(rhs->digits+rhs->exponent)+2; - #if DECSUBSET - if (!set->extended) aset.digits--; /* use classic precision */ - #endif - /* it's an error if this is more than can be handled */ - if (aset.digits>DECNUMMAXP) {status|=DEC_Invalid_operation; break;} - } /* integer path */ - - /* aset.digits is the count of digits for the accumulator needed */ - /* if accumulator is too long for local storage, then allocate */ - needbytes=sizeof(decNumber)+(D2U(aset.digits)-1)*sizeof(Unit); - /* [needbytes also used below if 1/lhs needed] */ - if (needbytes>sizeof(dacbuff)) { - allocdac=(decNumber *)malloc(needbytes); - if (allocdac==NULL) { /* hopeless -- abandon */ - status|=DEC_Insufficient_storage; - break;} - dac=allocdac; /* use the allocated space */ - } - /* here, aset is set up and accumulator is ready for use */ - - if (!useint) { /* non-integral rhs */ - /* x ** y; special-case x=1 here as it will otherwise always */ - /* reduce to integer 1; decLnOp has a fastpath which detects */ - /* the case of x=1 */ - decLnOp(dac, lhs, &aset, &status); /* dac=ln(lhs) */ - /* [no error possible, as lhs 0 already handled] */ - if (ISZERO(dac)) { /* x==1, 1.0, etc. */ - /* need to return fully-padded 1.0000 etc., but rhsint->1 */ - *dac->lsu=1; /* was 0, make int 1 */ - if (!rhsint) { /* add padding */ - Int shift=set->digits-1; - dac->digits=decShiftToMost(dac->lsu, 1, shift); - dac->exponent=-shift; /* make 1.0000... */ - status|=DEC_Inexact|DEC_Rounded; /* deemed inexact */ - } - } - else { - decMultiplyOp(dac, dac, rhs, &aset, &status); /* dac=dac*rhs */ - decExpOp(dac, dac, &aset, &status); /* dac=exp(dac) */ - } - /* and drop through for final rounding */ - } /* non-integer rhs */ - - else { /* carry on with integer */ - uprv_decNumberZero(dac); /* acc=1 */ - *dac->lsu=1; /* .. */ - - /* if a negative power the constant 1 is needed, and if not subset */ - /* invert the lhs now rather than inverting the result later */ - if (decNumberIsNegative(rhs)) { /* was a **-n [hence digits>0] */ - decNumber *inv=invbuff; /* asssume use fixed buffer */ - uprv_decNumberCopy(&dnOne, dac); /* dnOne=1; [needed now or later] */ - #if DECSUBSET - if (set->extended) { /* need to calculate 1/lhs */ - #endif - /* divide lhs into 1, putting result in dac [dac=1/dac] */ - decDivideOp(dac, &dnOne, lhs, &aset, DIVIDE, &status); - /* now locate or allocate space for the inverted lhs */ - if (needbytes>sizeof(invbuff)) { - allocinv=(decNumber *)malloc(needbytes); - if (allocinv==NULL) { /* hopeless -- abandon */ - status|=DEC_Insufficient_storage; - break;} - inv=allocinv; /* use the allocated space */ - } - /* [inv now points to big-enough buffer or allocated storage] */ - uprv_decNumberCopy(inv, dac); /* copy the 1/lhs */ - uprv_decNumberCopy(dac, &dnOne); /* restore acc=1 */ - lhs=inv; /* .. and go forward with new lhs */ - #if DECSUBSET - } - #endif - } - - /* Raise-to-the-power loop... */ - seenbit=0; /* set once a 1-bit is encountered */ - for (i=1;;i++){ /* for each bit [top bit ignored] */ - /* abandon if had overflow or terminal underflow */ - if (status & (DEC_Overflow|DEC_Underflow)) { /* interesting? */ - if (status&DEC_Overflow || ISZERO(dac)) break; - } - /* [the following two lines revealed an optimizer bug in a C++ */ - /* compiler, with symptom: 5**3 -> 25, when n=n+n was used] */ - n=n<<1; /* move next bit to testable position */ - if (n<0) { /* top bit is set */ - seenbit=1; /* OK, significant bit seen */ - decMultiplyOp(dac, dac, lhs, &aset, &status); /* dac=dac*x */ - } - if (i==31) break; /* that was the last bit */ - if (!seenbit) continue; /* no need to square 1 */ - decMultiplyOp(dac, dac, dac, &aset, &status); /* dac=dac*dac [square] */ - } /*i*/ /* 32 bits */ - - /* complete internal overflow or underflow processing */ - if (status & (DEC_Overflow|DEC_Underflow)) { - #if DECSUBSET - /* If subset, and power was negative, reverse the kind of -erflow */ - /* [1/x not yet done] */ - if (!set->extended && decNumberIsNegative(rhs)) { - if (status & DEC_Overflow) - status^=DEC_Overflow | DEC_Underflow | DEC_Subnormal; - else { /* trickier -- Underflow may or may not be set */ - status&=~(DEC_Underflow | DEC_Subnormal); /* [one or both] */ - status|=DEC_Overflow; - } - } - #endif - dac->bits=(dac->bits & ~DECNEG) | bits; /* force correct sign */ - /* round subnormals [to set.digits rather than aset.digits] */ - /* or set overflow result similarly as required */ - decFinalize(dac, set, &residue, &status); - uprv_decNumberCopy(res, dac); /* copy to result (is now OK length) */ - break; - } - - #if DECSUBSET - if (!set->extended && /* subset math */ - decNumberIsNegative(rhs)) { /* was a **-n [hence digits>0] */ - /* so divide result into 1 [dac=1/dac] */ - decDivideOp(dac, &dnOne, dac, &aset, DIVIDE, &status); - } - #endif - } /* rhs integer path */ - - /* reduce result to the requested length and copy to result */ - decCopyFit(res, dac, set, &residue, &status); - decFinish(res, set, &residue, &status); /* final cleanup */ - #if DECSUBSET - if (!set->extended) decTrim(res, set, 0, 1, &dropped); /* trailing zeros */ - #endif - } while(0); /* end protected */ - - if (allocdac!=NULL) free(allocdac); /* drop any storage used */ - if (allocinv!=NULL) free(allocinv); /* .. */ - #if DECSUBSET - if (alloclhs!=NULL) free(alloclhs); /* .. */ - if (allocrhs!=NULL) free(allocrhs); /* .. */ - #endif - if (status!=0) decStatus(res, status, set); - #if DECCHECK - decCheckInexact(res, set); - #endif - return res; - } /* decNumberPower */ - -/* ------------------------------------------------------------------ */ -/* decNumberQuantize -- force exponent to requested value */ -/* */ -/* This computes C = op(A, B), where op adjusts the coefficient */ -/* of C (by rounding or shifting) such that the exponent (-scale) */ -/* of C has exponent of B. The numerical value of C will equal A, */ -/* except for the effects of any rounding that occurred. */ -/* */ -/* res is C, the result. C may be A or B */ -/* lhs is A, the number to adjust */ -/* rhs is B, the number with exponent to match */ -/* set is the context */ -/* */ -/* C must have space for set->digits digits. */ -/* */ -/* Unless there is an error or the result is infinite, the exponent */ -/* after the operation is guaranteed to be equal to that of B. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberQuantize(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - uInt status=0; /* accumulator */ - decQuantizeOp(res, lhs, rhs, set, 1, &status); - if (status!=0) decStatus(res, status, set); - return res; - } /* decNumberQuantize */ - -/* ------------------------------------------------------------------ */ -/* decNumberReduce -- remove trailing zeros */ -/* */ -/* This computes C = 0 + A, and normalizes the result */ -/* */ -/* res is C, the result. C may be A */ -/* rhs is A */ -/* set is the context */ -/* */ -/* C must have space for set->digits digits. */ -/* ------------------------------------------------------------------ */ -/* Previously known as Normalize */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberNormalize(decNumber *res, const decNumber *rhs, - decContext *set) { - return uprv_decNumberReduce(res, rhs, set); - } /* decNumberNormalize */ - -U_CAPI decNumber * U_EXPORT2 uprv_decNumberReduce(decNumber *res, const decNumber *rhs, - decContext *set) { - #if DECSUBSET - decNumber *allocrhs=NULL; /* non-NULL if rounded rhs allocated */ - #endif - uInt status=0; /* as usual */ - Int residue=0; /* as usual */ - Int dropped; /* work */ - - #if DECCHECK - if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; - #endif - - do { /* protect allocated storage */ - #if DECSUBSET - if (!set->extended) { - /* reduce operand and set lostDigits status, as needed */ - if (rhs->digits>set->digits) { - allocrhs=decRoundOperand(rhs, set, &status); - if (allocrhs==NULL) break; - rhs=allocrhs; - } - } - #endif - /* [following code does not require input rounding] */ - - /* Infinities copy through; NaNs need usual treatment */ - if (decNumberIsNaN(rhs)) { - decNaNs(res, rhs, NULL, set, &status); - break; - } - - /* reduce result to the requested length and copy to result */ - decCopyFit(res, rhs, set, &residue, &status); /* copy & round */ - decFinish(res, set, &residue, &status); /* cleanup/set flags */ - decTrim(res, set, 1, 0, &dropped); /* normalize in place */ - /* [may clamp] */ - } while(0); /* end protected */ - - #if DECSUBSET - if (allocrhs !=NULL) free(allocrhs); /* .. */ - #endif - if (status!=0) decStatus(res, status, set);/* then report status */ - return res; - } /* decNumberReduce */ - -/* ------------------------------------------------------------------ */ -/* decNumberRescale -- force exponent to requested value */ -/* */ -/* This computes C = op(A, B), where op adjusts the coefficient */ -/* of C (by rounding or shifting) such that the exponent (-scale) */ -/* of C has the value B. The numerical value of C will equal A, */ -/* except for the effects of any rounding that occurred. */ -/* */ -/* res is C, the result. C may be A or B */ -/* lhs is A, the number to adjust */ -/* rhs is B, the requested exponent */ -/* set is the context */ -/* */ -/* C must have space for set->digits digits. */ -/* */ -/* Unless there is an error or the result is infinite, the exponent */ -/* after the operation is guaranteed to be equal to B. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberRescale(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - uInt status=0; /* accumulator */ - decQuantizeOp(res, lhs, rhs, set, 0, &status); - if (status!=0) decStatus(res, status, set); - return res; - } /* decNumberRescale */ - -/* ------------------------------------------------------------------ */ -/* decNumberRemainder -- divide and return remainder */ -/* */ -/* This computes C = A % B */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X%X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context */ -/* */ -/* C must have space for set->digits digits. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberRemainder(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - uInt status=0; /* accumulator */ - decDivideOp(res, lhs, rhs, set, REMAINDER, &status); - if (status!=0) decStatus(res, status, set); - #if DECCHECK - decCheckInexact(res, set); - #endif - return res; - } /* decNumberRemainder */ - -/* ------------------------------------------------------------------ */ -/* decNumberRemainderNear -- divide and return remainder from nearest */ -/* */ -/* This computes C = A % B, where % is the IEEE remainder operator */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X%X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context */ -/* */ -/* C must have space for set->digits digits. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberRemainderNear(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - uInt status=0; /* accumulator */ - decDivideOp(res, lhs, rhs, set, REMNEAR, &status); - if (status!=0) decStatus(res, status, set); - #if DECCHECK - decCheckInexact(res, set); - #endif - return res; - } /* decNumberRemainderNear */ - -/* ------------------------------------------------------------------ */ -/* decNumberRotate -- rotate the coefficient of a Number left/right */ -/* */ -/* This computes C = A rot B (in base ten and rotating set->digits */ -/* digits). */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=XrotX) */ -/* lhs is A */ -/* rhs is B, the number of digits to rotate (-ve to right) */ -/* set is the context */ -/* */ -/* The digits of the coefficient of A are rotated to the left (if B */ -/* is positive) or to the right (if B is negative) without adjusting */ -/* the exponent or the sign of A. If lhs->digits is less than */ -/* set->digits the coefficient is padded with zeros on the left */ -/* before the rotate. Any leading zeros in the result are removed */ -/* as usual. */ -/* */ -/* B must be an integer (q=0) and in the range -set->digits through */ -/* +set->digits. */ -/* C must have space for set->digits digits. */ -/* NaNs are propagated as usual. Infinities are unaffected (but */ -/* B must be valid). No status is set unless B is invalid or an */ -/* operand is an sNaN. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberRotate(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - uInt status=0; /* accumulator */ - Int rotate; /* rhs as an Int */ - - #if DECCHECK - if (decCheckOperands(res, lhs, rhs, set)) return res; - #endif - - /* NaNs propagate as normal */ - if (decNumberIsNaN(lhs) || decNumberIsNaN(rhs)) - decNaNs(res, lhs, rhs, set, &status); - /* rhs must be an integer */ - else if (decNumberIsInfinite(rhs) || rhs->exponent!=0) - status=DEC_Invalid_operation; - else { /* both numeric, rhs is an integer */ - rotate=decGetInt(rhs); /* [cannot fail] */ - if (rotate==BADINT /* something bad .. */ - || rotate==BIGODD || rotate==BIGEVEN /* .. very big .. */ - || abs(rotate)>set->digits) /* .. or out of range */ - status=DEC_Invalid_operation; - else { /* rhs is OK */ - uprv_decNumberCopy(res, lhs); - /* convert -ve rotate to equivalent positive rotation */ - if (rotate<0) rotate=set->digits+rotate; - if (rotate!=0 && rotate!=set->digits /* zero or full rotation */ - && !decNumberIsInfinite(res)) { /* lhs was infinite */ - /* left-rotate to do; 0 < rotate < set->digits */ - uInt units, shift; /* work */ - uInt msudigits; /* digits in result msu */ - Unit *msu=res->lsu+D2U(res->digits)-1; /* current msu */ - Unit *msumax=res->lsu+D2U(set->digits)-1; /* rotation msu */ - for (msu++; msu<=msumax; msu++) *msu=0; /* ensure high units=0 */ - res->digits=set->digits; /* now full-length */ - msudigits=MSUDIGITS(res->digits); /* actual digits in msu */ - - /* rotation here is done in-place, in three steps */ - /* 1. shift all to least up to one unit to unit-align final */ - /* lsd [any digits shifted out are rotated to the left, */ - /* abutted to the original msd (which may require split)] */ - /* */ - /* [if there are no whole units left to rotate, the */ - /* rotation is now complete] */ - /* */ - /* 2. shift to least, from below the split point only, so that */ - /* the final msd is in the right place in its Unit [any */ - /* digits shifted out will fit exactly in the current msu, */ - /* left aligned, no split required] */ - /* */ - /* 3. rotate all the units by reversing left part, right */ - /* part, and then whole */ - /* */ - /* example: rotate right 8 digits (2 units + 2), DECDPUN=3. */ - /* */ - /* start: 00a bcd efg hij klm npq */ - /* */ - /* 1a 000 0ab cde fgh|ijk lmn [pq saved] */ - /* 1b 00p qab cde fgh|ijk lmn */ - /* */ - /* 2a 00p qab cde fgh|00i jkl [mn saved] */ - /* 2b mnp qab cde fgh|00i jkl */ - /* */ - /* 3a fgh cde qab mnp|00i jkl */ - /* 3b fgh cde qab mnp|jkl 00i */ - /* 3c 00i jkl mnp qab cde fgh */ - - /* Step 1: amount to shift is the partial right-rotate count */ - rotate=set->digits-rotate; /* make it right-rotate */ - units=rotate/DECDPUN; /* whole units to rotate */ - shift=rotate%DECDPUN; /* left-over digits count */ - if (shift>0) { /* not an exact number of units */ - uInt save=res->lsu[0]%powers[shift]; /* save low digit(s) */ - decShiftToLeast(res->lsu, D2U(res->digits), shift); - if (shift>msudigits) { /* msumax-1 needs >0 digits */ - uInt rem=save%powers[shift-msudigits];/* split save */ - *msumax=(Unit)(save/powers[shift-msudigits]); /* and insert */ - *(msumax-1)=*(msumax-1) - +(Unit)(rem*powers[DECDPUN-(shift-msudigits)]); /* .. */ - } - else { /* all fits in msumax */ - *msumax=*msumax+(Unit)(save*powers[msudigits-shift]); /* [maybe *1] */ - } - } /* digits shift needed */ - - /* If whole units to rotate... */ - if (units>0) { /* some to do */ - /* Step 2: the units to touch are the whole ones in rotate, */ - /* if any, and the shift is DECDPUN-msudigits (which may be */ - /* 0, again) */ - shift=DECDPUN-msudigits; - if (shift>0) { /* not an exact number of units */ - uInt save=res->lsu[0]%powers[shift]; /* save low digit(s) */ - decShiftToLeast(res->lsu, units, shift); - *msumax=*msumax+(Unit)(save*powers[msudigits]); - } /* partial shift needed */ - - /* Step 3: rotate the units array using triple reverse */ - /* (reversing is easy and fast) */ - decReverse(res->lsu+units, msumax); /* left part */ - decReverse(res->lsu, res->lsu+units-1); /* right part */ - decReverse(res->lsu, msumax); /* whole */ - } /* whole units to rotate */ - /* the rotation may have left an undetermined number of zeros */ - /* on the left, so true length needs to be calculated */ - res->digits=decGetDigits(res->lsu, msumax-res->lsu+1); - } /* rotate needed */ - } /* rhs OK */ - } /* numerics */ - if (status!=0) decStatus(res, status, set); - return res; - } /* decNumberRotate */ - -/* ------------------------------------------------------------------ */ -/* decNumberSameQuantum -- test for equal exponents */ -/* */ -/* res is the result number, which will contain either 0 or 1 */ -/* lhs is a number to test */ -/* rhs is the second (usually a pattern) */ -/* */ -/* No errors are possible and no context is needed. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberSameQuantum(decNumber *res, const decNumber *lhs, - const decNumber *rhs) { - Unit ret=0; /* return value */ - - #if DECCHECK - if (decCheckOperands(res, lhs, rhs, DECUNCONT)) return res; - #endif - - if (SPECIALARGS) { - if (decNumberIsNaN(lhs) && decNumberIsNaN(rhs)) ret=1; - else if (decNumberIsInfinite(lhs) && decNumberIsInfinite(rhs)) ret=1; - /* [anything else with a special gives 0] */ - } - else if (lhs->exponent==rhs->exponent) ret=1; - - uprv_decNumberZero(res); /* OK to overwrite an operand now */ - *res->lsu=ret; - return res; - } /* decNumberSameQuantum */ - -/* ------------------------------------------------------------------ */ -/* decNumberScaleB -- multiply by a power of 10 */ -/* */ -/* This computes C = A x 10**B where B is an integer (q=0) with */ -/* maximum magnitude 2*(emax+digits) */ -/* */ -/* res is C, the result. C may be A or B */ -/* lhs is A, the number to adjust */ -/* rhs is B, the requested power of ten to use */ -/* set is the context */ -/* */ -/* C must have space for set->digits digits. */ -/* */ -/* The result may underflow or overflow. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberScaleB(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - Int reqexp; /* requested exponent change [B] */ - uInt status=0; /* accumulator */ - Int residue; /* work */ - - #if DECCHECK - if (decCheckOperands(res, lhs, rhs, set)) return res; - #endif - - /* Handle special values except lhs infinite */ - if (decNumberIsNaN(lhs) || decNumberIsNaN(rhs)) - decNaNs(res, lhs, rhs, set, &status); - /* rhs must be an integer */ - else if (decNumberIsInfinite(rhs) || rhs->exponent!=0) - status=DEC_Invalid_operation; - else { - /* lhs is a number; rhs is a finite with q==0 */ - reqexp=decGetInt(rhs); /* [cannot fail] */ - if (reqexp==BADINT /* something bad .. */ - || reqexp==BIGODD || reqexp==BIGEVEN /* .. very big .. */ - || abs(reqexp)>(2*(set->digits+set->emax))) /* .. or out of range */ - status=DEC_Invalid_operation; - else { /* rhs is OK */ - uprv_decNumberCopy(res, lhs); /* all done if infinite lhs */ - if (!decNumberIsInfinite(res)) { /* prepare to scale */ - res->exponent+=reqexp; /* adjust the exponent */ - residue=0; - decFinalize(res, set, &residue, &status); /* .. and check */ - } /* finite LHS */ - } /* rhs OK */ - } /* rhs finite */ - if (status!=0) decStatus(res, status, set); - return res; - } /* decNumberScaleB */ - -/* ------------------------------------------------------------------ */ -/* decNumberShift -- shift the coefficient of a Number left or right */ -/* */ -/* This computes C = A << B or C = A >> -B (in base ten). */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X<digits through */ -/* +set->digits. */ -/* C must have space for set->digits digits. */ -/* NaNs are propagated as usual. Infinities are unaffected (but */ -/* B must be valid). No status is set unless B is invalid or an */ -/* operand is an sNaN. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberShift(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - uInt status=0; /* accumulator */ - Int shift; /* rhs as an Int */ - - #if DECCHECK - if (decCheckOperands(res, lhs, rhs, set)) return res; - #endif - - /* NaNs propagate as normal */ - if (decNumberIsNaN(lhs) || decNumberIsNaN(rhs)) - decNaNs(res, lhs, rhs, set, &status); - /* rhs must be an integer */ - else if (decNumberIsInfinite(rhs) || rhs->exponent!=0) - status=DEC_Invalid_operation; - else { /* both numeric, rhs is an integer */ - shift=decGetInt(rhs); /* [cannot fail] */ - if (shift==BADINT /* something bad .. */ - || shift==BIGODD || shift==BIGEVEN /* .. very big .. */ - || abs(shift)>set->digits) /* .. or out of range */ - status=DEC_Invalid_operation; - else { /* rhs is OK */ - uprv_decNumberCopy(res, lhs); - if (shift!=0 && !decNumberIsInfinite(res)) { /* something to do */ - if (shift>0) { /* to left */ - if (shift==set->digits) { /* removing all */ - *res->lsu=0; /* so place 0 */ - res->digits=1; /* .. */ - } - else { /* */ - /* first remove leading digits if necessary */ - if (res->digits+shift>set->digits) { - decDecap(res, res->digits+shift-set->digits); - /* that updated res->digits; may have gone to 1 (for a */ - /* single digit or for zero */ - } - if (res->digits>1 || *res->lsu) /* if non-zero.. */ - res->digits=decShiftToMost(res->lsu, res->digits, shift); - } /* partial left */ - } /* left */ - else { /* to right */ - if (-shift>=res->digits) { /* discarding all */ - *res->lsu=0; /* so place 0 */ - res->digits=1; /* .. */ - } - else { - decShiftToLeast(res->lsu, D2U(res->digits), -shift); - res->digits-=(-shift); - } - } /* to right */ - } /* non-0 non-Inf shift */ - } /* rhs OK */ - } /* numerics */ - if (status!=0) decStatus(res, status, set); - return res; - } /* decNumberShift */ - -/* ------------------------------------------------------------------ */ -/* decNumberSquareRoot -- square root operator */ -/* */ -/* This computes C = squareroot(A) */ -/* */ -/* res is C, the result. C may be A */ -/* rhs is A */ -/* set is the context; note that rounding mode has no effect */ -/* */ -/* C must have space for set->digits digits. */ -/* ------------------------------------------------------------------ */ -/* This uses the following varying-precision algorithm in: */ -/* */ -/* Properly Rounded Variable Precision Square Root, T. E. Hull and */ -/* A. Abrham, ACM Transactions on Mathematical Software, Vol 11 #3, */ -/* pp229-237, ACM, September 1985. */ -/* */ -/* The square-root is calculated using Newton's method, after which */ -/* a check is made to ensure the result is correctly rounded. */ -/* */ -/* % [Reformatted original Numerical Turing source code follows.] */ -/* function sqrt(x : real) : real */ -/* % sqrt(x) returns the properly rounded approximation to the square */ -/* % root of x, in the precision of the calling environment, or it */ -/* % fails if x < 0. */ -/* % t e hull and a abrham, august, 1984 */ -/* if x <= 0 then */ -/* if x < 0 then */ -/* assert false */ -/* else */ -/* result 0 */ -/* end if */ -/* end if */ -/* var f := setexp(x, 0) % fraction part of x [0.1 <= x < 1] */ -/* var e := getexp(x) % exponent part of x */ -/* var approx : real */ -/* if e mod 2 = 0 then */ -/* approx := .259 + .819 * f % approx to root of f */ -/* else */ -/* f := f/l0 % adjustments */ -/* e := e + 1 % for odd */ -/* approx := .0819 + 2.59 * f % exponent */ -/* end if */ -/* */ -/* var p:= 3 */ -/* const maxp := currentprecision + 2 */ -/* loop */ -/* p := min(2*p - 2, maxp) % p = 4,6,10, . . . , maxp */ -/* precision p */ -/* approx := .5 * (approx + f/approx) */ -/* exit when p = maxp */ -/* end loop */ -/* */ -/* % approx is now within 1 ulp of the properly rounded square root */ -/* % of f; to ensure proper rounding, compare squares of (approx - */ -/* % l/2 ulp) and (approx + l/2 ulp) with f. */ -/* p := currentprecision */ -/* begin */ -/* precision p + 2 */ -/* const approxsubhalf := approx - setexp(.5, -p) */ -/* if mulru(approxsubhalf, approxsubhalf) > f then */ -/* approx := approx - setexp(.l, -p + 1) */ -/* else */ -/* const approxaddhalf := approx + setexp(.5, -p) */ -/* if mulrd(approxaddhalf, approxaddhalf) < f then */ -/* approx := approx + setexp(.l, -p + 1) */ -/* end if */ -/* end if */ -/* end */ -/* result setexp(approx, e div 2) % fix exponent */ -/* end sqrt */ -/* ------------------------------------------------------------------ */ -#if defined(__clang__) || U_GCC_MAJOR_MINOR >= 406 -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Warray-bounds" -#endif -U_CAPI decNumber * U_EXPORT2 uprv_decNumberSquareRoot(decNumber *res, const decNumber *rhs, - decContext *set) { - decContext workset, approxset; /* work contexts */ - decNumber dzero; /* used for constant zero */ - Int maxp; /* largest working precision */ - Int workp; /* working precision */ - Int residue=0; /* rounding residue */ - uInt status=0, ignore=0; /* status accumulators */ - uInt rstatus; /* .. */ - Int exp; /* working exponent */ - Int ideal; /* ideal (preferred) exponent */ - Int needbytes; /* work */ - Int dropped; /* .. */ - - #if DECSUBSET - decNumber *allocrhs=NULL; /* non-NULL if rounded rhs allocated */ - #endif - /* buffer for f [needs +1 in case DECBUFFER 0] */ - decNumber buff[D2N(DECBUFFER+1)]; - /* buffer for a [needs +2 to match likely maxp] */ - decNumber bufa[D2N(DECBUFFER+2)]; - /* buffer for temporary, b [must be same size as a] */ - decNumber bufb[D2N(DECBUFFER+2)]; - decNumber *allocbuff=NULL; /* -> allocated buff, iff allocated */ - decNumber *allocbufa=NULL; /* -> allocated bufa, iff allocated */ - decNumber *allocbufb=NULL; /* -> allocated bufb, iff allocated */ - decNumber *f=buff; /* reduced fraction */ - decNumber *a=bufa; /* approximation to result */ - decNumber *b=bufb; /* intermediate result */ - /* buffer for temporary variable, up to 3 digits */ - decNumber buft[D2N(3)]; - decNumber *t=buft; /* up-to-3-digit constant or work */ - - #if DECCHECK - if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; - #endif - - do { /* protect allocated storage */ - #if DECSUBSET - if (!set->extended) { - /* reduce operand and set lostDigits status, as needed */ - if (rhs->digits>set->digits) { - allocrhs=decRoundOperand(rhs, set, &status); - if (allocrhs==NULL) break; - /* [Note: 'f' allocation below could reuse this buffer if */ - /* used, but as this is rare they are kept separate for clarity.] */ - rhs=allocrhs; - } - } - #endif - /* [following code does not require input rounding] */ - - /* handle infinities and NaNs */ - if (SPECIALARG) { - if (decNumberIsInfinite(rhs)) { /* an infinity */ - if (decNumberIsNegative(rhs)) status|=DEC_Invalid_operation; - else uprv_decNumberCopy(res, rhs); /* +Infinity */ - } - else decNaNs(res, rhs, NULL, set, &status); /* a NaN */ - break; - } - - /* calculate the ideal (preferred) exponent [floor(exp/2)] */ - /* [It would be nicer to write: ideal=rhs->exponent>>1, but this */ - /* generates a compiler warning. Generated code is the same.] */ - ideal=(rhs->exponent&~1)/2; /* target */ - - /* handle zeros */ - if (ISZERO(rhs)) { - uprv_decNumberCopy(res, rhs); /* could be 0 or -0 */ - res->exponent=ideal; /* use the ideal [safe] */ - /* use decFinish to clamp any out-of-range exponent, etc. */ - decFinish(res, set, &residue, &status); - break; - } - - /* any other -x is an oops */ - if (decNumberIsNegative(rhs)) { - status|=DEC_Invalid_operation; - break; - } - - /* space is needed for three working variables */ - /* f -- the same precision as the RHS, reduced to 0.01->0.99... */ - /* a -- Hull's approximation -- precision, when assigned, is */ - /* currentprecision+1 or the input argument precision, */ - /* whichever is larger (+2 for use as temporary) */ - /* b -- intermediate temporary result (same size as a) */ - /* if any is too long for local storage, then allocate */ - workp=MAXI(set->digits+1, rhs->digits); /* actual rounding precision */ - workp=MAXI(workp, 7); /* at least 7 for low cases */ - maxp=workp+2; /* largest working precision */ - - needbytes=sizeof(decNumber)+(D2U(rhs->digits)-1)*sizeof(Unit); - if (needbytes>(Int)sizeof(buff)) { - allocbuff=(decNumber *)malloc(needbytes); - if (allocbuff==NULL) { /* hopeless -- abandon */ - status|=DEC_Insufficient_storage; - break;} - f=allocbuff; /* use the allocated space */ - } - /* a and b both need to be able to hold a maxp-length number */ - needbytes=sizeof(decNumber)+(D2U(maxp)-1)*sizeof(Unit); - if (needbytes>(Int)sizeof(bufa)) { /* [same applies to b] */ - allocbufa=(decNumber *)malloc(needbytes); - allocbufb=(decNumber *)malloc(needbytes); - if (allocbufa==NULL || allocbufb==NULL) { /* hopeless */ - status|=DEC_Insufficient_storage; - break;} - a=allocbufa; /* use the allocated spaces */ - b=allocbufb; /* .. */ - } - - /* copy rhs -> f, save exponent, and reduce so 0.1 <= f < 1 */ - uprv_decNumberCopy(f, rhs); - exp=f->exponent+f->digits; /* adjusted to Hull rules */ - f->exponent=-(f->digits); /* to range */ - - /* set up working context */ - uprv_decContextDefault(&workset, DEC_INIT_DECIMAL64); - workset.emax=DEC_MAX_EMAX; - workset.emin=DEC_MIN_EMIN; - - /* [Until further notice, no error is possible and status bits */ - /* (Rounded, etc.) should be ignored, not accumulated.] */ - - /* Calculate initial approximation, and allow for odd exponent */ - workset.digits=workp; /* p for initial calculation */ - t->bits=0; t->digits=3; - a->bits=0; a->digits=3; - if ((exp & 1)==0) { /* even exponent */ - /* Set t=0.259, a=0.819 */ - t->exponent=-3; - a->exponent=-3; - #if DECDPUN>=3 - t->lsu[0]=259; - a->lsu[0]=819; - #elif DECDPUN==2 - t->lsu[0]=59; t->lsu[1]=2; - a->lsu[0]=19; a->lsu[1]=8; - #else - t->lsu[0]=9; t->lsu[1]=5; t->lsu[2]=2; - a->lsu[0]=9; a->lsu[1]=1; a->lsu[2]=8; - #endif - } - else { /* odd exponent */ - /* Set t=0.0819, a=2.59 */ - f->exponent--; /* f=f/10 */ - exp++; /* e=e+1 */ - t->exponent=-4; - a->exponent=-2; - #if DECDPUN>=3 - t->lsu[0]=819; - a->lsu[0]=259; - #elif DECDPUN==2 - t->lsu[0]=19; t->lsu[1]=8; - a->lsu[0]=59; a->lsu[1]=2; - #else - t->lsu[0]=9; t->lsu[1]=1; t->lsu[2]=8; - a->lsu[0]=9; a->lsu[1]=5; a->lsu[2]=2; - #endif - } - - decMultiplyOp(a, a, f, &workset, &ignore); /* a=a*f */ - decAddOp(a, a, t, &workset, 0, &ignore); /* ..+t */ - /* [a is now the initial approximation for sqrt(f), calculated with */ - /* currentprecision, which is also a's precision.] */ - - /* the main calculation loop */ - uprv_decNumberZero(&dzero); /* make 0 */ - uprv_decNumberZero(t); /* set t = 0.5 */ - t->lsu[0]=5; /* .. */ - t->exponent=-1; /* .. */ - workset.digits=3; /* initial p */ - for (; workset.digitsexponent+=exp/2; /* set correct exponent */ - rstatus=0; /* clear status */ - residue=0; /* .. and accumulator */ - decCopyFit(a, a, &approxset, &residue, &rstatus); /* reduce (if needed) */ - decFinish(a, &approxset, &residue, &rstatus); /* clean and finalize */ - - /* Overflow was possible if the input exponent was out-of-range, */ - /* in which case quit */ - if (rstatus&DEC_Overflow) { - status=rstatus; /* use the status as-is */ - uprv_decNumberCopy(res, a); /* copy to result */ - break; - } - - /* Preserve status except Inexact/Rounded */ - status|=(rstatus & ~(DEC_Rounded|DEC_Inexact)); - - /* Carry out the Hull correction */ - a->exponent-=exp/2; /* back to 0.1->1 */ - - /* a is now at final precision and within 1 ulp of the properly */ - /* rounded square root of f; to ensure proper rounding, compare */ - /* squares of (a - l/2 ulp) and (a + l/2 ulp) with f. */ - /* Here workset.digits=maxp and t=0.5, and a->digits determines */ - /* the ulp */ - workset.digits--; /* maxp-1 is OK now */ - t->exponent=-a->digits-1; /* make 0.5 ulp */ - decAddOp(b, a, t, &workset, DECNEG, &ignore); /* b = a - 0.5 ulp */ - workset.round=DEC_ROUND_UP; - decMultiplyOp(b, b, b, &workset, &ignore); /* b = mulru(b, b) */ - decCompareOp(b, f, b, &workset, COMPARE, &ignore); /* b ? f, reversed */ - if (decNumberIsNegative(b)) { /* f < b [i.e., b > f] */ - /* this is the more common adjustment, though both are rare */ - t->exponent++; /* make 1.0 ulp */ - t->lsu[0]=1; /* .. */ - decAddOp(a, a, t, &workset, DECNEG, &ignore); /* a = a - 1 ulp */ - /* assign to approx [round to length] */ - approxset.emin-=exp/2; /* adjust to match a */ - approxset.emax-=exp/2; - decAddOp(a, &dzero, a, &approxset, 0, &ignore); - } - else { - decAddOp(b, a, t, &workset, 0, &ignore); /* b = a + 0.5 ulp */ - workset.round=DEC_ROUND_DOWN; - decMultiplyOp(b, b, b, &workset, &ignore); /* b = mulrd(b, b) */ - decCompareOp(b, b, f, &workset, COMPARE, &ignore); /* b ? f */ - if (decNumberIsNegative(b)) { /* b < f */ - t->exponent++; /* make 1.0 ulp */ - t->lsu[0]=1; /* .. */ - decAddOp(a, a, t, &workset, 0, &ignore); /* a = a + 1 ulp */ - /* assign to approx [round to length] */ - approxset.emin-=exp/2; /* adjust to match a */ - approxset.emax-=exp/2; - decAddOp(a, &dzero, a, &approxset, 0, &ignore); - } - } - /* [no errors are possible in the above, and rounding/inexact during */ - /* estimation are irrelevant, so status was not accumulated] */ - - /* Here, 0.1 <= a < 1 (still), so adjust back */ - a->exponent+=exp/2; /* set correct exponent */ - - /* count droppable zeros [after any subnormal rounding] by */ - /* trimming a copy */ - uprv_decNumberCopy(b, a); - decTrim(b, set, 1, 1, &dropped); /* [drops trailing zeros] */ - - /* Set Inexact and Rounded. The answer can only be exact if */ - /* it is short enough so that squaring it could fit in workp */ - /* digits, so this is the only (relatively rare) condition that */ - /* a careful check is needed */ - if (b->digits*2-1 > workp) { /* cannot fit */ - status|=DEC_Inexact|DEC_Rounded; - } - else { /* could be exact/unrounded */ - uInt mstatus=0; /* local status */ - decMultiplyOp(b, b, b, &workset, &mstatus); /* try the multiply */ - if (mstatus&DEC_Overflow) { /* result just won't fit */ - status|=DEC_Inexact|DEC_Rounded; - } - else { /* plausible */ - decCompareOp(t, b, rhs, &workset, COMPARE, &mstatus); /* b ? rhs */ - if (!ISZERO(t)) status|=DEC_Inexact|DEC_Rounded; /* not equal */ - else { /* is Exact */ - /* here, dropped is the count of trailing zeros in 'a' */ - /* use closest exponent to ideal... */ - Int todrop=ideal-a->exponent; /* most that can be dropped */ - if (todrop<0) status|=DEC_Rounded; /* ideally would add 0s */ - else { /* unrounded */ - /* there are some to drop, but emax may not allow all */ - Int maxexp=set->emax-set->digits+1; - Int maxdrop=maxexp-a->exponent; - if (todrop>maxdrop && set->clamp) { /* apply clamping */ - todrop=maxdrop; - status|=DEC_Clamped; - } - if (dropped0) { /* have some to drop */ - decShiftToLeast(a->lsu, D2U(a->digits), todrop); - a->exponent+=todrop; /* maintain numerical value */ - a->digits-=todrop; /* new length */ - } - } - } - } - } - - /* double-check Underflow, as perhaps the result could not have */ - /* been subnormal (initial argument too big), or it is now Exact */ - if (status&DEC_Underflow) { - Int ae=rhs->exponent+rhs->digits-1; /* adjusted exponent */ - /* check if truly subnormal */ - #if DECEXTFLAG /* DEC_Subnormal too */ - if (ae>=set->emin*2) status&=~(DEC_Subnormal|DEC_Underflow); - #else - if (ae>=set->emin*2) status&=~DEC_Underflow; - #endif - /* check if truly inexact */ - if (!(status&DEC_Inexact)) status&=~DEC_Underflow; - } - - uprv_decNumberCopy(res, a); /* a is now the result */ - } while(0); /* end protected */ - - if (allocbuff!=NULL) free(allocbuff); /* drop any storage used */ - if (allocbufa!=NULL) free(allocbufa); /* .. */ - if (allocbufb!=NULL) free(allocbufb); /* .. */ - #if DECSUBSET - if (allocrhs !=NULL) free(allocrhs); /* .. */ - #endif - if (status!=0) decStatus(res, status, set);/* then report status */ - #if DECCHECK - decCheckInexact(res, set); - #endif - return res; - } /* decNumberSquareRoot */ -#if defined(__clang__) || U_GCC_MAJOR_MINOR >= 406 -#pragma GCC diagnostic pop -#endif - -/* ------------------------------------------------------------------ */ -/* decNumberSubtract -- subtract two Numbers */ -/* */ -/* This computes C = A - B */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X-X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context */ -/* */ -/* C must have space for set->digits digits. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberSubtract(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - uInt status=0; /* accumulator */ - - decAddOp(res, lhs, rhs, set, DECNEG, &status); - if (status!=0) decStatus(res, status, set); - #if DECCHECK - decCheckInexact(res, set); - #endif - return res; - } /* decNumberSubtract */ - -/* ------------------------------------------------------------------ */ -/* decNumberToIntegralExact -- round-to-integral-value with InExact */ -/* decNumberToIntegralValue -- round-to-integral-value */ -/* */ -/* res is the result */ -/* rhs is input number */ -/* set is the context */ -/* */ -/* res must have space for any value of rhs. */ -/* */ -/* This implements the IEEE special operators and therefore treats */ -/* special values as valid. For finite numbers it returns */ -/* rescale(rhs, 0) if rhs->exponent is <0. */ -/* Otherwise the result is rhs (so no error is possible, except for */ -/* sNaN). */ -/* */ -/* The context is used for rounding mode and status after sNaN, but */ -/* the digits setting is ignored. The Exact version will signal */ -/* Inexact if the result differs numerically from rhs; the other */ -/* never signals Inexact. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberToIntegralExact(decNumber *res, const decNumber *rhs, - decContext *set) { - decNumber dn; - decContext workset; /* working context */ - uInt status=0; /* accumulator */ - - #if DECCHECK - if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; - #endif - - /* handle infinities and NaNs */ - if (SPECIALARG) { - if (decNumberIsInfinite(rhs)) uprv_decNumberCopy(res, rhs); /* an Infinity */ - else decNaNs(res, rhs, NULL, set, &status); /* a NaN */ - } - else { /* finite */ - /* have a finite number; no error possible (res must be big enough) */ - if (rhs->exponent>=0) return uprv_decNumberCopy(res, rhs); - /* that was easy, but if negative exponent there is work to do... */ - workset=*set; /* clone rounding, etc. */ - workset.digits=rhs->digits; /* no length rounding */ - workset.traps=0; /* no traps */ - uprv_decNumberZero(&dn); /* make a number with exponent 0 */ - uprv_decNumberQuantize(res, rhs, &dn, &workset); - status|=workset.status; - } - if (status!=0) decStatus(res, status, set); - return res; - } /* decNumberToIntegralExact */ - -U_CAPI decNumber * U_EXPORT2 uprv_decNumberToIntegralValue(decNumber *res, const decNumber *rhs, - decContext *set) { - decContext workset=*set; /* working context */ - workset.traps=0; /* no traps */ - uprv_decNumberToIntegralExact(res, rhs, &workset); - /* this never affects set, except for sNaNs; NaN will have been set */ - /* or propagated already, so no need to call decStatus */ - set->status|=workset.status&DEC_Invalid_operation; - return res; - } /* decNumberToIntegralValue */ - -/* ------------------------------------------------------------------ */ -/* decNumberXor -- XOR two Numbers, digitwise */ -/* */ -/* This computes C = A ^ B */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X^X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context (used for result length and error report) */ -/* */ -/* C must have space for set->digits digits. */ -/* */ -/* Logical function restrictions apply (see above); a NaN is */ -/* returned with Invalid_operation if a restriction is violated. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberXor(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - const Unit *ua, *ub; /* -> operands */ - const Unit *msua, *msub; /* -> operand msus */ - Unit *uc, *msuc; /* -> result and its msu */ - Int msudigs; /* digits in res msu */ - #if DECCHECK - if (decCheckOperands(res, lhs, rhs, set)) return res; - #endif - - if (lhs->exponent!=0 || decNumberIsSpecial(lhs) || decNumberIsNegative(lhs) - || rhs->exponent!=0 || decNumberIsSpecial(rhs) || decNumberIsNegative(rhs)) { - decStatus(res, DEC_Invalid_operation, set); - return res; - } - /* operands are valid */ - ua=lhs->lsu; /* bottom-up */ - ub=rhs->lsu; /* .. */ - uc=res->lsu; /* .. */ - msua=ua+D2U(lhs->digits)-1; /* -> msu of lhs */ - msub=ub+D2U(rhs->digits)-1; /* -> msu of rhs */ - msuc=uc+D2U(set->digits)-1; /* -> msu of result */ - msudigs=MSUDIGITS(set->digits); /* [faster than remainder] */ - for (; uc<=msuc; ua++, ub++, uc++) { /* Unit loop */ - Unit a, b; /* extract units */ - if (ua>msua) a=0; - else a=*ua; - if (ub>msub) b=0; - else b=*ub; - *uc=0; /* can now write back */ - if (a|b) { /* maybe 1 bits to examine */ - Int i, j; - /* This loop could be unrolled and/or use BIN2BCD tables */ - for (i=0; i1) { - decStatus(res, DEC_Invalid_operation, set); - return res; - } - if (uc==msuc && i==msudigs-1) break; /* just did final digit */ - } /* each digit */ - } /* non-zero */ - } /* each unit */ - /* [here uc-1 is the msu of the result] */ - res->digits=decGetDigits(res->lsu, uc-res->lsu); - res->exponent=0; /* integer */ - res->bits=0; /* sign=0 */ - return res; /* [no status to set] */ - } /* decNumberXor */ - - -/* ================================================================== */ -/* Utility routines */ -/* ================================================================== */ - -/* ------------------------------------------------------------------ */ -/* decNumberClass -- return the decClass of a decNumber */ -/* dn -- the decNumber to test */ -/* set -- the context to use for Emin */ -/* returns the decClass enum */ -/* ------------------------------------------------------------------ */ -enum decClass uprv_decNumberClass(const decNumber *dn, decContext *set) { - if (decNumberIsSpecial(dn)) { - if (decNumberIsQNaN(dn)) return DEC_CLASS_QNAN; - if (decNumberIsSNaN(dn)) return DEC_CLASS_SNAN; - /* must be an infinity */ - if (decNumberIsNegative(dn)) return DEC_CLASS_NEG_INF; - return DEC_CLASS_POS_INF; - } - /* is finite */ - if (uprv_decNumberIsNormal(dn, set)) { /* most common */ - if (decNumberIsNegative(dn)) return DEC_CLASS_NEG_NORMAL; - return DEC_CLASS_POS_NORMAL; - } - /* is subnormal or zero */ - if (decNumberIsZero(dn)) { /* most common */ - if (decNumberIsNegative(dn)) return DEC_CLASS_NEG_ZERO; - return DEC_CLASS_POS_ZERO; - } - if (decNumberIsNegative(dn)) return DEC_CLASS_NEG_SUBNORMAL; - return DEC_CLASS_POS_SUBNORMAL; - } /* decNumberClass */ - -/* ------------------------------------------------------------------ */ -/* decNumberClassToString -- convert decClass to a string */ -/* */ -/* eclass is a valid decClass */ -/* returns a constant string describing the class (max 13+1 chars) */ -/* ------------------------------------------------------------------ */ -const char *uprv_decNumberClassToString(enum decClass eclass) { - if (eclass==DEC_CLASS_POS_NORMAL) return DEC_ClassString_PN; - if (eclass==DEC_CLASS_NEG_NORMAL) return DEC_ClassString_NN; - if (eclass==DEC_CLASS_POS_ZERO) return DEC_ClassString_PZ; - if (eclass==DEC_CLASS_NEG_ZERO) return DEC_ClassString_NZ; - if (eclass==DEC_CLASS_POS_SUBNORMAL) return DEC_ClassString_PS; - if (eclass==DEC_CLASS_NEG_SUBNORMAL) return DEC_ClassString_NS; - if (eclass==DEC_CLASS_POS_INF) return DEC_ClassString_PI; - if (eclass==DEC_CLASS_NEG_INF) return DEC_ClassString_NI; - if (eclass==DEC_CLASS_QNAN) return DEC_ClassString_QN; - if (eclass==DEC_CLASS_SNAN) return DEC_ClassString_SN; - return DEC_ClassString_UN; /* Unknown */ - } /* decNumberClassToString */ - -/* ------------------------------------------------------------------ */ -/* decNumberCopy -- copy a number */ -/* */ -/* dest is the target decNumber */ -/* src is the source decNumber */ -/* returns dest */ -/* */ -/* (dest==src is allowed and is a no-op) */ -/* All fields are updated as required. This is a utility operation, */ -/* so special values are unchanged and no error is possible. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberCopy(decNumber *dest, const decNumber *src) { - - #if DECCHECK - if (src==NULL) return uprv_decNumberZero(dest); - #endif - - if (dest==src) return dest; /* no copy required */ - - /* Use explicit assignments here as structure assignment could copy */ - /* more than just the lsu (for small DECDPUN). This would not affect */ - /* the value of the results, but could disturb test harness spill */ - /* checking. */ - dest->bits=src->bits; - dest->exponent=src->exponent; - dest->digits=src->digits; - dest->lsu[0]=src->lsu[0]; - if (src->digits>DECDPUN) { /* more Units to come */ - const Unit *smsup, *s; /* work */ - Unit *d; /* .. */ - /* memcpy for the remaining Units would be safe as they cannot */ - /* overlap. However, this explicit loop is faster in short cases. */ - d=dest->lsu+1; /* -> first destination */ - smsup=src->lsu+D2U(src->digits); /* -> source msu+1 */ - for (s=src->lsu+1; sdigits digits. */ -/* No exception or error can occur; this is a quiet bitwise operation.*/ -/* See also decNumberAbs for a checking version of this. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberCopyAbs(decNumber *res, const decNumber *rhs) { - #if DECCHECK - if (decCheckOperands(res, DECUNUSED, rhs, DECUNCONT)) return res; - #endif - uprv_decNumberCopy(res, rhs); - res->bits&=~DECNEG; /* turn off sign */ - return res; - } /* decNumberCopyAbs */ - -/* ------------------------------------------------------------------ */ -/* decNumberCopyNegate -- quiet negate value operator */ -/* */ -/* This sets C = negate(A) */ -/* */ -/* res is C, the result. C may be A */ -/* rhs is A */ -/* */ -/* C must have space for set->digits digits. */ -/* No exception or error can occur; this is a quiet bitwise operation.*/ -/* See also decNumberMinus for a checking version of this. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberCopyNegate(decNumber *res, const decNumber *rhs) { - #if DECCHECK - if (decCheckOperands(res, DECUNUSED, rhs, DECUNCONT)) return res; - #endif - uprv_decNumberCopy(res, rhs); - res->bits^=DECNEG; /* invert the sign */ - return res; - } /* decNumberCopyNegate */ - -/* ------------------------------------------------------------------ */ -/* decNumberCopySign -- quiet copy and set sign operator */ -/* */ -/* This sets C = A with the sign of B */ -/* */ -/* res is C, the result. C may be A */ -/* lhs is A */ -/* rhs is B */ -/* */ -/* C must have space for set->digits digits. */ -/* No exception or error can occur; this is a quiet bitwise operation.*/ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberCopySign(decNumber *res, const decNumber *lhs, - const decNumber *rhs) { - uByte sign; /* rhs sign */ - #if DECCHECK - if (decCheckOperands(res, DECUNUSED, rhs, DECUNCONT)) return res; - #endif - sign=rhs->bits & DECNEG; /* save sign bit */ - uprv_decNumberCopy(res, lhs); - res->bits&=~DECNEG; /* clear the sign */ - res->bits|=sign; /* set from rhs */ - return res; - } /* decNumberCopySign */ - -/* ------------------------------------------------------------------ */ -/* decNumberGetBCD -- get the coefficient in BCD8 */ -/* dn is the source decNumber */ -/* bcd is the uInt array that will receive dn->digits BCD bytes, */ -/* most-significant at offset 0 */ -/* returns bcd */ -/* */ -/* bcd must have at least dn->digits bytes. No error is possible; if */ -/* dn is a NaN or Infinite, digits must be 1 and the coefficient 0. */ -/* ------------------------------------------------------------------ */ -U_CAPI uByte * U_EXPORT2 uprv_decNumberGetBCD(const decNumber *dn, uByte *bcd) { - uByte *ub=bcd+dn->digits-1; /* -> lsd */ - const Unit *up=dn->lsu; /* Unit pointer, -> lsu */ - - #if DECDPUN==1 /* trivial simple copy */ - for (; ub>=bcd; ub--, up++) *ub=*up; - #else /* chopping needed */ - uInt u=*up; /* work */ - uInt cut=DECDPUN; /* downcounter through unit */ - for (; ub>=bcd; ub--) { - *ub=(uByte)(u%10); /* [*6554 trick inhibits, here] */ - u=u/10; - cut--; - if (cut>0) continue; /* more in this unit */ - up++; - u=*up; - cut=DECDPUN; - } - #endif - return bcd; - } /* decNumberGetBCD */ - -/* ------------------------------------------------------------------ */ -/* decNumberSetBCD -- set (replace) the coefficient from BCD8 */ -/* dn is the target decNumber */ -/* bcd is the uInt array that will source n BCD bytes, most- */ -/* significant at offset 0 */ -/* n is the number of digits in the source BCD array (bcd) */ -/* returns dn */ -/* */ -/* dn must have space for at least n digits. No error is possible; */ -/* if dn is a NaN, or Infinite, or is to become a zero, n must be 1 */ -/* and bcd[0] zero. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberSetBCD(decNumber *dn, const uByte *bcd, uInt n) { - Unit *up=dn->lsu+D2U(dn->digits)-1; /* -> msu [target pointer] */ - const uByte *ub=bcd; /* -> source msd */ - - #if DECDPUN==1 /* trivial simple copy */ - for (; ub=dn->lsu; up--) { /* each Unit from msu */ - *up=0; /* will take <=DECDPUN digits */ - for (; cut>0; ub++, cut--) *up=X10(*up)+*ub; - cut=DECDPUN; /* next Unit has all digits */ - } - #endif - dn->digits=n; /* set digit count */ - return dn; - } /* decNumberSetBCD */ - -/* ------------------------------------------------------------------ */ -/* decNumberIsNormal -- test normality of a decNumber */ -/* dn is the decNumber to test */ -/* set is the context to use for Emin */ -/* returns 1 if |dn| is finite and >=Nmin, 0 otherwise */ -/* ------------------------------------------------------------------ */ -Int uprv_decNumberIsNormal(const decNumber *dn, decContext *set) { - Int ae; /* adjusted exponent */ - #if DECCHECK - if (decCheckOperands(DECUNRESU, DECUNUSED, dn, set)) return 0; - #endif - - if (decNumberIsSpecial(dn)) return 0; /* not finite */ - if (decNumberIsZero(dn)) return 0; /* not non-zero */ - - ae=dn->exponent+dn->digits-1; /* adjusted exponent */ - if (aeemin) return 0; /* is subnormal */ - return 1; - } /* decNumberIsNormal */ - -/* ------------------------------------------------------------------ */ -/* decNumberIsSubnormal -- test subnormality of a decNumber */ -/* dn is the decNumber to test */ -/* set is the context to use for Emin */ -/* returns 1 if |dn| is finite, non-zero, and exponent+dn->digits-1; /* adjusted exponent */ - if (aeemin) return 1; /* is subnormal */ - return 0; - } /* decNumberIsSubnormal */ - -/* ------------------------------------------------------------------ */ -/* decNumberTrim -- remove insignificant zeros */ -/* */ -/* dn is the number to trim */ -/* returns dn */ -/* */ -/* All fields are updated as required. This is a utility operation, */ -/* so special values are unchanged and no error is possible. The */ -/* zeros are removed unconditionally. */ -/* ------------------------------------------------------------------ */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberTrim(decNumber *dn) { - Int dropped; /* work */ - decContext set; /* .. */ - #if DECCHECK - if (decCheckOperands(DECUNRESU, DECUNUSED, dn, DECUNCONT)) return dn; - #endif - uprv_decContextDefault(&set, DEC_INIT_BASE); /* clamp=0 */ - return decTrim(dn, &set, 0, 1, &dropped); - } /* decNumberTrim */ - -/* ------------------------------------------------------------------ */ -/* decNumberVersion -- return the name and version of this module */ -/* */ -/* No error is possible. */ -/* ------------------------------------------------------------------ */ -const char * uprv_decNumberVersion(void) { - return DECVERSION; - } /* decNumberVersion */ - -/* ------------------------------------------------------------------ */ -/* decNumberZero -- set a number to 0 */ -/* */ -/* dn is the number to set, with space for one digit */ -/* returns dn */ -/* */ -/* No error is possible. */ -/* ------------------------------------------------------------------ */ -/* Memset is not used as it is much slower in some environments. */ -U_CAPI decNumber * U_EXPORT2 uprv_decNumberZero(decNumber *dn) { - - #if DECCHECK - if (decCheckOperands(dn, DECUNUSED, DECUNUSED, DECUNCONT)) return dn; - #endif - - dn->bits=0; - dn->exponent=0; - dn->digits=1; - dn->lsu[0]=0; - return dn; - } /* decNumberZero */ - -/* ================================================================== */ -/* Local routines */ -/* ================================================================== */ - -/* ------------------------------------------------------------------ */ -/* decToString -- lay out a number into a string */ -/* */ -/* dn is the number to lay out */ -/* string is where to lay out the number */ -/* eng is 1 if Engineering, 0 if Scientific */ -/* */ -/* string must be at least dn->digits+14 characters long */ -/* No error is possible. */ -/* */ -/* Note that this routine can generate a -0 or 0.000. These are */ -/* never generated in subset to-number or arithmetic, but can occur */ -/* in non-subset arithmetic (e.g., -1*0 or 1.234-1.234). */ -/* ------------------------------------------------------------------ */ -/* If DECCHECK is enabled the string "?" is returned if a number is */ -/* invalid. */ -static void decToString(const decNumber *dn, char *string, Flag eng) { - Int exp=dn->exponent; /* local copy */ - Int e; /* E-part value */ - Int pre; /* digits before the '.' */ - Int cut; /* for counting digits in a Unit */ - char *c=string; /* work [output pointer] */ - const Unit *up=dn->lsu+D2U(dn->digits)-1; /* -> msu [input pointer] */ - uInt u, pow; /* work */ - - #if DECCHECK - if (decCheckOperands(DECUNRESU, dn, DECUNUSED, DECUNCONT)) { - strcpy(string, "?"); - return;} - #endif - - if (decNumberIsNegative(dn)) { /* Negatives get a minus */ - *c='-'; - c++; - } - if (dn->bits&DECSPECIAL) { /* Is a special value */ - if (decNumberIsInfinite(dn)) { - strcpy(c, "Inf"); - strcpy(c+3, "inity"); - return;} - /* a NaN */ - if (dn->bits&DECSNAN) { /* signalling NaN */ - *c='s'; - c++; - } - strcpy(c, "NaN"); - c+=3; /* step past */ - /* if not a clean non-zero coefficient, that's all there is in a */ - /* NaN string */ - if (exp!=0 || (*dn->lsu==0 && dn->digits==1)) return; - /* [drop through to add integer] */ - } - - /* calculate how many digits in msu, and hence first cut */ - cut=MSUDIGITS(dn->digits); /* [faster than remainder] */ - cut--; /* power of ten for digit */ - - if (exp==0) { /* simple integer [common fastpath] */ - for (;up>=dn->lsu; up--) { /* each Unit from msu */ - u=*up; /* contains DECDPUN digits to lay out */ - for (; cut>=0; c++, cut--) TODIGIT(u, cut, c, pow); - cut=DECDPUN-1; /* next Unit has all digits */ - } - *c='\0'; /* terminate the string */ - return;} - - /* non-0 exponent -- assume plain form */ - pre=dn->digits+exp; /* digits before '.' */ - e=0; /* no E */ - if ((exp>0) || (pre<-5)) { /* need exponential form */ - e=exp+dn->digits-1; /* calculate E value */ - pre=1; /* assume one digit before '.' */ - if (eng && (e!=0)) { /* engineering: may need to adjust */ - Int adj; /* adjustment */ - /* The C remainder operator is undefined for negative numbers, so */ - /* a positive remainder calculation must be used here */ - if (e<0) { - adj=(-e)%3; - if (adj!=0) adj=3-adj; - } - else { /* e>0 */ - adj=e%3; - } - e=e-adj; - /* if dealing with zero still produce an exponent which is a */ - /* multiple of three, as expected, but there will only be the */ - /* one zero before the E, still. Otherwise note the padding. */ - if (!ISZERO(dn)) pre+=adj; - else { /* is zero */ - if (adj!=0) { /* 0.00Esnn needed */ - e=e+3; - pre=-(2-adj); - } - } /* zero */ - } /* eng */ - } /* need exponent */ - - /* lay out the digits of the coefficient, adding 0s and . as needed */ - u=*up; - if (pre>0) { /* xxx.xxx or xx00 (engineering) form */ - Int n=pre; - for (; pre>0; pre--, c++, cut--) { - if (cut<0) { /* need new Unit */ - if (up==dn->lsu) break; /* out of input digits (pre>digits) */ - up--; - cut=DECDPUN-1; - u=*up; - } - TODIGIT(u, cut, c, pow); - } - if (ndigits) { /* more to come, after '.' */ - *c='.'; c++; - for (;; c++, cut--) { - if (cut<0) { /* need new Unit */ - if (up==dn->lsu) break; /* out of input digits */ - up--; - cut=DECDPUN-1; - u=*up; - } - TODIGIT(u, cut, c, pow); - } - } - else for (; pre>0; pre--, c++) *c='0'; /* 0 padding (for engineering) needed */ - } - else { /* 0.xxx or 0.000xxx form */ - *c='0'; c++; - *c='.'; c++; - for (; pre<0; pre++, c++) *c='0'; /* add any 0's after '.' */ - for (; ; c++, cut--) { - if (cut<0) { /* need new Unit */ - if (up==dn->lsu) break; /* out of input digits */ - up--; - cut=DECDPUN-1; - u=*up; - } - TODIGIT(u, cut, c, pow); - } - } - - /* Finally add the E-part, if needed. It will never be 0, has a - base maximum and minimum of +999999999 through -999999999, but - could range down to -1999999998 for anormal numbers */ - if (e!=0) { - Flag had=0; /* 1=had non-zero */ - *c='E'; c++; - *c='+'; c++; /* assume positive */ - u=e; /* .. */ - if (e<0) { - *(c-1)='-'; /* oops, need - */ - u=-e; /* uInt, please */ - } - /* lay out the exponent [_itoa or equivalent is not ANSI C] */ - for (cut=9; cut>=0; cut--) { - TODIGIT(u, cut, c, pow); - if (*c=='0' && !had) continue; /* skip leading zeros */ - had=1; /* had non-0 */ - c++; /* step for next */ - } /* cut */ - } - *c='\0'; /* terminate the string (all paths) */ - return; - } /* decToString */ - -/* ------------------------------------------------------------------ */ -/* decAddOp -- add/subtract operation */ -/* */ -/* This computes C = A + B */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X+X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context */ -/* negate is DECNEG if rhs should be negated, or 0 otherwise */ -/* status accumulates status for the caller */ -/* */ -/* C must have space for set->digits digits. */ -/* Inexact in status must be 0 for correct Exact zero sign in result */ -/* ------------------------------------------------------------------ */ -/* If possible, the coefficient is calculated directly into C. */ -/* However, if: */ -/* -- a digits+1 calculation is needed because the numbers are */ -/* unaligned and span more than set->digits digits */ -/* -- a carry to digits+1 digits looks possible */ -/* -- C is the same as A or B, and the result would destructively */ -/* overlap the A or B coefficient */ -/* then the result must be calculated into a temporary buffer. In */ -/* this case a local (stack) buffer is used if possible, and only if */ -/* too long for that does malloc become the final resort. */ -/* */ -/* Misalignment is handled as follows: */ -/* Apad: (AExp>BExp) Swap operands and proceed as for BExp>AExp. */ -/* BPad: Apply the padding by a combination of shifting (whole */ -/* units) and multiplication (part units). */ -/* */ -/* Addition, especially x=x+1, is speed-critical. */ -/* The static buffer is larger than might be expected to allow for */ -/* calls from higher-level funtions (notable exp). */ -/* ------------------------------------------------------------------ */ -static decNumber * decAddOp(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set, - uByte negate, uInt *status) { - #if DECSUBSET - decNumber *alloclhs=NULL; /* non-NULL if rounded lhs allocated */ - decNumber *allocrhs=NULL; /* .., rhs */ - #endif - Int rhsshift; /* working shift (in Units) */ - Int maxdigits; /* longest logical length */ - Int mult; /* multiplier */ - Int residue; /* rounding accumulator */ - uByte bits; /* result bits */ - Flag diffsign; /* non-0 if arguments have different sign */ - Unit *acc; /* accumulator for result */ - Unit accbuff[SD2U(DECBUFFER*2+20)]; /* local buffer [*2+20 reduces many */ - /* allocations when called from */ - /* other operations, notable exp] */ - Unit *allocacc=NULL; /* -> allocated acc buffer, iff allocated */ - Int reqdigits=set->digits; /* local copy; requested DIGITS */ - Int padding; /* work */ - - #if DECCHECK - if (decCheckOperands(res, lhs, rhs, set)) return res; - #endif - - do { /* protect allocated storage */ - #if DECSUBSET - if (!set->extended) { - /* reduce operands and set lostDigits status, as needed */ - if (lhs->digits>reqdigits) { - alloclhs=decRoundOperand(lhs, set, status); - if (alloclhs==NULL) break; - lhs=alloclhs; - } - if (rhs->digits>reqdigits) { - allocrhs=decRoundOperand(rhs, set, status); - if (allocrhs==NULL) break; - rhs=allocrhs; - } - } - #endif - /* [following code does not require input rounding] */ - - /* note whether signs differ [used all paths] */ - diffsign=(Flag)((lhs->bits^rhs->bits^negate)&DECNEG); - - /* handle infinities and NaNs */ - if (SPECIALARGS) { /* a special bit set */ - if (SPECIALARGS & (DECSNAN | DECNAN)) /* a NaN */ - decNaNs(res, lhs, rhs, set, status); - else { /* one or two infinities */ - if (decNumberIsInfinite(lhs)) { /* LHS is infinity */ - /* two infinities with different signs is invalid */ - if (decNumberIsInfinite(rhs) && diffsign) { - *status|=DEC_Invalid_operation; - break; - } - bits=lhs->bits & DECNEG; /* get sign from LHS */ - } - else bits=(rhs->bits^negate) & DECNEG;/* RHS must be Infinity */ - bits|=DECINF; - uprv_decNumberZero(res); - res->bits=bits; /* set +/- infinity */ - } /* an infinity */ - break; - } - - /* Quick exit for add 0s; return the non-0, modified as need be */ - if (ISZERO(lhs)) { - Int adjust; /* work */ - Int lexp=lhs->exponent; /* save in case LHS==RES */ - bits=lhs->bits; /* .. */ - residue=0; /* clear accumulator */ - decCopyFit(res, rhs, set, &residue, status); /* copy (as needed) */ - res->bits^=negate; /* flip if rhs was negated */ - #if DECSUBSET - if (set->extended) { /* exponents on zeros count */ - #endif - /* exponent will be the lower of the two */ - adjust=lexp-res->exponent; /* adjustment needed [if -ve] */ - if (ISZERO(res)) { /* both 0: special IEEE 754 rules */ - if (adjust<0) res->exponent=lexp; /* set exponent */ - /* 0-0 gives +0 unless rounding to -infinity, and -0-0 gives -0 */ - if (diffsign) { - if (set->round!=DEC_ROUND_FLOOR) res->bits=0; - else res->bits=DECNEG; /* preserve 0 sign */ - } - } - else { /* non-0 res */ - if (adjust<0) { /* 0-padding needed */ - if ((res->digits-adjust)>set->digits) { - adjust=res->digits-set->digits; /* to fit exactly */ - *status|=DEC_Rounded; /* [but exact] */ - } - res->digits=decShiftToMost(res->lsu, res->digits, -adjust); - res->exponent+=adjust; /* set the exponent. */ - } - } /* non-0 res */ - #if DECSUBSET - } /* extended */ - #endif - decFinish(res, set, &residue, status); /* clean and finalize */ - break;} - - if (ISZERO(rhs)) { /* [lhs is non-zero] */ - Int adjust; /* work */ - Int rexp=rhs->exponent; /* save in case RHS==RES */ - bits=rhs->bits; /* be clean */ - residue=0; /* clear accumulator */ - decCopyFit(res, lhs, set, &residue, status); /* copy (as needed) */ - #if DECSUBSET - if (set->extended) { /* exponents on zeros count */ - #endif - /* exponent will be the lower of the two */ - /* [0-0 case handled above] */ - adjust=rexp-res->exponent; /* adjustment needed [if -ve] */ - if (adjust<0) { /* 0-padding needed */ - if ((res->digits-adjust)>set->digits) { - adjust=res->digits-set->digits; /* to fit exactly */ - *status|=DEC_Rounded; /* [but exact] */ - } - res->digits=decShiftToMost(res->lsu, res->digits, -adjust); - res->exponent+=adjust; /* set the exponent. */ - } - #if DECSUBSET - } /* extended */ - #endif - decFinish(res, set, &residue, status); /* clean and finalize */ - break;} - - /* [NB: both fastpath and mainpath code below assume these cases */ - /* (notably 0-0) have already been handled] */ - - /* calculate the padding needed to align the operands */ - padding=rhs->exponent-lhs->exponent; - - /* Fastpath cases where the numbers are aligned and normal, the RHS */ - /* is all in one unit, no operand rounding is needed, and no carry, */ - /* lengthening, or borrow is needed */ - if (padding==0 - && rhs->digits<=DECDPUN - && rhs->exponent>=set->emin /* [some normals drop through] */ - && rhs->exponent<=set->emax-set->digits+1 /* [could clamp] */ - && rhs->digits<=reqdigits - && lhs->digits<=reqdigits) { - Int partial=*lhs->lsu; - if (!diffsign) { /* adding */ - partial+=*rhs->lsu; - if ((partial<=DECDPUNMAX) /* result fits in unit */ - && (lhs->digits>=DECDPUN || /* .. and no digits-count change */ - partial<(Int)powers[lhs->digits])) { /* .. */ - if (res!=lhs) uprv_decNumberCopy(res, lhs); /* not in place */ - *res->lsu=(Unit)partial; /* [copy could have overwritten RHS] */ - break; - } - /* else drop out for careful add */ - } - else { /* signs differ */ - partial-=*rhs->lsu; - if (partial>0) { /* no borrow needed, and non-0 result */ - if (res!=lhs) uprv_decNumberCopy(res, lhs); /* not in place */ - *res->lsu=(Unit)partial; - /* this could have reduced digits [but result>0] */ - res->digits=decGetDigits(res->lsu, D2U(res->digits)); - break; - } - /* else drop out for careful subtract */ - } - } - - /* Now align (pad) the lhs or rhs so they can be added or */ - /* subtracted, as necessary. If one number is much larger than */ - /* the other (that is, if in plain form there is a least one */ - /* digit between the lowest digit of one and the highest of the */ - /* other) padding with up to DIGITS-1 trailing zeros may be */ - /* needed; then apply rounding (as exotic rounding modes may be */ - /* affected by the residue). */ - rhsshift=0; /* rhs shift to left (padding) in Units */ - bits=lhs->bits; /* assume sign is that of LHS */ - mult=1; /* likely multiplier */ - - /* [if padding==0 the operands are aligned; no padding is needed] */ - if (padding!=0) { - /* some padding needed; always pad the RHS, as any required */ - /* padding can then be effected by a simple combination of */ - /* shifts and a multiply */ - Flag swapped=0; - if (padding<0) { /* LHS needs the padding */ - const decNumber *t; - padding=-padding; /* will be +ve */ - bits=(uByte)(rhs->bits^negate); /* assumed sign is now that of RHS */ - t=lhs; lhs=rhs; rhs=t; - swapped=1; - } - - /* If, after pad, rhs would be longer than lhs by digits+1 or */ - /* more then lhs cannot affect the answer, except as a residue, */ - /* so only need to pad up to a length of DIGITS+1. */ - if (rhs->digits+padding > lhs->digits+reqdigits+1) { - /* The RHS is sufficient */ - /* for residue use the relative sign indication... */ - Int shift=reqdigits-rhs->digits; /* left shift needed */ - residue=1; /* residue for rounding */ - if (diffsign) residue=-residue; /* signs differ */ - /* copy, shortening if necessary */ - decCopyFit(res, rhs, set, &residue, status); - /* if it was already shorter, then need to pad with zeros */ - if (shift>0) { - res->digits=decShiftToMost(res->lsu, res->digits, shift); - res->exponent-=shift; /* adjust the exponent. */ - } - /* flip the result sign if unswapped and rhs was negated */ - if (!swapped) res->bits^=negate; - decFinish(res, set, &residue, status); /* done */ - break;} - - /* LHS digits may affect result */ - rhsshift=D2U(padding+1)-1; /* this much by Unit shift .. */ - mult=powers[padding-(rhsshift*DECDPUN)]; /* .. this by multiplication */ - } /* padding needed */ - - if (diffsign) mult=-mult; /* signs differ */ - - /* determine the longer operand */ - maxdigits=rhs->digits+padding; /* virtual length of RHS */ - if (lhs->digits>maxdigits) maxdigits=lhs->digits; - - /* Decide on the result buffer to use; if possible place directly */ - /* into result. */ - acc=res->lsu; /* assume add direct to result */ - /* If destructive overlap, or the number is too long, or a carry or */ - /* borrow to DIGITS+1 might be possible, a buffer must be used. */ - /* [Might be worth more sophisticated tests when maxdigits==reqdigits] */ - if ((maxdigits>=reqdigits) /* is, or could be, too large */ - || (res==rhs && rhsshift>0)) { /* destructive overlap */ - /* buffer needed, choose it; units for maxdigits digits will be */ - /* needed, +1 Unit for carry or borrow */ - Int need=D2U(maxdigits)+1; - acc=accbuff; /* assume use local buffer */ - if (need*sizeof(Unit)>sizeof(accbuff)) { - /* printf("malloc add %ld %ld\n", need, sizeof(accbuff)); */ - allocacc=(Unit *)malloc(need*sizeof(Unit)); - if (allocacc==NULL) { /* hopeless -- abandon */ - *status|=DEC_Insufficient_storage; - break;} - acc=allocacc; - } - } - - res->bits=(uByte)(bits&DECNEG); /* it's now safe to overwrite.. */ - res->exponent=lhs->exponent; /* .. operands (even if aliased) */ - - #if DECTRACE - decDumpAr('A', lhs->lsu, D2U(lhs->digits)); - decDumpAr('B', rhs->lsu, D2U(rhs->digits)); - printf(" :h: %ld %ld\n", rhsshift, mult); - #endif - - /* add [A+B*m] or subtract [A+B*(-m)] */ - U_ASSERT(rhs->digits > 0); - U_ASSERT(lhs->digits > 0); - res->digits=decUnitAddSub(lhs->lsu, D2U(lhs->digits), - rhs->lsu, D2U(rhs->digits), - rhsshift, acc, mult) - *DECDPUN; /* [units -> digits] */ - if (res->digits<0) { /* borrowed... */ - res->digits=-res->digits; - res->bits^=DECNEG; /* flip the sign */ - } - #if DECTRACE - decDumpAr('+', acc, D2U(res->digits)); - #endif - - /* If a buffer was used the result must be copied back, possibly */ - /* shortening. (If no buffer was used then the result must have */ - /* fit, so can't need rounding and residue must be 0.) */ - residue=0; /* clear accumulator */ - if (acc!=res->lsu) { - #if DECSUBSET - if (set->extended) { /* round from first significant digit */ - #endif - /* remove leading zeros that were added due to rounding up to */ - /* integral Units -- before the test for rounding. */ - if (res->digits>reqdigits) - res->digits=decGetDigits(acc, D2U(res->digits)); - decSetCoeff(res, set, acc, res->digits, &residue, status); - #if DECSUBSET - } - else { /* subset arithmetic rounds from original significant digit */ - /* May have an underestimate. This only occurs when both */ - /* numbers fit in DECDPUN digits and are padding with a */ - /* negative multiple (-10, -100...) and the top digit(s) become */ - /* 0. (This only matters when using X3.274 rules where the */ - /* leading zero could be included in the rounding.) */ - if (res->digitsdigits))=0; /* ensure leading 0 is there */ - res->digits=maxdigits; - } - else { - /* remove leading zeros that added due to rounding up to */ - /* integral Units (but only those in excess of the original */ - /* maxdigits length, unless extended) before test for rounding. */ - if (res->digits>reqdigits) { - res->digits=decGetDigits(acc, D2U(res->digits)); - if (res->digitsdigits=maxdigits; - } - } - decSetCoeff(res, set, acc, res->digits, &residue, status); - /* Now apply rounding if needed before removing leading zeros. */ - /* This is safe because subnormals are not a possibility */ - if (residue!=0) { - decApplyRound(res, set, residue, status); - residue=0; /* did what needed to be done */ - } - } /* subset */ - #endif - } /* used buffer */ - - /* strip leading zeros [these were left on in case of subset subtract] */ - res->digits=decGetDigits(res->lsu, D2U(res->digits)); - - /* apply checks and rounding */ - decFinish(res, set, &residue, status); - - /* "When the sum of two operands with opposite signs is exactly */ - /* zero, the sign of that sum shall be '+' in all rounding modes */ - /* except round toward -Infinity, in which mode that sign shall be */ - /* '-'." [Subset zeros also never have '-', set by decFinish.] */ - if (ISZERO(res) && diffsign - #if DECSUBSET - && set->extended - #endif - && (*status&DEC_Inexact)==0) { - if (set->round==DEC_ROUND_FLOOR) res->bits|=DECNEG; /* sign - */ - else res->bits&=~DECNEG; /* sign + */ - } - } while(0); /* end protected */ - - if (allocacc!=NULL) free(allocacc); /* drop any storage used */ - #if DECSUBSET - if (allocrhs!=NULL) free(allocrhs); /* .. */ - if (alloclhs!=NULL) free(alloclhs); /* .. */ - #endif - return res; - } /* decAddOp */ - -/* ------------------------------------------------------------------ */ -/* decDivideOp -- division operation */ -/* */ -/* This routine performs the calculations for all four division */ -/* operators (divide, divideInteger, remainder, remainderNear). */ -/* */ -/* C=A op B */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X/X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context */ -/* op is DIVIDE, DIVIDEINT, REMAINDER, or REMNEAR respectively. */ -/* status is the usual accumulator */ -/* */ -/* C must have space for set->digits digits. */ -/* */ -/* ------------------------------------------------------------------ */ -/* The underlying algorithm of this routine is the same as in the */ -/* 1981 S/370 implementation, that is, non-restoring long division */ -/* with bi-unit (rather than bi-digit) estimation for each unit */ -/* multiplier. In this pseudocode overview, complications for the */ -/* Remainder operators and division residues for exact rounding are */ -/* omitted for clarity. */ -/* */ -/* Prepare operands and handle special values */ -/* Test for x/0 and then 0/x */ -/* Exp =Exp1 - Exp2 */ -/* Exp =Exp +len(var1) -len(var2) */ -/* Sign=Sign1 * Sign2 */ -/* Pad accumulator (Var1) to double-length with 0's (pad1) */ -/* Pad Var2 to same length as Var1 */ -/* msu2pair/plus=1st 2 or 1 units of var2, +1 to allow for round */ -/* have=0 */ -/* Do until (have=digits+1 OR residue=0) */ -/* if exp<0 then if integer divide/residue then leave */ -/* this_unit=0 */ -/* Do forever */ -/* compare numbers */ -/* if <0 then leave inner_loop */ -/* if =0 then (* quick exit without subtract *) do */ -/* this_unit=this_unit+1; output this_unit */ -/* leave outer_loop; end */ -/* Compare lengths of numbers (mantissae): */ -/* If same then tops2=msu2pair -- {units 1&2 of var2} */ -/* else tops2=msu2plus -- {0, unit 1 of var2} */ -/* tops1=first_unit_of_Var1*10**DECDPUN +second_unit_of_var1 */ -/* mult=tops1/tops2 -- Good and safe guess at divisor */ -/* if mult=0 then mult=1 */ -/* this_unit=this_unit+mult */ -/* subtract */ -/* end inner_loop */ -/* if have\=0 | this_unit\=0 then do */ -/* output this_unit */ -/* have=have+1; end */ -/* var2=var2/10 */ -/* exp=exp-1 */ -/* end outer_loop */ -/* exp=exp+1 -- set the proper exponent */ -/* if have=0 then generate answer=0 */ -/* Return (Result is defined by Var1) */ -/* */ -/* ------------------------------------------------------------------ */ -/* Two working buffers are needed during the division; one (digits+ */ -/* 1) to accumulate the result, and the other (up to 2*digits+1) for */ -/* long subtractions. These are acc and var1 respectively. */ -/* var1 is a copy of the lhs coefficient, var2 is the rhs coefficient.*/ -/* The static buffers may be larger than might be expected to allow */ -/* for calls from higher-level funtions (notable exp). */ -/* ------------------------------------------------------------------ */ -static decNumber * decDivideOp(decNumber *res, - const decNumber *lhs, const decNumber *rhs, - decContext *set, Flag op, uInt *status) { - #if DECSUBSET - decNumber *alloclhs=NULL; /* non-NULL if rounded lhs allocated */ - decNumber *allocrhs=NULL; /* .., rhs */ - #endif - Unit accbuff[SD2U(DECBUFFER+DECDPUN+10)]; /* local buffer */ - Unit *acc=accbuff; /* -> accumulator array for result */ - Unit *allocacc=NULL; /* -> allocated buffer, iff allocated */ - Unit *accnext; /* -> where next digit will go */ - Int acclength; /* length of acc needed [Units] */ - Int accunits; /* count of units accumulated */ - Int accdigits; /* count of digits accumulated */ - - Unit varbuff[SD2U(DECBUFFER*2+DECDPUN)]; /* buffer for var1 */ - Unit *var1=varbuff; /* -> var1 array for long subtraction */ - Unit *varalloc=NULL; /* -> allocated buffer, iff used */ - Unit *msu1; /* -> msu of var1 */ - - const Unit *var2; /* -> var2 array */ - const Unit *msu2; /* -> msu of var2 */ - Int msu2plus; /* msu2 plus one [does not vary] */ - eInt msu2pair; /* msu2 pair plus one [does not vary] */ - - Int var1units, var2units; /* actual lengths */ - Int var2ulen; /* logical length (units) */ - Int var1initpad=0; /* var1 initial padding (digits) */ - Int maxdigits; /* longest LHS or required acc length */ - Int mult; /* multiplier for subtraction */ - Unit thisunit; /* current unit being accumulated */ - Int residue; /* for rounding */ - Int reqdigits=set->digits; /* requested DIGITS */ - Int exponent; /* working exponent */ - Int maxexponent=0; /* DIVIDE maximum exponent if unrounded */ - uByte bits; /* working sign */ - Unit *target; /* work */ - const Unit *source; /* .. */ - uInt const *pow; /* .. */ - Int shift, cut; /* .. */ - #if DECSUBSET - Int dropped; /* work */ - #endif - - #if DECCHECK - if (decCheckOperands(res, lhs, rhs, set)) return res; - #endif - - do { /* protect allocated storage */ - #if DECSUBSET - if (!set->extended) { - /* reduce operands and set lostDigits status, as needed */ - if (lhs->digits>reqdigits) { - alloclhs=decRoundOperand(lhs, set, status); - if (alloclhs==NULL) break; - lhs=alloclhs; - } - if (rhs->digits>reqdigits) { - allocrhs=decRoundOperand(rhs, set, status); - if (allocrhs==NULL) break; - rhs=allocrhs; - } - } - #endif - /* [following code does not require input rounding] */ - - bits=(lhs->bits^rhs->bits)&DECNEG; /* assumed sign for divisions */ - - /* handle infinities and NaNs */ - if (SPECIALARGS) { /* a special bit set */ - if (SPECIALARGS & (DECSNAN | DECNAN)) { /* one or two NaNs */ - decNaNs(res, lhs, rhs, set, status); - break; - } - /* one or two infinities */ - if (decNumberIsInfinite(lhs)) { /* LHS (dividend) is infinite */ - if (decNumberIsInfinite(rhs) || /* two infinities are invalid .. */ - op & (REMAINDER | REMNEAR)) { /* as is remainder of infinity */ - *status|=DEC_Invalid_operation; - break; - } - /* [Note that infinity/0 raises no exceptions] */ - uprv_decNumberZero(res); - res->bits=bits|DECINF; /* set +/- infinity */ - break; - } - else { /* RHS (divisor) is infinite */ - residue=0; - if (op&(REMAINDER|REMNEAR)) { - /* result is [finished clone of] lhs */ - decCopyFit(res, lhs, set, &residue, status); - } - else { /* a division */ - uprv_decNumberZero(res); - res->bits=bits; /* set +/- zero */ - /* for DIVIDEINT the exponent is always 0. For DIVIDE, result */ - /* is a 0 with infinitely negative exponent, clamped to minimum */ - if (op&DIVIDE) { - res->exponent=set->emin-set->digits+1; - *status|=DEC_Clamped; - } - } - decFinish(res, set, &residue, status); - break; - } - } - - /* handle 0 rhs (x/0) */ - if (ISZERO(rhs)) { /* x/0 is always exceptional */ - if (ISZERO(lhs)) { - uprv_decNumberZero(res); /* [after lhs test] */ - *status|=DEC_Division_undefined;/* 0/0 will become NaN */ - } - else { - uprv_decNumberZero(res); - if (op&(REMAINDER|REMNEAR)) *status|=DEC_Invalid_operation; - else { - *status|=DEC_Division_by_zero; /* x/0 */ - res->bits=bits|DECINF; /* .. is +/- Infinity */ - } - } - break;} - - /* handle 0 lhs (0/x) */ - if (ISZERO(lhs)) { /* 0/x [x!=0] */ - #if DECSUBSET - if (!set->extended) uprv_decNumberZero(res); - else { - #endif - if (op&DIVIDE) { - residue=0; - exponent=lhs->exponent-rhs->exponent; /* ideal exponent */ - uprv_decNumberCopy(res, lhs); /* [zeros always fit] */ - res->bits=bits; /* sign as computed */ - res->exponent=exponent; /* exponent, too */ - decFinalize(res, set, &residue, status); /* check exponent */ - } - else if (op&DIVIDEINT) { - uprv_decNumberZero(res); /* integer 0 */ - res->bits=bits; /* sign as computed */ - } - else { /* a remainder */ - exponent=rhs->exponent; /* [save in case overwrite] */ - uprv_decNumberCopy(res, lhs); /* [zeros always fit] */ - if (exponentexponent) res->exponent=exponent; /* use lower */ - } - #if DECSUBSET - } - #endif - break;} - - /* Precalculate exponent. This starts off adjusted (and hence fits */ - /* in 31 bits) and becomes the usual unadjusted exponent as the */ - /* division proceeds. The order of evaluation is important, here, */ - /* to avoid wrap. */ - exponent=(lhs->exponent+lhs->digits)-(rhs->exponent+rhs->digits); - - /* If the working exponent is -ve, then some quick exits are */ - /* possible because the quotient is known to be <1 */ - /* [for REMNEAR, it needs to be < -1, as -0.5 could need work] */ - if (exponent<0 && !(op==DIVIDE)) { - if (op&DIVIDEINT) { - uprv_decNumberZero(res); /* integer part is 0 */ - #if DECSUBSET - if (set->extended) - #endif - res->bits=bits; /* set +/- zero */ - break;} - /* fastpath remainders so long as the lhs has the smaller */ - /* (or equal) exponent */ - if (lhs->exponent<=rhs->exponent) { - if (op&REMAINDER || exponent<-1) { - /* It is REMAINDER or safe REMNEAR; result is [finished */ - /* clone of] lhs (r = x - 0*y) */ - residue=0; - decCopyFit(res, lhs, set, &residue, status); - decFinish(res, set, &residue, status); - break; - } - /* [unsafe REMNEAR drops through] */ - } - } /* fastpaths */ - - /* Long (slow) division is needed; roll up the sleeves... */ - - /* The accumulator will hold the quotient of the division. */ - /* If it needs to be too long for stack storage, then allocate. */ - acclength=D2U(reqdigits+DECDPUN); /* in Units */ - if (acclength*sizeof(Unit)>sizeof(accbuff)) { - /* printf("malloc dvacc %ld units\n", acclength); */ - allocacc=(Unit *)malloc(acclength*sizeof(Unit)); - if (allocacc==NULL) { /* hopeless -- abandon */ - *status|=DEC_Insufficient_storage; - break;} - acc=allocacc; /* use the allocated space */ - } - - /* var1 is the padded LHS ready for subtractions. */ - /* If it needs to be too long for stack storage, then allocate. */ - /* The maximum units needed for var1 (long subtraction) is: */ - /* Enough for */ - /* (rhs->digits+reqdigits-1) -- to allow full slide to right */ - /* or (lhs->digits) -- to allow for long lhs */ - /* whichever is larger */ - /* +1 -- for rounding of slide to right */ - /* +1 -- for leading 0s */ - /* +1 -- for pre-adjust if a remainder or DIVIDEINT */ - /* [Note: unused units do not participate in decUnitAddSub data] */ - maxdigits=rhs->digits+reqdigits-1; - if (lhs->digits>maxdigits) maxdigits=lhs->digits; - var1units=D2U(maxdigits)+2; - /* allocate a guard unit above msu1 for REMAINDERNEAR */ - if (!(op&DIVIDE)) var1units++; - if ((var1units+1)*sizeof(Unit)>sizeof(varbuff)) { - /* printf("malloc dvvar %ld units\n", var1units+1); */ - varalloc=(Unit *)malloc((var1units+1)*sizeof(Unit)); - if (varalloc==NULL) { /* hopeless -- abandon */ - *status|=DEC_Insufficient_storage; - break;} - var1=varalloc; /* use the allocated space */ - } - - /* Extend the lhs and rhs to full long subtraction length. The lhs */ - /* is truly extended into the var1 buffer, with 0 padding, so a */ - /* subtract in place is always possible. The rhs (var2) has */ - /* virtual padding (implemented by decUnitAddSub). */ - /* One guard unit was allocated above msu1 for rem=rem+rem in */ - /* REMAINDERNEAR. */ - msu1=var1+var1units-1; /* msu of var1 */ - source=lhs->lsu+D2U(lhs->digits)-1; /* msu of input array */ - for (target=msu1; source>=lhs->lsu; source--, target--) *target=*source; - for (; target>=var1; target--) *target=0; - - /* rhs (var2) is left-aligned with var1 at the start */ - var2ulen=var1units; /* rhs logical length (units) */ - var2units=D2U(rhs->digits); /* rhs actual length (units) */ - var2=rhs->lsu; /* -> rhs array */ - msu2=var2+var2units-1; /* -> msu of var2 [never changes] */ - /* now set up the variables which will be used for estimating the */ - /* multiplication factor. If these variables are not exact, add */ - /* 1 to make sure that the multiplier is never overestimated. */ - msu2plus=*msu2; /* it's value .. */ - if (var2units>1) msu2plus++; /* .. +1 if any more */ - msu2pair=(eInt)*msu2*(DECDPUNMAX+1);/* top two pair .. */ - if (var2units>1) { /* .. [else treat 2nd as 0] */ - msu2pair+=*(msu2-1); /* .. */ - if (var2units>2) msu2pair++; /* .. +1 if any more */ - } - - /* The calculation is working in units, which may have leading zeros, */ - /* but the exponent was calculated on the assumption that they are */ - /* both left-aligned. Adjust the exponent to compensate: add the */ - /* number of leading zeros in var1 msu and subtract those in var2 msu. */ - /* [This is actually done by counting the digits and negating, as */ - /* lead1=DECDPUN-digits1, and similarly for lead2.] */ - for (pow=&powers[1]; *msu1>=*pow; pow++) exponent--; - for (pow=&powers[1]; *msu2>=*pow; pow++) exponent++; - - /* Now, if doing an integer divide or remainder, ensure that */ - /* the result will be Unit-aligned. To do this, shift the var1 */ - /* accumulator towards least if need be. (It's much easier to */ - /* do this now than to reassemble the residue afterwards, if */ - /* doing a remainder.) Also ensure the exponent is not negative. */ - if (!(op&DIVIDE)) { - Unit *u; /* work */ - /* save the initial 'false' padding of var1, in digits */ - var1initpad=(var1units-D2U(lhs->digits))*DECDPUN; - /* Determine the shift to do. */ - if (exponent<0) cut=-exponent; - else cut=DECDPUN-exponent%DECDPUN; - decShiftToLeast(var1, var1units, cut); - exponent+=cut; /* maintain numerical value */ - var1initpad-=cut; /* .. and reduce padding */ - /* clean any most-significant units which were just emptied */ - for (u=msu1; cut>=DECDPUN; cut-=DECDPUN, u--) *u=0; - } /* align */ - else { /* is DIVIDE */ - maxexponent=lhs->exponent-rhs->exponent; /* save */ - /* optimization: if the first iteration will just produce 0, */ - /* preadjust to skip it [valid for DIVIDE only] */ - if (*msu1<*msu2) { - var2ulen--; /* shift down */ - exponent-=DECDPUN; /* update the exponent */ - } - } - - /* ---- start the long-division loops ------------------------------ */ - accunits=0; /* no units accumulated yet */ - accdigits=0; /* .. or digits */ - accnext=acc+acclength-1; /* -> msu of acc [NB: allows digits+1] */ - for (;;) { /* outer forever loop */ - thisunit=0; /* current unit assumed 0 */ - /* find the next unit */ - for (;;) { /* inner forever loop */ - /* strip leading zero units [from either pre-adjust or from */ - /* subtract last time around]. Leave at least one unit. */ - for (; *msu1==0 && msu1>var1; msu1--) var1units--; - - if (var1units msu */ - for (pv1=msu1; ; pv1--, pv2--) { - /* v1=*pv1 -- always OK */ - v2=0; /* assume in padding */ - if (pv2>=var2) v2=*pv2; /* in range */ - if (*pv1!=v2) break; /* no longer the same */ - if (pv1==var1) break; /* done; leave pv1 as is */ - } - /* here when all inspected or a difference seen */ - if (*pv1v2. Prepare for real subtraction; the lengths are equal */ - /* Estimate the multiplier (there's always a msu1-1)... */ - /* Bring in two units of var2 to provide a good estimate. */ - mult=(Int)(((eInt)*msu1*(DECDPUNMAX+1)+*(msu1-1))/msu2pair); - } /* lengths the same */ - else { /* var1units > var2ulen, so subtraction is safe */ - /* The var2 msu is one unit towards the lsu of the var1 msu, */ - /* so only one unit for var2 can be used. */ - mult=(Int)(((eInt)*msu1*(DECDPUNMAX+1)+*(msu1-1))/msu2plus); - } - if (mult==0) mult=1; /* must always be at least 1 */ - /* subtraction needed; var1 is > var2 */ - thisunit=(Unit)(thisunit+mult); /* accumulate */ - /* subtract var1-var2, into var1; only the overlap needs */ - /* processing, as this is an in-place calculation */ - shift=var2ulen-var2units; - #if DECTRACE - decDumpAr('1', &var1[shift], var1units-shift); - decDumpAr('2', var2, var2units); - printf("m=%ld\n", -mult); - #endif - decUnitAddSub(&var1[shift], var1units-shift, - var2, var2units, 0, - &var1[shift], -mult); - #if DECTRACE - decDumpAr('#', &var1[shift], var1units-shift); - #endif - /* var1 now probably has leading zeros; these are removed at the */ - /* top of the inner loop. */ - } /* inner loop */ - - /* The next unit has been calculated in full; unless it's a */ - /* leading zero, add to acc */ - if (accunits!=0 || thisunit!=0) { /* is first or non-zero */ - *accnext=thisunit; /* store in accumulator */ - /* account exactly for the new digits */ - if (accunits==0) { - accdigits++; /* at least one */ - for (pow=&powers[1]; thisunit>=*pow; pow++) accdigits++; - } - else accdigits+=DECDPUN; - accunits++; /* update count */ - accnext--; /* ready for next */ - if (accdigits>reqdigits) break; /* have enough digits */ - } - - /* if the residue is zero, the operation is done (unless divide */ - /* or divideInteger and still not enough digits yet) */ - if (*var1==0 && var1units==1) { /* residue is 0 */ - if (op&(REMAINDER|REMNEAR)) break; - if ((op&DIVIDE) && (exponent<=maxexponent)) break; - /* [drop through if divideInteger] */ - } - /* also done enough if calculating remainder or integer */ - /* divide and just did the last ('units') unit */ - if (exponent==0 && !(op&DIVIDE)) break; - - /* to get here, var1 is less than var2, so divide var2 by the per- */ - /* Unit power of ten and go for the next digit */ - var2ulen--; /* shift down */ - exponent-=DECDPUN; /* update the exponent */ - } /* outer loop */ - - /* ---- division is complete --------------------------------------- */ - /* here: acc has at least reqdigits+1 of good results (or fewer */ - /* if early stop), starting at accnext+1 (its lsu) */ - /* var1 has any residue at the stopping point */ - /* accunits is the number of digits collected in acc */ - if (accunits==0) { /* acc is 0 */ - accunits=1; /* show have a unit .. */ - accdigits=1; /* .. */ - *accnext=0; /* .. whose value is 0 */ - } - else accnext++; /* back to last placed */ - /* accnext now -> lowest unit of result */ - - residue=0; /* assume no residue */ - if (op&DIVIDE) { - /* record the presence of any residue, for rounding */ - if (*var1!=0 || var1units>1) residue=1; - else { /* no residue */ - /* Had an exact division; clean up spurious trailing 0s. */ - /* There will be at most DECDPUN-1, from the final multiply, */ - /* and then only if the result is non-0 (and even) and the */ - /* exponent is 'loose'. */ - #if DECDPUN>1 - Unit lsu=*accnext; - if (!(lsu&0x01) && (lsu!=0)) { - /* count the trailing zeros */ - Int drop=0; - for (;; drop++) { /* [will terminate because lsu!=0] */ - if (exponent>=maxexponent) break; /* don't chop real 0s */ - #if DECDPUN<=4 - if ((lsu-QUOT10(lsu, drop+1) - *powers[drop+1])!=0) break; /* found non-0 digit */ - #else - if (lsu%powers[drop+1]!=0) break; /* found non-0 digit */ - #endif - exponent++; - } - if (drop>0) { - accunits=decShiftToLeast(accnext, accunits, drop); - accdigits=decGetDigits(accnext, accunits); - accunits=D2U(accdigits); - /* [exponent was adjusted in the loop] */ - } - } /* neither odd nor 0 */ - #endif - } /* exact divide */ - } /* divide */ - else /* op!=DIVIDE */ { - /* check for coefficient overflow */ - if (accdigits+exponent>reqdigits) { - *status|=DEC_Division_impossible; - break; - } - if (op & (REMAINDER|REMNEAR)) { - /* [Here, the exponent will be 0, because var1 was adjusted */ - /* appropriately.] */ - Int postshift; /* work */ - Flag wasodd=0; /* integer was odd */ - Unit *quotlsu; /* for save */ - Int quotdigits; /* .. */ - - bits=lhs->bits; /* remainder sign is always as lhs */ - - /* Fastpath when residue is truly 0 is worthwhile [and */ - /* simplifies the code below] */ - if (*var1==0 && var1units==1) { /* residue is 0 */ - Int exp=lhs->exponent; /* save min(exponents) */ - if (rhs->exponentexponent; - uprv_decNumberZero(res); /* 0 coefficient */ - #if DECSUBSET - if (set->extended) - #endif - res->exponent=exp; /* .. with proper exponent */ - res->bits=(uByte)(bits&DECNEG); /* [cleaned] */ - decFinish(res, set, &residue, status); /* might clamp */ - break; - } - /* note if the quotient was odd */ - if (*accnext & 0x01) wasodd=1; /* acc is odd */ - quotlsu=accnext; /* save in case need to reinspect */ - quotdigits=accdigits; /* .. */ - - /* treat the residue, in var1, as the value to return, via acc */ - /* calculate the unused zero digits. This is the smaller of: */ - /* var1 initial padding (saved above) */ - /* var2 residual padding, which happens to be given by: */ - postshift=var1initpad+exponent-lhs->exponent+rhs->exponent; - /* [the 'exponent' term accounts for the shifts during divide] */ - if (var1initpadexponent; /* exponent is smaller of lhs & rhs */ - if (rhs->exponentexponent; - - /* Now correct the result if doing remainderNear; if it */ - /* (looking just at coefficients) is > rhs/2, or == rhs/2 and */ - /* the integer was odd then the result should be rem-rhs. */ - if (op&REMNEAR) { - Int compare, tarunits; /* work */ - Unit *up; /* .. */ - /* calculate remainder*2 into the var1 buffer (which has */ - /* 'headroom' of an extra unit and hence enough space) */ - /* [a dedicated 'double' loop would be faster, here] */ - tarunits=decUnitAddSub(accnext, accunits, accnext, accunits, - 0, accnext, 1); - /* decDumpAr('r', accnext, tarunits); */ - - /* Here, accnext (var1) holds tarunits Units with twice the */ - /* remainder's coefficient, which must now be compared to the */ - /* RHS. The remainder's exponent may be smaller than the RHS's. */ - compare=decUnitCompare(accnext, tarunits, rhs->lsu, D2U(rhs->digits), - rhs->exponent-exponent); - if (compare==BADINT) { /* deep trouble */ - *status|=DEC_Insufficient_storage; - break;} - - /* now restore the remainder by dividing by two; the lsu */ - /* is known to be even. */ - for (up=accnext; up0 || (compare==0 && wasodd)) { /* adjustment needed */ - Int exp, expunits, exprem; /* work */ - /* This is effectively causing round-up of the quotient, */ - /* so if it was the rare case where it was full and all */ - /* nines, it would overflow and hence division-impossible */ - /* should be raised */ - Flag allnines=0; /* 1 if quotient all nines */ - if (quotdigits==reqdigits) { /* could be borderline */ - for (up=quotlsu; ; up++) { - if (quotdigits>DECDPUN) { - if (*up!=DECDPUNMAX) break;/* non-nines */ - } - else { /* this is the last Unit */ - if (*up==powers[quotdigits]-1) allnines=1; - break; - } - quotdigits-=DECDPUN; /* checked those digits */ - } /* up */ - } /* borderline check */ - if (allnines) { - *status|=DEC_Division_impossible; - break;} - - /* rem-rhs is needed; the sign will invert. Again, var1 */ - /* can safely be used for the working Units array. */ - exp=rhs->exponent-exponent; /* RHS padding needed */ - /* Calculate units and remainder from exponent. */ - expunits=exp/DECDPUN; - exprem=exp%DECDPUN; - /* subtract [A+B*(-m)]; the result will always be negative */ - accunits=-decUnitAddSub(accnext, accunits, - rhs->lsu, D2U(rhs->digits), - expunits, accnext, -(Int)powers[exprem]); - accdigits=decGetDigits(accnext, accunits); /* count digits exactly */ - accunits=D2U(accdigits); /* and recalculate the units for copy */ - /* [exponent is as for original remainder] */ - bits^=DECNEG; /* flip the sign */ - } - } /* REMNEAR */ - } /* REMAINDER or REMNEAR */ - } /* not DIVIDE */ - - /* Set exponent and bits */ - res->exponent=exponent; - res->bits=(uByte)(bits&DECNEG); /* [cleaned] */ - - /* Now the coefficient. */ - decSetCoeff(res, set, accnext, accdigits, &residue, status); - - decFinish(res, set, &residue, status); /* final cleanup */ - - #if DECSUBSET - /* If a divide then strip trailing zeros if subset [after round] */ - if (!set->extended && (op==DIVIDE)) decTrim(res, set, 0, 1, &dropped); - #endif - } while(0); /* end protected */ - - if (varalloc!=NULL) free(varalloc); /* drop any storage used */ - if (allocacc!=NULL) free(allocacc); /* .. */ - #if DECSUBSET - if (allocrhs!=NULL) free(allocrhs); /* .. */ - if (alloclhs!=NULL) free(alloclhs); /* .. */ - #endif - return res; - } /* decDivideOp */ - -/* ------------------------------------------------------------------ */ -/* decMultiplyOp -- multiplication operation */ -/* */ -/* This routine performs the multiplication C=A x B. */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X*X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context */ -/* status is the usual accumulator */ -/* */ -/* C must have space for set->digits digits. */ -/* */ -/* ------------------------------------------------------------------ */ -/* 'Classic' multiplication is used rather than Karatsuba, as the */ -/* latter would give only a minor improvement for the short numbers */ -/* expected to be handled most (and uses much more memory). */ -/* */ -/* There are two major paths here: the general-purpose ('old code') */ -/* path which handles all DECDPUN values, and a fastpath version */ -/* which is used if 64-bit ints are available, DECDPUN<=4, and more */ -/* than two calls to decUnitAddSub would be made. */ -/* */ -/* The fastpath version lumps units together into 8-digit or 9-digit */ -/* chunks, and also uses a lazy carry strategy to minimise expensive */ -/* 64-bit divisions. The chunks are then broken apart again into */ -/* units for continuing processing. Despite this overhead, the */ -/* fastpath can speed up some 16-digit operations by 10x (and much */ -/* more for higher-precision calculations). */ -/* */ -/* A buffer always has to be used for the accumulator; in the */ -/* fastpath, buffers are also always needed for the chunked copies of */ -/* of the operand coefficients. */ -/* Static buffers are larger than needed just for multiply, to allow */ -/* for calls from other operations (notably exp). */ -/* ------------------------------------------------------------------ */ -#define FASTMUL (DECUSE64 && DECDPUN<5) -static decNumber * decMultiplyOp(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set, - uInt *status) { - Int accunits; /* Units of accumulator in use */ - Int exponent; /* work */ - Int residue=0; /* rounding residue */ - uByte bits; /* result sign */ - Unit *acc; /* -> accumulator Unit array */ - Int needbytes; /* size calculator */ - void *allocacc=NULL; /* -> allocated accumulator, iff allocated */ - Unit accbuff[SD2U(DECBUFFER*4+1)]; /* buffer (+1 for DECBUFFER==0, */ - /* *4 for calls from other operations) */ - const Unit *mer, *mermsup; /* work */ - Int madlength; /* Units in multiplicand */ - Int shift; /* Units to shift multiplicand by */ - - #if FASTMUL - /* if DECDPUN is 1 or 3 work in base 10**9, otherwise */ - /* (DECDPUN is 2 or 4) then work in base 10**8 */ - #if DECDPUN & 1 /* odd */ - #define FASTBASE 1000000000 /* base */ - #define FASTDIGS 9 /* digits in base */ - #define FASTLAZY 18 /* carry resolution point [1->18] */ - #else - #define FASTBASE 100000000 - #define FASTDIGS 8 - #define FASTLAZY 1844 /* carry resolution point [1->1844] */ - #endif - /* three buffers are used, two for chunked copies of the operands */ - /* (base 10**8 or base 10**9) and one base 2**64 accumulator with */ - /* lazy carry evaluation */ - uInt zlhibuff[(DECBUFFER*2+1)/8+1]; /* buffer (+1 for DECBUFFER==0) */ - uInt *zlhi=zlhibuff; /* -> lhs array */ - uInt *alloclhi=NULL; /* -> allocated buffer, iff allocated */ - uInt zrhibuff[(DECBUFFER*2+1)/8+1]; /* buffer (+1 for DECBUFFER==0) */ - uInt *zrhi=zrhibuff; /* -> rhs array */ - uInt *allocrhi=NULL; /* -> allocated buffer, iff allocated */ - uLong zaccbuff[(DECBUFFER*2+1)/4+2]; /* buffer (+1 for DECBUFFER==0) */ - /* [allocacc is shared for both paths, as only one will run] */ - uLong *zacc=zaccbuff; /* -> accumulator array for exact result */ - #if DECDPUN==1 - Int zoff; /* accumulator offset */ - #endif - uInt *lip, *rip; /* item pointers */ - uInt *lmsi, *rmsi; /* most significant items */ - Int ilhs, irhs, iacc; /* item counts in the arrays */ - Int lazy; /* lazy carry counter */ - uLong lcarry; /* uLong carry */ - uInt carry; /* carry (NB not uLong) */ - Int count; /* work */ - const Unit *cup; /* .. */ - Unit *up; /* .. */ - uLong *lp; /* .. */ - Int p; /* .. */ - #endif - - #if DECSUBSET - decNumber *alloclhs=NULL; /* -> allocated buffer, iff allocated */ - decNumber *allocrhs=NULL; /* -> allocated buffer, iff allocated */ - #endif - - #if DECCHECK - if (decCheckOperands(res, lhs, rhs, set)) return res; - #endif - - /* precalculate result sign */ - bits=(uByte)((lhs->bits^rhs->bits)&DECNEG); - - /* handle infinities and NaNs */ - if (SPECIALARGS) { /* a special bit set */ - if (SPECIALARGS & (DECSNAN | DECNAN)) { /* one or two NaNs */ - decNaNs(res, lhs, rhs, set, status); - return res;} - /* one or two infinities; Infinity * 0 is invalid */ - if (((lhs->bits & DECINF)==0 && ISZERO(lhs)) - ||((rhs->bits & DECINF)==0 && ISZERO(rhs))) { - *status|=DEC_Invalid_operation; - return res;} - uprv_decNumberZero(res); - res->bits=bits|DECINF; /* infinity */ - return res;} - - /* For best speed, as in DMSRCN [the original Rexx numerics */ - /* module], use the shorter number as the multiplier (rhs) and */ - /* the longer as the multiplicand (lhs) to minimise the number of */ - /* adds (partial products) */ - if (lhs->digitsdigits) { /* swap... */ - const decNumber *hold=lhs; - lhs=rhs; - rhs=hold; - } - - do { /* protect allocated storage */ - #if DECSUBSET - if (!set->extended) { - /* reduce operands and set lostDigits status, as needed */ - if (lhs->digits>set->digits) { - alloclhs=decRoundOperand(lhs, set, status); - if (alloclhs==NULL) break; - lhs=alloclhs; - } - if (rhs->digits>set->digits) { - allocrhs=decRoundOperand(rhs, set, status); - if (allocrhs==NULL) break; - rhs=allocrhs; - } - } - #endif - /* [following code does not require input rounding] */ - - #if FASTMUL /* fastpath can be used */ - /* use the fast path if there are enough digits in the shorter */ - /* operand to make the setup and takedown worthwhile */ - #define NEEDTWO (DECDPUN*2) /* within two decUnitAddSub calls */ - if (rhs->digits>NEEDTWO) { /* use fastpath... */ - /* calculate the number of elements in each array */ - ilhs=(lhs->digits+FASTDIGS-1)/FASTDIGS; /* [ceiling] */ - irhs=(rhs->digits+FASTDIGS-1)/FASTDIGS; /* .. */ - iacc=ilhs+irhs; - - /* allocate buffers if required, as usual */ - needbytes=ilhs*sizeof(uInt); - if (needbytes>(Int)sizeof(zlhibuff)) { - alloclhi=(uInt *)malloc(needbytes); - zlhi=alloclhi;} - needbytes=irhs*sizeof(uInt); - if (needbytes>(Int)sizeof(zrhibuff)) { - allocrhi=(uInt *)malloc(needbytes); - zrhi=allocrhi;} - - /* Allocating the accumulator space needs a special case when */ - /* DECDPUN=1 because when converting the accumulator to Units */ - /* after the multiplication each 8-byte item becomes 9 1-byte */ - /* units. Therefore iacc extra bytes are needed at the front */ - /* (rounded up to a multiple of 8 bytes), and the uLong */ - /* accumulator starts offset the appropriate number of units */ - /* to the right to avoid overwrite during the unchunking. */ - - /* Make sure no signed int overflow below. This is always true */ - /* if the given numbers have less digits than DEC_MAX_DIGITS. */ - U_ASSERT(iacc <= INT32_MAX/sizeof(uLong)); - needbytes=iacc*sizeof(uLong); - #if DECDPUN==1 - zoff=(iacc+7)/8; /* items to offset by */ - needbytes+=zoff*8; - #endif - if (needbytes>(Int)sizeof(zaccbuff)) { - allocacc=(uLong *)malloc(needbytes); - zacc=(uLong *)allocacc;} - if (zlhi==NULL||zrhi==NULL||zacc==NULL) { - *status|=DEC_Insufficient_storage; - break;} - - acc=(Unit *)zacc; /* -> target Unit array */ - #if DECDPUN==1 - zacc+=zoff; /* start uLong accumulator to right */ - #endif - - /* assemble the chunked copies of the left and right sides */ - for (count=lhs->digits, cup=lhs->lsu, lip=zlhi; count>0; lip++) - for (p=0, *lip=0; p0; - p+=DECDPUN, cup++, count-=DECDPUN) - *lip+=*cup*powers[p]; - lmsi=lip-1; /* save -> msi */ - for (count=rhs->digits, cup=rhs->lsu, rip=zrhi; count>0; rip++) - for (p=0, *rip=0; p0; - p+=DECDPUN, cup++, count-=DECDPUN) - *rip+=*cup*powers[p]; - rmsi=rip-1; /* save -> msi */ - - /* zero the accumulator */ - for (lp=zacc; lp0 && rip!=rmsi) continue; - lazy=FASTLAZY; /* reset delay count */ - /* spin up the accumulator resolving overflows */ - for (lp=zacc; lp assume buffer for accumulator */ - needbytes=(D2U(lhs->digits)+D2U(rhs->digits))*sizeof(Unit); - if (needbytes>(Int)sizeof(accbuff)) { - allocacc=(Unit *)malloc(needbytes); - if (allocacc==NULL) {*status|=DEC_Insufficient_storage; break;} - acc=(Unit *)allocacc; /* use the allocated space */ - } - - /* Now the main long multiplication loop */ - /* Unlike the equivalent in the IBM Java implementation, there */ - /* is no advantage in calculating from msu to lsu. So, do it */ - /* by the book, as it were. */ - /* Each iteration calculates ACC=ACC+MULTAND*MULT */ - accunits=1; /* accumulator starts at '0' */ - *acc=0; /* .. (lsu=0) */ - shift=0; /* no multiplicand shift at first */ - madlength=D2U(lhs->digits); /* this won't change */ - mermsup=rhs->lsu+D2U(rhs->digits); /* -> msu+1 of multiplier */ - - for (mer=rhs->lsu; merlsu, madlength, 0, - &acc[shift], *mer) - + shift; - else { /* extend acc with a 0; it will be used shortly */ - *(acc+accunits)=0; /* [this avoids length of <=0 later] */ - accunits++; - } - /* multiply multiplicand by 10**DECDPUN for next Unit to left */ - shift++; /* add this for 'logical length' */ - } /* n */ - #if FASTMUL - } /* unchunked units */ - #endif - /* common end-path */ - #if DECTRACE - decDumpAr('*', acc, accunits); /* Show exact result */ - #endif - - /* acc now contains the exact result of the multiplication, */ - /* possibly with a leading zero unit; build the decNumber from */ - /* it, noting if any residue */ - res->bits=bits; /* set sign */ - res->digits=decGetDigits(acc, accunits); /* count digits exactly */ - - /* There can be a 31-bit wrap in calculating the exponent. */ - /* This can only happen if both input exponents are negative and */ - /* both their magnitudes are large. If there was a wrap, set a */ - /* safe very negative exponent, from which decFinalize() will */ - /* raise a hard underflow shortly. */ - exponent=lhs->exponent+rhs->exponent; /* calculate exponent */ - if (lhs->exponent<0 && rhs->exponent<0 && exponent>0) - exponent=-2*DECNUMMAXE; /* force underflow */ - res->exponent=exponent; /* OK to overwrite now */ - - - /* Set the coefficient. If any rounding, residue records */ - decSetCoeff(res, set, acc, res->digits, &residue, status); - decFinish(res, set, &residue, status); /* final cleanup */ - } while(0); /* end protected */ - - if (allocacc!=NULL) free(allocacc); /* drop any storage used */ - #if DECSUBSET - if (allocrhs!=NULL) free(allocrhs); /* .. */ - if (alloclhs!=NULL) free(alloclhs); /* .. */ - #endif - #if FASTMUL - if (allocrhi!=NULL) free(allocrhi); /* .. */ - if (alloclhi!=NULL) free(alloclhi); /* .. */ - #endif - return res; - } /* decMultiplyOp */ - -/* ------------------------------------------------------------------ */ -/* decExpOp -- effect exponentiation */ -/* */ -/* This computes C = exp(A) */ -/* */ -/* res is C, the result. C may be A */ -/* rhs is A */ -/* set is the context; note that rounding mode has no effect */ -/* */ -/* C must have space for set->digits digits. status is updated but */ -/* not set. */ -/* */ -/* Restrictions: */ -/* */ -/* digits, emax, and -emin in the context must be less than */ -/* 2*DEC_MAX_MATH (1999998), and the rhs must be within these */ -/* bounds or a zero. This is an internal routine, so these */ -/* restrictions are contractual and not enforced. */ -/* */ -/* A finite result is rounded using DEC_ROUND_HALF_EVEN; it will */ -/* almost always be correctly rounded, but may be up to 1 ulp in */ -/* error in rare cases. */ -/* */ -/* Finite results will always be full precision and Inexact, except */ -/* when A is a zero or -Infinity (giving 1 or 0 respectively). */ -/* ------------------------------------------------------------------ */ -/* This approach used here is similar to the algorithm described in */ -/* */ -/* Variable Precision Exponential Function, T. E. Hull and */ -/* A. Abrham, ACM Transactions on Mathematical Software, Vol 12 #2, */ -/* pp79-91, ACM, June 1986. */ -/* */ -/* with the main difference being that the iterations in the series */ -/* evaluation are terminated dynamically (which does not require the */ -/* extra variable-precision variables which are expensive in this */ -/* context). */ -/* */ -/* The error analysis in Hull & Abrham's paper applies except for the */ -/* round-off error accumulation during the series evaluation. This */ -/* code does not precalculate the number of iterations and so cannot */ -/* use Horner's scheme. Instead, the accumulation is done at double- */ -/* precision, which ensures that the additions of the terms are exact */ -/* and do not accumulate round-off (and any round-off errors in the */ -/* terms themselves move 'to the right' faster than they can */ -/* accumulate). This code also extends the calculation by allowing, */ -/* in the spirit of other decNumber operators, the input to be more */ -/* precise than the result (the precision used is based on the more */ -/* precise of the input or requested result). */ -/* */ -/* Implementation notes: */ -/* */ -/* 1. This is separated out as decExpOp so it can be called from */ -/* other Mathematical functions (notably Ln) with a wider range */ -/* than normal. In particular, it can handle the slightly wider */ -/* (double) range needed by Ln (which has to be able to calculate */ -/* exp(-x) where x can be the tiniest number (Ntiny). */ -/* */ -/* 2. Normalizing x to be <=0.1 (instead of <=1) reduces loop */ -/* iterations by appoximately a third with additional (although */ -/* diminishing) returns as the range is reduced to even smaller */ -/* fractions. However, h (the power of 10 used to correct the */ -/* result at the end, see below) must be kept <=8 as otherwise */ -/* the final result cannot be computed. Hence the leverage is a */ -/* sliding value (8-h), where potentially the range is reduced */ -/* more for smaller values. */ -/* */ -/* The leverage that can be applied in this way is severely */ -/* limited by the cost of the raise-to-the power at the end, */ -/* which dominates when the number of iterations is small (less */ -/* than ten) or when rhs is short. As an example, the adjustment */ -/* x**10,000,000 needs 31 multiplications, all but one full-width. */ -/* */ -/* 3. The restrictions (especially precision) could be raised with */ -/* care, but the full decNumber range seems very hard within the */ -/* 32-bit limits. */ -/* */ -/* 4. The working precisions for the static buffers are twice the */ -/* obvious size to allow for calls from decNumberPower. */ -/* ------------------------------------------------------------------ */ -decNumber * decExpOp(decNumber *res, const decNumber *rhs, - decContext *set, uInt *status) { - uInt ignore=0; /* working status */ - Int h; /* adjusted exponent for 0.xxxx */ - Int p; /* working precision */ - Int residue; /* rounding residue */ - uInt needbytes; /* for space calculations */ - const decNumber *x=rhs; /* (may point to safe copy later) */ - decContext aset, tset, dset; /* working contexts */ - Int comp; /* work */ - - /* the argument is often copied to normalize it, so (unusually) it */ - /* is treated like other buffers, using DECBUFFER, +1 in case */ - /* DECBUFFER is 0 */ - decNumber bufr[D2N(DECBUFFER*2+1)]; - decNumber *allocrhs=NULL; /* non-NULL if rhs buffer allocated */ - - /* the working precision will be no more than set->digits+8+1 */ - /* so for on-stack buffers DECBUFFER+9 is used, +1 in case DECBUFFER */ - /* is 0 (and twice that for the accumulator) */ - - /* buffer for t, term (working precision plus) */ - decNumber buft[D2N(DECBUFFER*2+9+1)]; - decNumber *allocbuft=NULL; /* -> allocated buft, iff allocated */ - decNumber *t=buft; /* term */ - /* buffer for a, accumulator (working precision * 2), at least 9 */ - decNumber bufa[D2N(DECBUFFER*4+18+1)]; - decNumber *allocbufa=NULL; /* -> allocated bufa, iff allocated */ - decNumber *a=bufa; /* accumulator */ - /* decNumber for the divisor term; this needs at most 9 digits */ - /* and so can be fixed size [16 so can use standard context] */ - decNumber bufd[D2N(16)]; - decNumber *d=bufd; /* divisor */ - decNumber numone; /* constant 1 */ - - #if DECCHECK - Int iterations=0; /* for later sanity check */ - if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; - #endif - - do { /* protect allocated storage */ - if (SPECIALARG) { /* handle infinities and NaNs */ - if (decNumberIsInfinite(rhs)) { /* an infinity */ - if (decNumberIsNegative(rhs)) /* -Infinity -> +0 */ - uprv_decNumberZero(res); - else uprv_decNumberCopy(res, rhs); /* +Infinity -> self */ - } - else decNaNs(res, rhs, NULL, set, status); /* a NaN */ - break;} - - if (ISZERO(rhs)) { /* zeros -> exact 1 */ - uprv_decNumberZero(res); /* make clean 1 */ - *res->lsu=1; /* .. */ - break;} /* [no status to set] */ - - /* e**x when 0 < x < 0.66 is < 1+3x/2, hence can fast-path */ - /* positive and negative tiny cases which will result in inexact */ - /* 1. This also allows the later add-accumulate to always be */ - /* exact (because its length will never be more than twice the */ - /* working precision). */ - /* The comparator (tiny) needs just one digit, so use the */ - /* decNumber d for it (reused as the divisor, etc., below); its */ - /* exponent is such that if x is positive it will have */ - /* set->digits-1 zeros between the decimal point and the digit, */ - /* which is 4, and if x is negative one more zero there as the */ - /* more precise result will be of the form 0.9999999 rather than */ - /* 1.0000001. Hence, tiny will be 0.0000004 if digits=7 and x>0 */ - /* or 0.00000004 if digits=7 and x<0. If RHS not larger than */ - /* this then the result will be 1.000000 */ - uprv_decNumberZero(d); /* clean */ - *d->lsu=4; /* set 4 .. */ - d->exponent=-set->digits; /* * 10**(-d) */ - if (decNumberIsNegative(rhs)) d->exponent--; /* negative case */ - comp=decCompare(d, rhs, 1); /* signless compare */ - if (comp==BADINT) { - *status|=DEC_Insufficient_storage; - break;} - if (comp>=0) { /* rhs < d */ - Int shift=set->digits-1; - uprv_decNumberZero(res); /* set 1 */ - *res->lsu=1; /* .. */ - res->digits=decShiftToMost(res->lsu, 1, shift); - res->exponent=-shift; /* make 1.0000... */ - *status|=DEC_Inexact | DEC_Rounded; /* .. inexactly */ - break;} /* tiny */ - - /* set up the context to be used for calculating a, as this is */ - /* used on both paths below */ - uprv_decContextDefault(&aset, DEC_INIT_DECIMAL64); - /* accumulator bounds are as requested (could underflow) */ - aset.emax=set->emax; /* usual bounds */ - aset.emin=set->emin; /* .. */ - aset.clamp=0; /* and no concrete format */ - - /* calculate the adjusted (Hull & Abrham) exponent (where the */ - /* decimal point is just to the left of the coefficient msd) */ - h=rhs->exponent+rhs->digits; - /* if h>8 then 10**h cannot be calculated safely; however, when */ - /* h=8 then exp(|rhs|) will be at least exp(1E+7) which is at */ - /* least 6.59E+4342944, so (due to the restriction on Emax/Emin) */ - /* overflow (or underflow to 0) is guaranteed -- so this case can */ - /* be handled by simply forcing the appropriate excess */ - if (h>8) { /* overflow/underflow */ - /* set up here so Power call below will over or underflow to */ - /* zero; set accumulator to either 2 or 0.02 */ - /* [stack buffer for a is always big enough for this] */ - uprv_decNumberZero(a); - *a->lsu=2; /* not 1 but < exp(1) */ - if (decNumberIsNegative(rhs)) a->exponent=-2; /* make 0.02 */ - h=8; /* clamp so 10**h computable */ - p=9; /* set a working precision */ - } - else { /* h<=8 */ - Int maxlever=(rhs->digits>8?1:0); - /* [could/should increase this for precisions >40 or so, too] */ - - /* if h is 8, cannot normalize to a lower upper limit because */ - /* the final result will not be computable (see notes above), */ - /* but leverage can be applied whenever h is less than 8. */ - /* Apply as much as possible, up to a MAXLEVER digits, which */ - /* sets the tradeoff against the cost of the later a**(10**h). */ - /* As h is increased, the working precision below also */ - /* increases to compensate for the "constant digits at the */ - /* front" effect. */ - Int lever=MINI(8-h, maxlever); /* leverage attainable */ - Int use=-rhs->digits-lever; /* exponent to use for RHS */ - h+=lever; /* apply leverage selected */ - if (h<0) { /* clamp */ - use+=h; /* [may end up subnormal] */ - h=0; - } - /* Take a copy of RHS if it needs normalization (true whenever x>=1) */ - if (rhs->exponent!=use) { - decNumber *newrhs=bufr; /* assume will fit on stack */ - needbytes=sizeof(decNumber)+(D2U(rhs->digits)-1)*sizeof(Unit); - if (needbytes>sizeof(bufr)) { /* need malloc space */ - allocrhs=(decNumber *)malloc(needbytes); - if (allocrhs==NULL) { /* hopeless -- abandon */ - *status|=DEC_Insufficient_storage; - break;} - newrhs=allocrhs; /* use the allocated space */ - } - uprv_decNumberCopy(newrhs, rhs); /* copy to safe space */ - newrhs->exponent=use; /* normalize; now <1 */ - x=newrhs; /* ready for use */ - /* decNumberShow(x); */ - } - - /* Now use the usual power series to evaluate exp(x). The */ - /* series starts as 1 + x + x^2/2 ... so prime ready for the */ - /* third term by setting the term variable t=x, the accumulator */ - /* a=1, and the divisor d=2. */ - - /* First determine the working precision. From Hull & Abrham */ - /* this is set->digits+h+2. However, if x is 'over-precise' we */ - /* need to allow for all its digits to potentially participate */ - /* (consider an x where all the excess digits are 9s) so in */ - /* this case use x->digits+h+2 */ - p=MAXI(x->digits, set->digits)+h+2; /* [h<=8] */ - - /* a and t are variable precision, and depend on p, so space */ - /* must be allocated for them if necessary */ - - /* the accumulator needs to be able to hold 2p digits so that */ - /* the additions on the second and subsequent iterations are */ - /* sufficiently exact. */ - needbytes=sizeof(decNumber)+(D2U(p*2)-1)*sizeof(Unit); - if (needbytes>sizeof(bufa)) { /* need malloc space */ - allocbufa=(decNumber *)malloc(needbytes); - if (allocbufa==NULL) { /* hopeless -- abandon */ - *status|=DEC_Insufficient_storage; - break;} - a=allocbufa; /* use the allocated space */ - } - /* the term needs to be able to hold p digits (which is */ - /* guaranteed to be larger than x->digits, so the initial copy */ - /* is safe); it may also be used for the raise-to-power */ - /* calculation below, which needs an extra two digits */ - needbytes=sizeof(decNumber)+(D2U(p+2)-1)*sizeof(Unit); - if (needbytes>sizeof(buft)) { /* need malloc space */ - allocbuft=(decNumber *)malloc(needbytes); - if (allocbuft==NULL) { /* hopeless -- abandon */ - *status|=DEC_Insufficient_storage; - break;} - t=allocbuft; /* use the allocated space */ - } - - uprv_decNumberCopy(t, x); /* term=x */ - uprv_decNumberZero(a); *a->lsu=1; /* accumulator=1 */ - uprv_decNumberZero(d); *d->lsu=2; /* divisor=2 */ - uprv_decNumberZero(&numone); *numone.lsu=1; /* constant 1 for increment */ - - /* set up the contexts for calculating a, t, and d */ - uprv_decContextDefault(&tset, DEC_INIT_DECIMAL64); - dset=tset; - /* accumulator bounds are set above, set precision now */ - aset.digits=p*2; /* double */ - /* term bounds avoid any underflow or overflow */ - tset.digits=p; - tset.emin=DEC_MIN_EMIN; /* [emax is plenty] */ - /* [dset.digits=16, etc., are sufficient] */ - - /* finally ready to roll */ - for (;;) { - #if DECCHECK - iterations++; - #endif - /* only the status from the accumulation is interesting */ - /* [but it should remain unchanged after first add] */ - decAddOp(a, a, t, &aset, 0, status); /* a=a+t */ - decMultiplyOp(t, t, x, &tset, &ignore); /* t=t*x */ - decDivideOp(t, t, d, &tset, DIVIDE, &ignore); /* t=t/d */ - /* the iteration ends when the term cannot affect the result, */ - /* if rounded to p digits, which is when its value is smaller */ - /* than the accumulator by p+1 digits. There must also be */ - /* full precision in a. */ - if (((a->digits+a->exponent)>=(t->digits+t->exponent+p+1)) - && (a->digits>=p)) break; - decAddOp(d, d, &numone, &dset, 0, &ignore); /* d=d+1 */ - } /* iterate */ - - #if DECCHECK - /* just a sanity check; comment out test to show always */ - if (iterations>p+3) - printf("Exp iterations=%ld, status=%08lx, p=%ld, d=%ld\n", - (LI)iterations, (LI)*status, (LI)p, (LI)x->digits); - #endif - } /* h<=8 */ - - /* apply postconditioning: a=a**(10**h) -- this is calculated */ - /* at a slightly higher precision than Hull & Abrham suggest */ - if (h>0) { - Int seenbit=0; /* set once a 1-bit is seen */ - Int i; /* counter */ - Int n=powers[h]; /* always positive */ - aset.digits=p+2; /* sufficient precision */ - /* avoid the overhead and many extra digits of decNumberPower */ - /* as all that is needed is the short 'multipliers' loop; here */ - /* accumulate the answer into t */ - uprv_decNumberZero(t); *t->lsu=1; /* acc=1 */ - for (i=1;;i++){ /* for each bit [top bit ignored] */ - /* abandon if have had overflow or terminal underflow */ - if (*status & (DEC_Overflow|DEC_Underflow)) { /* interesting? */ - if (*status&DEC_Overflow || ISZERO(t)) break;} - n=n<<1; /* move next bit to testable position */ - if (n<0) { /* top bit is set */ - seenbit=1; /* OK, have a significant bit */ - decMultiplyOp(t, t, a, &aset, status); /* acc=acc*x */ - } - if (i==31) break; /* that was the last bit */ - if (!seenbit) continue; /* no need to square 1 */ - decMultiplyOp(t, t, t, &aset, status); /* acc=acc*acc [square] */ - } /*i*/ /* 32 bits */ - /* decNumberShow(t); */ - a=t; /* and carry on using t instead of a */ - } - - /* Copy and round the result to res */ - residue=1; /* indicate dirt to right .. */ - if (ISZERO(a)) residue=0; /* .. unless underflowed to 0 */ - aset.digits=set->digits; /* [use default rounding] */ - decCopyFit(res, a, &aset, &residue, status); /* copy & shorten */ - decFinish(res, set, &residue, status); /* cleanup/set flags */ - } while(0); /* end protected */ - - if (allocrhs !=NULL) free(allocrhs); /* drop any storage used */ - if (allocbufa!=NULL) free(allocbufa); /* .. */ - if (allocbuft!=NULL) free(allocbuft); /* .. */ - /* [status is handled by caller] */ - return res; - } /* decExpOp */ - -/* ------------------------------------------------------------------ */ -/* Initial-estimate natural logarithm table */ -/* */ -/* LNnn -- 90-entry 16-bit table for values from .10 through .99. */ -/* The result is a 4-digit encode of the coefficient (c=the */ -/* top 14 bits encoding 0-9999) and a 2-digit encode of the */ -/* exponent (e=the bottom 2 bits encoding 0-3) */ -/* */ -/* The resulting value is given by: */ -/* */ -/* v = -c * 10**(-e-3) */ -/* */ -/* where e and c are extracted from entry k = LNnn[x-10] */ -/* where x is truncated (NB) into the range 10 through 99, */ -/* and then c = k>>2 and e = k&3. */ -/* ------------------------------------------------------------------ */ -static const uShort LNnn[90]={9016, 8652, 8316, 8008, 7724, 7456, 7208, - 6972, 6748, 6540, 6340, 6148, 5968, 5792, 5628, 5464, 5312, - 5164, 5020, 4884, 4748, 4620, 4496, 4376, 4256, 4144, 4032, - 39233, 38181, 37157, 36157, 35181, 34229, 33297, 32389, 31501, 30629, - 29777, 28945, 28129, 27329, 26545, 25777, 25021, 24281, 23553, 22837, - 22137, 21445, 20769, 20101, 19445, 18801, 18165, 17541, 16925, 16321, - 15721, 15133, 14553, 13985, 13421, 12865, 12317, 11777, 11241, 10717, - 10197, 9685, 9177, 8677, 8185, 7697, 7213, 6737, 6269, 5801, - 5341, 4889, 4437, 39930, 35534, 31186, 26886, 22630, 18418, 14254, - 10130, 6046, 20055}; - -/* ------------------------------------------------------------------ */ -/* decLnOp -- effect natural logarithm */ -/* */ -/* This computes C = ln(A) */ -/* */ -/* res is C, the result. C may be A */ -/* rhs is A */ -/* set is the context; note that rounding mode has no effect */ -/* */ -/* C must have space for set->digits digits. */ -/* */ -/* Notable cases: */ -/* A<0 -> Invalid */ -/* A=0 -> -Infinity (Exact) */ -/* A=+Infinity -> +Infinity (Exact) */ -/* A=1 exactly -> 0 (Exact) */ -/* */ -/* Restrictions (as for Exp): */ -/* */ -/* digits, emax, and -emin in the context must be less than */ -/* DEC_MAX_MATH+11 (1000010), and the rhs must be within these */ -/* bounds or a zero. This is an internal routine, so these */ -/* restrictions are contractual and not enforced. */ -/* */ -/* A finite result is rounded using DEC_ROUND_HALF_EVEN; it will */ -/* almost always be correctly rounded, but may be up to 1 ulp in */ -/* error in rare cases. */ -/* ------------------------------------------------------------------ */ -/* The result is calculated using Newton's method, with each */ -/* iteration calculating a' = a + x * exp(-a) - 1. See, for example, */ -/* Epperson 1989. */ -/* */ -/* The iteration ends when the adjustment x*exp(-a)-1 is tiny enough. */ -/* This has to be calculated at the sum of the precision of x and the */ -/* working precision. */ -/* */ -/* Implementation notes: */ -/* */ -/* 1. This is separated out as decLnOp so it can be called from */ -/* other Mathematical functions (e.g., Log 10) with a wider range */ -/* than normal. In particular, it can handle the slightly wider */ -/* (+9+2) range needed by a power function. */ -/* */ -/* 2. The speed of this function is about 10x slower than exp, as */ -/* it typically needs 4-6 iterations for short numbers, and the */ -/* extra precision needed adds a squaring effect, twice. */ -/* */ -/* 3. Fastpaths are included for ln(10) and ln(2), up to length 40, */ -/* as these are common requests. ln(10) is used by log10(x). */ -/* */ -/* 4. An iteration might be saved by widening the LNnn table, and */ -/* would certainly save at least one if it were made ten times */ -/* bigger, too (for truncated fractions 0.100 through 0.999). */ -/* However, for most practical evaluations, at least four or five */ -/* iterations will be neede -- so this would only speed up by */ -/* 20-25% and that probably does not justify increasing the table */ -/* size. */ -/* */ -/* 5. The static buffers are larger than might be expected to allow */ -/* for calls from decNumberPower. */ -/* ------------------------------------------------------------------ */ -#if defined(__clang__) || U_GCC_MAJOR_MINOR >= 406 -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Warray-bounds" -#endif -decNumber * decLnOp(decNumber *res, const decNumber *rhs, - decContext *set, uInt *status) { - uInt ignore=0; /* working status accumulator */ - uInt needbytes; /* for space calculations */ - Int residue; /* rounding residue */ - Int r; /* rhs=f*10**r [see below] */ - Int p; /* working precision */ - Int pp; /* precision for iteration */ - Int t; /* work */ - - /* buffers for a (accumulator, typically precision+2) and b */ - /* (adjustment calculator, same size) */ - decNumber bufa[D2N(DECBUFFER+12)]; - decNumber *allocbufa=NULL; /* -> allocated bufa, iff allocated */ - decNumber *a=bufa; /* accumulator/work */ - decNumber bufb[D2N(DECBUFFER*2+2)]; - decNumber *allocbufb=NULL; /* -> allocated bufa, iff allocated */ - decNumber *b=bufb; /* adjustment/work */ - - decNumber numone; /* constant 1 */ - decNumber cmp; /* work */ - decContext aset, bset; /* working contexts */ - - #if DECCHECK - Int iterations=0; /* for later sanity check */ - if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; - #endif - - do { /* protect allocated storage */ - if (SPECIALARG) { /* handle infinities and NaNs */ - if (decNumberIsInfinite(rhs)) { /* an infinity */ - if (decNumberIsNegative(rhs)) /* -Infinity -> error */ - *status|=DEC_Invalid_operation; - else uprv_decNumberCopy(res, rhs); /* +Infinity -> self */ - } - else decNaNs(res, rhs, NULL, set, status); /* a NaN */ - break;} - - if (ISZERO(rhs)) { /* +/- zeros -> -Infinity */ - uprv_decNumberZero(res); /* make clean */ - res->bits=DECINF|DECNEG; /* set - infinity */ - break;} /* [no status to set] */ - - /* Non-zero negatives are bad... */ - if (decNumberIsNegative(rhs)) { /* -x -> error */ - *status|=DEC_Invalid_operation; - break;} - - /* Here, rhs is positive, finite, and in range */ - - /* lookaside fastpath code for ln(2) and ln(10) at common lengths */ - if (rhs->exponent==0 && set->digits<=40) { - #if DECDPUN==1 - if (rhs->lsu[0]==0 && rhs->lsu[1]==1 && rhs->digits==2) { /* ln(10) */ - #else - if (rhs->lsu[0]==10 && rhs->digits==2) { /* ln(10) */ - #endif - aset=*set; aset.round=DEC_ROUND_HALF_EVEN; - #define LN10 "2.302585092994045684017991454684364207601" - uprv_decNumberFromString(res, LN10, &aset); - *status|=(DEC_Inexact | DEC_Rounded); /* is inexact */ - break;} - if (rhs->lsu[0]==2 && rhs->digits==1) { /* ln(2) */ - aset=*set; aset.round=DEC_ROUND_HALF_EVEN; - #define LN2 "0.6931471805599453094172321214581765680755" - uprv_decNumberFromString(res, LN2, &aset); - *status|=(DEC_Inexact | DEC_Rounded); - break;} - } /* integer and short */ - - /* Determine the working precision. This is normally the */ - /* requested precision + 2, with a minimum of 9. However, if */ - /* the rhs is 'over-precise' then allow for all its digits to */ - /* potentially participate (consider an rhs where all the excess */ - /* digits are 9s) so in this case use rhs->digits+2. */ - p=MAXI(rhs->digits, MAXI(set->digits, 7))+2; - - /* Allocate space for the accumulator and the high-precision */ - /* adjustment calculator, if necessary. The accumulator must */ - /* be able to hold p digits, and the adjustment up to */ - /* rhs->digits+p digits. They are also made big enough for 16 */ - /* digits so that they can be used for calculating the initial */ - /* estimate. */ - needbytes=sizeof(decNumber)+(D2U(MAXI(p,16))-1)*sizeof(Unit); - if (needbytes>sizeof(bufa)) { /* need malloc space */ - allocbufa=(decNumber *)malloc(needbytes); - if (allocbufa==NULL) { /* hopeless -- abandon */ - *status|=DEC_Insufficient_storage; - break;} - a=allocbufa; /* use the allocated space */ - } - pp=p+rhs->digits; - needbytes=sizeof(decNumber)+(D2U(MAXI(pp,16))-1)*sizeof(Unit); - if (needbytes>sizeof(bufb)) { /* need malloc space */ - allocbufb=(decNumber *)malloc(needbytes); - if (allocbufb==NULL) { /* hopeless -- abandon */ - *status|=DEC_Insufficient_storage; - break;} - b=allocbufb; /* use the allocated space */ - } - - /* Prepare an initial estimate in acc. Calculate this by */ - /* considering the coefficient of x to be a normalized fraction, */ - /* f, with the decimal point at far left and multiplied by */ - /* 10**r. Then, rhs=f*10**r and 0.1<=f<1, and */ - /* ln(x) = ln(f) + ln(10)*r */ - /* Get the initial estimate for ln(f) from a small lookup */ - /* table (see above) indexed by the first two digits of f, */ - /* truncated. */ - - uprv_decContextDefault(&aset, DEC_INIT_DECIMAL64); /* 16-digit extended */ - r=rhs->exponent+rhs->digits; /* 'normalised' exponent */ - uprv_decNumberFromInt32(a, r); /* a=r */ - uprv_decNumberFromInt32(b, 2302585); /* b=ln(10) (2.302585) */ - b->exponent=-6; /* .. */ - decMultiplyOp(a, a, b, &aset, &ignore); /* a=a*b */ - /* now get top two digits of rhs into b by simple truncate and */ - /* force to integer */ - residue=0; /* (no residue) */ - aset.digits=2; aset.round=DEC_ROUND_DOWN; - decCopyFit(b, rhs, &aset, &residue, &ignore); /* copy & shorten */ - b->exponent=0; /* make integer */ - t=decGetInt(b); /* [cannot fail] */ - if (t<10) t=X10(t); /* adjust single-digit b */ - t=LNnn[t-10]; /* look up ln(b) */ - uprv_decNumberFromInt32(b, t>>2); /* b=ln(b) coefficient */ - b->exponent=-(t&3)-3; /* set exponent */ - b->bits=DECNEG; /* ln(0.10)->ln(0.99) always -ve */ - aset.digits=16; aset.round=DEC_ROUND_HALF_EVEN; /* restore */ - decAddOp(a, a, b, &aset, 0, &ignore); /* acc=a+b */ - /* the initial estimate is now in a, with up to 4 digits correct. */ - /* When rhs is at or near Nmax the estimate will be low, so we */ - /* will approach it from below, avoiding overflow when calling exp. */ - - uprv_decNumberZero(&numone); *numone.lsu=1; /* constant 1 for adjustment */ - - /* accumulator bounds are as requested (could underflow, but */ - /* cannot overflow) */ - aset.emax=set->emax; - aset.emin=set->emin; - aset.clamp=0; /* no concrete format */ - /* set up a context to be used for the multiply and subtract */ - bset=aset; - bset.emax=DEC_MAX_MATH*2; /* use double bounds for the */ - bset.emin=-DEC_MAX_MATH*2; /* adjustment calculation */ - /* [see decExpOp call below] */ - /* for each iteration double the number of digits to calculate, */ - /* up to a maximum of p */ - pp=9; /* initial precision */ - /* [initially 9 as then the sequence starts 7+2, 16+2, and */ - /* 34+2, which is ideal for standard-sized numbers] */ - aset.digits=pp; /* working context */ - bset.digits=pp+rhs->digits; /* wider context */ - for (;;) { /* iterate */ - #if DECCHECK - iterations++; - if (iterations>24) break; /* consider 9 * 2**24 */ - #endif - /* calculate the adjustment (exp(-a)*x-1) into b. This is a */ - /* catastrophic subtraction but it really is the difference */ - /* from 1 that is of interest. */ - /* Use the internal entry point to Exp as it allows the double */ - /* range for calculating exp(-a) when a is the tiniest subnormal. */ - a->bits^=DECNEG; /* make -a */ - decExpOp(b, a, &bset, &ignore); /* b=exp(-a) */ - a->bits^=DECNEG; /* restore sign of a */ - /* now multiply by rhs and subtract 1, at the wider precision */ - decMultiplyOp(b, b, rhs, &bset, &ignore); /* b=b*rhs */ - decAddOp(b, b, &numone, &bset, DECNEG, &ignore); /* b=b-1 */ - - /* the iteration ends when the adjustment cannot affect the */ - /* result by >=0.5 ulp (at the requested digits), which */ - /* is when its value is smaller than the accumulator by */ - /* set->digits+1 digits (or it is zero) -- this is a looser */ - /* requirement than for Exp because all that happens to the */ - /* accumulator after this is the final rounding (but note that */ - /* there must also be full precision in a, or a=0). */ - - if (decNumberIsZero(b) || - (a->digits+a->exponent)>=(b->digits+b->exponent+set->digits+1)) { - if (a->digits==p) break; - if (decNumberIsZero(a)) { - decCompareOp(&cmp, rhs, &numone, &aset, COMPARE, &ignore); /* rhs=1 ? */ - if (cmp.lsu[0]==0) a->exponent=0; /* yes, exact 0 */ - else *status|=(DEC_Inexact | DEC_Rounded); /* no, inexact */ - break; - } - /* force padding if adjustment has gone to 0 before full length */ - if (decNumberIsZero(b)) b->exponent=a->exponent-p; - } - - /* not done yet ... */ - decAddOp(a, a, b, &aset, 0, &ignore); /* a=a+b for next estimate */ - if (pp==p) continue; /* precision is at maximum */ - /* lengthen the next calculation */ - pp=pp*2; /* double precision */ - if (pp>p) pp=p; /* clamp to maximum */ - aset.digits=pp; /* working context */ - bset.digits=pp+rhs->digits; /* wider context */ - } /* Newton's iteration */ - - #if DECCHECK - /* just a sanity check; remove the test to show always */ - if (iterations>24) - printf("Ln iterations=%ld, status=%08lx, p=%ld, d=%ld\n", - (LI)iterations, (LI)*status, (LI)p, (LI)rhs->digits); - #endif - - /* Copy and round the result to res */ - residue=1; /* indicate dirt to right */ - if (ISZERO(a)) residue=0; /* .. unless underflowed to 0 */ - aset.digits=set->digits; /* [use default rounding] */ - decCopyFit(res, a, &aset, &residue, status); /* copy & shorten */ - decFinish(res, set, &residue, status); /* cleanup/set flags */ - } while(0); /* end protected */ - - if (allocbufa!=NULL) free(allocbufa); /* drop any storage used */ - if (allocbufb!=NULL) free(allocbufb); /* .. */ - /* [status is handled by caller] */ - return res; - } /* decLnOp */ -#if defined(__clang__) || U_GCC_MAJOR_MINOR >= 406 -#pragma GCC diagnostic pop -#endif - -/* ------------------------------------------------------------------ */ -/* decQuantizeOp -- force exponent to requested value */ -/* */ -/* This computes C = op(A, B), where op adjusts the coefficient */ -/* of C (by rounding or shifting) such that the exponent (-scale) */ -/* of C has the value B or matches the exponent of B. */ -/* The numerical value of C will equal A, except for the effects of */ -/* any rounding that occurred. */ -/* */ -/* res is C, the result. C may be A or B */ -/* lhs is A, the number to adjust */ -/* rhs is B, the requested exponent */ -/* set is the context */ -/* quant is 1 for quantize or 0 for rescale */ -/* status is the status accumulator (this can be called without */ -/* risk of control loss) */ -/* */ -/* C must have space for set->digits digits. */ -/* */ -/* Unless there is an error or the result is infinite, the exponent */ -/* after the operation is guaranteed to be that requested. */ -/* ------------------------------------------------------------------ */ -static decNumber * decQuantizeOp(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set, - Flag quant, uInt *status) { - #if DECSUBSET - decNumber *alloclhs=NULL; /* non-NULL if rounded lhs allocated */ - decNumber *allocrhs=NULL; /* .., rhs */ - #endif - const decNumber *inrhs=rhs; /* save original rhs */ - Int reqdigits=set->digits; /* requested DIGITS */ - Int reqexp; /* requested exponent [-scale] */ - Int residue=0; /* rounding residue */ - Int etiny=set->emin-(reqdigits-1); - - #if DECCHECK - if (decCheckOperands(res, lhs, rhs, set)) return res; - #endif - - do { /* protect allocated storage */ - #if DECSUBSET - if (!set->extended) { - /* reduce operands and set lostDigits status, as needed */ - if (lhs->digits>reqdigits) { - alloclhs=decRoundOperand(lhs, set, status); - if (alloclhs==NULL) break; - lhs=alloclhs; - } - if (rhs->digits>reqdigits) { /* [this only checks lostDigits] */ - allocrhs=decRoundOperand(rhs, set, status); - if (allocrhs==NULL) break; - rhs=allocrhs; - } - } - #endif - /* [following code does not require input rounding] */ - - /* Handle special values */ - if (SPECIALARGS) { - /* NaNs get usual processing */ - if (SPECIALARGS & (DECSNAN | DECNAN)) - decNaNs(res, lhs, rhs, set, status); - /* one infinity but not both is bad */ - else if ((lhs->bits ^ rhs->bits) & DECINF) - *status|=DEC_Invalid_operation; - /* both infinity: return lhs */ - else uprv_decNumberCopy(res, lhs); /* [nop if in place] */ - break; - } - - /* set requested exponent */ - if (quant) reqexp=inrhs->exponent; /* quantize -- match exponents */ - else { /* rescale -- use value of rhs */ - /* Original rhs must be an integer that fits and is in range, */ - /* which could be from -1999999997 to +999999999, thanks to */ - /* subnormals */ - reqexp=decGetInt(inrhs); /* [cannot fail] */ - } - - #if DECSUBSET - if (!set->extended) etiny=set->emin; /* no subnormals */ - #endif - - if (reqexp==BADINT /* bad (rescale only) or .. */ - || reqexp==BIGODD || reqexp==BIGEVEN /* very big (ditto) or .. */ - || (reqexpset->emax)) { /* > emax */ - *status|=DEC_Invalid_operation; - break;} - - /* the RHS has been processed, so it can be overwritten now if necessary */ - if (ISZERO(lhs)) { /* zero coefficient unchanged */ - uprv_decNumberCopy(res, lhs); /* [nop if in place] */ - res->exponent=reqexp; /* .. just set exponent */ - #if DECSUBSET - if (!set->extended) res->bits=0; /* subset specification; no -0 */ - #endif - } - else { /* non-zero lhs */ - Int adjust=reqexp-lhs->exponent; /* digit adjustment needed */ - /* if adjusted coefficient will definitely not fit, give up now */ - if ((lhs->digits-adjust)>reqdigits) { - *status|=DEC_Invalid_operation; - break; - } - - if (adjust>0) { /* increasing exponent */ - /* this will decrease the length of the coefficient by adjust */ - /* digits, and must round as it does so */ - decContext workset; /* work */ - workset=*set; /* clone rounding, etc. */ - workset.digits=lhs->digits-adjust; /* set requested length */ - /* [note that the latter can be <1, here] */ - decCopyFit(res, lhs, &workset, &residue, status); /* fit to result */ - decApplyRound(res, &workset, residue, status); /* .. and round */ - residue=0; /* [used] */ - /* If just rounded a 999s case, exponent will be off by one; */ - /* adjust back (after checking space), if so. */ - if (res->exponent>reqexp) { - /* re-check needed, e.g., for quantize(0.9999, 0.001) under */ - /* set->digits==3 */ - if (res->digits==reqdigits) { /* cannot shift by 1 */ - *status&=~(DEC_Inexact | DEC_Rounded); /* [clean these] */ - *status|=DEC_Invalid_operation; - break; - } - res->digits=decShiftToMost(res->lsu, res->digits, 1); /* shift */ - res->exponent--; /* (re)adjust the exponent. */ - } - #if DECSUBSET - if (ISZERO(res) && !set->extended) res->bits=0; /* subset; no -0 */ - #endif - } /* increase */ - else /* adjust<=0 */ { /* decreasing or = exponent */ - /* this will increase the length of the coefficient by -adjust */ - /* digits, by adding zero or more trailing zeros; this is */ - /* already checked for fit, above */ - uprv_decNumberCopy(res, lhs); /* [it will fit] */ - /* if padding needed (adjust<0), add it now... */ - if (adjust<0) { - res->digits=decShiftToMost(res->lsu, res->digits, -adjust); - res->exponent+=adjust; /* adjust the exponent */ - } - } /* decrease */ - } /* non-zero */ - - /* Check for overflow [do not use Finalize in this case, as an */ - /* overflow here is a "don't fit" situation] */ - if (res->exponent>set->emax-res->digits+1) { /* too big */ - *status|=DEC_Invalid_operation; - break; - } - else { - decFinalize(res, set, &residue, status); /* set subnormal flags */ - *status&=~DEC_Underflow; /* suppress Underflow [as per 754] */ - } - } while(0); /* end protected */ - - #if DECSUBSET - if (allocrhs!=NULL) free(allocrhs); /* drop any storage used */ - if (alloclhs!=NULL) free(alloclhs); /* .. */ - #endif - return res; - } /* decQuantizeOp */ - -/* ------------------------------------------------------------------ */ -/* decCompareOp -- compare, min, or max two Numbers */ -/* */ -/* This computes C = A ? B and carries out one of four operations: */ -/* COMPARE -- returns the signum (as a number) giving the */ -/* result of a comparison unless one or both */ -/* operands is a NaN (in which case a NaN results) */ -/* COMPSIG -- as COMPARE except that a quiet NaN raises */ -/* Invalid operation. */ -/* COMPMAX -- returns the larger of the operands, using the */ -/* 754 maxnum operation */ -/* COMPMAXMAG -- ditto, comparing absolute values */ -/* COMPMIN -- the 754 minnum operation */ -/* COMPMINMAG -- ditto, comparing absolute values */ -/* COMTOTAL -- returns the signum (as a number) giving the */ -/* result of a comparison using 754 total ordering */ -/* */ -/* res is C, the result. C may be A and/or B (e.g., X=X?X) */ -/* lhs is A */ -/* rhs is B */ -/* set is the context */ -/* op is the operation flag */ -/* status is the usual accumulator */ -/* */ -/* C must have space for one digit for COMPARE or set->digits for */ -/* COMPMAX, COMPMIN, COMPMAXMAG, or COMPMINMAG. */ -/* ------------------------------------------------------------------ */ -/* The emphasis here is on speed for common cases, and avoiding */ -/* coefficient comparison if possible. */ -/* ------------------------------------------------------------------ */ -static decNumber * decCompareOp(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set, - Flag op, uInt *status) { - #if DECSUBSET - decNumber *alloclhs=NULL; /* non-NULL if rounded lhs allocated */ - decNumber *allocrhs=NULL; /* .., rhs */ - #endif - Int result=0; /* default result value */ - uByte merged; /* work */ - - #if DECCHECK - if (decCheckOperands(res, lhs, rhs, set)) return res; - #endif - - do { /* protect allocated storage */ - #if DECSUBSET - if (!set->extended) { - /* reduce operands and set lostDigits status, as needed */ - if (lhs->digits>set->digits) { - alloclhs=decRoundOperand(lhs, set, status); - if (alloclhs==NULL) {result=BADINT; break;} - lhs=alloclhs; - } - if (rhs->digits>set->digits) { - allocrhs=decRoundOperand(rhs, set, status); - if (allocrhs==NULL) {result=BADINT; break;} - rhs=allocrhs; - } - } - #endif - /* [following code does not require input rounding] */ - - /* If total ordering then handle differing signs 'up front' */ - if (op==COMPTOTAL) { /* total ordering */ - if (decNumberIsNegative(lhs) && !decNumberIsNegative(rhs)) { - result=-1; - break; - } - if (!decNumberIsNegative(lhs) && decNumberIsNegative(rhs)) { - result=+1; - break; - } - } - - /* handle NaNs specially; let infinities drop through */ - /* This assumes sNaN (even just one) leads to NaN. */ - merged=(lhs->bits | rhs->bits) & (DECSNAN | DECNAN); - if (merged) { /* a NaN bit set */ - if (op==COMPARE); /* result will be NaN */ - else if (op==COMPSIG) /* treat qNaN as sNaN */ - *status|=DEC_Invalid_operation | DEC_sNaN; - else if (op==COMPTOTAL) { /* total ordering, always finite */ - /* signs are known to be the same; compute the ordering here */ - /* as if the signs are both positive, then invert for negatives */ - if (!decNumberIsNaN(lhs)) result=-1; - else if (!decNumberIsNaN(rhs)) result=+1; - /* here if both NaNs */ - else if (decNumberIsSNaN(lhs) && decNumberIsQNaN(rhs)) result=-1; - else if (decNumberIsQNaN(lhs) && decNumberIsSNaN(rhs)) result=+1; - else { /* both NaN or both sNaN */ - /* now it just depends on the payload */ - result=decUnitCompare(lhs->lsu, D2U(lhs->digits), - rhs->lsu, D2U(rhs->digits), 0); - /* [Error not possible, as these are 'aligned'] */ - } /* both same NaNs */ - if (decNumberIsNegative(lhs)) result=-result; - break; - } /* total order */ - - else if (merged & DECSNAN); /* sNaN -> qNaN */ - else { /* here if MIN or MAX and one or two quiet NaNs */ - /* min or max -- 754 rules ignore single NaN */ - if (!decNumberIsNaN(lhs) || !decNumberIsNaN(rhs)) { - /* just one NaN; force choice to be the non-NaN operand */ - op=COMPMAX; - if (lhs->bits & DECNAN) result=-1; /* pick rhs */ - else result=+1; /* pick lhs */ - break; - } - } /* max or min */ - op=COMPNAN; /* use special path */ - decNaNs(res, lhs, rhs, set, status); /* propagate NaN */ - break; - } - /* have numbers */ - if (op==COMPMAXMAG || op==COMPMINMAG) result=decCompare(lhs, rhs, 1); - else result=decCompare(lhs, rhs, 0); /* sign matters */ - } while(0); /* end protected */ - - if (result==BADINT) *status|=DEC_Insufficient_storage; /* rare */ - else { - if (op==COMPARE || op==COMPSIG ||op==COMPTOTAL) { /* returning signum */ - if (op==COMPTOTAL && result==0) { - /* operands are numerically equal or same NaN (and same sign, */ - /* tested first); if identical, leave result 0 */ - if (lhs->exponent!=rhs->exponent) { - if (lhs->exponentexponent) result=-1; - else result=+1; - if (decNumberIsNegative(lhs)) result=-result; - } /* lexp!=rexp */ - } /* total-order by exponent */ - uprv_decNumberZero(res); /* [always a valid result] */ - if (result!=0) { /* must be -1 or +1 */ - *res->lsu=1; - if (result<0) res->bits=DECNEG; - } - } - else if (op==COMPNAN); /* special, drop through */ - else { /* MAX or MIN, non-NaN result */ - Int residue=0; /* rounding accumulator */ - /* choose the operand for the result */ - const decNumber *choice; - if (result==0) { /* operands are numerically equal */ - /* choose according to sign then exponent (see 754) */ - uByte slhs=(lhs->bits & DECNEG); - uByte srhs=(rhs->bits & DECNEG); - #if DECSUBSET - if (!set->extended) { /* subset: force left-hand */ - op=COMPMAX; - result=+1; - } - else - #endif - if (slhs!=srhs) { /* signs differ */ - if (slhs) result=-1; /* rhs is max */ - else result=+1; /* lhs is max */ - } - else if (slhs && srhs) { /* both negative */ - if (lhs->exponentexponent) result=+1; - else result=-1; - /* [if equal, use lhs, technically identical] */ - } - else { /* both positive */ - if (lhs->exponent>rhs->exponent) result=+1; - else result=-1; - /* [ditto] */ - } - } /* numerically equal */ - /* here result will be non-0; reverse if looking for MIN */ - if (op==COMPMIN || op==COMPMINMAG) result=-result; - choice=(result>0 ? lhs : rhs); /* choose */ - /* copy chosen to result, rounding if need be */ - decCopyFit(res, choice, set, &residue, status); - decFinish(res, set, &residue, status); - } - } - #if DECSUBSET - if (allocrhs!=NULL) free(allocrhs); /* free any storage used */ - if (alloclhs!=NULL) free(alloclhs); /* .. */ - #endif - return res; - } /* decCompareOp */ - -/* ------------------------------------------------------------------ */ -/* decCompare -- compare two decNumbers by numerical value */ -/* */ -/* This routine compares A ? B without altering them. */ -/* */ -/* Arg1 is A, a decNumber which is not a NaN */ -/* Arg2 is B, a decNumber which is not a NaN */ -/* Arg3 is 1 for a sign-independent compare, 0 otherwise */ -/* */ -/* returns -1, 0, or 1 for AB, or BADINT if failure */ -/* (the only possible failure is an allocation error) */ -/* ------------------------------------------------------------------ */ -static Int decCompare(const decNumber *lhs, const decNumber *rhs, - Flag abs_c) { - Int result; /* result value */ - Int sigr; /* rhs signum */ - Int compare; /* work */ - - result=1; /* assume signum(lhs) */ - if (ISZERO(lhs)) result=0; - if (abs_c) { - if (ISZERO(rhs)) return result; /* LHS wins or both 0 */ - /* RHS is non-zero */ - if (result==0) return -1; /* LHS is 0; RHS wins */ - /* [here, both non-zero, result=1] */ - } - else { /* signs matter */ - if (result && decNumberIsNegative(lhs)) result=-1; - sigr=1; /* compute signum(rhs) */ - if (ISZERO(rhs)) sigr=0; - else if (decNumberIsNegative(rhs)) sigr=-1; - if (result > sigr) return +1; /* L > R, return 1 */ - if (result < sigr) return -1; /* L < R, return -1 */ - if (result==0) return 0; /* both 0 */ - } - - /* signums are the same; both are non-zero */ - if ((lhs->bits | rhs->bits) & DECINF) { /* one or more infinities */ - if (decNumberIsInfinite(rhs)) { - if (decNumberIsInfinite(lhs)) result=0;/* both infinite */ - else result=-result; /* only rhs infinite */ - } - return result; - } - /* must compare the coefficients, allowing for exponents */ - if (lhs->exponent>rhs->exponent) { /* LHS exponent larger */ - /* swap sides, and sign */ - const decNumber *temp=lhs; - lhs=rhs; - rhs=temp; - result=-result; - } - compare=decUnitCompare(lhs->lsu, D2U(lhs->digits), - rhs->lsu, D2U(rhs->digits), - rhs->exponent-lhs->exponent); - if (compare!=BADINT) compare*=result; /* comparison succeeded */ - return compare; - } /* decCompare */ - -/* ------------------------------------------------------------------ */ -/* decUnitCompare -- compare two >=0 integers in Unit arrays */ -/* */ -/* This routine compares A ? B*10**E where A and B are unit arrays */ -/* A is a plain integer */ -/* B has an exponent of E (which must be non-negative) */ -/* */ -/* Arg1 is A first Unit (lsu) */ -/* Arg2 is A length in Units */ -/* Arg3 is B first Unit (lsu) */ -/* Arg4 is B length in Units */ -/* Arg5 is E (0 if the units are aligned) */ -/* */ -/* returns -1, 0, or 1 for AB, or BADINT if failure */ -/* (the only possible failure is an allocation error, which can */ -/* only occur if E!=0) */ -/* ------------------------------------------------------------------ */ -static Int decUnitCompare(const Unit *a, Int alength, - const Unit *b, Int blength, Int exp) { - Unit *acc; /* accumulator for result */ - Unit accbuff[SD2U(DECBUFFER*2+1)]; /* local buffer */ - Unit *allocacc=NULL; /* -> allocated acc buffer, iff allocated */ - Int accunits, need; /* units in use or needed for acc */ - const Unit *l, *r, *u; /* work */ - Int expunits, exprem, result; /* .. */ - - if (exp==0) { /* aligned; fastpath */ - if (alength>blength) return 1; - if (alength=a; l--, r--) { - if (*l>*r) return 1; - if (*l<*r) return -1; - } - return 0; /* all units match */ - } /* aligned */ - - /* Unaligned. If one is >1 unit longer than the other, padded */ - /* approximately, then can return easily */ - if (alength>blength+(Int)D2U(exp)) return 1; - if (alength+1sizeof(accbuff)) { - allocacc=(Unit *)malloc(need*sizeof(Unit)); - if (allocacc==NULL) return BADINT; /* hopeless -- abandon */ - acc=allocacc; - } - /* Calculate units and remainder from exponent. */ - expunits=exp/DECDPUN; - exprem=exp%DECDPUN; - /* subtract [A+B*(-m)] */ - accunits=decUnitAddSub(a, alength, b, blength, expunits, acc, - -(Int)powers[exprem]); - /* [UnitAddSub result may have leading zeros, even on zero] */ - if (accunits<0) result=-1; /* negative result */ - else { /* non-negative result */ - /* check units of the result before freeing any storage */ - for (u=acc; u=0 integers in Unit arrays */ -/* */ -/* This routine performs the calculation: */ -/* */ -/* C=A+(B*M) */ -/* */ -/* Where M is in the range -DECDPUNMAX through +DECDPUNMAX. */ -/* */ -/* A may be shorter or longer than B. */ -/* */ -/* Leading zeros are not removed after a calculation. The result is */ -/* either the same length as the longer of A and B (adding any */ -/* shift), or one Unit longer than that (if a Unit carry occurred). */ -/* */ -/* A and B content are not altered unless C is also A or B. */ -/* C may be the same array as A or B, but only if no zero padding is */ -/* requested (that is, C may be B only if bshift==0). */ -/* C is filled from the lsu; only those units necessary to complete */ -/* the calculation are referenced. */ -/* */ -/* Arg1 is A first Unit (lsu) */ -/* Arg2 is A length in Units */ -/* Arg3 is B first Unit (lsu) */ -/* Arg4 is B length in Units */ -/* Arg5 is B shift in Units (>=0; pads with 0 units if positive) */ -/* Arg6 is C first Unit (lsu) */ -/* Arg7 is M, the multiplier */ -/* */ -/* returns the count of Units written to C, which will be non-zero */ -/* and negated if the result is negative. That is, the sign of the */ -/* returned Int is the sign of the result (positive for zero) and */ -/* the absolute value of the Int is the count of Units. */ -/* */ -/* It is the caller's responsibility to make sure that C size is */ -/* safe, allowing space if necessary for a one-Unit carry. */ -/* */ -/* This routine is severely performance-critical; *any* change here */ -/* must be measured (timed) to assure no performance degradation. */ -/* In particular, trickery here tends to be counter-productive, as */ -/* increased complexity of code hurts register optimizations on */ -/* register-poor architectures. Avoiding divisions is nearly */ -/* always a Good Idea, however. */ -/* */ -/* Special thanks to Rick McGuire (IBM Cambridge, MA) and Dave Clark */ -/* (IBM Warwick, UK) for some of the ideas used in this routine. */ -/* ------------------------------------------------------------------ */ -static Int decUnitAddSub(const Unit *a, Int alength, - const Unit *b, Int blength, Int bshift, - Unit *c, Int m) { - const Unit *alsu=a; /* A lsu [need to remember it] */ - Unit *clsu=c; /* C ditto */ - Unit *minC; /* low water mark for C */ - Unit *maxC; /* high water mark for C */ - eInt carry=0; /* carry integer (could be Long) */ - Int add; /* work */ - #if DECDPUN<=4 /* myriadal, millenary, etc. */ - Int est; /* estimated quotient */ - #endif - - #if DECTRACE - if (alength<1 || blength<1) - printf("decUnitAddSub: alen blen m %ld %ld [%ld]\n", alength, blength, m); - #endif - - maxC=c+alength; /* A is usually the longer */ - minC=c+blength; /* .. and B the shorter */ - if (bshift!=0) { /* B is shifted; low As copy across */ - minC+=bshift; - /* if in place [common], skip copy unless there's a gap [rare] */ - if (a==c && bshift<=alength) { - c+=bshift; - a+=bshift; - } - else for (; cmaxC) { /* swap */ - Unit *hold=minC; - minC=maxC; - maxC=hold; - } - - /* For speed, do the addition as two loops; the first where both A */ - /* and B contribute, and the second (if necessary) where only one or */ - /* other of the numbers contribute. */ - /* Carry handling is the same (i.e., duplicated) in each case. */ - for (; c=0) { - est=(((ueInt)carry>>11)*53687)>>18; - *c=(Unit)(carry-est*(DECDPUNMAX+1)); /* remainder */ - carry=est; /* likely quotient [89%] */ - if (*c>11)*53687)>>18; - *c=(Unit)(carry-est*(DECDPUNMAX+1)); - carry=est-(DECDPUNMAX+1); /* correctly negative */ - if (*c=0) { - est=(((ueInt)carry>>3)*16777)>>21; - *c=(Unit)(carry-est*(DECDPUNMAX+1)); /* remainder */ - carry=est; /* likely quotient [99%] */ - if (*c>3)*16777)>>21; - *c=(Unit)(carry-est*(DECDPUNMAX+1)); - carry=est-(DECDPUNMAX+1); /* correctly negative */ - if (*c=0) { - est=QUOT10(carry, DECDPUN); - *c=(Unit)(carry-est*(DECDPUNMAX+1)); /* remainder */ - carry=est; /* quotient */ - continue; - } - /* negative case */ - carry=carry+(eInt)(DECDPUNMAX+1)*(DECDPUNMAX+1); /* make positive */ - est=QUOT10(carry, DECDPUN); - *c=(Unit)(carry-est*(DECDPUNMAX+1)); - carry=est-(DECDPUNMAX+1); /* correctly negative */ - #else - /* remainder operator is undefined if negative, so must test */ - if ((ueInt)carry<(DECDPUNMAX+1)*2) { /* fastpath carry +1 */ - *c=(Unit)(carry-(DECDPUNMAX+1)); /* [helps additions] */ - carry=1; - continue; - } - if (carry>=0) { - *c=(Unit)(carry%(DECDPUNMAX+1)); - carry=carry/(DECDPUNMAX+1); - continue; - } - /* negative case */ - carry=carry+(eInt)(DECDPUNMAX+1)*(DECDPUNMAX+1); /* make positive */ - *c=(Unit)(carry%(DECDPUNMAX+1)); - carry=carry/(DECDPUNMAX+1)-(DECDPUNMAX+1); - #endif - } /* c */ - - /* now may have one or other to complete */ - /* [pretest to avoid loop setup/shutdown] */ - if (cDECDPUNMAX */ - #if DECDPUN==4 /* use divide-by-multiply */ - if (carry>=0) { - est=(((ueInt)carry>>11)*53687)>>18; - *c=(Unit)(carry-est*(DECDPUNMAX+1)); /* remainder */ - carry=est; /* likely quotient [79.7%] */ - if (*c>11)*53687)>>18; - *c=(Unit)(carry-est*(DECDPUNMAX+1)); - carry=est-(DECDPUNMAX+1); /* correctly negative */ - if (*c=0) { - est=(((ueInt)carry>>3)*16777)>>21; - *c=(Unit)(carry-est*(DECDPUNMAX+1)); /* remainder */ - carry=est; /* likely quotient [99%] */ - if (*c>3)*16777)>>21; - *c=(Unit)(carry-est*(DECDPUNMAX+1)); - carry=est-(DECDPUNMAX+1); /* correctly negative */ - if (*c=0) { - est=QUOT10(carry, DECDPUN); - *c=(Unit)(carry-est*(DECDPUNMAX+1)); /* remainder */ - carry=est; /* quotient */ - continue; - } - /* negative case */ - carry=carry+(eInt)(DECDPUNMAX+1)*(DECDPUNMAX+1); /* make positive */ - est=QUOT10(carry, DECDPUN); - *c=(Unit)(carry-est*(DECDPUNMAX+1)); - carry=est-(DECDPUNMAX+1); /* correctly negative */ - #else - if ((ueInt)carry<(DECDPUNMAX+1)*2){ /* fastpath carry 1 */ - *c=(Unit)(carry-(DECDPUNMAX+1)); - carry=1; - continue; - } - /* remainder operator is undefined if negative, so must test */ - if (carry>=0) { - *c=(Unit)(carry%(DECDPUNMAX+1)); - carry=carry/(DECDPUNMAX+1); - continue; - } - /* negative case */ - carry=carry+(eInt)(DECDPUNMAX+1)*(DECDPUNMAX+1); /* make positive */ - *c=(Unit)(carry%(DECDPUNMAX+1)); - carry=carry/(DECDPUNMAX+1)-(DECDPUNMAX+1); - #endif - } /* c */ - - /* OK, all A and B processed; might still have carry or borrow */ - /* return number of Units in the result, negated if a borrow */ - if (carry==0) return c-clsu; /* no carry, so no more to do */ - if (carry>0) { /* positive carry */ - *c=(Unit)carry; /* place as new unit */ - c++; /* .. */ - return c-clsu; - } - /* -ve carry: it's a borrow; complement needed */ - add=1; /* temporary carry... */ - for (c=clsu; c current Unit */ - - #if DECCHECK - if (decCheckOperands(dn, DECUNUSED, DECUNUSED, DECUNCONT)) return dn; - #endif - - *dropped=0; /* assume no zeros dropped */ - if ((dn->bits & DECSPECIAL) /* fast exit if special .. */ - || (*dn->lsu & 0x01)) return dn; /* .. or odd */ - if (ISZERO(dn)) { /* .. or 0 */ - dn->exponent=0; /* (sign is preserved) */ - return dn; - } - - /* have a finite number which is even */ - exp=dn->exponent; - cut=1; /* digit (1-DECDPUN) in Unit */ - up=dn->lsu; /* -> current Unit */ - for (d=0; ddigits-1; d++) { /* [don't strip the final digit] */ - /* slice by powers */ - #if DECDPUN<=4 - uInt quot=QUOT10(*up, cut); - if ((*up-quot*powers[cut])!=0) break; /* found non-0 digit */ - #else - if (*up%powers[cut]!=0) break; /* found non-0 digit */ - #endif - /* have a trailing 0 */ - if (!all) { /* trimming */ - /* [if exp>0 then all trailing 0s are significant for trim] */ - if (exp<=0) { /* if digit might be significant */ - if (exp==0) break; /* then quit */ - exp++; /* next digit might be significant */ - } - } - cut++; /* next power */ - if (cut>DECDPUN) { /* need new Unit */ - up++; - cut=1; - } - } /* d */ - if (d==0) return dn; /* none to drop */ - - /* may need to limit drop if clamping */ - if (set->clamp && !noclamp) { - Int maxd=set->emax-set->digits+1-dn->exponent; - if (maxd<=0) return dn; /* nothing possible */ - if (d>maxd) d=maxd; - } - - /* effect the drop */ - decShiftToLeast(dn->lsu, D2U(dn->digits), d); - dn->exponent+=d; /* maintain numerical value */ - dn->digits-=d; /* new length */ - *dropped=d; /* report the count */ - return dn; - } /* decTrim */ - -/* ------------------------------------------------------------------ */ -/* decReverse -- reverse a Unit array in place */ -/* */ -/* ulo is the start of the array */ -/* uhi is the end of the array (highest Unit to include) */ -/* */ -/* The units ulo through uhi are reversed in place (if the number */ -/* of units is odd, the middle one is untouched). Note that the */ -/* digit(s) in each unit are unaffected. */ -/* ------------------------------------------------------------------ */ -static void decReverse(Unit *ulo, Unit *uhi) { - Unit temp; - for (; ulo=uar; source--, target--) *target=*source; - } - else { - first=uar+D2U(digits+shift)-1; /* where msu of source will end up */ - for (; source>=uar; source--, target--) { - /* split the source Unit and accumulate remainder for next */ - #if DECDPUN<=4 - uInt quot=QUOT10(*source, cut); - uInt rem=*source-quot*powers[cut]; - next+=quot; - #else - uInt rem=*source%powers[cut]; - next+=*source/powers[cut]; - #endif - if (target<=first) *target=(Unit)next; /* write to target iff valid */ - next=rem*powers[DECDPUN-cut]; /* save remainder for next Unit */ - } - } /* shift-move */ - - /* propagate any partial unit to one below and clear the rest */ - for (; target>=uar; target--) { - *target=(Unit)next; - next=0; - } - return digits+shift; - } /* decShiftToMost */ - -/* ------------------------------------------------------------------ */ -/* decShiftToLeast -- shift digits in array towards least significant */ -/* */ -/* uar is the array */ -/* units is length of the array, in units */ -/* shift is the number of digits to remove from the lsu end; it */ -/* must be zero or positive and <= than units*DECDPUN. */ -/* */ -/* returns the new length of the integer in the array, in units */ -/* */ -/* Removed digits are discarded (lost). Units not required to hold */ -/* the final result are unchanged. */ -/* ------------------------------------------------------------------ */ -static Int decShiftToLeast(Unit *uar, Int units, Int shift) { - Unit *target, *up; /* work */ - Int cut, count; /* work */ - Int quot, rem; /* for division */ - - if (shift==0) return units; /* [fastpath] nothing to do */ - if (shift==units*DECDPUN) { /* [fastpath] little to do */ - *uar=0; /* all digits cleared gives zero */ - return 1; /* leaves just the one */ - } - - target=uar; /* both paths */ - cut=MSUDIGITS(shift); - if (cut==DECDPUN) { /* unit-boundary case; easy */ - up=uar+D2U(shift); - for (; updigits is > set->digits) */ -/* set is the relevant context */ -/* status is the status accumulator */ -/* */ -/* returns an allocated decNumber with the rounded result. */ -/* */ -/* lostDigits and other status may be set by this. */ -/* */ -/* Since the input is an operand, it must not be modified. */ -/* Instead, return an allocated decNumber, rounded as required. */ -/* It is the caller's responsibility to free the allocated storage. */ -/* */ -/* If no storage is available then the result cannot be used, so NULL */ -/* is returned. */ -/* ------------------------------------------------------------------ */ -static decNumber *decRoundOperand(const decNumber *dn, decContext *set, - uInt *status) { - decNumber *res; /* result structure */ - uInt newstatus=0; /* status from round */ - Int residue=0; /* rounding accumulator */ - - /* Allocate storage for the returned decNumber, big enough for the */ - /* length specified by the context */ - res=(decNumber *)malloc(sizeof(decNumber) - +(D2U(set->digits)-1)*sizeof(Unit)); - if (res==NULL) { - *status|=DEC_Insufficient_storage; - return NULL; - } - decCopyFit(res, dn, set, &residue, &newstatus); - decApplyRound(res, set, residue, &newstatus); - - /* If that set Inexact then "lost digits" is raised... */ - if (newstatus & DEC_Inexact) newstatus|=DEC_Lost_digits; - *status|=newstatus; - return res; - } /* decRoundOperand */ -#endif - -/* ------------------------------------------------------------------ */ -/* decCopyFit -- copy a number, truncating the coefficient if needed */ -/* */ -/* dest is the target decNumber */ -/* src is the source decNumber */ -/* set is the context [used for length (digits) and rounding mode] */ -/* residue is the residue accumulator */ -/* status contains the current status to be updated */ -/* */ -/* (dest==src is allowed and will be a no-op if fits) */ -/* All fields are updated as required. */ -/* ------------------------------------------------------------------ */ -static void decCopyFit(decNumber *dest, const decNumber *src, - decContext *set, Int *residue, uInt *status) { - dest->bits=src->bits; - dest->exponent=src->exponent; - decSetCoeff(dest, set, src->lsu, src->digits, residue, status); - } /* decCopyFit */ - -/* ------------------------------------------------------------------ */ -/* decSetCoeff -- set the coefficient of a number */ -/* */ -/* dn is the number whose coefficient array is to be set. */ -/* It must have space for set->digits digits */ -/* set is the context [for size] */ -/* lsu -> lsu of the source coefficient [may be dn->lsu] */ -/* len is digits in the source coefficient [may be dn->digits] */ -/* residue is the residue accumulator. This has values as in */ -/* decApplyRound, and will be unchanged unless the */ -/* target size is less than len. In this case, the */ -/* coefficient is truncated and the residue is updated to */ -/* reflect the previous residue and the dropped digits. */ -/* status is the status accumulator, as usual */ -/* */ -/* The coefficient may already be in the number, or it can be an */ -/* external intermediate array. If it is in the number, lsu must == */ -/* dn->lsu and len must == dn->digits. */ -/* */ -/* Note that the coefficient length (len) may be < set->digits, and */ -/* in this case this merely copies the coefficient (or is a no-op */ -/* if dn->lsu==lsu). */ -/* */ -/* Note also that (only internally, from decQuantizeOp and */ -/* decSetSubnormal) the value of set->digits may be less than one, */ -/* indicating a round to left. This routine handles that case */ -/* correctly; caller ensures space. */ -/* */ -/* dn->digits, dn->lsu (and as required), and dn->exponent are */ -/* updated as necessary. dn->bits (sign) is unchanged. */ -/* */ -/* DEC_Rounded status is set if any digits are discarded. */ -/* DEC_Inexact status is set if any non-zero digits are discarded, or */ -/* incoming residue was non-0 (implies rounded) */ -/* ------------------------------------------------------------------ */ -/* mapping array: maps 0-9 to canonical residues, so that a residue */ -/* can be adjusted in the range [-1, +1] and achieve correct rounding */ -/* 0 1 2 3 4 5 6 7 8 9 */ -static const uByte resmap[10]={0, 3, 3, 3, 3, 5, 7, 7, 7, 7}; -static void decSetCoeff(decNumber *dn, decContext *set, const Unit *lsu, - Int len, Int *residue, uInt *status) { - Int discard; /* number of digits to discard */ - uInt cut; /* cut point in Unit */ - const Unit *up; /* work */ - Unit *target; /* .. */ - Int count; /* .. */ - #if DECDPUN<=4 - uInt temp; /* .. */ - #endif - - discard=len-set->digits; /* digits to discard */ - if (discard<=0) { /* no digits are being discarded */ - if (dn->lsu!=lsu) { /* copy needed */ - /* copy the coefficient array to the result number; no shift needed */ - count=len; /* avoids D2U */ - up=lsu; - for (target=dn->lsu; count>0; target++, up++, count-=DECDPUN) - *target=*up; - dn->digits=len; /* set the new length */ - } - /* dn->exponent and residue are unchanged, record any inexactitude */ - if (*residue!=0) *status|=(DEC_Inexact | DEC_Rounded); - return; - } - - /* some digits must be discarded ... */ - dn->exponent+=discard; /* maintain numerical value */ - *status|=DEC_Rounded; /* accumulate Rounded status */ - if (*residue>1) *residue=1; /* previous residue now to right, so reduce */ - - if (discard>len) { /* everything, +1, is being discarded */ - /* guard digit is 0 */ - /* residue is all the number [NB could be all 0s] */ - if (*residue<=0) { /* not already positive */ - count=len; /* avoids D2U */ - for (up=lsu; count>0; up++, count-=DECDPUN) if (*up!=0) { /* found non-0 */ - *residue=1; - break; /* no need to check any others */ - } - } - if (*residue!=0) *status|=DEC_Inexact; /* record inexactitude */ - *dn->lsu=0; /* coefficient will now be 0 */ - dn->digits=1; /* .. */ - return; - } /* total discard */ - - /* partial discard [most common case] */ - /* here, at least the first (most significant) discarded digit exists */ - - /* spin up the number, noting residue during the spin, until get to */ - /* the Unit with the first discarded digit. When reach it, extract */ - /* it and remember its position */ - count=0; - for (up=lsu;; up++) { - count+=DECDPUN; - if (count>=discard) break; /* full ones all checked */ - if (*up!=0) *residue=1; - } /* up */ - - /* here up -> Unit with first discarded digit */ - cut=discard-(count-DECDPUN)-1; - if (cut==DECDPUN-1) { /* unit-boundary case (fast) */ - Unit half=(Unit)powers[DECDPUN]>>1; - /* set residue directly */ - if (*up>=half) { - if (*up>half) *residue=7; - else *residue+=5; /* add sticky bit */ - } - else { /* digits<=0) { /* special for Quantize/Subnormal :-( */ - *dn->lsu=0; /* .. result is 0 */ - dn->digits=1; /* .. */ - } - else { /* shift to least */ - count=set->digits; /* now digits to end up with */ - dn->digits=count; /* set the new length */ - up++; /* move to next */ - /* on unit boundary, so shift-down copy loop is simple */ - for (target=dn->lsu; count>0; target++, up++, count-=DECDPUN) - *target=*up; - } - } /* unit-boundary case */ - - else { /* discard digit is in low digit(s), and not top digit */ - uInt discard1; /* first discarded digit */ - uInt quot, rem; /* for divisions */ - if (cut==0) quot=*up; /* is at bottom of unit */ - else /* cut>0 */ { /* it's not at bottom of unit */ - #if DECDPUN<=4 - U_ASSERT(/* cut >= 0 &&*/ cut <= 4); - quot=QUOT10(*up, cut); - rem=*up-quot*powers[cut]; - #else - rem=*up%powers[cut]; - quot=*up/powers[cut]; - #endif - if (rem!=0) *residue=1; - } - /* discard digit is now at bottom of quot */ - #if DECDPUN<=4 - temp=(quot*6554)>>16; /* fast /10 */ - /* Vowels algorithm here not a win (9 instructions) */ - discard1=quot-X10(temp); - quot=temp; - #else - discard1=quot%10; - quot=quot/10; - #endif - /* here, discard1 is the guard digit, and residue is everything */ - /* else [use mapping array to accumulate residue safely] */ - *residue+=resmap[discard1]; - cut++; /* update cut */ - /* here: up -> Unit of the array with bottom digit */ - /* cut is the division point for each Unit */ - /* quot holds the uncut high-order digits for the current unit */ - if (set->digits<=0) { /* special for Quantize/Subnormal :-( */ - *dn->lsu=0; /* .. result is 0 */ - dn->digits=1; /* .. */ - } - else { /* shift to least needed */ - count=set->digits; /* now digits to end up with */ - dn->digits=count; /* set the new length */ - /* shift-copy the coefficient array to the result number */ - for (target=dn->lsu; ; target++) { - *target=(Unit)quot; - count-=(DECDPUN-cut); - if (count<=0) break; - up++; - quot=*up; - #if DECDPUN<=4 - quot=QUOT10(quot, cut); - rem=*up-quot*powers[cut]; - #else - rem=quot%powers[cut]; - quot=quot/powers[cut]; - #endif - *target=(Unit)(*target+rem*powers[DECDPUN-cut]); - count-=cut; - if (count<=0) break; - } /* shift-copy loop */ - } /* shift to least */ - } /* not unit boundary */ - - if (*residue!=0) *status|=DEC_Inexact; /* record inexactitude */ - return; - } /* decSetCoeff */ - -/* ------------------------------------------------------------------ */ -/* decApplyRound -- apply pending rounding to a number */ -/* */ -/* dn is the number, with space for set->digits digits */ -/* set is the context [for size and rounding mode] */ -/* residue indicates pending rounding, being any accumulated */ -/* guard and sticky information. It may be: */ -/* 6-9: rounding digit is >5 */ -/* 5: rounding digit is exactly half-way */ -/* 1-4: rounding digit is <5 and >0 */ -/* 0: the coefficient is exact */ -/* -1: as 1, but the hidden digits are subtractive, that */ -/* is, of the opposite sign to dn. In this case the */ -/* coefficient must be non-0. This case occurs when */ -/* subtracting a small number (which can be reduced to */ -/* a sticky bit); see decAddOp. */ -/* status is the status accumulator, as usual */ -/* */ -/* This routine applies rounding while keeping the length of the */ -/* coefficient constant. The exponent and status are unchanged */ -/* except if: */ -/* */ -/* -- the coefficient was increased and is all nines (in which */ -/* case Overflow could occur, and is handled directly here so */ -/* the caller does not need to re-test for overflow) */ -/* */ -/* -- the coefficient was decreased and becomes all nines (in which */ -/* case Underflow could occur, and is also handled directly). */ -/* */ -/* All fields in dn are updated as required. */ -/* */ -/* ------------------------------------------------------------------ */ -static void decApplyRound(decNumber *dn, decContext *set, Int residue, - uInt *status) { - Int bump; /* 1 if coefficient needs to be incremented */ - /* -1 if coefficient needs to be decremented */ - - if (residue==0) return; /* nothing to apply */ - - bump=0; /* assume a smooth ride */ - - /* now decide whether, and how, to round, depending on mode */ - switch (set->round) { - case DEC_ROUND_05UP: { /* round zero or five up (for reround) */ - /* This is the same as DEC_ROUND_DOWN unless there is a */ - /* positive residue and the lsd of dn is 0 or 5, in which case */ - /* it is bumped; when residue is <0, the number is therefore */ - /* bumped down unless the final digit was 1 or 6 (in which */ - /* case it is bumped down and then up -- a no-op) */ - Int lsd5=*dn->lsu%5; /* get lsd and quintate */ - if (residue<0 && lsd5!=1) bump=-1; - else if (residue>0 && lsd5==0) bump=1; - /* [bump==1 could be applied directly; use common path for clarity] */ - break;} /* r-05 */ - - case DEC_ROUND_DOWN: { - /* no change, except if negative residue */ - if (residue<0) bump=-1; - break;} /* r-d */ - - case DEC_ROUND_HALF_DOWN: { - if (residue>5) bump=1; - break;} /* r-h-d */ - - case DEC_ROUND_HALF_EVEN: { - if (residue>5) bump=1; /* >0.5 goes up */ - else if (residue==5) { /* exactly 0.5000... */ - /* 0.5 goes up iff [new] lsd is odd */ - if (*dn->lsu & 0x01) bump=1; - } - break;} /* r-h-e */ - - case DEC_ROUND_HALF_UP: { - if (residue>=5) bump=1; - break;} /* r-h-u */ - - case DEC_ROUND_UP: { - if (residue>0) bump=1; - break;} /* r-u */ - - case DEC_ROUND_CEILING: { - /* same as _UP for positive numbers, and as _DOWN for negatives */ - /* [negative residue cannot occur on 0] */ - if (decNumberIsNegative(dn)) { - if (residue<0) bump=-1; - } - else { - if (residue>0) bump=1; - } - break;} /* r-c */ - - case DEC_ROUND_FLOOR: { - /* same as _UP for negative numbers, and as _DOWN for positive */ - /* [negative residue cannot occur on 0] */ - if (!decNumberIsNegative(dn)) { - if (residue<0) bump=-1; - } - else { - if (residue>0) bump=1; - } - break;} /* r-f */ - - default: { /* e.g., DEC_ROUND_MAX */ - *status|=DEC_Invalid_context; - #if DECTRACE || (DECCHECK && DECVERB) - printf("Unknown rounding mode: %d\n", set->round); - #endif - break;} - } /* switch */ - - /* now bump the number, up or down, if need be */ - if (bump==0) return; /* no action required */ - - /* Simply use decUnitAddSub unless bumping up and the number is */ - /* all nines. In this special case set to 100... explicitly */ - /* and adjust the exponent by one (as otherwise could overflow */ - /* the array) */ - /* Similarly handle all-nines result if bumping down. */ - if (bump>0) { - Unit *up; /* work */ - uInt count=dn->digits; /* digits to be checked */ - for (up=dn->lsu; ; up++) { - if (count<=DECDPUN) { - /* this is the last Unit (the msu) */ - if (*up!=powers[count]-1) break; /* not still 9s */ - /* here if it, too, is all nines */ - *up=(Unit)powers[count-1]; /* here 999 -> 100 etc. */ - for (up=up-1; up>=dn->lsu; up--) *up=0; /* others all to 0 */ - dn->exponent++; /* and bump exponent */ - /* [which, very rarely, could cause Overflow...] */ - if ((dn->exponent+dn->digits)>set->emax+1) { - decSetOverflow(dn, set, status); - } - return; /* done */ - } - /* a full unit to check, with more to come */ - if (*up!=DECDPUNMAX) break; /* not still 9s */ - count-=DECDPUN; - } /* up */ - } /* bump>0 */ - else { /* -1 */ - /* here checking for a pre-bump of 1000... (leading 1, all */ - /* other digits zero) */ - Unit *up, *sup; /* work */ - uInt count=dn->digits; /* digits to be checked */ - for (up=dn->lsu; ; up++) { - if (count<=DECDPUN) { - /* this is the last Unit (the msu) */ - if (*up!=powers[count-1]) break; /* not 100.. */ - /* here if have the 1000... case */ - sup=up; /* save msu pointer */ - *up=(Unit)powers[count]-1; /* here 100 in msu -> 999 */ - /* others all to all-nines, too */ - for (up=up-1; up>=dn->lsu; up--) *up=(Unit)powers[DECDPUN]-1; - dn->exponent--; /* and bump exponent */ - - /* iff the number was at the subnormal boundary (exponent=etiny) */ - /* then the exponent is now out of range, so it will in fact get */ - /* clamped to etiny and the final 9 dropped. */ - /* printf(">> emin=%d exp=%d sdig=%d\n", set->emin, */ - /* dn->exponent, set->digits); */ - if (dn->exponent+1==set->emin-set->digits+1) { - if (count==1 && dn->digits==1) *sup=0; /* here 9 -> 0[.9] */ - else { - *sup=(Unit)powers[count-1]-1; /* here 999.. in msu -> 99.. */ - dn->digits--; - } - dn->exponent++; - *status|=DEC_Underflow | DEC_Subnormal | DEC_Inexact | DEC_Rounded; - } - return; /* done */ - } - - /* a full unit to check, with more to come */ - if (*up!=0) break; /* not still 0s */ - count-=DECDPUN; - } /* up */ - - } /* bump<0 */ - - /* Actual bump needed. Do it. */ - decUnitAddSub(dn->lsu, D2U(dn->digits), uarrone, 1, 0, dn->lsu, bump); - } /* decApplyRound */ - -#if DECSUBSET -/* ------------------------------------------------------------------ */ -/* decFinish -- finish processing a number */ -/* */ -/* dn is the number */ -/* set is the context */ -/* residue is the rounding accumulator (as in decApplyRound) */ -/* status is the accumulator */ -/* */ -/* This finishes off the current number by: */ -/* 1. If not extended: */ -/* a. Converting a zero result to clean '0' */ -/* b. Reducing positive exponents to 0, if would fit in digits */ -/* 2. Checking for overflow and subnormals (always) */ -/* Note this is just Finalize when no subset arithmetic. */ -/* All fields are updated as required. */ -/* ------------------------------------------------------------------ */ -static void decFinish(decNumber *dn, decContext *set, Int *residue, - uInt *status) { - if (!set->extended) { - if ISZERO(dn) { /* value is zero */ - dn->exponent=0; /* clean exponent .. */ - dn->bits=0; /* .. and sign */ - return; /* no error possible */ - } - if (dn->exponent>=0) { /* non-negative exponent */ - /* >0; reduce to integer if possible */ - if (set->digits >= (dn->exponent+dn->digits)) { - dn->digits=decShiftToMost(dn->lsu, dn->digits, dn->exponent); - dn->exponent=0; - } - } - } /* !extended */ - - decFinalize(dn, set, residue, status); - } /* decFinish */ -#endif - -/* ------------------------------------------------------------------ */ -/* decFinalize -- final check, clamp, and round of a number */ -/* */ -/* dn is the number */ -/* set is the context */ -/* residue is the rounding accumulator (as in decApplyRound) */ -/* status is the status accumulator */ -/* */ -/* This finishes off the current number by checking for subnormal */ -/* results, applying any pending rounding, checking for overflow, */ -/* and applying any clamping. */ -/* Underflow and overflow conditions are raised as appropriate. */ -/* All fields are updated as required. */ -/* ------------------------------------------------------------------ */ -static void decFinalize(decNumber *dn, decContext *set, Int *residue, - uInt *status) { - Int shift; /* shift needed if clamping */ - Int tinyexp=set->emin-dn->digits+1; /* precalculate subnormal boundary */ - - /* Must be careful, here, when checking the exponent as the */ - /* adjusted exponent could overflow 31 bits [because it may already */ - /* be up to twice the expected]. */ - - /* First test for subnormal. This must be done before any final */ - /* round as the result could be rounded to Nmin or 0. */ - if (dn->exponent<=tinyexp) { /* prefilter */ - Int comp; - decNumber nmin; - /* A very nasty case here is dn == Nmin and residue<0 */ - if (dn->exponentemin; - comp=decCompare(dn, &nmin, 1); /* (signless compare) */ - if (comp==BADINT) { /* oops */ - *status|=DEC_Insufficient_storage; /* abandon... */ - return; - } - if (*residue<0 && comp==0) { /* neg residue and dn==Nmin */ - decApplyRound(dn, set, *residue, status); /* might force down */ - decSetSubnormal(dn, set, residue, status); - return; - } - } - - /* now apply any pending round (this could raise overflow). */ - if (*residue!=0) decApplyRound(dn, set, *residue, status); - - /* Check for overflow [redundant in the 'rare' case] or clamp */ - if (dn->exponent<=set->emax-set->digits+1) return; /* neither needed */ - - - /* here when might have an overflow or clamp to do */ - if (dn->exponent>set->emax-dn->digits+1) { /* too big */ - decSetOverflow(dn, set, status); - return; - } - /* here when the result is normal but in clamp range */ - if (!set->clamp) return; - - /* here when need to apply the IEEE exponent clamp (fold-down) */ - shift=dn->exponent-(set->emax-set->digits+1); - - /* shift coefficient (if non-zero) */ - if (!ISZERO(dn)) { - dn->digits=decShiftToMost(dn->lsu, dn->digits, shift); - } - dn->exponent-=shift; /* adjust the exponent to match */ - *status|=DEC_Clamped; /* and record the dirty deed */ - return; - } /* decFinalize */ - -/* ------------------------------------------------------------------ */ -/* decSetOverflow -- set number to proper overflow value */ -/* */ -/* dn is the number (used for sign [only] and result) */ -/* set is the context [used for the rounding mode, etc.] */ -/* status contains the current status to be updated */ -/* */ -/* This sets the sign of a number and sets its value to either */ -/* Infinity or the maximum finite value, depending on the sign of */ -/* dn and the rounding mode, following IEEE 754 rules. */ -/* ------------------------------------------------------------------ */ -static void decSetOverflow(decNumber *dn, decContext *set, uInt *status) { - Flag needmax=0; /* result is maximum finite value */ - uByte sign=dn->bits&DECNEG; /* clean and save sign bit */ - - if (ISZERO(dn)) { /* zero does not overflow magnitude */ - Int emax=set->emax; /* limit value */ - if (set->clamp) emax-=set->digits-1; /* lower if clamping */ - if (dn->exponent>emax) { /* clamp required */ - dn->exponent=emax; - *status|=DEC_Clamped; - } - return; - } - - uprv_decNumberZero(dn); - switch (set->round) { - case DEC_ROUND_DOWN: { - needmax=1; /* never Infinity */ - break;} /* r-d */ - case DEC_ROUND_05UP: { - needmax=1; /* never Infinity */ - break;} /* r-05 */ - case DEC_ROUND_CEILING: { - if (sign) needmax=1; /* Infinity if non-negative */ - break;} /* r-c */ - case DEC_ROUND_FLOOR: { - if (!sign) needmax=1; /* Infinity if negative */ - break;} /* r-f */ - default: break; /* Infinity in all other cases */ - } - if (needmax) { - decSetMaxValue(dn, set); - dn->bits=sign; /* set sign */ - } - else dn->bits=sign|DECINF; /* Value is +/-Infinity */ - *status|=DEC_Overflow | DEC_Inexact | DEC_Rounded; - } /* decSetOverflow */ - -/* ------------------------------------------------------------------ */ -/* decSetMaxValue -- set number to +Nmax (maximum normal value) */ -/* */ -/* dn is the number to set */ -/* set is the context [used for digits and emax] */ -/* */ -/* This sets the number to the maximum positive value. */ -/* ------------------------------------------------------------------ */ -static void decSetMaxValue(decNumber *dn, decContext *set) { - Unit *up; /* work */ - Int count=set->digits; /* nines to add */ - dn->digits=count; - /* fill in all nines to set maximum value */ - for (up=dn->lsu; ; up++) { - if (count>DECDPUN) *up=DECDPUNMAX; /* unit full o'nines */ - else { /* this is the msu */ - *up=(Unit)(powers[count]-1); - break; - } - count-=DECDPUN; /* filled those digits */ - } /* up */ - dn->bits=0; /* + sign */ - dn->exponent=set->emax-set->digits+1; - } /* decSetMaxValue */ - -/* ------------------------------------------------------------------ */ -/* decSetSubnormal -- process value whose exponent is extended) { - uprv_decNumberZero(dn); - /* always full overflow */ - *status|=DEC_Underflow | DEC_Subnormal | DEC_Inexact | DEC_Rounded; - return; - } - #endif - - /* Full arithmetic -- allow subnormals, rounded to minimum exponent */ - /* (Etiny) if needed */ - etiny=set->emin-(set->digits-1); /* smallest allowed exponent */ - - if ISZERO(dn) { /* value is zero */ - /* residue can never be non-zero here */ - #if DECCHECK - if (*residue!=0) { - printf("++ Subnormal 0 residue %ld\n", (LI)*residue); - *status|=DEC_Invalid_operation; - } - #endif - if (dn->exponentexponent=etiny; - *status|=DEC_Clamped; - } - return; - } - - *status|=DEC_Subnormal; /* have a non-zero subnormal */ - adjust=etiny-dn->exponent; /* calculate digits to remove */ - if (adjust<=0) { /* not out of range; unrounded */ - /* residue can never be non-zero here, except in the Nmin-residue */ - /* case (which is a subnormal result), so can take fast-path here */ - /* it may already be inexact (from setting the coefficient) */ - if (*status&DEC_Inexact) *status|=DEC_Underflow; - return; - } - - /* adjust>0, so need to rescale the result so exponent becomes Etiny */ - /* [this code is similar to that in rescale] */ - workset=*set; /* clone rounding, etc. */ - workset.digits=dn->digits-adjust; /* set requested length */ - workset.emin-=adjust; /* and adjust emin to match */ - /* [note that the latter can be <1, here, similar to Rescale case] */ - decSetCoeff(dn, &workset, dn->lsu, dn->digits, residue, status); - decApplyRound(dn, &workset, *residue, status); - - /* Use 754 default rule: Underflow is set iff Inexact */ - /* [independent of whether trapped] */ - if (*status&DEC_Inexact) *status|=DEC_Underflow; - - /* if rounded up a 999s case, exponent will be off by one; adjust */ - /* back if so [it will fit, because it was shortened earlier] */ - if (dn->exponent>etiny) { - dn->digits=decShiftToMost(dn->lsu, dn->digits, 1); - dn->exponent--; /* (re)adjust the exponent. */ - } - - /* if rounded to zero, it is by definition clamped... */ - if (ISZERO(dn)) *status|=DEC_Clamped; - } /* decSetSubnormal */ - -/* ------------------------------------------------------------------ */ -/* decCheckMath - check entry conditions for a math function */ -/* */ -/* This checks the context and the operand */ -/* */ -/* rhs is the operand to check */ -/* set is the context to check */ -/* status is unchanged if both are good */ -/* */ -/* returns non-zero if status is changed, 0 otherwise */ -/* */ -/* Restrictions enforced: */ -/* */ -/* digits, emax, and -emin in the context must be less than */ -/* DEC_MAX_MATH (999999), and A must be within these bounds if */ -/* non-zero. Invalid_operation is set in the status if a */ -/* restriction is violated. */ -/* ------------------------------------------------------------------ */ -static uInt decCheckMath(const decNumber *rhs, decContext *set, - uInt *status) { - uInt save=*status; /* record */ - if (set->digits>DEC_MAX_MATH - || set->emax>DEC_MAX_MATH - || -set->emin>DEC_MAX_MATH) *status|=DEC_Invalid_context; - else if ((rhs->digits>DEC_MAX_MATH - || rhs->exponent+rhs->digits>DEC_MAX_MATH+1 - || rhs->exponent+rhs->digits<2*(1-DEC_MAX_MATH)) - && !ISZERO(rhs)) *status|=DEC_Invalid_operation; - return (*status!=save); - } /* decCheckMath */ - -/* ------------------------------------------------------------------ */ -/* decGetInt -- get integer from a number */ -/* */ -/* dn is the number [which will not be altered] */ -/* */ -/* returns one of: */ -/* BADINT if there is a non-zero fraction */ -/* the converted integer */ -/* BIGEVEN if the integer is even and magnitude > 2*10**9 */ -/* BIGODD if the integer is odd and magnitude > 2*10**9 */ -/* */ -/* This checks and gets a whole number from the input decNumber. */ -/* The sign can be determined from dn by the caller when BIGEVEN or */ -/* BIGODD is returned. */ -/* ------------------------------------------------------------------ */ -static Int decGetInt(const decNumber *dn) { - Int theInt; /* result accumulator */ - const Unit *up; /* work */ - Int got; /* digits (real or not) processed */ - Int ilength=dn->digits+dn->exponent; /* integral length */ - Flag neg=decNumberIsNegative(dn); /* 1 if -ve */ - - /* The number must be an integer that fits in 10 digits */ - /* Assert, here, that 10 is enough for any rescale Etiny */ - #if DEC_MAX_EMAX > 999999999 - #error GetInt may need updating [for Emax] - #endif - #if DEC_MIN_EMIN < -999999999 - #error GetInt may need updating [for Emin] - #endif - if (ISZERO(dn)) return 0; /* zeros are OK, with any exponent */ - - up=dn->lsu; /* ready for lsu */ - theInt=0; /* ready to accumulate */ - if (dn->exponent>=0) { /* relatively easy */ - /* no fractional part [usual]; allow for positive exponent */ - got=dn->exponent; - } - else { /* -ve exponent; some fractional part to check and discard */ - Int count=-dn->exponent; /* digits to discard */ - /* spin up whole units until reach the Unit with the unit digit */ - for (; count>=DECDPUN; up++) { - if (*up!=0) return BADINT; /* non-zero Unit to discard */ - count-=DECDPUN; - } - if (count==0) got=0; /* [a multiple of DECDPUN] */ - else { /* [not multiple of DECDPUN] */ - Int rem; /* work */ - /* slice off fraction digits and check for non-zero */ - #if DECDPUN<=4 - theInt=QUOT10(*up, count); - rem=*up-theInt*powers[count]; - #else - rem=*up%powers[count]; /* slice off discards */ - theInt=*up/powers[count]; - #endif - if (rem!=0) return BADINT; /* non-zero fraction */ - /* it looks good */ - got=DECDPUN-count; /* number of digits so far */ - up++; /* ready for next */ - } - } - /* now it's known there's no fractional part */ - - /* tricky code now, to accumulate up to 9.3 digits */ - if (got==0) {theInt=*up; got+=DECDPUN; up++;} /* ensure lsu is there */ - - if (ilength<11) { - Int save=theInt; - /* collect any remaining unit(s) */ - for (; got1999999997) ilength=11; - else if (!neg && theInt>999999999) ilength=11; - if (ilength==11) theInt=save; /* restore correct low bit */ - } - } - - if (ilength>10) { /* too big */ - if (theInt&1) return BIGODD; /* bottom bit 1 */ - return BIGEVEN; /* bottom bit 0 */ - } - - if (neg) theInt=-theInt; /* apply sign */ - return theInt; - } /* decGetInt */ - -/* ------------------------------------------------------------------ */ -/* decDecap -- decapitate the coefficient of a number */ -/* */ -/* dn is the number to be decapitated */ -/* drop is the number of digits to be removed from the left of dn; */ -/* this must be <= dn->digits (if equal, the coefficient is */ -/* set to 0) */ -/* */ -/* Returns dn; dn->digits will be <= the initial digits less drop */ -/* (after removing drop digits there may be leading zero digits */ -/* which will also be removed). Only dn->lsu and dn->digits change. */ -/* ------------------------------------------------------------------ */ -static decNumber *decDecap(decNumber *dn, Int drop) { - Unit *msu; /* -> target cut point */ - Int cut; /* work */ - if (drop>=dn->digits) { /* losing the whole thing */ - #if DECCHECK - if (drop>dn->digits) - printf("decDecap called with drop>digits [%ld>%ld]\n", - (LI)drop, (LI)dn->digits); - #endif - dn->lsu[0]=0; - dn->digits=1; - return dn; - } - msu=dn->lsu+D2U(dn->digits-drop)-1; /* -> likely msu */ - cut=MSUDIGITS(dn->digits-drop); /* digits to be in use in msu */ - if (cut!=DECDPUN) *msu%=powers[cut]; /* clear left digits */ - /* that may have left leading zero digits, so do a proper count... */ - dn->digits=decGetDigits(dn->lsu, msu-dn->lsu+1); - return dn; - } /* decDecap */ - -/* ------------------------------------------------------------------ */ -/* decBiStr -- compare string with pairwise options */ -/* */ -/* targ is the string to compare */ -/* str1 is one of the strings to compare against (length may be 0) */ -/* str2 is the other; it must be the same length as str1 */ -/* */ -/* returns 1 if strings compare equal, (that is, it is the same */ -/* length as str1 and str2, and each character of targ is in either */ -/* str1 or str2 in the corresponding position), or 0 otherwise */ -/* */ -/* This is used for generic caseless compare, including the awkward */ -/* case of the Turkish dotted and dotless Is. Use as (for example): */ -/* if (decBiStr(test, "mike", "MIKE")) ... */ -/* ------------------------------------------------------------------ */ -static Flag decBiStr(const char *targ, const char *str1, const char *str2) { - for (;;targ++, str1++, str2++) { - if (*targ!=*str1 && *targ!=*str2) return 0; - /* *targ has a match in one (or both, if terminator) */ - if (*targ=='\0') break; - } /* forever */ - return 1; - } /* decBiStr */ - -/* ------------------------------------------------------------------ */ -/* decNaNs -- handle NaN operand or operands */ -/* */ -/* res is the result number */ -/* lhs is the first operand */ -/* rhs is the second operand, or NULL if none */ -/* context is used to limit payload length */ -/* status contains the current status */ -/* returns res in case convenient */ -/* */ -/* Called when one or both operands is a NaN, and propagates the */ -/* appropriate result to res. When an sNaN is found, it is changed */ -/* to a qNaN and Invalid operation is set. */ -/* ------------------------------------------------------------------ */ -static decNumber * decNaNs(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set, - uInt *status) { - /* This decision tree ends up with LHS being the source pointer, */ - /* and status updated if need be */ - if (lhs->bits & DECSNAN) - *status|=DEC_Invalid_operation | DEC_sNaN; - else if (rhs==NULL); - else if (rhs->bits & DECSNAN) { - lhs=rhs; - *status|=DEC_Invalid_operation | DEC_sNaN; - } - else if (lhs->bits & DECNAN); - else lhs=rhs; - - /* propagate the payload */ - if (lhs->digits<=set->digits) uprv_decNumberCopy(res, lhs); /* easy */ - else { /* too long */ - const Unit *ul; - Unit *ur, *uresp1; - /* copy safe number of units, then decapitate */ - res->bits=lhs->bits; /* need sign etc. */ - uresp1=res->lsu+D2U(set->digits); - for (ur=res->lsu, ul=lhs->lsu; urdigits=D2U(set->digits)*DECDPUN; - /* maybe still too long */ - if (res->digits>set->digits) decDecap(res, res->digits-set->digits); - } - - res->bits&=~DECSNAN; /* convert any sNaN to NaN, while */ - res->bits|=DECNAN; /* .. preserving sign */ - res->exponent=0; /* clean exponent */ - /* [coefficient was copied/decapitated] */ - return res; - } /* decNaNs */ - -/* ------------------------------------------------------------------ */ -/* decStatus -- apply non-zero status */ -/* */ -/* dn is the number to set if error */ -/* status contains the current status (not yet in context) */ -/* set is the context */ -/* */ -/* If the status is an error status, the number is set to a NaN, */ -/* unless the error was an overflow, divide-by-zero, or underflow, */ -/* in which case the number will have already been set. */ -/* */ -/* The context status is then updated with the new status. Note that */ -/* this may raise a signal, so control may never return from this */ -/* routine (hence resources must be recovered before it is called). */ -/* ------------------------------------------------------------------ */ -static void decStatus(decNumber *dn, uInt status, decContext *set) { - if (status & DEC_NaNs) { /* error status -> NaN */ - /* if cause was an sNaN, clear and propagate [NaN is already set up] */ - if (status & DEC_sNaN) status&=~DEC_sNaN; - else { - uprv_decNumberZero(dn); /* other error: clean throughout */ - dn->bits=DECNAN; /* and make a quiet NaN */ - } - } - uprv_decContextSetStatus(set, status); /* [may not return] */ - return; - } /* decStatus */ - -/* ------------------------------------------------------------------ */ -/* decGetDigits -- count digits in a Units array */ -/* */ -/* uar is the Unit array holding the number (this is often an */ -/* accumulator of some sort) */ -/* len is the length of the array in units [>=1] */ -/* */ -/* returns the number of (significant) digits in the array */ -/* */ -/* All leading zeros are excluded, except the last if the array has */ -/* only zero Units. */ -/* ------------------------------------------------------------------ */ -/* This may be called twice during some operations. */ -static Int decGetDigits(Unit *uar, Int len) { - Unit *up=uar+(len-1); /* -> msu */ - Int digits=(len-1)*DECDPUN+1; /* possible digits excluding msu */ - #if DECDPUN>4 - uInt const *pow; /* work */ - #endif - /* (at least 1 in final msu) */ - #if DECCHECK - if (len<1) printf("decGetDigits called with len<1 [%ld]\n", (LI)len); - #endif - - for (; up>=uar; up--) { - if (*up==0) { /* unit is all 0s */ - if (digits==1) break; /* a zero has one digit */ - digits-=DECDPUN; /* adjust for 0 unit */ - continue;} - /* found the first (most significant) non-zero Unit */ - #if DECDPUN>1 /* not done yet */ - if (*up<10) break; /* is 1-9 */ - digits++; - #if DECDPUN>2 /* not done yet */ - if (*up<100) break; /* is 10-99 */ - digits++; - #if DECDPUN>3 /* not done yet */ - if (*up<1000) break; /* is 100-999 */ - digits++; - #if DECDPUN>4 /* count the rest ... */ - for (pow=&powers[4]; *up>=*pow; pow++) digits++; - #endif - #endif - #endif - #endif - break; - } /* up */ - return digits; - } /* decGetDigits */ - -#if DECTRACE | DECCHECK -/* ------------------------------------------------------------------ */ -/* decNumberShow -- display a number [debug aid] */ -/* dn is the number to show */ -/* */ -/* Shows: sign, exponent, coefficient (msu first), digits */ -/* or: sign, special-value */ -/* ------------------------------------------------------------------ */ -/* this is public so other modules can use it */ -void uprv_decNumberShow(const decNumber *dn) { - const Unit *up; /* work */ - uInt u, d; /* .. */ - Int cut; /* .. */ - char isign='+'; /* main sign */ - if (dn==NULL) { - printf("NULL\n"); - return;} - if (decNumberIsNegative(dn)) isign='-'; - printf(" >> %c ", isign); - if (dn->bits&DECSPECIAL) { /* Is a special value */ - if (decNumberIsInfinite(dn)) printf("Infinity"); - else { /* a NaN */ - if (dn->bits&DECSNAN) printf("sNaN"); /* signalling NaN */ - else printf("NaN"); - } - /* if coefficient and exponent are 0, no more to do */ - if (dn->exponent==0 && dn->digits==1 && *dn->lsu==0) { - printf("\n"); - return;} - /* drop through to report other information */ - printf(" "); - } - - /* now carefully display the coefficient */ - up=dn->lsu+D2U(dn->digits)-1; /* msu */ - printf("%ld", (LI)*up); - for (up=up-1; up>=dn->lsu; up--) { - u=*up; - printf(":"); - for (cut=DECDPUN-1; cut>=0; cut--) { - d=u/powers[cut]; - u-=d*powers[cut]; - printf("%ld", (LI)d); - } /* cut */ - } /* up */ - if (dn->exponent!=0) { - char esign='+'; - if (dn->exponent<0) esign='-'; - printf(" E%c%ld", esign, (LI)abs(dn->exponent)); - } - printf(" [%ld]\n", (LI)dn->digits); - } /* decNumberShow */ -#endif - -#if DECTRACE || DECCHECK -/* ------------------------------------------------------------------ */ -/* decDumpAr -- display a unit array [debug/check aid] */ -/* name is a single-character tag name */ -/* ar is the array to display */ -/* len is the length of the array in Units */ -/* ------------------------------------------------------------------ */ -static void decDumpAr(char name, const Unit *ar, Int len) { - Int i; - const char *spec; - #if DECDPUN==9 - spec="%09d "; - #elif DECDPUN==8 - spec="%08d "; - #elif DECDPUN==7 - spec="%07d "; - #elif DECDPUN==6 - spec="%06d "; - #elif DECDPUN==5 - spec="%05d "; - #elif DECDPUN==4 - spec="%04d "; - #elif DECDPUN==3 - spec="%03d "; - #elif DECDPUN==2 - spec="%02d "; - #else - spec="%d "; - #endif - printf(" :%c: ", name); - for (i=len-1; i>=0; i--) { - if (i==len-1) printf("%ld ", (LI)ar[i]); - else printf(spec, ar[i]); - } - printf("\n"); - return;} -#endif - -#if DECCHECK -/* ------------------------------------------------------------------ */ -/* decCheckOperands -- check operand(s) to a routine */ -/* res is the result structure (not checked; it will be set to */ -/* quiet NaN if error found (and it is not NULL)) */ -/* lhs is the first operand (may be DECUNRESU) */ -/* rhs is the second (may be DECUNUSED) */ -/* set is the context (may be DECUNCONT) */ -/* returns 0 if both operands, and the context are clean, or 1 */ -/* otherwise (in which case the context will show an error, */ -/* unless NULL). Note that res is not cleaned; caller should */ -/* handle this so res=NULL case is safe. */ -/* The caller is expected to abandon immediately if 1 is returned. */ -/* ------------------------------------------------------------------ */ -static Flag decCheckOperands(decNumber *res, const decNumber *lhs, - const decNumber *rhs, decContext *set) { - Flag bad=0; - if (set==NULL) { /* oops; hopeless */ - #if DECTRACE || DECVERB - printf("Reference to context is NULL.\n"); - #endif - bad=1; - return 1;} - else if (set!=DECUNCONT - && (set->digits<1 || set->round>=DEC_ROUND_MAX)) { - bad=1; - #if DECTRACE || DECVERB - printf("Bad context [digits=%ld round=%ld].\n", - (LI)set->digits, (LI)set->round); - #endif - } - else { - if (res==NULL) { - bad=1; - #if DECTRACE - /* this one not DECVERB as standard tests include NULL */ - printf("Reference to result is NULL.\n"); - #endif - } - if (!bad && lhs!=DECUNUSED) bad=(decCheckNumber(lhs)); - if (!bad && rhs!=DECUNUSED) bad=(decCheckNumber(rhs)); - } - if (bad) { - if (set!=DECUNCONT) uprv_decContextSetStatus(set, DEC_Invalid_operation); - if (res!=DECUNRESU && res!=NULL) { - uprv_decNumberZero(res); - res->bits=DECNAN; /* qNaN */ - } - } - return bad; - } /* decCheckOperands */ - -/* ------------------------------------------------------------------ */ -/* decCheckNumber -- check a number */ -/* dn is the number to check */ -/* returns 0 if the number is clean, or 1 otherwise */ -/* */ -/* The number is considered valid if it could be a result from some */ -/* operation in some valid context. */ -/* ------------------------------------------------------------------ */ -static Flag decCheckNumber(const decNumber *dn) { - const Unit *up; /* work */ - uInt maxuint; /* .. */ - Int ae, d, digits; /* .. */ - Int emin, emax; /* .. */ - - if (dn==NULL) { /* hopeless */ - #if DECTRACE - /* this one not DECVERB as standard tests include NULL */ - printf("Reference to decNumber is NULL.\n"); - #endif - return 1;} - - /* check special values */ - if (dn->bits & DECSPECIAL) { - if (dn->exponent!=0) { - #if DECTRACE || DECVERB - printf("Exponent %ld (not 0) for a special value [%02x].\n", - (LI)dn->exponent, dn->bits); - #endif - return 1;} - - /* 2003.09.08: NaNs may now have coefficients, so next tests Inf only */ - if (decNumberIsInfinite(dn)) { - if (dn->digits!=1) { - #if DECTRACE || DECVERB - printf("Digits %ld (not 1) for an infinity.\n", (LI)dn->digits); - #endif - return 1;} - if (*dn->lsu!=0) { - #if DECTRACE || DECVERB - printf("LSU %ld (not 0) for an infinity.\n", (LI)*dn->lsu); - #endif - decDumpAr('I', dn->lsu, D2U(dn->digits)); - return 1;} - } /* Inf */ - /* 2002.12.26: negative NaNs can now appear through proposed IEEE */ - /* concrete formats (decimal64, etc.). */ - return 0; - } - - /* check the coefficient */ - if (dn->digits<1 || dn->digits>DECNUMMAXP) { - #if DECTRACE || DECVERB - printf("Digits %ld in number.\n", (LI)dn->digits); - #endif - return 1;} - - d=dn->digits; - - for (up=dn->lsu; d>0; up++) { - if (d>DECDPUN) maxuint=DECDPUNMAX; - else { /* reached the msu */ - maxuint=powers[d]-1; - if (dn->digits>1 && *upmaxuint) { - #if DECTRACE || DECVERB - printf("Bad Unit [%08lx] in %ld-digit number at offset %ld [maxuint %ld].\n", - (LI)*up, (LI)dn->digits, (LI)(up-dn->lsu), (LI)maxuint); - #endif - return 1;} - d-=DECDPUN; - } - - /* check the exponent. Note that input operands can have exponents */ - /* which are out of the set->emin/set->emax and set->digits range */ - /* (just as they can have more digits than set->digits). */ - ae=dn->exponent+dn->digits-1; /* adjusted exponent */ - emax=DECNUMMAXE; - emin=DECNUMMINE; - digits=DECNUMMAXP; - if (ae+emax) { - #if DECTRACE || DECVERB - printf("Adjusted exponent overflow [%ld].\n", (LI)ae); - uprv_decNumberShow(dn); - #endif - return 1;} - - return 0; /* it's OK */ - } /* decCheckNumber */ - -/* ------------------------------------------------------------------ */ -/* decCheckInexact -- check a normal finite inexact result has digits */ -/* dn is the number to check */ -/* set is the context (for status and precision) */ -/* sets Invalid operation, etc., if some digits are missing */ -/* [this check is not made for DECSUBSET compilation or when */ -/* subnormal is not set] */ -/* ------------------------------------------------------------------ */ -static void decCheckInexact(const decNumber *dn, decContext *set) { - #if !DECSUBSET && DECEXTFLAG - if ((set->status & (DEC_Inexact|DEC_Subnormal))==DEC_Inexact - && (set->digits!=dn->digits) && !(dn->bits & DECSPECIAL)) { - #if DECTRACE || DECVERB - printf("Insufficient digits [%ld] on normal Inexact result.\n", - (LI)dn->digits); - uprv_decNumberShow(dn); - #endif - uprv_decContextSetStatus(set, DEC_Invalid_operation); - } - #else - /* next is a noop for quiet compiler */ - if (dn!=NULL && dn->digits==0) set->status|=DEC_Invalid_operation; - #endif - return; - } /* decCheckInexact */ -#endif - -#if DECALLOC -#undef malloc -#undef free -/* ------------------------------------------------------------------ */ -/* decMalloc -- accountable allocation routine */ -/* n is the number of bytes to allocate */ -/* */ -/* Semantics is the same as the stdlib malloc routine, but bytes */ -/* allocated are accounted for globally, and corruption fences are */ -/* added before and after the 'actual' storage. */ -/* ------------------------------------------------------------------ */ -/* This routine allocates storage with an extra twelve bytes; 8 are */ -/* at the start and hold: */ -/* 0-3 the original length requested */ -/* 4-7 buffer corruption detection fence (DECFENCE, x4) */ -/* The 4 bytes at the end also hold a corruption fence (DECFENCE, x4) */ -/* ------------------------------------------------------------------ */ -static void *decMalloc(size_t n) { - uInt size=n+12; /* true size */ - void *alloc; /* -> allocated storage */ - uByte *b, *b0; /* work */ - uInt uiwork; /* for macros */ - - alloc=malloc(size); /* -> allocated storage */ - if (alloc==NULL) return NULL; /* out of strorage */ - b0=(uByte *)alloc; /* as bytes */ - decAllocBytes+=n; /* account for storage */ - UBFROMUI(alloc, n); /* save n */ - /* printf(" alloc ++ dAB: %ld (%ld)\n", (LI)decAllocBytes, (LI)n); */ - for (b=b0+4; b play area */ - } /* decMalloc */ - -/* ------------------------------------------------------------------ */ -/* decFree -- accountable free routine */ -/* alloc is the storage to free */ -/* */ -/* Semantics is the same as the stdlib malloc routine, except that */ -/* the global storage accounting is updated and the fences are */ -/* checked to ensure that no routine has written 'out of bounds'. */ -/* ------------------------------------------------------------------ */ -/* This routine first checks that the fences have not been corrupted. */ -/* It then frees the storage using the 'truw' storage address (that */ -/* is, offset by 8). */ -/* ------------------------------------------------------------------ */ -static void decFree(void *alloc) { - uInt n; /* original length */ - uByte *b, *b0; /* work */ - uInt uiwork; /* for macros */ - - if (alloc==NULL) return; /* allowed; it's a nop */ - b0=(uByte *)alloc; /* as bytes */ - b0-=8; /* -> true start of storage */ - n=UBTOUI(b0); /* lift length */ - for (b=b0+4; b4 or DECUSE64=1, the C99 64-bit int64_t and */ +/* uint64_t types may be used. To avoid these, set DECUSE64=0 */ +/* and DECDPUN<=4 (see documentation). */ +/* */ +/* The code also conforms to C99 restrictions; in particular, */ +/* strict aliasing rules are observed. */ +/* */ +/* 2. The decNumber format which this library uses is optimized for */ +/* efficient processing of relatively short numbers; in particular */ +/* it allows the use of fixed sized structures and minimizes copy */ +/* and move operations. It does, however, support arbitrary */ +/* precision (up to 999,999,999 digits) and arbitrary exponent */ +/* range (Emax in the range 0 through 999,999,999 and Emin in the */ +/* range -999,999,999 through 0). Mathematical functions (for */ +/* example decNumberExp) as identified below are restricted more */ +/* tightly: digits, emax, and -emin in the context must be <= */ +/* DEC_MAX_MATH (999999), and their operand(s) must be within */ +/* these bounds. */ +/* */ +/* 3. Logical functions are further restricted; their operands must */ +/* be finite, positive, have an exponent of zero, and all digits */ +/* must be either 0 or 1. The result will only contain digits */ +/* which are 0 or 1 (and will have exponent=0 and a sign of 0). */ +/* */ +/* 4. Operands to operator functions are never modified unless they */ +/* are also specified to be the result number (which is always */ +/* permitted). Other than that case, operands must not overlap. */ +/* */ +/* 5. Error handling: the type of the error is ORed into the status */ +/* flags in the current context (decContext structure). The */ +/* SIGFPE signal is then raised if the corresponding trap-enabler */ +/* flag in the decContext is set (is 1). */ +/* */ +/* It is the responsibility of the caller to clear the status */ +/* flags as required. */ +/* */ +/* The result of any routine which returns a number will always */ +/* be a valid number (which may be a special value, such as an */ +/* Infinity or NaN). */ +/* */ +/* 6. The decNumber format is not an exchangeable concrete */ +/* representation as it comprises fields which may be machine- */ +/* dependent (packed or unpacked, or special length, for example). */ +/* Canonical conversions to and from strings are provided; other */ +/* conversions are available in separate modules. */ +/* */ +/* 7. Normally, input operands are assumed to be valid. Set DECCHECK */ +/* to 1 for extended operand checking (including NULL operands). */ +/* Results are undefined if a badly-formed structure (or a NULL */ +/* pointer to a structure) is provided, though with DECCHECK */ +/* enabled the operator routines are protected against exceptions. */ +/* (Except if the result pointer is NULL, which is unrecoverable.) */ +/* */ +/* However, the routines will never cause exceptions if they are */ +/* given well-formed operands, even if the value of the operands */ +/* is inappropriate for the operation and DECCHECK is not set. */ +/* (Except for SIGFPE, as and where documented.) */ +/* */ +/* 8. Subset arithmetic is available only if DECSUBSET is set to 1. */ +/* ------------------------------------------------------------------ */ +/* Implementation notes for maintenance of this module: */ +/* */ +/* 1. Storage leak protection: Routines which use malloc are not */ +/* permitted to use return for fastpath or error exits (i.e., */ +/* they follow strict structured programming conventions). */ +/* Instead they have a do{}while(0); construct surrounding the */ +/* code which is protected -- break may be used to exit this. */ +/* Other routines can safely use the return statement inline. */ +/* */ +/* Storage leak accounting can be enabled using DECALLOC. */ +/* */ +/* 2. All loops use the for(;;) construct. Any do construct does */ +/* not loop; it is for allocation protection as just described. */ +/* */ +/* 3. Setting status in the context must always be the very last */ +/* action in a routine, as non-0 status may raise a trap and hence */ +/* the call to set status may not return (if the handler uses long */ +/* jump). Therefore all cleanup must be done first. In general, */ +/* to achieve this status is accumulated and is only applied just */ +/* before return by calling decContextSetStatus (via decStatus). */ +/* */ +/* Routines which allocate storage cannot, in general, use the */ +/* 'top level' routines which could cause a non-returning */ +/* transfer of control. The decXxxxOp routines are safe (do not */ +/* call decStatus even if traps are set in the context) and should */ +/* be used instead (they are also a little faster). */ +/* */ +/* 4. Exponent checking is minimized by allowing the exponent to */ +/* grow outside its limits during calculations, provided that */ +/* the decFinalize function is called later. Multiplication and */ +/* division, and intermediate calculations in exponentiation, */ +/* require more careful checks because of the risk of 31-bit */ +/* overflow (the most negative valid exponent is -1999999997, for */ +/* a 999999999-digit number with adjusted exponent of -999999999). */ +/* */ +/* 5. Rounding is deferred until finalization of results, with any */ +/* 'off to the right' data being represented as a single digit */ +/* residue (in the range -1 through 9). This avoids any double- */ +/* rounding when more than one shortening takes place (for */ +/* example, when a result is subnormal). */ +/* */ +/* 6. The digits count is allowed to rise to a multiple of DECDPUN */ +/* during many operations, so whole Units are handled and exact */ +/* accounting of digits is not needed. The correct digits value */ +/* is found by decGetDigits, which accounts for leading zeros. */ +/* This must be called before any rounding if the number of digits */ +/* is not known exactly. */ +/* */ +/* 7. The multiply-by-reciprocal 'trick' is used for partitioning */ +/* numbers up to four digits, using appropriate constants. This */ +/* is not useful for longer numbers because overflow of 32 bits */ +/* would lead to 4 multiplies, which is almost as expensive as */ +/* a divide (unless a floating-point or 64-bit multiply is */ +/* assumed to be available). */ +/* */ +/* 8. Unusual abbreviations that may be used in the commentary: */ +/* lhs -- left hand side (operand, of an operation) */ +/* lsd -- least significant digit (of coefficient) */ +/* lsu -- least significant Unit (of coefficient) */ +/* msd -- most significant digit (of coefficient) */ +/* msi -- most significant item (in an array) */ +/* msu -- most significant Unit (of coefficient) */ +/* rhs -- right hand side (operand, of an operation) */ +/* +ve -- positive */ +/* -ve -- negative */ +/* ** -- raise to the power */ +/* ------------------------------------------------------------------ */ + +#include /* for malloc, free, etc. */ +/* #include */ /* for printf [if needed] */ +#include /* for strcpy */ +#include /* for lower */ +#include "cmemory.h" /* for uprv_malloc, etc., in ICU */ +#include "decNumber.h" /* base number library */ +#include "decNumberLocal.h" /* decNumber local types, etc. */ +#include "uassert.h" + +/* Constants */ +/* Public lookup table used by the D2U macro */ +static const uByte d2utable[DECMAXD2U+1]=D2UTABLE; + +#define DECVERB 1 /* set to 1 for verbose DECCHECK */ +#define powers DECPOWERS /* old internal name */ + +/* Local constants */ +#define DIVIDE 0x80 /* Divide operators */ +#define REMAINDER 0x40 /* .. */ +#define DIVIDEINT 0x20 /* .. */ +#define REMNEAR 0x10 /* .. */ +#define COMPARE 0x01 /* Compare operators */ +#define COMPMAX 0x02 /* .. */ +#define COMPMIN 0x03 /* .. */ +#define COMPTOTAL 0x04 /* .. */ +#define COMPNAN 0x05 /* .. [NaN processing] */ +#define COMPSIG 0x06 /* .. [signaling COMPARE] */ +#define COMPMAXMAG 0x07 /* .. */ +#define COMPMINMAG 0x08 /* .. */ + +#define DEC_sNaN 0x40000000 /* local status: sNaN signal */ +#define BADINT (Int)0x80000000 /* most-negative Int; error indicator */ +/* Next two indicate an integer >= 10**6, and its parity (bottom bit) */ +#define BIGEVEN (Int)0x80000002 +#define BIGODD (Int)0x80000003 + +static const Unit uarrone[1]={1}; /* Unit array of 1, used for incrementing */ + +/* ------------------------------------------------------------------ */ +/* round-for-reround digits */ +/* ------------------------------------------------------------------ */ +#if 0 +static const uByte DECSTICKYTAB[10]={1,1,2,3,4,6,6,7,8,9}; /* used if sticky */ +#endif + +/* ------------------------------------------------------------------ */ +/* Powers of ten (powers[n]==10**n, 0<=n<=9) */ +/* ------------------------------------------------------------------ */ +static const uInt DECPOWERS[10]={1, 10, 100, 1000, 10000, 100000, 1000000, + 10000000, 100000000, 1000000000}; + + +/* Granularity-dependent code */ +#if DECDPUN<=4 + #define eInt Int /* extended integer */ + #define ueInt uInt /* unsigned extended integer */ + /* Constant multipliers for divide-by-power-of five using reciprocal */ + /* multiply, after removing powers of 2 by shifting, and final shift */ + /* of 17 [we only need up to **4] */ + static const uInt multies[]={131073, 26215, 5243, 1049, 210}; + /* QUOT10 -- macro to return the quotient of unit u divided by 10**n */ + #define QUOT10(u, n) ((((uInt)(u)>>(n))*multies[n])>>17) +#else + /* For DECDPUN>4 non-ANSI-89 64-bit types are needed. */ + #if !DECUSE64 + #error decNumber.c: DECUSE64 must be 1 when DECDPUN>4 + #endif + #define eInt Long /* extended integer */ + #define ueInt uLong /* unsigned extended integer */ +#endif + +/* Local routines */ +static decNumber * decAddOp(decNumber *, const decNumber *, const decNumber *, + decContext *, uByte, uInt *); +static Flag decBiStr(const char *, const char *, const char *); +static uInt decCheckMath(const decNumber *, decContext *, uInt *); +static void decApplyRound(decNumber *, decContext *, Int, uInt *); +static Int decCompare(const decNumber *lhs, const decNumber *rhs, Flag); +static decNumber * decCompareOp(decNumber *, const decNumber *, + const decNumber *, decContext *, + Flag, uInt *); +static void decCopyFit(decNumber *, const decNumber *, decContext *, + Int *, uInt *); +static decNumber * decDecap(decNumber *, Int); +static decNumber * decDivideOp(decNumber *, const decNumber *, + const decNumber *, decContext *, Flag, uInt *); +static decNumber * decExpOp(decNumber *, const decNumber *, + decContext *, uInt *); +static void decFinalize(decNumber *, decContext *, Int *, uInt *); +static Int decGetDigits(Unit *, Int); +static Int decGetInt(const decNumber *); +static decNumber * decLnOp(decNumber *, const decNumber *, + decContext *, uInt *); +static decNumber * decMultiplyOp(decNumber *, const decNumber *, + const decNumber *, decContext *, + uInt *); +static decNumber * decNaNs(decNumber *, const decNumber *, + const decNumber *, decContext *, uInt *); +static decNumber * decQuantizeOp(decNumber *, const decNumber *, + const decNumber *, decContext *, Flag, + uInt *); +static void decReverse(Unit *, Unit *); +static void decSetCoeff(decNumber *, decContext *, const Unit *, + Int, Int *, uInt *); +static void decSetMaxValue(decNumber *, decContext *); +static void decSetOverflow(decNumber *, decContext *, uInt *); +static void decSetSubnormal(decNumber *, decContext *, Int *, uInt *); +static Int decShiftToLeast(Unit *, Int, Int); +static Int decShiftToMost(Unit *, Int, Int); +static void decStatus(decNumber *, uInt, decContext *); +static void decToString(const decNumber *, char[], Flag); +static decNumber * decTrim(decNumber *, decContext *, Flag, Flag, Int *); +static Int decUnitAddSub(const Unit *, Int, const Unit *, Int, Int, + Unit *, Int); +static Int decUnitCompare(const Unit *, Int, const Unit *, Int, Int); + +#if !DECSUBSET +/* decFinish == decFinalize when no subset arithmetic needed */ +#define decFinish(a,b,c,d) decFinalize(a,b,c,d) +#else +static void decFinish(decNumber *, decContext *, Int *, uInt *); +static decNumber * decRoundOperand(const decNumber *, decContext *, uInt *); +#endif + +/* Local macros */ +/* masked special-values bits */ +#define SPECIALARG (rhs->bits & DECSPECIAL) +#define SPECIALARGS ((lhs->bits | rhs->bits) & DECSPECIAL) + +/* For use in ICU */ +#define malloc(a) uprv_malloc(a) +#define free(a) uprv_free(a) + +/* Diagnostic macros, etc. */ +#if DECALLOC +/* Handle malloc/free accounting. If enabled, our accountable routines */ +/* are used; otherwise the code just goes straight to the system malloc */ +/* and free routines. */ +#define malloc(a) decMalloc(a) +#define free(a) decFree(a) +#define DECFENCE 0x5a /* corruption detector */ +/* 'Our' malloc and free: */ +static void *decMalloc(size_t); +static void decFree(void *); +uInt decAllocBytes=0; /* count of bytes allocated */ +/* Note that DECALLOC code only checks for storage buffer overflow. */ +/* To check for memory leaks, the decAllocBytes variable must be */ +/* checked to be 0 at appropriate times (e.g., after the test */ +/* harness completes a set of tests). This checking may be unreliable */ +/* if the testing is done in a multi-thread environment. */ +#endif + +#if DECCHECK +/* Optional checking routines. Enabling these means that decNumber */ +/* and decContext operands to operator routines are checked for */ +/* correctness. This roughly doubles the execution time of the */ +/* fastest routines (and adds 600+ bytes), so should not normally be */ +/* used in 'production'. */ +/* decCheckInexact is used to check that inexact results have a full */ +/* complement of digits (where appropriate -- this is not the case */ +/* for Quantize, for example) */ +#define DECUNRESU ((decNumber *)(void *)0xffffffff) +#define DECUNUSED ((const decNumber *)(void *)0xffffffff) +#define DECUNCONT ((decContext *)(void *)(0xffffffff)) +static Flag decCheckOperands(decNumber *, const decNumber *, + const decNumber *, decContext *); +static Flag decCheckNumber(const decNumber *); +static void decCheckInexact(const decNumber *, decContext *); +#endif + +#if DECTRACE || DECCHECK +/* Optional trace/debugging routines (may or may not be used) */ +void decNumberShow(const decNumber *); /* displays the components of a number */ +static void decDumpAr(char, const Unit *, Int); +#endif + +/* ================================================================== */ +/* Conversions */ +/* ================================================================== */ + +/* ------------------------------------------------------------------ */ +/* from-int32 -- conversion from Int or uInt */ +/* */ +/* dn is the decNumber to receive the integer */ +/* in or uin is the integer to be converted */ +/* returns dn */ +/* */ +/* No error is possible. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberFromInt32(decNumber *dn, Int in) { + uInt unsig; + if (in>=0) unsig=in; + else { /* negative (possibly BADINT) */ + if (in==BADINT) unsig=(uInt)1073741824*2; /* special case */ + else unsig=-in; /* invert */ + } + /* in is now positive */ + uprv_decNumberFromUInt32(dn, unsig); + if (in<0) dn->bits=DECNEG; /* sign needed */ + return dn; + } /* decNumberFromInt32 */ + +U_CAPI decNumber * U_EXPORT2 uprv_decNumberFromUInt32(decNumber *dn, uInt uin) { + Unit *up; /* work pointer */ + uprv_decNumberZero(dn); /* clean */ + if (uin==0) return dn; /* [or decGetDigits bad call] */ + for (up=dn->lsu; uin>0; up++) { + *up=(Unit)(uin%(DECDPUNMAX+1)); + uin=uin/(DECDPUNMAX+1); + } + dn->digits=decGetDigits(dn->lsu, up-dn->lsu); + return dn; + } /* decNumberFromUInt32 */ + +/* ------------------------------------------------------------------ */ +/* to-int32 -- conversion to Int or uInt */ +/* */ +/* dn is the decNumber to convert */ +/* set is the context for reporting errors */ +/* returns the converted decNumber, or 0 if Invalid is set */ +/* */ +/* Invalid is set if the decNumber does not have exponent==0 or if */ +/* it is a NaN, Infinite, or out-of-range. */ +/* ------------------------------------------------------------------ */ +U_CAPI Int U_EXPORT2 uprv_decNumberToInt32(const decNumber *dn, decContext *set) { + #if DECCHECK + if (decCheckOperands(DECUNRESU, DECUNUSED, dn, set)) return 0; + #endif + + /* special or too many digits, or bad exponent */ + if (dn->bits&DECSPECIAL || dn->digits>10 || dn->exponent!=0) ; /* bad */ + else { /* is a finite integer with 10 or fewer digits */ + Int d; /* work */ + const Unit *up; /* .. */ + uInt hi=0, lo; /* .. */ + up=dn->lsu; /* -> lsu */ + lo=*up; /* get 1 to 9 digits */ + #if DECDPUN>1 /* split to higher */ + hi=lo/10; + lo=lo%10; + #endif + up++; + /* collect remaining Units, if any, into hi */ + for (d=DECDPUN; ddigits; up++, d+=DECDPUN) hi+=*up*powers[d-1]; + /* now low has the lsd, hi the remainder */ + if (hi>214748364 || (hi==214748364 && lo>7)) { /* out of range? */ + /* most-negative is a reprieve */ + if (dn->bits&DECNEG && hi==214748364 && lo==8) return 0x80000000; + /* bad -- drop through */ + } + else { /* in-range always */ + Int i=X10(hi)+lo; + if (dn->bits&DECNEG) return -i; + return i; + } + } /* integer */ + uprv_decContextSetStatus(set, DEC_Invalid_operation); /* [may not return] */ + return 0; + } /* decNumberToInt32 */ + +U_CAPI uInt U_EXPORT2 uprv_decNumberToUInt32(const decNumber *dn, decContext *set) { + #if DECCHECK + if (decCheckOperands(DECUNRESU, DECUNUSED, dn, set)) return 0; + #endif + /* special or too many digits, or bad exponent, or negative (<0) */ + if (dn->bits&DECSPECIAL || dn->digits>10 || dn->exponent!=0 + || (dn->bits&DECNEG && !ISZERO(dn))); /* bad */ + else { /* is a finite integer with 10 or fewer digits */ + Int d; /* work */ + const Unit *up; /* .. */ + uInt hi=0, lo; /* .. */ + up=dn->lsu; /* -> lsu */ + lo=*up; /* get 1 to 9 digits */ + #if DECDPUN>1 /* split to higher */ + hi=lo/10; + lo=lo%10; + #endif + up++; + /* collect remaining Units, if any, into hi */ + for (d=DECDPUN; ddigits; up++, d+=DECDPUN) hi+=*up*powers[d-1]; + + /* now low has the lsd, hi the remainder */ + if (hi>429496729 || (hi==429496729 && lo>5)) ; /* no reprieve possible */ + else return X10(hi)+lo; + } /* integer */ + uprv_decContextSetStatus(set, DEC_Invalid_operation); /* [may not return] */ + return 0; + } /* decNumberToUInt32 */ + +/* ------------------------------------------------------------------ */ +/* to-scientific-string -- conversion to numeric string */ +/* to-engineering-string -- conversion to numeric string */ +/* */ +/* decNumberToString(dn, string); */ +/* decNumberToEngString(dn, string); */ +/* */ +/* dn is the decNumber to convert */ +/* string is the string where the result will be laid out */ +/* */ +/* string must be at least dn->digits+14 characters long */ +/* */ +/* No error is possible, and no status can be set. */ +/* ------------------------------------------------------------------ */ +U_CAPI char * U_EXPORT2 uprv_decNumberToString(const decNumber *dn, char *string){ + decToString(dn, string, 0); + return string; + } /* DecNumberToString */ + +U_CAPI char * U_EXPORT2 uprv_decNumberToEngString(const decNumber *dn, char *string){ + decToString(dn, string, 1); + return string; + } /* DecNumberToEngString */ + +/* ------------------------------------------------------------------ */ +/* to-number -- conversion from numeric string */ +/* */ +/* decNumberFromString -- convert string to decNumber */ +/* dn -- the number structure to fill */ +/* chars[] -- the string to convert ('\0' terminated) */ +/* set -- the context used for processing any error, */ +/* determining the maximum precision available */ +/* (set.digits), determining the maximum and minimum */ +/* exponent (set.emax and set.emin), determining if */ +/* extended values are allowed, and checking the */ +/* rounding mode if overflow occurs or rounding is */ +/* needed. */ +/* */ +/* The length of the coefficient and the size of the exponent are */ +/* checked by this routine, so the correct error (Underflow or */ +/* Overflow) can be reported or rounding applied, as necessary. */ +/* */ +/* If bad syntax is detected, the result will be a quiet NaN. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberFromString(decNumber *dn, const char chars[], + decContext *set) { + Int exponent=0; /* working exponent [assume 0] */ + uByte bits=0; /* working flags [assume +ve] */ + Unit *res; /* where result will be built */ + Unit resbuff[SD2U(DECBUFFER+9)];/* local buffer in case need temporary */ + /* [+9 allows for ln() constants] */ + Unit *allocres=NULL; /* -> allocated result, iff allocated */ + Int d=0; /* count of digits found in decimal part */ + const char *dotchar=NULL; /* where dot was found */ + const char *cfirst=chars; /* -> first character of decimal part */ + const char *last=NULL; /* -> last digit of decimal part */ + const char *c; /* work */ + Unit *up; /* .. */ + #if DECDPUN>1 + Int cut, out; /* .. */ + #endif + Int residue; /* rounding residue */ + uInt status=0; /* error code */ + + #if DECCHECK + if (decCheckOperands(DECUNRESU, DECUNUSED, DECUNUSED, set)) + return uprv_decNumberZero(dn); + #endif + + do { /* status & malloc protection */ + for (c=chars;; c++) { /* -> input character */ + if (*c>='0' && *c<='9') { /* test for Arabic digit */ + last=c; + d++; /* count of real digits */ + continue; /* still in decimal part */ + } + if (*c=='.' && dotchar==NULL) { /* first '.' */ + dotchar=c; /* record offset into decimal part */ + if (c==cfirst) cfirst++; /* first digit must follow */ + continue;} + if (c==chars) { /* first in string... */ + if (*c=='-') { /* valid - sign */ + cfirst++; + bits=DECNEG; + continue;} + if (*c=='+') { /* valid + sign */ + cfirst++; + continue;} + } + /* *c is not a digit, or a valid +, -, or '.' */ + break; + } /* c */ + + if (last==NULL) { /* no digits yet */ + status=DEC_Conversion_syntax;/* assume the worst */ + if (*c=='\0') break; /* and no more to come... */ + #if DECSUBSET + /* if subset then infinities and NaNs are not allowed */ + if (!set->extended) break; /* hopeless */ + #endif + /* Infinities and NaNs are possible, here */ + if (dotchar!=NULL) break; /* .. unless had a dot */ + uprv_decNumberZero(dn); /* be optimistic */ + if (decBiStr(c, "infinity", "INFINITY") + || decBiStr(c, "inf", "INF")) { + dn->bits=bits | DECINF; + status=0; /* is OK */ + break; /* all done */ + } + /* a NaN expected */ + /* 2003.09.10 NaNs are now permitted to have a sign */ + dn->bits=bits | DECNAN; /* assume simple NaN */ + if (*c=='s' || *c=='S') { /* looks like an sNaN */ + c++; + dn->bits=bits | DECSNAN; + } + if (*c!='n' && *c!='N') break; /* check caseless "NaN" */ + c++; + if (*c!='a' && *c!='A') break; /* .. */ + c++; + if (*c!='n' && *c!='N') break; /* .. */ + c++; + /* now either nothing, or nnnn payload, expected */ + /* -> start of integer and skip leading 0s [including plain 0] */ + for (cfirst=c; *cfirst=='0';) cfirst++; + if (*cfirst=='\0') { /* "NaN" or "sNaN", maybe with all 0s */ + status=0; /* it's good */ + break; /* .. */ + } + /* something other than 0s; setup last and d as usual [no dots] */ + for (c=cfirst;; c++, d++) { + if (*c<'0' || *c>'9') break; /* test for Arabic digit */ + last=c; + } + if (*c!='\0') break; /* not all digits */ + if (d>set->digits-1) { + /* [NB: payload in a decNumber can be full length unless */ + /* clamped, in which case can only be digits-1] */ + if (set->clamp) break; + if (d>set->digits) break; + } /* too many digits? */ + /* good; drop through to convert the integer to coefficient */ + status=0; /* syntax is OK */ + bits=dn->bits; /* for copy-back */ + } /* last==NULL */ + + else if (*c!='\0') { /* more to process... */ + /* had some digits; exponent is only valid sequence now */ + Flag nege; /* 1=negative exponent */ + const char *firstexp; /* -> first significant exponent digit */ + status=DEC_Conversion_syntax;/* assume the worst */ + if (*c!='e' && *c!='E') break; + /* Found 'e' or 'E' -- now process explicit exponent */ + /* 1998.07.11: sign no longer required */ + nege=0; + c++; /* to (possible) sign */ + if (*c=='-') {nege=1; c++;} + else if (*c=='+') c++; + if (*c=='\0') break; + + for (; *c=='0' && *(c+1)!='\0';) c++; /* strip insignificant zeros */ + firstexp=c; /* save exponent digit place */ + for (; ;c++) { + if (*c<'0' || *c>'9') break; /* not a digit */ + exponent=X10(exponent)+(Int)*c-(Int)'0'; + } /* c */ + /* if not now on a '\0', *c must not be a digit */ + if (*c!='\0') break; + + /* (this next test must be after the syntax checks) */ + /* if it was too long the exponent may have wrapped, so check */ + /* carefully and set it to a certain overflow if wrap possible */ + if (c>=firstexp+9+1) { + if (c>firstexp+9+1 || *firstexp>'1') exponent=DECNUMMAXE*2; + /* [up to 1999999999 is OK, for example 1E-1000000998] */ + } + if (nege) exponent=-exponent; /* was negative */ + status=0; /* is OK */ + } /* stuff after digits */ + + /* Here when whole string has been inspected; syntax is good */ + /* cfirst->first digit (never dot), last->last digit (ditto) */ + + /* strip leading zeros/dot [leave final 0 if all 0's] */ + if (*cfirst=='0') { /* [cfirst has stepped over .] */ + for (c=cfirst; cextended) { + uprv_decNumberZero(dn); /* clean result */ + break; /* [could be return] */ + } + #endif + } /* at least one leading 0 */ + + /* Handle decimal point... */ + if (dotchar!=NULL && dotchardigits) res=dn->lsu; /* fits into supplied decNumber */ + else { /* rounding needed */ + Int needbytes=D2U(d)*sizeof(Unit);/* bytes needed */ + res=resbuff; /* assume use local buffer */ + if (needbytes>(Int)sizeof(resbuff)) { /* too big for local */ + allocres=(Unit *)malloc(needbytes); + if (allocres==NULL) {status|=DEC_Insufficient_storage; break;} + res=allocres; + } + } + /* res now -> number lsu, buffer, or allocated storage for Unit array */ + + /* Place the coefficient into the selected Unit array */ + /* [this is often 70% of the cost of this function when DECDPUN>1] */ + #if DECDPUN>1 + out=0; /* accumulator */ + up=res+D2U(d)-1; /* -> msu */ + cut=d-(up-res)*DECDPUN; /* digits in top unit */ + for (c=cfirst;; c++) { /* along the digits */ + if (*c=='.') continue; /* ignore '.' [don't decrement cut] */ + out=X10(out)+(Int)*c-(Int)'0'; + if (c==last) break; /* done [never get to trailing '.'] */ + cut--; + if (cut>0) continue; /* more for this unit */ + *up=(Unit)out; /* write unit */ + up--; /* prepare for unit below.. */ + cut=DECDPUN; /* .. */ + out=0; /* .. */ + } /* c */ + *up=(Unit)out; /* write lsu */ + + #else + /* DECDPUN==1 */ + up=res; /* -> lsu */ + for (c=last; c>=cfirst; c--) { /* over each character, from least */ + if (*c=='.') continue; /* ignore . [don't step up] */ + *up=(Unit)((Int)*c-(Int)'0'); + up++; + } /* c */ + #endif + + dn->bits=bits; + dn->exponent=exponent; + dn->digits=d; + + /* if not in number (too long) shorten into the number */ + if (d>set->digits) { + residue=0; + decSetCoeff(dn, set, res, d, &residue, &status); + /* always check for overflow or subnormal and round as needed */ + decFinalize(dn, set, &residue, &status); + } + else { /* no rounding, but may still have overflow or subnormal */ + /* [these tests are just for performance; finalize repeats them] */ + if ((dn->exponent-1emin-dn->digits) + || (dn->exponent-1>set->emax-set->digits)) { + residue=0; + decFinalize(dn, set, &residue, &status); + } + } + /* decNumberShow(dn); */ + } while(0); /* [for break] */ + + if (allocres!=NULL) free(allocres); /* drop any storage used */ + if (status!=0) decStatus(dn, status, set); + return dn; + } /* decNumberFromString */ + +/* ================================================================== */ +/* Operators */ +/* ================================================================== */ + +/* ------------------------------------------------------------------ */ +/* decNumberAbs -- absolute value operator */ +/* */ +/* This computes C = abs(A) */ +/* */ +/* res is C, the result. C may be A */ +/* rhs is A */ +/* set is the context */ +/* */ +/* See also decNumberCopyAbs for a quiet bitwise version of this. */ +/* C must have space for set->digits digits. */ +/* ------------------------------------------------------------------ */ +/* This has the same effect as decNumberPlus unless A is negative, */ +/* in which case it has the same effect as decNumberMinus. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberAbs(decNumber *res, const decNumber *rhs, + decContext *set) { + decNumber dzero; /* for 0 */ + uInt status=0; /* accumulator */ + + #if DECCHECK + if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; + #endif + + uprv_decNumberZero(&dzero); /* set 0 */ + dzero.exponent=rhs->exponent; /* [no coefficient expansion] */ + decAddOp(res, &dzero, rhs, set, (uByte)(rhs->bits & DECNEG), &status); + if (status!=0) decStatus(res, status, set); + #if DECCHECK + decCheckInexact(res, set); + #endif + return res; + } /* decNumberAbs */ + +/* ------------------------------------------------------------------ */ +/* decNumberAdd -- add two Numbers */ +/* */ +/* This computes C = A + B */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X+X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context */ +/* */ +/* C must have space for set->digits digits. */ +/* ------------------------------------------------------------------ */ +/* This just calls the routine shared with Subtract */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberAdd(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + uInt status=0; /* accumulator */ + decAddOp(res, lhs, rhs, set, 0, &status); + if (status!=0) decStatus(res, status, set); + #if DECCHECK + decCheckInexact(res, set); + #endif + return res; + } /* decNumberAdd */ + +/* ------------------------------------------------------------------ */ +/* decNumberAnd -- AND two Numbers, digitwise */ +/* */ +/* This computes C = A & B */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X&X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context (used for result length and error report) */ +/* */ +/* C must have space for set->digits digits. */ +/* */ +/* Logical function restrictions apply (see above); a NaN is */ +/* returned with Invalid_operation if a restriction is violated. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberAnd(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + const Unit *ua, *ub; /* -> operands */ + const Unit *msua, *msub; /* -> operand msus */ + Unit *uc, *msuc; /* -> result and its msu */ + Int msudigs; /* digits in res msu */ + #if DECCHECK + if (decCheckOperands(res, lhs, rhs, set)) return res; + #endif + + if (lhs->exponent!=0 || decNumberIsSpecial(lhs) || decNumberIsNegative(lhs) + || rhs->exponent!=0 || decNumberIsSpecial(rhs) || decNumberIsNegative(rhs)) { + decStatus(res, DEC_Invalid_operation, set); + return res; + } + + /* operands are valid */ + ua=lhs->lsu; /* bottom-up */ + ub=rhs->lsu; /* .. */ + uc=res->lsu; /* .. */ + msua=ua+D2U(lhs->digits)-1; /* -> msu of lhs */ + msub=ub+D2U(rhs->digits)-1; /* -> msu of rhs */ + msuc=uc+D2U(set->digits)-1; /* -> msu of result */ + msudigs=MSUDIGITS(set->digits); /* [faster than remainder] */ + for (; uc<=msuc; ua++, ub++, uc++) { /* Unit loop */ + Unit a, b; /* extract units */ + if (ua>msua) a=0; + else a=*ua; + if (ub>msub) b=0; + else b=*ub; + *uc=0; /* can now write back */ + if (a|b) { /* maybe 1 bits to examine */ + Int i, j; + *uc=0; /* can now write back */ + /* This loop could be unrolled and/or use BIN2BCD tables */ + for (i=0; i1) { + decStatus(res, DEC_Invalid_operation, set); + return res; + } + if (uc==msuc && i==msudigs-1) break; /* just did final digit */ + } /* each digit */ + } /* both OK */ + } /* each unit */ + /* [here uc-1 is the msu of the result] */ + res->digits=decGetDigits(res->lsu, uc-res->lsu); + res->exponent=0; /* integer */ + res->bits=0; /* sign=0 */ + return res; /* [no status to set] */ + } /* decNumberAnd */ + +/* ------------------------------------------------------------------ */ +/* decNumberCompare -- compare two Numbers */ +/* */ +/* This computes C = A ? B */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X?X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context */ +/* */ +/* C must have space for one digit (or NaN). */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberCompare(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + uInt status=0; /* accumulator */ + decCompareOp(res, lhs, rhs, set, COMPARE, &status); + if (status!=0) decStatus(res, status, set); + return res; + } /* decNumberCompare */ + +/* ------------------------------------------------------------------ */ +/* decNumberCompareSignal -- compare, signalling on all NaNs */ +/* */ +/* This computes C = A ? B */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X?X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context */ +/* */ +/* C must have space for one digit (or NaN). */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberCompareSignal(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + uInt status=0; /* accumulator */ + decCompareOp(res, lhs, rhs, set, COMPSIG, &status); + if (status!=0) decStatus(res, status, set); + return res; + } /* decNumberCompareSignal */ + +/* ------------------------------------------------------------------ */ +/* decNumberCompareTotal -- compare two Numbers, using total ordering */ +/* */ +/* This computes C = A ? B, under total ordering */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X?X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context */ +/* */ +/* C must have space for one digit; the result will always be one of */ +/* -1, 0, or 1. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberCompareTotal(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + uInt status=0; /* accumulator */ + decCompareOp(res, lhs, rhs, set, COMPTOTAL, &status); + if (status!=0) decStatus(res, status, set); + return res; + } /* decNumberCompareTotal */ + +/* ------------------------------------------------------------------ */ +/* decNumberCompareTotalMag -- compare, total ordering of magnitudes */ +/* */ +/* This computes C = |A| ? |B|, under total ordering */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X?X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context */ +/* */ +/* C must have space for one digit; the result will always be one of */ +/* -1, 0, or 1. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberCompareTotalMag(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + uInt status=0; /* accumulator */ + uInt needbytes; /* for space calculations */ + decNumber bufa[D2N(DECBUFFER+1)];/* +1 in case DECBUFFER=0 */ + decNumber *allocbufa=NULL; /* -> allocated bufa, iff allocated */ + decNumber bufb[D2N(DECBUFFER+1)]; + decNumber *allocbufb=NULL; /* -> allocated bufb, iff allocated */ + decNumber *a, *b; /* temporary pointers */ + + #if DECCHECK + if (decCheckOperands(res, lhs, rhs, set)) return res; + #endif + + do { /* protect allocated storage */ + /* if either is negative, take a copy and absolute */ + if (decNumberIsNegative(lhs)) { /* lhs<0 */ + a=bufa; + needbytes=sizeof(decNumber)+(D2U(lhs->digits)-1)*sizeof(Unit); + if (needbytes>sizeof(bufa)) { /* need malloc space */ + allocbufa=(decNumber *)malloc(needbytes); + if (allocbufa==NULL) { /* hopeless -- abandon */ + status|=DEC_Insufficient_storage; + break;} + a=allocbufa; /* use the allocated space */ + } + uprv_decNumberCopy(a, lhs); /* copy content */ + a->bits&=~DECNEG; /* .. and clear the sign */ + lhs=a; /* use copy from here on */ + } + if (decNumberIsNegative(rhs)) { /* rhs<0 */ + b=bufb; + needbytes=sizeof(decNumber)+(D2U(rhs->digits)-1)*sizeof(Unit); + if (needbytes>sizeof(bufb)) { /* need malloc space */ + allocbufb=(decNumber *)malloc(needbytes); + if (allocbufb==NULL) { /* hopeless -- abandon */ + status|=DEC_Insufficient_storage; + break;} + b=allocbufb; /* use the allocated space */ + } + uprv_decNumberCopy(b, rhs); /* copy content */ + b->bits&=~DECNEG; /* .. and clear the sign */ + rhs=b; /* use copy from here on */ + } + decCompareOp(res, lhs, rhs, set, COMPTOTAL, &status); + } while(0); /* end protected */ + + if (allocbufa!=NULL) free(allocbufa); /* drop any storage used */ + if (allocbufb!=NULL) free(allocbufb); /* .. */ + if (status!=0) decStatus(res, status, set); + return res; + } /* decNumberCompareTotalMag */ + +/* ------------------------------------------------------------------ */ +/* decNumberDivide -- divide one number by another */ +/* */ +/* This computes C = A / B */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X/X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context */ +/* */ +/* C must have space for set->digits digits. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberDivide(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + uInt status=0; /* accumulator */ + decDivideOp(res, lhs, rhs, set, DIVIDE, &status); + if (status!=0) decStatus(res, status, set); + #if DECCHECK + decCheckInexact(res, set); + #endif + return res; + } /* decNumberDivide */ + +/* ------------------------------------------------------------------ */ +/* decNumberDivideInteger -- divide and return integer quotient */ +/* */ +/* This computes C = A # B, where # is the integer divide operator */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X#X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context */ +/* */ +/* C must have space for set->digits digits. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberDivideInteger(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + uInt status=0; /* accumulator */ + decDivideOp(res, lhs, rhs, set, DIVIDEINT, &status); + if (status!=0) decStatus(res, status, set); + return res; + } /* decNumberDivideInteger */ + +/* ------------------------------------------------------------------ */ +/* decNumberExp -- exponentiation */ +/* */ +/* This computes C = exp(A) */ +/* */ +/* res is C, the result. C may be A */ +/* rhs is A */ +/* set is the context; note that rounding mode has no effect */ +/* */ +/* C must have space for set->digits digits. */ +/* */ +/* Mathematical function restrictions apply (see above); a NaN is */ +/* returned with Invalid_operation if a restriction is violated. */ +/* */ +/* Finite results will always be full precision and Inexact, except */ +/* when A is a zero or -Infinity (giving 1 or 0 respectively). */ +/* */ +/* An Inexact result is rounded using DEC_ROUND_HALF_EVEN; it will */ +/* almost always be correctly rounded, but may be up to 1 ulp in */ +/* error in rare cases. */ +/* ------------------------------------------------------------------ */ +/* This is a wrapper for decExpOp which can handle the slightly wider */ +/* (double) range needed by Ln (which has to be able to calculate */ +/* exp(-a) where a can be the tiniest number (Ntiny). */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberExp(decNumber *res, const decNumber *rhs, + decContext *set) { + uInt status=0; /* accumulator */ + #if DECSUBSET + decNumber *allocrhs=NULL; /* non-NULL if rounded rhs allocated */ + #endif + + #if DECCHECK + if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; + #endif + + /* Check restrictions; these restrictions ensure that if h=8 (see */ + /* decExpOp) then the result will either overflow or underflow to 0. */ + /* Other math functions restrict the input range, too, for inverses. */ + /* If not violated then carry out the operation. */ + if (!decCheckMath(rhs, set, &status)) do { /* protect allocation */ + #if DECSUBSET + if (!set->extended) { + /* reduce operand and set lostDigits status, as needed */ + if (rhs->digits>set->digits) { + allocrhs=decRoundOperand(rhs, set, &status); + if (allocrhs==NULL) break; + rhs=allocrhs; + } + } + #endif + decExpOp(res, rhs, set, &status); + } while(0); /* end protected */ + + #if DECSUBSET + if (allocrhs !=NULL) free(allocrhs); /* drop any storage used */ + #endif + /* apply significant status */ + if (status!=0) decStatus(res, status, set); + #if DECCHECK + decCheckInexact(res, set); + #endif + return res; + } /* decNumberExp */ + +/* ------------------------------------------------------------------ */ +/* decNumberFMA -- fused multiply add */ +/* */ +/* This computes D = (A * B) + C with only one rounding */ +/* */ +/* res is D, the result. D may be A or B or C (e.g., X=FMA(X,X,X)) */ +/* lhs is A */ +/* rhs is B */ +/* fhs is C [far hand side] */ +/* set is the context */ +/* */ +/* Mathematical function restrictions apply (see above); a NaN is */ +/* returned with Invalid_operation if a restriction is violated. */ +/* */ +/* C must have space for set->digits digits. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberFMA(decNumber *res, const decNumber *lhs, + const decNumber *rhs, const decNumber *fhs, + decContext *set) { + uInt status=0; /* accumulator */ + decContext dcmul; /* context for the multiplication */ + uInt needbytes; /* for space calculations */ + decNumber bufa[D2N(DECBUFFER*2+1)]; + decNumber *allocbufa=NULL; /* -> allocated bufa, iff allocated */ + decNumber *acc; /* accumulator pointer */ + decNumber dzero; /* work */ + + #if DECCHECK + if (decCheckOperands(res, lhs, rhs, set)) return res; + if (decCheckOperands(res, fhs, DECUNUSED, set)) return res; + #endif + + do { /* protect allocated storage */ + #if DECSUBSET + if (!set->extended) { /* [undefined if subset] */ + status|=DEC_Invalid_operation; + break;} + #endif + /* Check math restrictions [these ensure no overflow or underflow] */ + if ((!decNumberIsSpecial(lhs) && decCheckMath(lhs, set, &status)) + || (!decNumberIsSpecial(rhs) && decCheckMath(rhs, set, &status)) + || (!decNumberIsSpecial(fhs) && decCheckMath(fhs, set, &status))) break; + /* set up context for multiply */ + dcmul=*set; + dcmul.digits=lhs->digits+rhs->digits; /* just enough */ + /* [The above may be an over-estimate for subset arithmetic, but that's OK] */ + dcmul.emax=DEC_MAX_EMAX; /* effectively unbounded .. */ + dcmul.emin=DEC_MIN_EMIN; /* [thanks to Math restrictions] */ + /* set up decNumber space to receive the result of the multiply */ + acc=bufa; /* may fit */ + needbytes=sizeof(decNumber)+(D2U(dcmul.digits)-1)*sizeof(Unit); + if (needbytes>sizeof(bufa)) { /* need malloc space */ + allocbufa=(decNumber *)malloc(needbytes); + if (allocbufa==NULL) { /* hopeless -- abandon */ + status|=DEC_Insufficient_storage; + break;} + acc=allocbufa; /* use the allocated space */ + } + /* multiply with extended range and necessary precision */ + /*printf("emin=%ld\n", dcmul.emin); */ + decMultiplyOp(acc, lhs, rhs, &dcmul, &status); + /* Only Invalid operation (from sNaN or Inf * 0) is possible in */ + /* status; if either is seen than ignore fhs (in case it is */ + /* another sNaN) and set acc to NaN unless we had an sNaN */ + /* [decMultiplyOp leaves that to caller] */ + /* Note sNaN has to go through addOp to shorten payload if */ + /* necessary */ + if ((status&DEC_Invalid_operation)!=0) { + if (!(status&DEC_sNaN)) { /* but be true invalid */ + uprv_decNumberZero(res); /* acc not yet set */ + res->bits=DECNAN; + break; + } + uprv_decNumberZero(&dzero); /* make 0 (any non-NaN would do) */ + fhs=&dzero; /* use that */ + } + #if DECCHECK + else { /* multiply was OK */ + if (status!=0) printf("Status=%08lx after FMA multiply\n", (LI)status); + } + #endif + /* add the third operand and result -> res, and all is done */ + decAddOp(res, acc, fhs, set, 0, &status); + } while(0); /* end protected */ + + if (allocbufa!=NULL) free(allocbufa); /* drop any storage used */ + if (status!=0) decStatus(res, status, set); + #if DECCHECK + decCheckInexact(res, set); + #endif + return res; + } /* decNumberFMA */ + +/* ------------------------------------------------------------------ */ +/* decNumberInvert -- invert a Number, digitwise */ +/* */ +/* This computes C = ~A */ +/* */ +/* res is C, the result. C may be A (e.g., X=~X) */ +/* rhs is A */ +/* set is the context (used for result length and error report) */ +/* */ +/* C must have space for set->digits digits. */ +/* */ +/* Logical function restrictions apply (see above); a NaN is */ +/* returned with Invalid_operation if a restriction is violated. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberInvert(decNumber *res, const decNumber *rhs, + decContext *set) { + const Unit *ua, *msua; /* -> operand and its msu */ + Unit *uc, *msuc; /* -> result and its msu */ + Int msudigs; /* digits in res msu */ + #if DECCHECK + if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; + #endif + + if (rhs->exponent!=0 || decNumberIsSpecial(rhs) || decNumberIsNegative(rhs)) { + decStatus(res, DEC_Invalid_operation, set); + return res; + } + /* operand is valid */ + ua=rhs->lsu; /* bottom-up */ + uc=res->lsu; /* .. */ + msua=ua+D2U(rhs->digits)-1; /* -> msu of rhs */ + msuc=uc+D2U(set->digits)-1; /* -> msu of result */ + msudigs=MSUDIGITS(set->digits); /* [faster than remainder] */ + for (; uc<=msuc; ua++, uc++) { /* Unit loop */ + Unit a; /* extract unit */ + Int i, j; /* work */ + if (ua>msua) a=0; + else a=*ua; + *uc=0; /* can now write back */ + /* always need to examine all bits in rhs */ + /* This loop could be unrolled and/or use BIN2BCD tables */ + for (i=0; i1) { + decStatus(res, DEC_Invalid_operation, set); + return res; + } + if (uc==msuc && i==msudigs-1) break; /* just did final digit */ + } /* each digit */ + } /* each unit */ + /* [here uc-1 is the msu of the result] */ + res->digits=decGetDigits(res->lsu, uc-res->lsu); + res->exponent=0; /* integer */ + res->bits=0; /* sign=0 */ + return res; /* [no status to set] */ + } /* decNumberInvert */ + +/* ------------------------------------------------------------------ */ +/* decNumberLn -- natural logarithm */ +/* */ +/* This computes C = ln(A) */ +/* */ +/* res is C, the result. C may be A */ +/* rhs is A */ +/* set is the context; note that rounding mode has no effect */ +/* */ +/* C must have space for set->digits digits. */ +/* */ +/* Notable cases: */ +/* A<0 -> Invalid */ +/* A=0 -> -Infinity (Exact) */ +/* A=+Infinity -> +Infinity (Exact) */ +/* A=1 exactly -> 0 (Exact) */ +/* */ +/* Mathematical function restrictions apply (see above); a NaN is */ +/* returned with Invalid_operation if a restriction is violated. */ +/* */ +/* An Inexact result is rounded using DEC_ROUND_HALF_EVEN; it will */ +/* almost always be correctly rounded, but may be up to 1 ulp in */ +/* error in rare cases. */ +/* ------------------------------------------------------------------ */ +/* This is a wrapper for decLnOp which can handle the slightly wider */ +/* (+11) range needed by Ln, Log10, etc. (which may have to be able */ +/* to calculate at p+e+2). */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberLn(decNumber *res, const decNumber *rhs, + decContext *set) { + uInt status=0; /* accumulator */ + #if DECSUBSET + decNumber *allocrhs=NULL; /* non-NULL if rounded rhs allocated */ + #endif + + #if DECCHECK + if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; + #endif + + /* Check restrictions; this is a math function; if not violated */ + /* then carry out the operation. */ + if (!decCheckMath(rhs, set, &status)) do { /* protect allocation */ + #if DECSUBSET + if (!set->extended) { + /* reduce operand and set lostDigits status, as needed */ + if (rhs->digits>set->digits) { + allocrhs=decRoundOperand(rhs, set, &status); + if (allocrhs==NULL) break; + rhs=allocrhs; + } + /* special check in subset for rhs=0 */ + if (ISZERO(rhs)) { /* +/- zeros -> error */ + status|=DEC_Invalid_operation; + break;} + } /* extended=0 */ + #endif + decLnOp(res, rhs, set, &status); + } while(0); /* end protected */ + + #if DECSUBSET + if (allocrhs !=NULL) free(allocrhs); /* drop any storage used */ + #endif + /* apply significant status */ + if (status!=0) decStatus(res, status, set); + #if DECCHECK + decCheckInexact(res, set); + #endif + return res; + } /* decNumberLn */ + +/* ------------------------------------------------------------------ */ +/* decNumberLogB - get adjusted exponent, by 754 rules */ +/* */ +/* This computes C = adjustedexponent(A) */ +/* */ +/* res is C, the result. C may be A */ +/* rhs is A */ +/* set is the context, used only for digits and status */ +/* */ +/* C must have space for 10 digits (A might have 10**9 digits and */ +/* an exponent of +999999999, or one digit and an exponent of */ +/* -1999999999). */ +/* */ +/* This returns the adjusted exponent of A after (in theory) padding */ +/* with zeros on the right to set->digits digits while keeping the */ +/* same value. The exponent is not limited by emin/emax. */ +/* */ +/* Notable cases: */ +/* A<0 -> Use |A| */ +/* A=0 -> -Infinity (Division by zero) */ +/* A=Infinite -> +Infinity (Exact) */ +/* A=1 exactly -> 0 (Exact) */ +/* NaNs are propagated as usual */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberLogB(decNumber *res, const decNumber *rhs, + decContext *set) { + uInt status=0; /* accumulator */ + + #if DECCHECK + if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; + #endif + + /* NaNs as usual; Infinities return +Infinity; 0->oops */ + if (decNumberIsNaN(rhs)) decNaNs(res, rhs, NULL, set, &status); + else if (decNumberIsInfinite(rhs)) uprv_decNumberCopyAbs(res, rhs); + else if (decNumberIsZero(rhs)) { + uprv_decNumberZero(res); /* prepare for Infinity */ + res->bits=DECNEG|DECINF; /* -Infinity */ + status|=DEC_Division_by_zero; /* as per 754 */ + } + else { /* finite non-zero */ + Int ae=rhs->exponent+rhs->digits-1; /* adjusted exponent */ + uprv_decNumberFromInt32(res, ae); /* lay it out */ + } + + if (status!=0) decStatus(res, status, set); + return res; + } /* decNumberLogB */ + +/* ------------------------------------------------------------------ */ +/* decNumberLog10 -- logarithm in base 10 */ +/* */ +/* This computes C = log10(A) */ +/* */ +/* res is C, the result. C may be A */ +/* rhs is A */ +/* set is the context; note that rounding mode has no effect */ +/* */ +/* C must have space for set->digits digits. */ +/* */ +/* Notable cases: */ +/* A<0 -> Invalid */ +/* A=0 -> -Infinity (Exact) */ +/* A=+Infinity -> +Infinity (Exact) */ +/* A=10**n (if n is an integer) -> n (Exact) */ +/* */ +/* Mathematical function restrictions apply (see above); a NaN is */ +/* returned with Invalid_operation if a restriction is violated. */ +/* */ +/* An Inexact result is rounded using DEC_ROUND_HALF_EVEN; it will */ +/* almost always be correctly rounded, but may be up to 1 ulp in */ +/* error in rare cases. */ +/* ------------------------------------------------------------------ */ +/* This calculates ln(A)/ln(10) using appropriate precision. For */ +/* ln(A) this is the max(p, rhs->digits + t) + 3, where p is the */ +/* requested digits and t is the number of digits in the exponent */ +/* (maximum 6). For ln(10) it is p + 3; this is often handled by the */ +/* fastpath in decLnOp. The final division is done to the requested */ +/* precision. */ +/* ------------------------------------------------------------------ */ +#if defined(__clang__) || U_GCC_MAJOR_MINOR >= 406 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" +#endif +U_CAPI decNumber * U_EXPORT2 uprv_decNumberLog10(decNumber *res, const decNumber *rhs, + decContext *set) { + uInt status=0, ignore=0; /* status accumulators */ + uInt needbytes; /* for space calculations */ + Int p; /* working precision */ + Int t; /* digits in exponent of A */ + + /* buffers for a and b working decimals */ + /* (adjustment calculator, same size) */ + decNumber bufa[D2N(DECBUFFER+2)]; + decNumber *allocbufa=NULL; /* -> allocated bufa, iff allocated */ + decNumber *a=bufa; /* temporary a */ + decNumber bufb[D2N(DECBUFFER+2)]; + decNumber *allocbufb=NULL; /* -> allocated bufb, iff allocated */ + decNumber *b=bufb; /* temporary b */ + decNumber bufw[D2N(10)]; /* working 2-10 digit number */ + decNumber *w=bufw; /* .. */ + #if DECSUBSET + decNumber *allocrhs=NULL; /* non-NULL if rounded rhs allocated */ + #endif + + decContext aset; /* working context */ + + #if DECCHECK + if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; + #endif + + /* Check restrictions; this is a math function; if not violated */ + /* then carry out the operation. */ + if (!decCheckMath(rhs, set, &status)) do { /* protect malloc */ + #if DECSUBSET + if (!set->extended) { + /* reduce operand and set lostDigits status, as needed */ + if (rhs->digits>set->digits) { + allocrhs=decRoundOperand(rhs, set, &status); + if (allocrhs==NULL) break; + rhs=allocrhs; + } + /* special check in subset for rhs=0 */ + if (ISZERO(rhs)) { /* +/- zeros -> error */ + status|=DEC_Invalid_operation; + break;} + } /* extended=0 */ + #endif + + uprv_decContextDefault(&aset, DEC_INIT_DECIMAL64); /* clean context */ + + /* handle exact powers of 10; only check if +ve finite */ + if (!(rhs->bits&(DECNEG|DECSPECIAL)) && !ISZERO(rhs)) { + Int residue=0; /* (no residue) */ + uInt copystat=0; /* clean status */ + + /* round to a single digit... */ + aset.digits=1; + decCopyFit(w, rhs, &aset, &residue, ©stat); /* copy & shorten */ + /* if exact and the digit is 1, rhs is a power of 10 */ + if (!(copystat&DEC_Inexact) && w->lsu[0]==1) { + /* the exponent, conveniently, is the power of 10; making */ + /* this the result needs a little care as it might not fit, */ + /* so first convert it into the working number, and then move */ + /* to res */ + uprv_decNumberFromInt32(w, w->exponent); + residue=0; + decCopyFit(res, w, set, &residue, &status); /* copy & round */ + decFinish(res, set, &residue, &status); /* cleanup/set flags */ + break; + } /* not a power of 10 */ + } /* not a candidate for exact */ + + /* simplify the information-content calculation to use 'total */ + /* number of digits in a, including exponent' as compared to the */ + /* requested digits, as increasing this will only rarely cost an */ + /* iteration in ln(a) anyway */ + t=6; /* it can never be >6 */ + + /* allocate space when needed... */ + p=(rhs->digits+t>set->digits?rhs->digits+t:set->digits)+3; + needbytes=sizeof(decNumber)+(D2U(p)-1)*sizeof(Unit); + if (needbytes>sizeof(bufa)) { /* need malloc space */ + allocbufa=(decNumber *)malloc(needbytes); + if (allocbufa==NULL) { /* hopeless -- abandon */ + status|=DEC_Insufficient_storage; + break;} + a=allocbufa; /* use the allocated space */ + } + aset.digits=p; /* as calculated */ + aset.emax=DEC_MAX_MATH; /* usual bounds */ + aset.emin=-DEC_MAX_MATH; /* .. */ + aset.clamp=0; /* and no concrete format */ + decLnOp(a, rhs, &aset, &status); /* a=ln(rhs) */ + + /* skip the division if the result so far is infinite, NaN, or */ + /* zero, or there was an error; note NaN from sNaN needs copy */ + if (status&DEC_NaNs && !(status&DEC_sNaN)) break; + if (a->bits&DECSPECIAL || ISZERO(a)) { + uprv_decNumberCopy(res, a); /* [will fit] */ + break;} + + /* for ln(10) an extra 3 digits of precision are needed */ + p=set->digits+3; + needbytes=sizeof(decNumber)+(D2U(p)-1)*sizeof(Unit); + if (needbytes>sizeof(bufb)) { /* need malloc space */ + allocbufb=(decNumber *)malloc(needbytes); + if (allocbufb==NULL) { /* hopeless -- abandon */ + status|=DEC_Insufficient_storage; + break;} + b=allocbufb; /* use the allocated space */ + } + uprv_decNumberZero(w); /* set up 10... */ + #if DECDPUN==1 + w->lsu[1]=1; w->lsu[0]=0; /* .. */ + #else + w->lsu[0]=10; /* .. */ + #endif + w->digits=2; /* .. */ + + aset.digits=p; + decLnOp(b, w, &aset, &ignore); /* b=ln(10) */ + + aset.digits=set->digits; /* for final divide */ + decDivideOp(res, a, b, &aset, DIVIDE, &status); /* into result */ + } while(0); /* [for break] */ + + if (allocbufa!=NULL) free(allocbufa); /* drop any storage used */ + if (allocbufb!=NULL) free(allocbufb); /* .. */ + #if DECSUBSET + if (allocrhs !=NULL) free(allocrhs); /* .. */ + #endif + /* apply significant status */ + if (status!=0) decStatus(res, status, set); + #if DECCHECK + decCheckInexact(res, set); + #endif + return res; + } /* decNumberLog10 */ +#if defined(__clang__) || U_GCC_MAJOR_MINOR >= 406 +#pragma GCC diagnostic pop +#endif + +/* ------------------------------------------------------------------ */ +/* decNumberMax -- compare two Numbers and return the maximum */ +/* */ +/* This computes C = A ? B, returning the maximum by 754 rules */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X?X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context */ +/* */ +/* C must have space for set->digits digits. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberMax(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + uInt status=0; /* accumulator */ + decCompareOp(res, lhs, rhs, set, COMPMAX, &status); + if (status!=0) decStatus(res, status, set); + #if DECCHECK + decCheckInexact(res, set); + #endif + return res; + } /* decNumberMax */ + +/* ------------------------------------------------------------------ */ +/* decNumberMaxMag -- compare and return the maximum by magnitude */ +/* */ +/* This computes C = A ? B, returning the maximum by 754 rules */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X?X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context */ +/* */ +/* C must have space for set->digits digits. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberMaxMag(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + uInt status=0; /* accumulator */ + decCompareOp(res, lhs, rhs, set, COMPMAXMAG, &status); + if (status!=0) decStatus(res, status, set); + #if DECCHECK + decCheckInexact(res, set); + #endif + return res; + } /* decNumberMaxMag */ + +/* ------------------------------------------------------------------ */ +/* decNumberMin -- compare two Numbers and return the minimum */ +/* */ +/* This computes C = A ? B, returning the minimum by 754 rules */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X?X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context */ +/* */ +/* C must have space for set->digits digits. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberMin(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + uInt status=0; /* accumulator */ + decCompareOp(res, lhs, rhs, set, COMPMIN, &status); + if (status!=0) decStatus(res, status, set); + #if DECCHECK + decCheckInexact(res, set); + #endif + return res; + } /* decNumberMin */ + +/* ------------------------------------------------------------------ */ +/* decNumberMinMag -- compare and return the minimum by magnitude */ +/* */ +/* This computes C = A ? B, returning the minimum by 754 rules */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X?X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context */ +/* */ +/* C must have space for set->digits digits. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberMinMag(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + uInt status=0; /* accumulator */ + decCompareOp(res, lhs, rhs, set, COMPMINMAG, &status); + if (status!=0) decStatus(res, status, set); + #if DECCHECK + decCheckInexact(res, set); + #endif + return res; + } /* decNumberMinMag */ + +/* ------------------------------------------------------------------ */ +/* decNumberMinus -- prefix minus operator */ +/* */ +/* This computes C = 0 - A */ +/* */ +/* res is C, the result. C may be A */ +/* rhs is A */ +/* set is the context */ +/* */ +/* See also decNumberCopyNegate for a quiet bitwise version of this. */ +/* C must have space for set->digits digits. */ +/* ------------------------------------------------------------------ */ +/* Simply use AddOp for the subtract, which will do the necessary. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberMinus(decNumber *res, const decNumber *rhs, + decContext *set) { + decNumber dzero; + uInt status=0; /* accumulator */ + + #if DECCHECK + if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; + #endif + + uprv_decNumberZero(&dzero); /* make 0 */ + dzero.exponent=rhs->exponent; /* [no coefficient expansion] */ + decAddOp(res, &dzero, rhs, set, DECNEG, &status); + if (status!=0) decStatus(res, status, set); + #if DECCHECK + decCheckInexact(res, set); + #endif + return res; + } /* decNumberMinus */ + +/* ------------------------------------------------------------------ */ +/* decNumberNextMinus -- next towards -Infinity */ +/* */ +/* This computes C = A - infinitesimal, rounded towards -Infinity */ +/* */ +/* res is C, the result. C may be A */ +/* rhs is A */ +/* set is the context */ +/* */ +/* This is a generalization of 754 NextDown. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberNextMinus(decNumber *res, const decNumber *rhs, + decContext *set) { + decNumber dtiny; /* constant */ + decContext workset=*set; /* work */ + uInt status=0; /* accumulator */ + #if DECCHECK + if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; + #endif + + /* +Infinity is the special case */ + if ((rhs->bits&(DECINF|DECNEG))==DECINF) { + decSetMaxValue(res, set); /* is +ve */ + /* there is no status to set */ + return res; + } + uprv_decNumberZero(&dtiny); /* start with 0 */ + dtiny.lsu[0]=1; /* make number that is .. */ + dtiny.exponent=DEC_MIN_EMIN-1; /* .. smaller than tiniest */ + workset.round=DEC_ROUND_FLOOR; + decAddOp(res, rhs, &dtiny, &workset, DECNEG, &status); + status&=DEC_Invalid_operation|DEC_sNaN; /* only sNaN Invalid please */ + if (status!=0) decStatus(res, status, set); + return res; + } /* decNumberNextMinus */ + +/* ------------------------------------------------------------------ */ +/* decNumberNextPlus -- next towards +Infinity */ +/* */ +/* This computes C = A + infinitesimal, rounded towards +Infinity */ +/* */ +/* res is C, the result. C may be A */ +/* rhs is A */ +/* set is the context */ +/* */ +/* This is a generalization of 754 NextUp. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberNextPlus(decNumber *res, const decNumber *rhs, + decContext *set) { + decNumber dtiny; /* constant */ + decContext workset=*set; /* work */ + uInt status=0; /* accumulator */ + #if DECCHECK + if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; + #endif + + /* -Infinity is the special case */ + if ((rhs->bits&(DECINF|DECNEG))==(DECINF|DECNEG)) { + decSetMaxValue(res, set); + res->bits=DECNEG; /* negative */ + /* there is no status to set */ + return res; + } + uprv_decNumberZero(&dtiny); /* start with 0 */ + dtiny.lsu[0]=1; /* make number that is .. */ + dtiny.exponent=DEC_MIN_EMIN-1; /* .. smaller than tiniest */ + workset.round=DEC_ROUND_CEILING; + decAddOp(res, rhs, &dtiny, &workset, 0, &status); + status&=DEC_Invalid_operation|DEC_sNaN; /* only sNaN Invalid please */ + if (status!=0) decStatus(res, status, set); + return res; + } /* decNumberNextPlus */ + +/* ------------------------------------------------------------------ */ +/* decNumberNextToward -- next towards rhs */ +/* */ +/* This computes C = A +/- infinitesimal, rounded towards */ +/* +/-Infinity in the direction of B, as per 754-1985 nextafter */ +/* modified during revision but dropped from 754-2008. */ +/* */ +/* res is C, the result. C may be A or B. */ +/* lhs is A */ +/* rhs is B */ +/* set is the context */ +/* */ +/* This is a generalization of 754-1985 NextAfter. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberNextToward(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + decNumber dtiny; /* constant */ + decContext workset=*set; /* work */ + Int result; /* .. */ + uInt status=0; /* accumulator */ + #if DECCHECK + if (decCheckOperands(res, lhs, rhs, set)) return res; + #endif + + if (decNumberIsNaN(lhs) || decNumberIsNaN(rhs)) { + decNaNs(res, lhs, rhs, set, &status); + } + else { /* Is numeric, so no chance of sNaN Invalid, etc. */ + result=decCompare(lhs, rhs, 0); /* sign matters */ + if (result==BADINT) status|=DEC_Insufficient_storage; /* rare */ + else { /* valid compare */ + if (result==0) uprv_decNumberCopySign(res, lhs, rhs); /* easy */ + else { /* differ: need NextPlus or NextMinus */ + uByte sub; /* add or subtract */ + if (result<0) { /* lhsbits&(DECINF|DECNEG))==(DECINF|DECNEG)) { + decSetMaxValue(res, set); + res->bits=DECNEG; /* negative */ + return res; /* there is no status to set */ + } + workset.round=DEC_ROUND_CEILING; + sub=0; /* add, please */ + } /* plus */ + else { /* lhs>rhs, do nextminus */ + /* +Infinity is the special case */ + if ((lhs->bits&(DECINF|DECNEG))==DECINF) { + decSetMaxValue(res, set); + return res; /* there is no status to set */ + } + workset.round=DEC_ROUND_FLOOR; + sub=DECNEG; /* subtract, please */ + } /* minus */ + uprv_decNumberZero(&dtiny); /* start with 0 */ + dtiny.lsu[0]=1; /* make number that is .. */ + dtiny.exponent=DEC_MIN_EMIN-1; /* .. smaller than tiniest */ + decAddOp(res, lhs, &dtiny, &workset, sub, &status); /* + or - */ + /* turn off exceptions if the result is a normal number */ + /* (including Nmin), otherwise let all status through */ + if (uprv_decNumberIsNormal(res, set)) status=0; + } /* unequal */ + } /* compare OK */ + } /* numeric */ + if (status!=0) decStatus(res, status, set); + return res; + } /* decNumberNextToward */ + +/* ------------------------------------------------------------------ */ +/* decNumberOr -- OR two Numbers, digitwise */ +/* */ +/* This computes C = A | B */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X|X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context (used for result length and error report) */ +/* */ +/* C must have space for set->digits digits. */ +/* */ +/* Logical function restrictions apply (see above); a NaN is */ +/* returned with Invalid_operation if a restriction is violated. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberOr(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + const Unit *ua, *ub; /* -> operands */ + const Unit *msua, *msub; /* -> operand msus */ + Unit *uc, *msuc; /* -> result and its msu */ + Int msudigs; /* digits in res msu */ + #if DECCHECK + if (decCheckOperands(res, lhs, rhs, set)) return res; + #endif + + if (lhs->exponent!=0 || decNumberIsSpecial(lhs) || decNumberIsNegative(lhs) + || rhs->exponent!=0 || decNumberIsSpecial(rhs) || decNumberIsNegative(rhs)) { + decStatus(res, DEC_Invalid_operation, set); + return res; + } + /* operands are valid */ + ua=lhs->lsu; /* bottom-up */ + ub=rhs->lsu; /* .. */ + uc=res->lsu; /* .. */ + msua=ua+D2U(lhs->digits)-1; /* -> msu of lhs */ + msub=ub+D2U(rhs->digits)-1; /* -> msu of rhs */ + msuc=uc+D2U(set->digits)-1; /* -> msu of result */ + msudigs=MSUDIGITS(set->digits); /* [faster than remainder] */ + for (; uc<=msuc; ua++, ub++, uc++) { /* Unit loop */ + Unit a, b; /* extract units */ + if (ua>msua) a=0; + else a=*ua; + if (ub>msub) b=0; + else b=*ub; + *uc=0; /* can now write back */ + if (a|b) { /* maybe 1 bits to examine */ + Int i, j; + /* This loop could be unrolled and/or use BIN2BCD tables */ + for (i=0; i1) { + decStatus(res, DEC_Invalid_operation, set); + return res; + } + if (uc==msuc && i==msudigs-1) break; /* just did final digit */ + } /* each digit */ + } /* non-zero */ + } /* each unit */ + /* [here uc-1 is the msu of the result] */ + res->digits=decGetDigits(res->lsu, uc-res->lsu); + res->exponent=0; /* integer */ + res->bits=0; /* sign=0 */ + return res; /* [no status to set] */ + } /* decNumberOr */ + +/* ------------------------------------------------------------------ */ +/* decNumberPlus -- prefix plus operator */ +/* */ +/* This computes C = 0 + A */ +/* */ +/* res is C, the result. C may be A */ +/* rhs is A */ +/* set is the context */ +/* */ +/* See also decNumberCopy for a quiet bitwise version of this. */ +/* C must have space for set->digits digits. */ +/* ------------------------------------------------------------------ */ +/* This simply uses AddOp; Add will take fast path after preparing A. */ +/* Performance is a concern here, as this routine is often used to */ +/* check operands and apply rounding and overflow/underflow testing. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberPlus(decNumber *res, const decNumber *rhs, + decContext *set) { + decNumber dzero; + uInt status=0; /* accumulator */ + #if DECCHECK + if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; + #endif + + uprv_decNumberZero(&dzero); /* make 0 */ + dzero.exponent=rhs->exponent; /* [no coefficient expansion] */ + decAddOp(res, &dzero, rhs, set, 0, &status); + if (status!=0) decStatus(res, status, set); + #if DECCHECK + decCheckInexact(res, set); + #endif + return res; + } /* decNumberPlus */ + +/* ------------------------------------------------------------------ */ +/* decNumberMultiply -- multiply two Numbers */ +/* */ +/* This computes C = A x B */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X+X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context */ +/* */ +/* C must have space for set->digits digits. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberMultiply(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + uInt status=0; /* accumulator */ + decMultiplyOp(res, lhs, rhs, set, &status); + if (status!=0) decStatus(res, status, set); + #if DECCHECK + decCheckInexact(res, set); + #endif + return res; + } /* decNumberMultiply */ + +/* ------------------------------------------------------------------ */ +/* decNumberPower -- raise a number to a power */ +/* */ +/* This computes C = A ** B */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X**X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context */ +/* */ +/* C must have space for set->digits digits. */ +/* */ +/* Mathematical function restrictions apply (see above); a NaN is */ +/* returned with Invalid_operation if a restriction is violated. */ +/* */ +/* However, if 1999999997<=B<=999999999 and B is an integer then the */ +/* restrictions on A and the context are relaxed to the usual bounds, */ +/* for compatibility with the earlier (integer power only) version */ +/* of this function. */ +/* */ +/* When B is an integer, the result may be exact, even if rounded. */ +/* */ +/* The final result is rounded according to the context; it will */ +/* almost always be correctly rounded, but may be up to 1 ulp in */ +/* error in rare cases. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberPower(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + #if DECSUBSET + decNumber *alloclhs=NULL; /* non-NULL if rounded lhs allocated */ + decNumber *allocrhs=NULL; /* .., rhs */ + #endif + decNumber *allocdac=NULL; /* -> allocated acc buffer, iff used */ + decNumber *allocinv=NULL; /* -> allocated 1/x buffer, iff used */ + Int reqdigits=set->digits; /* requested DIGITS */ + Int n; /* rhs in binary */ + Flag rhsint=0; /* 1 if rhs is an integer */ + Flag useint=0; /* 1 if can use integer calculation */ + Flag isoddint=0; /* 1 if rhs is an integer and odd */ + Int i; /* work */ + #if DECSUBSET + Int dropped; /* .. */ + #endif + uInt needbytes; /* buffer size needed */ + Flag seenbit; /* seen a bit while powering */ + Int residue=0; /* rounding residue */ + uInt status=0; /* accumulators */ + uByte bits=0; /* result sign if errors */ + decContext aset; /* working context */ + decNumber dnOne; /* work value 1... */ + /* local accumulator buffer [a decNumber, with digits+elength+1 digits] */ + decNumber dacbuff[D2N(DECBUFFER+9)]; + decNumber *dac=dacbuff; /* -> result accumulator */ + /* same again for possible 1/lhs calculation */ + decNumber invbuff[D2N(DECBUFFER+9)]; + + #if DECCHECK + if (decCheckOperands(res, lhs, rhs, set)) return res; + #endif + + do { /* protect allocated storage */ + #if DECSUBSET + if (!set->extended) { /* reduce operands and set status, as needed */ + if (lhs->digits>reqdigits) { + alloclhs=decRoundOperand(lhs, set, &status); + if (alloclhs==NULL) break; + lhs=alloclhs; + } + if (rhs->digits>reqdigits) { + allocrhs=decRoundOperand(rhs, set, &status); + if (allocrhs==NULL) break; + rhs=allocrhs; + } + } + #endif + /* [following code does not require input rounding] */ + + /* handle NaNs and rhs Infinity (lhs infinity is harder) */ + if (SPECIALARGS) { + if (decNumberIsNaN(lhs) || decNumberIsNaN(rhs)) { /* NaNs */ + decNaNs(res, lhs, rhs, set, &status); + break;} + if (decNumberIsInfinite(rhs)) { /* rhs Infinity */ + Flag rhsneg=rhs->bits&DECNEG; /* save rhs sign */ + if (decNumberIsNegative(lhs) /* lhs<0 */ + && !decNumberIsZero(lhs)) /* .. */ + status|=DEC_Invalid_operation; + else { /* lhs >=0 */ + uprv_decNumberZero(&dnOne); /* set up 1 */ + dnOne.lsu[0]=1; + uprv_decNumberCompare(dac, lhs, &dnOne, set); /* lhs ? 1 */ + uprv_decNumberZero(res); /* prepare for 0/1/Infinity */ + if (decNumberIsNegative(dac)) { /* lhs<1 */ + if (rhsneg) res->bits|=DECINF; /* +Infinity [else is +0] */ + } + else if (dac->lsu[0]==0) { /* lhs=1 */ + /* 1**Infinity is inexact, so return fully-padded 1.0000 */ + Int shift=set->digits-1; + *res->lsu=1; /* was 0, make int 1 */ + res->digits=decShiftToMost(res->lsu, 1, shift); + res->exponent=-shift; /* make 1.0000... */ + status|=DEC_Inexact|DEC_Rounded; /* deemed inexact */ + } + else { /* lhs>1 */ + if (!rhsneg) res->bits|=DECINF; /* +Infinity [else is +0] */ + } + } /* lhs>=0 */ + break;} + /* [lhs infinity drops through] */ + } /* specials */ + + /* Original rhs may be an integer that fits and is in range */ + n=decGetInt(rhs); + if (n!=BADINT) { /* it is an integer */ + rhsint=1; /* record the fact for 1**n */ + isoddint=(Flag)n&1; /* [works even if big] */ + if (n!=BIGEVEN && n!=BIGODD) /* can use integer path? */ + useint=1; /* looks good */ + } + + if (decNumberIsNegative(lhs) /* -x .. */ + && isoddint) bits=DECNEG; /* .. to an odd power */ + + /* handle LHS infinity */ + if (decNumberIsInfinite(lhs)) { /* [NaNs already handled] */ + uByte rbits=rhs->bits; /* save */ + uprv_decNumberZero(res); /* prepare */ + if (n==0) *res->lsu=1; /* [-]Inf**0 => 1 */ + else { + /* -Inf**nonint -> error */ + if (!rhsint && decNumberIsNegative(lhs)) { + status|=DEC_Invalid_operation; /* -Inf**nonint is error */ + break;} + if (!(rbits & DECNEG)) bits|=DECINF; /* was not a **-n */ + /* [otherwise will be 0 or -0] */ + res->bits=bits; + } + break;} + + /* similarly handle LHS zero */ + if (decNumberIsZero(lhs)) { + if (n==0) { /* 0**0 => Error */ + #if DECSUBSET + if (!set->extended) { /* [unless subset] */ + uprv_decNumberZero(res); + *res->lsu=1; /* return 1 */ + break;} + #endif + status|=DEC_Invalid_operation; + } + else { /* 0**x */ + uByte rbits=rhs->bits; /* save */ + if (rbits & DECNEG) { /* was a 0**(-n) */ + #if DECSUBSET + if (!set->extended) { /* [bad if subset] */ + status|=DEC_Invalid_operation; + break;} + #endif + bits|=DECINF; + } + uprv_decNumberZero(res); /* prepare */ + /* [otherwise will be 0 or -0] */ + res->bits=bits; + } + break;} + + /* here both lhs and rhs are finite; rhs==0 is handled in the */ + /* integer path. Next handle the non-integer cases */ + if (!useint) { /* non-integral rhs */ + /* any -ve lhs is bad, as is either operand or context out of */ + /* bounds */ + if (decNumberIsNegative(lhs)) { + status|=DEC_Invalid_operation; + break;} + if (decCheckMath(lhs, set, &status) + || decCheckMath(rhs, set, &status)) break; /* variable status */ + + uprv_decContextDefault(&aset, DEC_INIT_DECIMAL64); /* clean context */ + aset.emax=DEC_MAX_MATH; /* usual bounds */ + aset.emin=-DEC_MAX_MATH; /* .. */ + aset.clamp=0; /* and no concrete format */ + + /* calculate the result using exp(ln(lhs)*rhs), which can */ + /* all be done into the accumulator, dac. The precision needed */ + /* is enough to contain the full information in the lhs (which */ + /* is the total digits, including exponent), or the requested */ + /* precision, if larger, + 4; 6 is used for the exponent */ + /* maximum length, and this is also used when it is shorter */ + /* than the requested digits as it greatly reduces the >0.5 ulp */ + /* cases at little cost (because Ln doubles digits each */ + /* iteration so a few extra digits rarely causes an extra */ + /* iteration) */ + aset.digits=MAXI(lhs->digits, set->digits)+6+4; + } /* non-integer rhs */ + + else { /* rhs is in-range integer */ + if (n==0) { /* x**0 = 1 */ + /* (0**0 was handled above) */ + uprv_decNumberZero(res); /* result=1 */ + *res->lsu=1; /* .. */ + break;} + /* rhs is a non-zero integer */ + if (n<0) n=-n; /* use abs(n) */ + + aset=*set; /* clone the context */ + aset.round=DEC_ROUND_HALF_EVEN; /* internally use balanced */ + /* calculate the working DIGITS */ + aset.digits=reqdigits+(rhs->digits+rhs->exponent)+2; + #if DECSUBSET + if (!set->extended) aset.digits--; /* use classic precision */ + #endif + /* it's an error if this is more than can be handled */ + if (aset.digits>DECNUMMAXP) {status|=DEC_Invalid_operation; break;} + } /* integer path */ + + /* aset.digits is the count of digits for the accumulator needed */ + /* if accumulator is too long for local storage, then allocate */ + needbytes=sizeof(decNumber)+(D2U(aset.digits)-1)*sizeof(Unit); + /* [needbytes also used below if 1/lhs needed] */ + if (needbytes>sizeof(dacbuff)) { + allocdac=(decNumber *)malloc(needbytes); + if (allocdac==NULL) { /* hopeless -- abandon */ + status|=DEC_Insufficient_storage; + break;} + dac=allocdac; /* use the allocated space */ + } + /* here, aset is set up and accumulator is ready for use */ + + if (!useint) { /* non-integral rhs */ + /* x ** y; special-case x=1 here as it will otherwise always */ + /* reduce to integer 1; decLnOp has a fastpath which detects */ + /* the case of x=1 */ + decLnOp(dac, lhs, &aset, &status); /* dac=ln(lhs) */ + /* [no error possible, as lhs 0 already handled] */ + if (ISZERO(dac)) { /* x==1, 1.0, etc. */ + /* need to return fully-padded 1.0000 etc., but rhsint->1 */ + *dac->lsu=1; /* was 0, make int 1 */ + if (!rhsint) { /* add padding */ + Int shift=set->digits-1; + dac->digits=decShiftToMost(dac->lsu, 1, shift); + dac->exponent=-shift; /* make 1.0000... */ + status|=DEC_Inexact|DEC_Rounded; /* deemed inexact */ + } + } + else { + decMultiplyOp(dac, dac, rhs, &aset, &status); /* dac=dac*rhs */ + decExpOp(dac, dac, &aset, &status); /* dac=exp(dac) */ + } + /* and drop through for final rounding */ + } /* non-integer rhs */ + + else { /* carry on with integer */ + uprv_decNumberZero(dac); /* acc=1 */ + *dac->lsu=1; /* .. */ + + /* if a negative power the constant 1 is needed, and if not subset */ + /* invert the lhs now rather than inverting the result later */ + if (decNumberIsNegative(rhs)) { /* was a **-n [hence digits>0] */ + decNumber *inv=invbuff; /* asssume use fixed buffer */ + uprv_decNumberCopy(&dnOne, dac); /* dnOne=1; [needed now or later] */ + #if DECSUBSET + if (set->extended) { /* need to calculate 1/lhs */ + #endif + /* divide lhs into 1, putting result in dac [dac=1/dac] */ + decDivideOp(dac, &dnOne, lhs, &aset, DIVIDE, &status); + /* now locate or allocate space for the inverted lhs */ + if (needbytes>sizeof(invbuff)) { + allocinv=(decNumber *)malloc(needbytes); + if (allocinv==NULL) { /* hopeless -- abandon */ + status|=DEC_Insufficient_storage; + break;} + inv=allocinv; /* use the allocated space */ + } + /* [inv now points to big-enough buffer or allocated storage] */ + uprv_decNumberCopy(inv, dac); /* copy the 1/lhs */ + uprv_decNumberCopy(dac, &dnOne); /* restore acc=1 */ + lhs=inv; /* .. and go forward with new lhs */ + #if DECSUBSET + } + #endif + } + + /* Raise-to-the-power loop... */ + seenbit=0; /* set once a 1-bit is encountered */ + for (i=1;;i++){ /* for each bit [top bit ignored] */ + /* abandon if had overflow or terminal underflow */ + if (status & (DEC_Overflow|DEC_Underflow)) { /* interesting? */ + if (status&DEC_Overflow || ISZERO(dac)) break; + } + /* [the following two lines revealed an optimizer bug in a C++ */ + /* compiler, with symptom: 5**3 -> 25, when n=n+n was used] */ + n=n<<1; /* move next bit to testable position */ + if (n<0) { /* top bit is set */ + seenbit=1; /* OK, significant bit seen */ + decMultiplyOp(dac, dac, lhs, &aset, &status); /* dac=dac*x */ + } + if (i==31) break; /* that was the last bit */ + if (!seenbit) continue; /* no need to square 1 */ + decMultiplyOp(dac, dac, dac, &aset, &status); /* dac=dac*dac [square] */ + } /*i*/ /* 32 bits */ + + /* complete internal overflow or underflow processing */ + if (status & (DEC_Overflow|DEC_Underflow)) { + #if DECSUBSET + /* If subset, and power was negative, reverse the kind of -erflow */ + /* [1/x not yet done] */ + if (!set->extended && decNumberIsNegative(rhs)) { + if (status & DEC_Overflow) + status^=DEC_Overflow | DEC_Underflow | DEC_Subnormal; + else { /* trickier -- Underflow may or may not be set */ + status&=~(DEC_Underflow | DEC_Subnormal); /* [one or both] */ + status|=DEC_Overflow; + } + } + #endif + dac->bits=(dac->bits & ~DECNEG) | bits; /* force correct sign */ + /* round subnormals [to set.digits rather than aset.digits] */ + /* or set overflow result similarly as required */ + decFinalize(dac, set, &residue, &status); + uprv_decNumberCopy(res, dac); /* copy to result (is now OK length) */ + break; + } + + #if DECSUBSET + if (!set->extended && /* subset math */ + decNumberIsNegative(rhs)) { /* was a **-n [hence digits>0] */ + /* so divide result into 1 [dac=1/dac] */ + decDivideOp(dac, &dnOne, dac, &aset, DIVIDE, &status); + } + #endif + } /* rhs integer path */ + + /* reduce result to the requested length and copy to result */ + decCopyFit(res, dac, set, &residue, &status); + decFinish(res, set, &residue, &status); /* final cleanup */ + #if DECSUBSET + if (!set->extended) decTrim(res, set, 0, 1, &dropped); /* trailing zeros */ + #endif + } while(0); /* end protected */ + + if (allocdac!=NULL) free(allocdac); /* drop any storage used */ + if (allocinv!=NULL) free(allocinv); /* .. */ + #if DECSUBSET + if (alloclhs!=NULL) free(alloclhs); /* .. */ + if (allocrhs!=NULL) free(allocrhs); /* .. */ + #endif + if (status!=0) decStatus(res, status, set); + #if DECCHECK + decCheckInexact(res, set); + #endif + return res; + } /* decNumberPower */ + +/* ------------------------------------------------------------------ */ +/* decNumberQuantize -- force exponent to requested value */ +/* */ +/* This computes C = op(A, B), where op adjusts the coefficient */ +/* of C (by rounding or shifting) such that the exponent (-scale) */ +/* of C has exponent of B. The numerical value of C will equal A, */ +/* except for the effects of any rounding that occurred. */ +/* */ +/* res is C, the result. C may be A or B */ +/* lhs is A, the number to adjust */ +/* rhs is B, the number with exponent to match */ +/* set is the context */ +/* */ +/* C must have space for set->digits digits. */ +/* */ +/* Unless there is an error or the result is infinite, the exponent */ +/* after the operation is guaranteed to be equal to that of B. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberQuantize(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + uInt status=0; /* accumulator */ + decQuantizeOp(res, lhs, rhs, set, 1, &status); + if (status!=0) decStatus(res, status, set); + return res; + } /* decNumberQuantize */ + +/* ------------------------------------------------------------------ */ +/* decNumberReduce -- remove trailing zeros */ +/* */ +/* This computes C = 0 + A, and normalizes the result */ +/* */ +/* res is C, the result. C may be A */ +/* rhs is A */ +/* set is the context */ +/* */ +/* C must have space for set->digits digits. */ +/* ------------------------------------------------------------------ */ +/* Previously known as Normalize */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberNormalize(decNumber *res, const decNumber *rhs, + decContext *set) { + return uprv_decNumberReduce(res, rhs, set); + } /* decNumberNormalize */ + +U_CAPI decNumber * U_EXPORT2 uprv_decNumberReduce(decNumber *res, const decNumber *rhs, + decContext *set) { + #if DECSUBSET + decNumber *allocrhs=NULL; /* non-NULL if rounded rhs allocated */ + #endif + uInt status=0; /* as usual */ + Int residue=0; /* as usual */ + Int dropped; /* work */ + + #if DECCHECK + if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; + #endif + + do { /* protect allocated storage */ + #if DECSUBSET + if (!set->extended) { + /* reduce operand and set lostDigits status, as needed */ + if (rhs->digits>set->digits) { + allocrhs=decRoundOperand(rhs, set, &status); + if (allocrhs==NULL) break; + rhs=allocrhs; + } + } + #endif + /* [following code does not require input rounding] */ + + /* Infinities copy through; NaNs need usual treatment */ + if (decNumberIsNaN(rhs)) { + decNaNs(res, rhs, NULL, set, &status); + break; + } + + /* reduce result to the requested length and copy to result */ + decCopyFit(res, rhs, set, &residue, &status); /* copy & round */ + decFinish(res, set, &residue, &status); /* cleanup/set flags */ + decTrim(res, set, 1, 0, &dropped); /* normalize in place */ + /* [may clamp] */ + } while(0); /* end protected */ + + #if DECSUBSET + if (allocrhs !=NULL) free(allocrhs); /* .. */ + #endif + if (status!=0) decStatus(res, status, set);/* then report status */ + return res; + } /* decNumberReduce */ + +/* ------------------------------------------------------------------ */ +/* decNumberRescale -- force exponent to requested value */ +/* */ +/* This computes C = op(A, B), where op adjusts the coefficient */ +/* of C (by rounding or shifting) such that the exponent (-scale) */ +/* of C has the value B. The numerical value of C will equal A, */ +/* except for the effects of any rounding that occurred. */ +/* */ +/* res is C, the result. C may be A or B */ +/* lhs is A, the number to adjust */ +/* rhs is B, the requested exponent */ +/* set is the context */ +/* */ +/* C must have space for set->digits digits. */ +/* */ +/* Unless there is an error or the result is infinite, the exponent */ +/* after the operation is guaranteed to be equal to B. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberRescale(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + uInt status=0; /* accumulator */ + decQuantizeOp(res, lhs, rhs, set, 0, &status); + if (status!=0) decStatus(res, status, set); + return res; + } /* decNumberRescale */ + +/* ------------------------------------------------------------------ */ +/* decNumberRemainder -- divide and return remainder */ +/* */ +/* This computes C = A % B */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X%X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context */ +/* */ +/* C must have space for set->digits digits. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberRemainder(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + uInt status=0; /* accumulator */ + decDivideOp(res, lhs, rhs, set, REMAINDER, &status); + if (status!=0) decStatus(res, status, set); + #if DECCHECK + decCheckInexact(res, set); + #endif + return res; + } /* decNumberRemainder */ + +/* ------------------------------------------------------------------ */ +/* decNumberRemainderNear -- divide and return remainder from nearest */ +/* */ +/* This computes C = A % B, where % is the IEEE remainder operator */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X%X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context */ +/* */ +/* C must have space for set->digits digits. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberRemainderNear(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + uInt status=0; /* accumulator */ + decDivideOp(res, lhs, rhs, set, REMNEAR, &status); + if (status!=0) decStatus(res, status, set); + #if DECCHECK + decCheckInexact(res, set); + #endif + return res; + } /* decNumberRemainderNear */ + +/* ------------------------------------------------------------------ */ +/* decNumberRotate -- rotate the coefficient of a Number left/right */ +/* */ +/* This computes C = A rot B (in base ten and rotating set->digits */ +/* digits). */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=XrotX) */ +/* lhs is A */ +/* rhs is B, the number of digits to rotate (-ve to right) */ +/* set is the context */ +/* */ +/* The digits of the coefficient of A are rotated to the left (if B */ +/* is positive) or to the right (if B is negative) without adjusting */ +/* the exponent or the sign of A. If lhs->digits is less than */ +/* set->digits the coefficient is padded with zeros on the left */ +/* before the rotate. Any leading zeros in the result are removed */ +/* as usual. */ +/* */ +/* B must be an integer (q=0) and in the range -set->digits through */ +/* +set->digits. */ +/* C must have space for set->digits digits. */ +/* NaNs are propagated as usual. Infinities are unaffected (but */ +/* B must be valid). No status is set unless B is invalid or an */ +/* operand is an sNaN. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberRotate(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + uInt status=0; /* accumulator */ + Int rotate; /* rhs as an Int */ + + #if DECCHECK + if (decCheckOperands(res, lhs, rhs, set)) return res; + #endif + + /* NaNs propagate as normal */ + if (decNumberIsNaN(lhs) || decNumberIsNaN(rhs)) + decNaNs(res, lhs, rhs, set, &status); + /* rhs must be an integer */ + else if (decNumberIsInfinite(rhs) || rhs->exponent!=0) + status=DEC_Invalid_operation; + else { /* both numeric, rhs is an integer */ + rotate=decGetInt(rhs); /* [cannot fail] */ + if (rotate==BADINT /* something bad .. */ + || rotate==BIGODD || rotate==BIGEVEN /* .. very big .. */ + || abs(rotate)>set->digits) /* .. or out of range */ + status=DEC_Invalid_operation; + else { /* rhs is OK */ + uprv_decNumberCopy(res, lhs); + /* convert -ve rotate to equivalent positive rotation */ + if (rotate<0) rotate=set->digits+rotate; + if (rotate!=0 && rotate!=set->digits /* zero or full rotation */ + && !decNumberIsInfinite(res)) { /* lhs was infinite */ + /* left-rotate to do; 0 < rotate < set->digits */ + uInt units, shift; /* work */ + uInt msudigits; /* digits in result msu */ + Unit *msu=res->lsu+D2U(res->digits)-1; /* current msu */ + Unit *msumax=res->lsu+D2U(set->digits)-1; /* rotation msu */ + for (msu++; msu<=msumax; msu++) *msu=0; /* ensure high units=0 */ + res->digits=set->digits; /* now full-length */ + msudigits=MSUDIGITS(res->digits); /* actual digits in msu */ + + /* rotation here is done in-place, in three steps */ + /* 1. shift all to least up to one unit to unit-align final */ + /* lsd [any digits shifted out are rotated to the left, */ + /* abutted to the original msd (which may require split)] */ + /* */ + /* [if there are no whole units left to rotate, the */ + /* rotation is now complete] */ + /* */ + /* 2. shift to least, from below the split point only, so that */ + /* the final msd is in the right place in its Unit [any */ + /* digits shifted out will fit exactly in the current msu, */ + /* left aligned, no split required] */ + /* */ + /* 3. rotate all the units by reversing left part, right */ + /* part, and then whole */ + /* */ + /* example: rotate right 8 digits (2 units + 2), DECDPUN=3. */ + /* */ + /* start: 00a bcd efg hij klm npq */ + /* */ + /* 1a 000 0ab cde fgh|ijk lmn [pq saved] */ + /* 1b 00p qab cde fgh|ijk lmn */ + /* */ + /* 2a 00p qab cde fgh|00i jkl [mn saved] */ + /* 2b mnp qab cde fgh|00i jkl */ + /* */ + /* 3a fgh cde qab mnp|00i jkl */ + /* 3b fgh cde qab mnp|jkl 00i */ + /* 3c 00i jkl mnp qab cde fgh */ + + /* Step 1: amount to shift is the partial right-rotate count */ + rotate=set->digits-rotate; /* make it right-rotate */ + units=rotate/DECDPUN; /* whole units to rotate */ + shift=rotate%DECDPUN; /* left-over digits count */ + if (shift>0) { /* not an exact number of units */ + uInt save=res->lsu[0]%powers[shift]; /* save low digit(s) */ + decShiftToLeast(res->lsu, D2U(res->digits), shift); + if (shift>msudigits) { /* msumax-1 needs >0 digits */ + uInt rem=save%powers[shift-msudigits];/* split save */ + *msumax=(Unit)(save/powers[shift-msudigits]); /* and insert */ + *(msumax-1)=*(msumax-1) + +(Unit)(rem*powers[DECDPUN-(shift-msudigits)]); /* .. */ + } + else { /* all fits in msumax */ + *msumax=*msumax+(Unit)(save*powers[msudigits-shift]); /* [maybe *1] */ + } + } /* digits shift needed */ + + /* If whole units to rotate... */ + if (units>0) { /* some to do */ + /* Step 2: the units to touch are the whole ones in rotate, */ + /* if any, and the shift is DECDPUN-msudigits (which may be */ + /* 0, again) */ + shift=DECDPUN-msudigits; + if (shift>0) { /* not an exact number of units */ + uInt save=res->lsu[0]%powers[shift]; /* save low digit(s) */ + decShiftToLeast(res->lsu, units, shift); + *msumax=*msumax+(Unit)(save*powers[msudigits]); + } /* partial shift needed */ + + /* Step 3: rotate the units array using triple reverse */ + /* (reversing is easy and fast) */ + decReverse(res->lsu+units, msumax); /* left part */ + decReverse(res->lsu, res->lsu+units-1); /* right part */ + decReverse(res->lsu, msumax); /* whole */ + } /* whole units to rotate */ + /* the rotation may have left an undetermined number of zeros */ + /* on the left, so true length needs to be calculated */ + res->digits=decGetDigits(res->lsu, msumax-res->lsu+1); + } /* rotate needed */ + } /* rhs OK */ + } /* numerics */ + if (status!=0) decStatus(res, status, set); + return res; + } /* decNumberRotate */ + +/* ------------------------------------------------------------------ */ +/* decNumberSameQuantum -- test for equal exponents */ +/* */ +/* res is the result number, which will contain either 0 or 1 */ +/* lhs is a number to test */ +/* rhs is the second (usually a pattern) */ +/* */ +/* No errors are possible and no context is needed. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberSameQuantum(decNumber *res, const decNumber *lhs, + const decNumber *rhs) { + Unit ret=0; /* return value */ + + #if DECCHECK + if (decCheckOperands(res, lhs, rhs, DECUNCONT)) return res; + #endif + + if (SPECIALARGS) { + if (decNumberIsNaN(lhs) && decNumberIsNaN(rhs)) ret=1; + else if (decNumberIsInfinite(lhs) && decNumberIsInfinite(rhs)) ret=1; + /* [anything else with a special gives 0] */ + } + else if (lhs->exponent==rhs->exponent) ret=1; + + uprv_decNumberZero(res); /* OK to overwrite an operand now */ + *res->lsu=ret; + return res; + } /* decNumberSameQuantum */ + +/* ------------------------------------------------------------------ */ +/* decNumberScaleB -- multiply by a power of 10 */ +/* */ +/* This computes C = A x 10**B where B is an integer (q=0) with */ +/* maximum magnitude 2*(emax+digits) */ +/* */ +/* res is C, the result. C may be A or B */ +/* lhs is A, the number to adjust */ +/* rhs is B, the requested power of ten to use */ +/* set is the context */ +/* */ +/* C must have space for set->digits digits. */ +/* */ +/* The result may underflow or overflow. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberScaleB(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + Int reqexp; /* requested exponent change [B] */ + uInt status=0; /* accumulator */ + Int residue; /* work */ + + #if DECCHECK + if (decCheckOperands(res, lhs, rhs, set)) return res; + #endif + + /* Handle special values except lhs infinite */ + if (decNumberIsNaN(lhs) || decNumberIsNaN(rhs)) + decNaNs(res, lhs, rhs, set, &status); + /* rhs must be an integer */ + else if (decNumberIsInfinite(rhs) || rhs->exponent!=0) + status=DEC_Invalid_operation; + else { + /* lhs is a number; rhs is a finite with q==0 */ + reqexp=decGetInt(rhs); /* [cannot fail] */ + if (reqexp==BADINT /* something bad .. */ + || reqexp==BIGODD || reqexp==BIGEVEN /* .. very big .. */ + || abs(reqexp)>(2*(set->digits+set->emax))) /* .. or out of range */ + status=DEC_Invalid_operation; + else { /* rhs is OK */ + uprv_decNumberCopy(res, lhs); /* all done if infinite lhs */ + if (!decNumberIsInfinite(res)) { /* prepare to scale */ + res->exponent+=reqexp; /* adjust the exponent */ + residue=0; + decFinalize(res, set, &residue, &status); /* .. and check */ + } /* finite LHS */ + } /* rhs OK */ + } /* rhs finite */ + if (status!=0) decStatus(res, status, set); + return res; + } /* decNumberScaleB */ + +/* ------------------------------------------------------------------ */ +/* decNumberShift -- shift the coefficient of a Number left or right */ +/* */ +/* This computes C = A << B or C = A >> -B (in base ten). */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X<digits through */ +/* +set->digits. */ +/* C must have space for set->digits digits. */ +/* NaNs are propagated as usual. Infinities are unaffected (but */ +/* B must be valid). No status is set unless B is invalid or an */ +/* operand is an sNaN. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberShift(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + uInt status=0; /* accumulator */ + Int shift; /* rhs as an Int */ + + #if DECCHECK + if (decCheckOperands(res, lhs, rhs, set)) return res; + #endif + + /* NaNs propagate as normal */ + if (decNumberIsNaN(lhs) || decNumberIsNaN(rhs)) + decNaNs(res, lhs, rhs, set, &status); + /* rhs must be an integer */ + else if (decNumberIsInfinite(rhs) || rhs->exponent!=0) + status=DEC_Invalid_operation; + else { /* both numeric, rhs is an integer */ + shift=decGetInt(rhs); /* [cannot fail] */ + if (shift==BADINT /* something bad .. */ + || shift==BIGODD || shift==BIGEVEN /* .. very big .. */ + || abs(shift)>set->digits) /* .. or out of range */ + status=DEC_Invalid_operation; + else { /* rhs is OK */ + uprv_decNumberCopy(res, lhs); + if (shift!=0 && !decNumberIsInfinite(res)) { /* something to do */ + if (shift>0) { /* to left */ + if (shift==set->digits) { /* removing all */ + *res->lsu=0; /* so place 0 */ + res->digits=1; /* .. */ + } + else { /* */ + /* first remove leading digits if necessary */ + if (res->digits+shift>set->digits) { + decDecap(res, res->digits+shift-set->digits); + /* that updated res->digits; may have gone to 1 (for a */ + /* single digit or for zero */ + } + if (res->digits>1 || *res->lsu) /* if non-zero.. */ + res->digits=decShiftToMost(res->lsu, res->digits, shift); + } /* partial left */ + } /* left */ + else { /* to right */ + if (-shift>=res->digits) { /* discarding all */ + *res->lsu=0; /* so place 0 */ + res->digits=1; /* .. */ + } + else { + decShiftToLeast(res->lsu, D2U(res->digits), -shift); + res->digits-=(-shift); + } + } /* to right */ + } /* non-0 non-Inf shift */ + } /* rhs OK */ + } /* numerics */ + if (status!=0) decStatus(res, status, set); + return res; + } /* decNumberShift */ + +/* ------------------------------------------------------------------ */ +/* decNumberSquareRoot -- square root operator */ +/* */ +/* This computes C = squareroot(A) */ +/* */ +/* res is C, the result. C may be A */ +/* rhs is A */ +/* set is the context; note that rounding mode has no effect */ +/* */ +/* C must have space for set->digits digits. */ +/* ------------------------------------------------------------------ */ +/* This uses the following varying-precision algorithm in: */ +/* */ +/* Properly Rounded Variable Precision Square Root, T. E. Hull and */ +/* A. Abrham, ACM Transactions on Mathematical Software, Vol 11 #3, */ +/* pp229-237, ACM, September 1985. */ +/* */ +/* The square-root is calculated using Newton's method, after which */ +/* a check is made to ensure the result is correctly rounded. */ +/* */ +/* % [Reformatted original Numerical Turing source code follows.] */ +/* function sqrt(x : real) : real */ +/* % sqrt(x) returns the properly rounded approximation to the square */ +/* % root of x, in the precision of the calling environment, or it */ +/* % fails if x < 0. */ +/* % t e hull and a abrham, august, 1984 */ +/* if x <= 0 then */ +/* if x < 0 then */ +/* assert false */ +/* else */ +/* result 0 */ +/* end if */ +/* end if */ +/* var f := setexp(x, 0) % fraction part of x [0.1 <= x < 1] */ +/* var e := getexp(x) % exponent part of x */ +/* var approx : real */ +/* if e mod 2 = 0 then */ +/* approx := .259 + .819 * f % approx to root of f */ +/* else */ +/* f := f/l0 % adjustments */ +/* e := e + 1 % for odd */ +/* approx := .0819 + 2.59 * f % exponent */ +/* end if */ +/* */ +/* var p:= 3 */ +/* const maxp := currentprecision + 2 */ +/* loop */ +/* p := min(2*p - 2, maxp) % p = 4,6,10, . . . , maxp */ +/* precision p */ +/* approx := .5 * (approx + f/approx) */ +/* exit when p = maxp */ +/* end loop */ +/* */ +/* % approx is now within 1 ulp of the properly rounded square root */ +/* % of f; to ensure proper rounding, compare squares of (approx - */ +/* % l/2 ulp) and (approx + l/2 ulp) with f. */ +/* p := currentprecision */ +/* begin */ +/* precision p + 2 */ +/* const approxsubhalf := approx - setexp(.5, -p) */ +/* if mulru(approxsubhalf, approxsubhalf) > f then */ +/* approx := approx - setexp(.l, -p + 1) */ +/* else */ +/* const approxaddhalf := approx + setexp(.5, -p) */ +/* if mulrd(approxaddhalf, approxaddhalf) < f then */ +/* approx := approx + setexp(.l, -p + 1) */ +/* end if */ +/* end if */ +/* end */ +/* result setexp(approx, e div 2) % fix exponent */ +/* end sqrt */ +/* ------------------------------------------------------------------ */ +#if defined(__clang__) || U_GCC_MAJOR_MINOR >= 406 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" +#endif +U_CAPI decNumber * U_EXPORT2 uprv_decNumberSquareRoot(decNumber *res, const decNumber *rhs, + decContext *set) { + decContext workset, approxset; /* work contexts */ + decNumber dzero; /* used for constant zero */ + Int maxp; /* largest working precision */ + Int workp; /* working precision */ + Int residue=0; /* rounding residue */ + uInt status=0, ignore=0; /* status accumulators */ + uInt rstatus; /* .. */ + Int exp; /* working exponent */ + Int ideal; /* ideal (preferred) exponent */ + Int needbytes; /* work */ + Int dropped; /* .. */ + + #if DECSUBSET + decNumber *allocrhs=NULL; /* non-NULL if rounded rhs allocated */ + #endif + /* buffer for f [needs +1 in case DECBUFFER 0] */ + decNumber buff[D2N(DECBUFFER+1)]; + /* buffer for a [needs +2 to match likely maxp] */ + decNumber bufa[D2N(DECBUFFER+2)]; + /* buffer for temporary, b [must be same size as a] */ + decNumber bufb[D2N(DECBUFFER+2)]; + decNumber *allocbuff=NULL; /* -> allocated buff, iff allocated */ + decNumber *allocbufa=NULL; /* -> allocated bufa, iff allocated */ + decNumber *allocbufb=NULL; /* -> allocated bufb, iff allocated */ + decNumber *f=buff; /* reduced fraction */ + decNumber *a=bufa; /* approximation to result */ + decNumber *b=bufb; /* intermediate result */ + /* buffer for temporary variable, up to 3 digits */ + decNumber buft[D2N(3)]; + decNumber *t=buft; /* up-to-3-digit constant or work */ + + #if DECCHECK + if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; + #endif + + do { /* protect allocated storage */ + #if DECSUBSET + if (!set->extended) { + /* reduce operand and set lostDigits status, as needed */ + if (rhs->digits>set->digits) { + allocrhs=decRoundOperand(rhs, set, &status); + if (allocrhs==NULL) break; + /* [Note: 'f' allocation below could reuse this buffer if */ + /* used, but as this is rare they are kept separate for clarity.] */ + rhs=allocrhs; + } + } + #endif + /* [following code does not require input rounding] */ + + /* handle infinities and NaNs */ + if (SPECIALARG) { + if (decNumberIsInfinite(rhs)) { /* an infinity */ + if (decNumberIsNegative(rhs)) status|=DEC_Invalid_operation; + else uprv_decNumberCopy(res, rhs); /* +Infinity */ + } + else decNaNs(res, rhs, NULL, set, &status); /* a NaN */ + break; + } + + /* calculate the ideal (preferred) exponent [floor(exp/2)] */ + /* [It would be nicer to write: ideal=rhs->exponent>>1, but this */ + /* generates a compiler warning. Generated code is the same.] */ + ideal=(rhs->exponent&~1)/2; /* target */ + + /* handle zeros */ + if (ISZERO(rhs)) { + uprv_decNumberCopy(res, rhs); /* could be 0 or -0 */ + res->exponent=ideal; /* use the ideal [safe] */ + /* use decFinish to clamp any out-of-range exponent, etc. */ + decFinish(res, set, &residue, &status); + break; + } + + /* any other -x is an oops */ + if (decNumberIsNegative(rhs)) { + status|=DEC_Invalid_operation; + break; + } + + /* space is needed for three working variables */ + /* f -- the same precision as the RHS, reduced to 0.01->0.99... */ + /* a -- Hull's approximation -- precision, when assigned, is */ + /* currentprecision+1 or the input argument precision, */ + /* whichever is larger (+2 for use as temporary) */ + /* b -- intermediate temporary result (same size as a) */ + /* if any is too long for local storage, then allocate */ + workp=MAXI(set->digits+1, rhs->digits); /* actual rounding precision */ + workp=MAXI(workp, 7); /* at least 7 for low cases */ + maxp=workp+2; /* largest working precision */ + + needbytes=sizeof(decNumber)+(D2U(rhs->digits)-1)*sizeof(Unit); + if (needbytes>(Int)sizeof(buff)) { + allocbuff=(decNumber *)malloc(needbytes); + if (allocbuff==NULL) { /* hopeless -- abandon */ + status|=DEC_Insufficient_storage; + break;} + f=allocbuff; /* use the allocated space */ + } + /* a and b both need to be able to hold a maxp-length number */ + needbytes=sizeof(decNumber)+(D2U(maxp)-1)*sizeof(Unit); + if (needbytes>(Int)sizeof(bufa)) { /* [same applies to b] */ + allocbufa=(decNumber *)malloc(needbytes); + allocbufb=(decNumber *)malloc(needbytes); + if (allocbufa==NULL || allocbufb==NULL) { /* hopeless */ + status|=DEC_Insufficient_storage; + break;} + a=allocbufa; /* use the allocated spaces */ + b=allocbufb; /* .. */ + } + + /* copy rhs -> f, save exponent, and reduce so 0.1 <= f < 1 */ + uprv_decNumberCopy(f, rhs); + exp=f->exponent+f->digits; /* adjusted to Hull rules */ + f->exponent=-(f->digits); /* to range */ + + /* set up working context */ + uprv_decContextDefault(&workset, DEC_INIT_DECIMAL64); + workset.emax=DEC_MAX_EMAX; + workset.emin=DEC_MIN_EMIN; + + /* [Until further notice, no error is possible and status bits */ + /* (Rounded, etc.) should be ignored, not accumulated.] */ + + /* Calculate initial approximation, and allow for odd exponent */ + workset.digits=workp; /* p for initial calculation */ + t->bits=0; t->digits=3; + a->bits=0; a->digits=3; + if ((exp & 1)==0) { /* even exponent */ + /* Set t=0.259, a=0.819 */ + t->exponent=-3; + a->exponent=-3; + #if DECDPUN>=3 + t->lsu[0]=259; + a->lsu[0]=819; + #elif DECDPUN==2 + t->lsu[0]=59; t->lsu[1]=2; + a->lsu[0]=19; a->lsu[1]=8; + #else + t->lsu[0]=9; t->lsu[1]=5; t->lsu[2]=2; + a->lsu[0]=9; a->lsu[1]=1; a->lsu[2]=8; + #endif + } + else { /* odd exponent */ + /* Set t=0.0819, a=2.59 */ + f->exponent--; /* f=f/10 */ + exp++; /* e=e+1 */ + t->exponent=-4; + a->exponent=-2; + #if DECDPUN>=3 + t->lsu[0]=819; + a->lsu[0]=259; + #elif DECDPUN==2 + t->lsu[0]=19; t->lsu[1]=8; + a->lsu[0]=59; a->lsu[1]=2; + #else + t->lsu[0]=9; t->lsu[1]=1; t->lsu[2]=8; + a->lsu[0]=9; a->lsu[1]=5; a->lsu[2]=2; + #endif + } + + decMultiplyOp(a, a, f, &workset, &ignore); /* a=a*f */ + decAddOp(a, a, t, &workset, 0, &ignore); /* ..+t */ + /* [a is now the initial approximation for sqrt(f), calculated with */ + /* currentprecision, which is also a's precision.] */ + + /* the main calculation loop */ + uprv_decNumberZero(&dzero); /* make 0 */ + uprv_decNumberZero(t); /* set t = 0.5 */ + t->lsu[0]=5; /* .. */ + t->exponent=-1; /* .. */ + workset.digits=3; /* initial p */ + for (; workset.digitsexponent+=exp/2; /* set correct exponent */ + rstatus=0; /* clear status */ + residue=0; /* .. and accumulator */ + decCopyFit(a, a, &approxset, &residue, &rstatus); /* reduce (if needed) */ + decFinish(a, &approxset, &residue, &rstatus); /* clean and finalize */ + + /* Overflow was possible if the input exponent was out-of-range, */ + /* in which case quit */ + if (rstatus&DEC_Overflow) { + status=rstatus; /* use the status as-is */ + uprv_decNumberCopy(res, a); /* copy to result */ + break; + } + + /* Preserve status except Inexact/Rounded */ + status|=(rstatus & ~(DEC_Rounded|DEC_Inexact)); + + /* Carry out the Hull correction */ + a->exponent-=exp/2; /* back to 0.1->1 */ + + /* a is now at final precision and within 1 ulp of the properly */ + /* rounded square root of f; to ensure proper rounding, compare */ + /* squares of (a - l/2 ulp) and (a + l/2 ulp) with f. */ + /* Here workset.digits=maxp and t=0.5, and a->digits determines */ + /* the ulp */ + workset.digits--; /* maxp-1 is OK now */ + t->exponent=-a->digits-1; /* make 0.5 ulp */ + decAddOp(b, a, t, &workset, DECNEG, &ignore); /* b = a - 0.5 ulp */ + workset.round=DEC_ROUND_UP; + decMultiplyOp(b, b, b, &workset, &ignore); /* b = mulru(b, b) */ + decCompareOp(b, f, b, &workset, COMPARE, &ignore); /* b ? f, reversed */ + if (decNumberIsNegative(b)) { /* f < b [i.e., b > f] */ + /* this is the more common adjustment, though both are rare */ + t->exponent++; /* make 1.0 ulp */ + t->lsu[0]=1; /* .. */ + decAddOp(a, a, t, &workset, DECNEG, &ignore); /* a = a - 1 ulp */ + /* assign to approx [round to length] */ + approxset.emin-=exp/2; /* adjust to match a */ + approxset.emax-=exp/2; + decAddOp(a, &dzero, a, &approxset, 0, &ignore); + } + else { + decAddOp(b, a, t, &workset, 0, &ignore); /* b = a + 0.5 ulp */ + workset.round=DEC_ROUND_DOWN; + decMultiplyOp(b, b, b, &workset, &ignore); /* b = mulrd(b, b) */ + decCompareOp(b, b, f, &workset, COMPARE, &ignore); /* b ? f */ + if (decNumberIsNegative(b)) { /* b < f */ + t->exponent++; /* make 1.0 ulp */ + t->lsu[0]=1; /* .. */ + decAddOp(a, a, t, &workset, 0, &ignore); /* a = a + 1 ulp */ + /* assign to approx [round to length] */ + approxset.emin-=exp/2; /* adjust to match a */ + approxset.emax-=exp/2; + decAddOp(a, &dzero, a, &approxset, 0, &ignore); + } + } + /* [no errors are possible in the above, and rounding/inexact during */ + /* estimation are irrelevant, so status was not accumulated] */ + + /* Here, 0.1 <= a < 1 (still), so adjust back */ + a->exponent+=exp/2; /* set correct exponent */ + + /* count droppable zeros [after any subnormal rounding] by */ + /* trimming a copy */ + uprv_decNumberCopy(b, a); + decTrim(b, set, 1, 1, &dropped); /* [drops trailing zeros] */ + + /* Set Inexact and Rounded. The answer can only be exact if */ + /* it is short enough so that squaring it could fit in workp */ + /* digits, so this is the only (relatively rare) condition that */ + /* a careful check is needed */ + if (b->digits*2-1 > workp) { /* cannot fit */ + status|=DEC_Inexact|DEC_Rounded; + } + else { /* could be exact/unrounded */ + uInt mstatus=0; /* local status */ + decMultiplyOp(b, b, b, &workset, &mstatus); /* try the multiply */ + if (mstatus&DEC_Overflow) { /* result just won't fit */ + status|=DEC_Inexact|DEC_Rounded; + } + else { /* plausible */ + decCompareOp(t, b, rhs, &workset, COMPARE, &mstatus); /* b ? rhs */ + if (!ISZERO(t)) status|=DEC_Inexact|DEC_Rounded; /* not equal */ + else { /* is Exact */ + /* here, dropped is the count of trailing zeros in 'a' */ + /* use closest exponent to ideal... */ + Int todrop=ideal-a->exponent; /* most that can be dropped */ + if (todrop<0) status|=DEC_Rounded; /* ideally would add 0s */ + else { /* unrounded */ + /* there are some to drop, but emax may not allow all */ + Int maxexp=set->emax-set->digits+1; + Int maxdrop=maxexp-a->exponent; + if (todrop>maxdrop && set->clamp) { /* apply clamping */ + todrop=maxdrop; + status|=DEC_Clamped; + } + if (dropped0) { /* have some to drop */ + decShiftToLeast(a->lsu, D2U(a->digits), todrop); + a->exponent+=todrop; /* maintain numerical value */ + a->digits-=todrop; /* new length */ + } + } + } + } + } + + /* double-check Underflow, as perhaps the result could not have */ + /* been subnormal (initial argument too big), or it is now Exact */ + if (status&DEC_Underflow) { + Int ae=rhs->exponent+rhs->digits-1; /* adjusted exponent */ + /* check if truly subnormal */ + #if DECEXTFLAG /* DEC_Subnormal too */ + if (ae>=set->emin*2) status&=~(DEC_Subnormal|DEC_Underflow); + #else + if (ae>=set->emin*2) status&=~DEC_Underflow; + #endif + /* check if truly inexact */ + if (!(status&DEC_Inexact)) status&=~DEC_Underflow; + } + + uprv_decNumberCopy(res, a); /* a is now the result */ + } while(0); /* end protected */ + + if (allocbuff!=NULL) free(allocbuff); /* drop any storage used */ + if (allocbufa!=NULL) free(allocbufa); /* .. */ + if (allocbufb!=NULL) free(allocbufb); /* .. */ + #if DECSUBSET + if (allocrhs !=NULL) free(allocrhs); /* .. */ + #endif + if (status!=0) decStatus(res, status, set);/* then report status */ + #if DECCHECK + decCheckInexact(res, set); + #endif + return res; + } /* decNumberSquareRoot */ +#if defined(__clang__) || U_GCC_MAJOR_MINOR >= 406 +#pragma GCC diagnostic pop +#endif + +/* ------------------------------------------------------------------ */ +/* decNumberSubtract -- subtract two Numbers */ +/* */ +/* This computes C = A - B */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X-X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context */ +/* */ +/* C must have space for set->digits digits. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberSubtract(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + uInt status=0; /* accumulator */ + + decAddOp(res, lhs, rhs, set, DECNEG, &status); + if (status!=0) decStatus(res, status, set); + #if DECCHECK + decCheckInexact(res, set); + #endif + return res; + } /* decNumberSubtract */ + +/* ------------------------------------------------------------------ */ +/* decNumberToIntegralExact -- round-to-integral-value with InExact */ +/* decNumberToIntegralValue -- round-to-integral-value */ +/* */ +/* res is the result */ +/* rhs is input number */ +/* set is the context */ +/* */ +/* res must have space for any value of rhs. */ +/* */ +/* This implements the IEEE special operators and therefore treats */ +/* special values as valid. For finite numbers it returns */ +/* rescale(rhs, 0) if rhs->exponent is <0. */ +/* Otherwise the result is rhs (so no error is possible, except for */ +/* sNaN). */ +/* */ +/* The context is used for rounding mode and status after sNaN, but */ +/* the digits setting is ignored. The Exact version will signal */ +/* Inexact if the result differs numerically from rhs; the other */ +/* never signals Inexact. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberToIntegralExact(decNumber *res, const decNumber *rhs, + decContext *set) { + decNumber dn; + decContext workset; /* working context */ + uInt status=0; /* accumulator */ + + #if DECCHECK + if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; + #endif + + /* handle infinities and NaNs */ + if (SPECIALARG) { + if (decNumberIsInfinite(rhs)) uprv_decNumberCopy(res, rhs); /* an Infinity */ + else decNaNs(res, rhs, NULL, set, &status); /* a NaN */ + } + else { /* finite */ + /* have a finite number; no error possible (res must be big enough) */ + if (rhs->exponent>=0) return uprv_decNumberCopy(res, rhs); + /* that was easy, but if negative exponent there is work to do... */ + workset=*set; /* clone rounding, etc. */ + workset.digits=rhs->digits; /* no length rounding */ + workset.traps=0; /* no traps */ + uprv_decNumberZero(&dn); /* make a number with exponent 0 */ + uprv_decNumberQuantize(res, rhs, &dn, &workset); + status|=workset.status; + } + if (status!=0) decStatus(res, status, set); + return res; + } /* decNumberToIntegralExact */ + +U_CAPI decNumber * U_EXPORT2 uprv_decNumberToIntegralValue(decNumber *res, const decNumber *rhs, + decContext *set) { + decContext workset=*set; /* working context */ + workset.traps=0; /* no traps */ + uprv_decNumberToIntegralExact(res, rhs, &workset); + /* this never affects set, except for sNaNs; NaN will have been set */ + /* or propagated already, so no need to call decStatus */ + set->status|=workset.status&DEC_Invalid_operation; + return res; + } /* decNumberToIntegralValue */ + +/* ------------------------------------------------------------------ */ +/* decNumberXor -- XOR two Numbers, digitwise */ +/* */ +/* This computes C = A ^ B */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X^X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context (used for result length and error report) */ +/* */ +/* C must have space for set->digits digits. */ +/* */ +/* Logical function restrictions apply (see above); a NaN is */ +/* returned with Invalid_operation if a restriction is violated. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberXor(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + const Unit *ua, *ub; /* -> operands */ + const Unit *msua, *msub; /* -> operand msus */ + Unit *uc, *msuc; /* -> result and its msu */ + Int msudigs; /* digits in res msu */ + #if DECCHECK + if (decCheckOperands(res, lhs, rhs, set)) return res; + #endif + + if (lhs->exponent!=0 || decNumberIsSpecial(lhs) || decNumberIsNegative(lhs) + || rhs->exponent!=0 || decNumberIsSpecial(rhs) || decNumberIsNegative(rhs)) { + decStatus(res, DEC_Invalid_operation, set); + return res; + } + /* operands are valid */ + ua=lhs->lsu; /* bottom-up */ + ub=rhs->lsu; /* .. */ + uc=res->lsu; /* .. */ + msua=ua+D2U(lhs->digits)-1; /* -> msu of lhs */ + msub=ub+D2U(rhs->digits)-1; /* -> msu of rhs */ + msuc=uc+D2U(set->digits)-1; /* -> msu of result */ + msudigs=MSUDIGITS(set->digits); /* [faster than remainder] */ + for (; uc<=msuc; ua++, ub++, uc++) { /* Unit loop */ + Unit a, b; /* extract units */ + if (ua>msua) a=0; + else a=*ua; + if (ub>msub) b=0; + else b=*ub; + *uc=0; /* can now write back */ + if (a|b) { /* maybe 1 bits to examine */ + Int i, j; + /* This loop could be unrolled and/or use BIN2BCD tables */ + for (i=0; i1) { + decStatus(res, DEC_Invalid_operation, set); + return res; + } + if (uc==msuc && i==msudigs-1) break; /* just did final digit */ + } /* each digit */ + } /* non-zero */ + } /* each unit */ + /* [here uc-1 is the msu of the result] */ + res->digits=decGetDigits(res->lsu, uc-res->lsu); + res->exponent=0; /* integer */ + res->bits=0; /* sign=0 */ + return res; /* [no status to set] */ + } /* decNumberXor */ + + +/* ================================================================== */ +/* Utility routines */ +/* ================================================================== */ + +/* ------------------------------------------------------------------ */ +/* decNumberClass -- return the decClass of a decNumber */ +/* dn -- the decNumber to test */ +/* set -- the context to use for Emin */ +/* returns the decClass enum */ +/* ------------------------------------------------------------------ */ +enum decClass uprv_decNumberClass(const decNumber *dn, decContext *set) { + if (decNumberIsSpecial(dn)) { + if (decNumberIsQNaN(dn)) return DEC_CLASS_QNAN; + if (decNumberIsSNaN(dn)) return DEC_CLASS_SNAN; + /* must be an infinity */ + if (decNumberIsNegative(dn)) return DEC_CLASS_NEG_INF; + return DEC_CLASS_POS_INF; + } + /* is finite */ + if (uprv_decNumberIsNormal(dn, set)) { /* most common */ + if (decNumberIsNegative(dn)) return DEC_CLASS_NEG_NORMAL; + return DEC_CLASS_POS_NORMAL; + } + /* is subnormal or zero */ + if (decNumberIsZero(dn)) { /* most common */ + if (decNumberIsNegative(dn)) return DEC_CLASS_NEG_ZERO; + return DEC_CLASS_POS_ZERO; + } + if (decNumberIsNegative(dn)) return DEC_CLASS_NEG_SUBNORMAL; + return DEC_CLASS_POS_SUBNORMAL; + } /* decNumberClass */ + +/* ------------------------------------------------------------------ */ +/* decNumberClassToString -- convert decClass to a string */ +/* */ +/* eclass is a valid decClass */ +/* returns a constant string describing the class (max 13+1 chars) */ +/* ------------------------------------------------------------------ */ +const char *uprv_decNumberClassToString(enum decClass eclass) { + if (eclass==DEC_CLASS_POS_NORMAL) return DEC_ClassString_PN; + if (eclass==DEC_CLASS_NEG_NORMAL) return DEC_ClassString_NN; + if (eclass==DEC_CLASS_POS_ZERO) return DEC_ClassString_PZ; + if (eclass==DEC_CLASS_NEG_ZERO) return DEC_ClassString_NZ; + if (eclass==DEC_CLASS_POS_SUBNORMAL) return DEC_ClassString_PS; + if (eclass==DEC_CLASS_NEG_SUBNORMAL) return DEC_ClassString_NS; + if (eclass==DEC_CLASS_POS_INF) return DEC_ClassString_PI; + if (eclass==DEC_CLASS_NEG_INF) return DEC_ClassString_NI; + if (eclass==DEC_CLASS_QNAN) return DEC_ClassString_QN; + if (eclass==DEC_CLASS_SNAN) return DEC_ClassString_SN; + return DEC_ClassString_UN; /* Unknown */ + } /* decNumberClassToString */ + +/* ------------------------------------------------------------------ */ +/* decNumberCopy -- copy a number */ +/* */ +/* dest is the target decNumber */ +/* src is the source decNumber */ +/* returns dest */ +/* */ +/* (dest==src is allowed and is a no-op) */ +/* All fields are updated as required. This is a utility operation, */ +/* so special values are unchanged and no error is possible. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberCopy(decNumber *dest, const decNumber *src) { + + #if DECCHECK + if (src==NULL) return uprv_decNumberZero(dest); + #endif + + if (dest==src) return dest; /* no copy required */ + + /* Use explicit assignments here as structure assignment could copy */ + /* more than just the lsu (for small DECDPUN). This would not affect */ + /* the value of the results, but could disturb test harness spill */ + /* checking. */ + dest->bits=src->bits; + dest->exponent=src->exponent; + dest->digits=src->digits; + dest->lsu[0]=src->lsu[0]; + if (src->digits>DECDPUN) { /* more Units to come */ + const Unit *smsup, *s; /* work */ + Unit *d; /* .. */ + /* memcpy for the remaining Units would be safe as they cannot */ + /* overlap. However, this explicit loop is faster in short cases. */ + d=dest->lsu+1; /* -> first destination */ + smsup=src->lsu+D2U(src->digits); /* -> source msu+1 */ + for (s=src->lsu+1; sdigits digits. */ +/* No exception or error can occur; this is a quiet bitwise operation.*/ +/* See also decNumberAbs for a checking version of this. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberCopyAbs(decNumber *res, const decNumber *rhs) { + #if DECCHECK + if (decCheckOperands(res, DECUNUSED, rhs, DECUNCONT)) return res; + #endif + uprv_decNumberCopy(res, rhs); + res->bits&=~DECNEG; /* turn off sign */ + return res; + } /* decNumberCopyAbs */ + +/* ------------------------------------------------------------------ */ +/* decNumberCopyNegate -- quiet negate value operator */ +/* */ +/* This sets C = negate(A) */ +/* */ +/* res is C, the result. C may be A */ +/* rhs is A */ +/* */ +/* C must have space for set->digits digits. */ +/* No exception or error can occur; this is a quiet bitwise operation.*/ +/* See also decNumberMinus for a checking version of this. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberCopyNegate(decNumber *res, const decNumber *rhs) { + #if DECCHECK + if (decCheckOperands(res, DECUNUSED, rhs, DECUNCONT)) return res; + #endif + uprv_decNumberCopy(res, rhs); + res->bits^=DECNEG; /* invert the sign */ + return res; + } /* decNumberCopyNegate */ + +/* ------------------------------------------------------------------ */ +/* decNumberCopySign -- quiet copy and set sign operator */ +/* */ +/* This sets C = A with the sign of B */ +/* */ +/* res is C, the result. C may be A */ +/* lhs is A */ +/* rhs is B */ +/* */ +/* C must have space for set->digits digits. */ +/* No exception or error can occur; this is a quiet bitwise operation.*/ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberCopySign(decNumber *res, const decNumber *lhs, + const decNumber *rhs) { + uByte sign; /* rhs sign */ + #if DECCHECK + if (decCheckOperands(res, DECUNUSED, rhs, DECUNCONT)) return res; + #endif + sign=rhs->bits & DECNEG; /* save sign bit */ + uprv_decNumberCopy(res, lhs); + res->bits&=~DECNEG; /* clear the sign */ + res->bits|=sign; /* set from rhs */ + return res; + } /* decNumberCopySign */ + +/* ------------------------------------------------------------------ */ +/* decNumberGetBCD -- get the coefficient in BCD8 */ +/* dn is the source decNumber */ +/* bcd is the uInt array that will receive dn->digits BCD bytes, */ +/* most-significant at offset 0 */ +/* returns bcd */ +/* */ +/* bcd must have at least dn->digits bytes. No error is possible; if */ +/* dn is a NaN or Infinite, digits must be 1 and the coefficient 0. */ +/* ------------------------------------------------------------------ */ +U_CAPI uByte * U_EXPORT2 uprv_decNumberGetBCD(const decNumber *dn, uByte *bcd) { + uByte *ub=bcd+dn->digits-1; /* -> lsd */ + const Unit *up=dn->lsu; /* Unit pointer, -> lsu */ + + #if DECDPUN==1 /* trivial simple copy */ + for (; ub>=bcd; ub--, up++) *ub=*up; + #else /* chopping needed */ + uInt u=*up; /* work */ + uInt cut=DECDPUN; /* downcounter through unit */ + for (; ub>=bcd; ub--) { + *ub=(uByte)(u%10); /* [*6554 trick inhibits, here] */ + u=u/10; + cut--; + if (cut>0) continue; /* more in this unit */ + up++; + u=*up; + cut=DECDPUN; + } + #endif + return bcd; + } /* decNumberGetBCD */ + +/* ------------------------------------------------------------------ */ +/* decNumberSetBCD -- set (replace) the coefficient from BCD8 */ +/* dn is the target decNumber */ +/* bcd is the uInt array that will source n BCD bytes, most- */ +/* significant at offset 0 */ +/* n is the number of digits in the source BCD array (bcd) */ +/* returns dn */ +/* */ +/* dn must have space for at least n digits. No error is possible; */ +/* if dn is a NaN, or Infinite, or is to become a zero, n must be 1 */ +/* and bcd[0] zero. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberSetBCD(decNumber *dn, const uByte *bcd, uInt n) { + Unit *up=dn->lsu+D2U(dn->digits)-1; /* -> msu [target pointer] */ + const uByte *ub=bcd; /* -> source msd */ + + #if DECDPUN==1 /* trivial simple copy */ + for (; ub=dn->lsu; up--) { /* each Unit from msu */ + *up=0; /* will take <=DECDPUN digits */ + for (; cut>0; ub++, cut--) *up=X10(*up)+*ub; + cut=DECDPUN; /* next Unit has all digits */ + } + #endif + dn->digits=n; /* set digit count */ + return dn; + } /* decNumberSetBCD */ + +/* ------------------------------------------------------------------ */ +/* decNumberIsNormal -- test normality of a decNumber */ +/* dn is the decNumber to test */ +/* set is the context to use for Emin */ +/* returns 1 if |dn| is finite and >=Nmin, 0 otherwise */ +/* ------------------------------------------------------------------ */ +Int uprv_decNumberIsNormal(const decNumber *dn, decContext *set) { + Int ae; /* adjusted exponent */ + #if DECCHECK + if (decCheckOperands(DECUNRESU, DECUNUSED, dn, set)) return 0; + #endif + + if (decNumberIsSpecial(dn)) return 0; /* not finite */ + if (decNumberIsZero(dn)) return 0; /* not non-zero */ + + ae=dn->exponent+dn->digits-1; /* adjusted exponent */ + if (aeemin) return 0; /* is subnormal */ + return 1; + } /* decNumberIsNormal */ + +/* ------------------------------------------------------------------ */ +/* decNumberIsSubnormal -- test subnormality of a decNumber */ +/* dn is the decNumber to test */ +/* set is the context to use for Emin */ +/* returns 1 if |dn| is finite, non-zero, and exponent+dn->digits-1; /* adjusted exponent */ + if (aeemin) return 1; /* is subnormal */ + return 0; + } /* decNumberIsSubnormal */ + +/* ------------------------------------------------------------------ */ +/* decNumberTrim -- remove insignificant zeros */ +/* */ +/* dn is the number to trim */ +/* returns dn */ +/* */ +/* All fields are updated as required. This is a utility operation, */ +/* so special values are unchanged and no error is possible. The */ +/* zeros are removed unconditionally. */ +/* ------------------------------------------------------------------ */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberTrim(decNumber *dn) { + Int dropped; /* work */ + decContext set; /* .. */ + #if DECCHECK + if (decCheckOperands(DECUNRESU, DECUNUSED, dn, DECUNCONT)) return dn; + #endif + uprv_decContextDefault(&set, DEC_INIT_BASE); /* clamp=0 */ + return decTrim(dn, &set, 0, 1, &dropped); + } /* decNumberTrim */ + +/* ------------------------------------------------------------------ */ +/* decNumberVersion -- return the name and version of this module */ +/* */ +/* No error is possible. */ +/* ------------------------------------------------------------------ */ +const char * uprv_decNumberVersion(void) { + return DECVERSION; + } /* decNumberVersion */ + +/* ------------------------------------------------------------------ */ +/* decNumberZero -- set a number to 0 */ +/* */ +/* dn is the number to set, with space for one digit */ +/* returns dn */ +/* */ +/* No error is possible. */ +/* ------------------------------------------------------------------ */ +/* Memset is not used as it is much slower in some environments. */ +U_CAPI decNumber * U_EXPORT2 uprv_decNumberZero(decNumber *dn) { + + #if DECCHECK + if (decCheckOperands(dn, DECUNUSED, DECUNUSED, DECUNCONT)) return dn; + #endif + + dn->bits=0; + dn->exponent=0; + dn->digits=1; + dn->lsu[0]=0; + return dn; + } /* decNumberZero */ + +/* ================================================================== */ +/* Local routines */ +/* ================================================================== */ + +/* ------------------------------------------------------------------ */ +/* decToString -- lay out a number into a string */ +/* */ +/* dn is the number to lay out */ +/* string is where to lay out the number */ +/* eng is 1 if Engineering, 0 if Scientific */ +/* */ +/* string must be at least dn->digits+14 characters long */ +/* No error is possible. */ +/* */ +/* Note that this routine can generate a -0 or 0.000. These are */ +/* never generated in subset to-number or arithmetic, but can occur */ +/* in non-subset arithmetic (e.g., -1*0 or 1.234-1.234). */ +/* ------------------------------------------------------------------ */ +/* If DECCHECK is enabled the string "?" is returned if a number is */ +/* invalid. */ +static void decToString(const decNumber *dn, char *string, Flag eng) { + Int exp=dn->exponent; /* local copy */ + Int e; /* E-part value */ + Int pre; /* digits before the '.' */ + Int cut; /* for counting digits in a Unit */ + char *c=string; /* work [output pointer] */ + const Unit *up=dn->lsu+D2U(dn->digits)-1; /* -> msu [input pointer] */ + uInt u, pow; /* work */ + + #if DECCHECK + if (decCheckOperands(DECUNRESU, dn, DECUNUSED, DECUNCONT)) { + strcpy(string, "?"); + return;} + #endif + + if (decNumberIsNegative(dn)) { /* Negatives get a minus */ + *c='-'; + c++; + } + if (dn->bits&DECSPECIAL) { /* Is a special value */ + if (decNumberIsInfinite(dn)) { + strcpy(c, "Inf"); + strcpy(c+3, "inity"); + return;} + /* a NaN */ + if (dn->bits&DECSNAN) { /* signalling NaN */ + *c='s'; + c++; + } + strcpy(c, "NaN"); + c+=3; /* step past */ + /* if not a clean non-zero coefficient, that's all there is in a */ + /* NaN string */ + if (exp!=0 || (*dn->lsu==0 && dn->digits==1)) return; + /* [drop through to add integer] */ + } + + /* calculate how many digits in msu, and hence first cut */ + cut=MSUDIGITS(dn->digits); /* [faster than remainder] */ + cut--; /* power of ten for digit */ + + if (exp==0) { /* simple integer [common fastpath] */ + for (;up>=dn->lsu; up--) { /* each Unit from msu */ + u=*up; /* contains DECDPUN digits to lay out */ + for (; cut>=0; c++, cut--) TODIGIT(u, cut, c, pow); + cut=DECDPUN-1; /* next Unit has all digits */ + } + *c='\0'; /* terminate the string */ + return;} + + /* non-0 exponent -- assume plain form */ + pre=dn->digits+exp; /* digits before '.' */ + e=0; /* no E */ + if ((exp>0) || (pre<-5)) { /* need exponential form */ + e=exp+dn->digits-1; /* calculate E value */ + pre=1; /* assume one digit before '.' */ + if (eng && (e!=0)) { /* engineering: may need to adjust */ + Int adj; /* adjustment */ + /* The C remainder operator is undefined for negative numbers, so */ + /* a positive remainder calculation must be used here */ + if (e<0) { + adj=(-e)%3; + if (adj!=0) adj=3-adj; + } + else { /* e>0 */ + adj=e%3; + } + e=e-adj; + /* if dealing with zero still produce an exponent which is a */ + /* multiple of three, as expected, but there will only be the */ + /* one zero before the E, still. Otherwise note the padding. */ + if (!ISZERO(dn)) pre+=adj; + else { /* is zero */ + if (adj!=0) { /* 0.00Esnn needed */ + e=e+3; + pre=-(2-adj); + } + } /* zero */ + } /* eng */ + } /* need exponent */ + + /* lay out the digits of the coefficient, adding 0s and . as needed */ + u=*up; + if (pre>0) { /* xxx.xxx or xx00 (engineering) form */ + Int n=pre; + for (; pre>0; pre--, c++, cut--) { + if (cut<0) { /* need new Unit */ + if (up==dn->lsu) break; /* out of input digits (pre>digits) */ + up--; + cut=DECDPUN-1; + u=*up; + } + TODIGIT(u, cut, c, pow); + } + if (ndigits) { /* more to come, after '.' */ + *c='.'; c++; + for (;; c++, cut--) { + if (cut<0) { /* need new Unit */ + if (up==dn->lsu) break; /* out of input digits */ + up--; + cut=DECDPUN-1; + u=*up; + } + TODIGIT(u, cut, c, pow); + } + } + else for (; pre>0; pre--, c++) *c='0'; /* 0 padding (for engineering) needed */ + } + else { /* 0.xxx or 0.000xxx form */ + *c='0'; c++; + *c='.'; c++; + for (; pre<0; pre++, c++) *c='0'; /* add any 0's after '.' */ + for (; ; c++, cut--) { + if (cut<0) { /* need new Unit */ + if (up==dn->lsu) break; /* out of input digits */ + up--; + cut=DECDPUN-1; + u=*up; + } + TODIGIT(u, cut, c, pow); + } + } + + /* Finally add the E-part, if needed. It will never be 0, has a + base maximum and minimum of +999999999 through -999999999, but + could range down to -1999999998 for anormal numbers */ + if (e!=0) { + Flag had=0; /* 1=had non-zero */ + *c='E'; c++; + *c='+'; c++; /* assume positive */ + u=e; /* .. */ + if (e<0) { + *(c-1)='-'; /* oops, need - */ + u=-e; /* uInt, please */ + } + /* lay out the exponent [_itoa or equivalent is not ANSI C] */ + for (cut=9; cut>=0; cut--) { + TODIGIT(u, cut, c, pow); + if (*c=='0' && !had) continue; /* skip leading zeros */ + had=1; /* had non-0 */ + c++; /* step for next */ + } /* cut */ + } + *c='\0'; /* terminate the string (all paths) */ + return; + } /* decToString */ + +/* ------------------------------------------------------------------ */ +/* decAddOp -- add/subtract operation */ +/* */ +/* This computes C = A + B */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X+X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context */ +/* negate is DECNEG if rhs should be negated, or 0 otherwise */ +/* status accumulates status for the caller */ +/* */ +/* C must have space for set->digits digits. */ +/* Inexact in status must be 0 for correct Exact zero sign in result */ +/* ------------------------------------------------------------------ */ +/* If possible, the coefficient is calculated directly into C. */ +/* However, if: */ +/* -- a digits+1 calculation is needed because the numbers are */ +/* unaligned and span more than set->digits digits */ +/* -- a carry to digits+1 digits looks possible */ +/* -- C is the same as A or B, and the result would destructively */ +/* overlap the A or B coefficient */ +/* then the result must be calculated into a temporary buffer. In */ +/* this case a local (stack) buffer is used if possible, and only if */ +/* too long for that does malloc become the final resort. */ +/* */ +/* Misalignment is handled as follows: */ +/* Apad: (AExp>BExp) Swap operands and proceed as for BExp>AExp. */ +/* BPad: Apply the padding by a combination of shifting (whole */ +/* units) and multiplication (part units). */ +/* */ +/* Addition, especially x=x+1, is speed-critical. */ +/* The static buffer is larger than might be expected to allow for */ +/* calls from higher-level funtions (notable exp). */ +/* ------------------------------------------------------------------ */ +static decNumber * decAddOp(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set, + uByte negate, uInt *status) { + #if DECSUBSET + decNumber *alloclhs=NULL; /* non-NULL if rounded lhs allocated */ + decNumber *allocrhs=NULL; /* .., rhs */ + #endif + Int rhsshift; /* working shift (in Units) */ + Int maxdigits; /* longest logical length */ + Int mult; /* multiplier */ + Int residue; /* rounding accumulator */ + uByte bits; /* result bits */ + Flag diffsign; /* non-0 if arguments have different sign */ + Unit *acc; /* accumulator for result */ + Unit accbuff[SD2U(DECBUFFER*2+20)]; /* local buffer [*2+20 reduces many */ + /* allocations when called from */ + /* other operations, notable exp] */ + Unit *allocacc=NULL; /* -> allocated acc buffer, iff allocated */ + Int reqdigits=set->digits; /* local copy; requested DIGITS */ + Int padding; /* work */ + + #if DECCHECK + if (decCheckOperands(res, lhs, rhs, set)) return res; + #endif + + do { /* protect allocated storage */ + #if DECSUBSET + if (!set->extended) { + /* reduce operands and set lostDigits status, as needed */ + if (lhs->digits>reqdigits) { + alloclhs=decRoundOperand(lhs, set, status); + if (alloclhs==NULL) break; + lhs=alloclhs; + } + if (rhs->digits>reqdigits) { + allocrhs=decRoundOperand(rhs, set, status); + if (allocrhs==NULL) break; + rhs=allocrhs; + } + } + #endif + /* [following code does not require input rounding] */ + + /* note whether signs differ [used all paths] */ + diffsign=(Flag)((lhs->bits^rhs->bits^negate)&DECNEG); + + /* handle infinities and NaNs */ + if (SPECIALARGS) { /* a special bit set */ + if (SPECIALARGS & (DECSNAN | DECNAN)) /* a NaN */ + decNaNs(res, lhs, rhs, set, status); + else { /* one or two infinities */ + if (decNumberIsInfinite(lhs)) { /* LHS is infinity */ + /* two infinities with different signs is invalid */ + if (decNumberIsInfinite(rhs) && diffsign) { + *status|=DEC_Invalid_operation; + break; + } + bits=lhs->bits & DECNEG; /* get sign from LHS */ + } + else bits=(rhs->bits^negate) & DECNEG;/* RHS must be Infinity */ + bits|=DECINF; + uprv_decNumberZero(res); + res->bits=bits; /* set +/- infinity */ + } /* an infinity */ + break; + } + + /* Quick exit for add 0s; return the non-0, modified as need be */ + if (ISZERO(lhs)) { + Int adjust; /* work */ + Int lexp=lhs->exponent; /* save in case LHS==RES */ + bits=lhs->bits; /* .. */ + residue=0; /* clear accumulator */ + decCopyFit(res, rhs, set, &residue, status); /* copy (as needed) */ + res->bits^=negate; /* flip if rhs was negated */ + #if DECSUBSET + if (set->extended) { /* exponents on zeros count */ + #endif + /* exponent will be the lower of the two */ + adjust=lexp-res->exponent; /* adjustment needed [if -ve] */ + if (ISZERO(res)) { /* both 0: special IEEE 754 rules */ + if (adjust<0) res->exponent=lexp; /* set exponent */ + /* 0-0 gives +0 unless rounding to -infinity, and -0-0 gives -0 */ + if (diffsign) { + if (set->round!=DEC_ROUND_FLOOR) res->bits=0; + else res->bits=DECNEG; /* preserve 0 sign */ + } + } + else { /* non-0 res */ + if (adjust<0) { /* 0-padding needed */ + if ((res->digits-adjust)>set->digits) { + adjust=res->digits-set->digits; /* to fit exactly */ + *status|=DEC_Rounded; /* [but exact] */ + } + res->digits=decShiftToMost(res->lsu, res->digits, -adjust); + res->exponent+=adjust; /* set the exponent. */ + } + } /* non-0 res */ + #if DECSUBSET + } /* extended */ + #endif + decFinish(res, set, &residue, status); /* clean and finalize */ + break;} + + if (ISZERO(rhs)) { /* [lhs is non-zero] */ + Int adjust; /* work */ + Int rexp=rhs->exponent; /* save in case RHS==RES */ + bits=rhs->bits; /* be clean */ + residue=0; /* clear accumulator */ + decCopyFit(res, lhs, set, &residue, status); /* copy (as needed) */ + #if DECSUBSET + if (set->extended) { /* exponents on zeros count */ + #endif + /* exponent will be the lower of the two */ + /* [0-0 case handled above] */ + adjust=rexp-res->exponent; /* adjustment needed [if -ve] */ + if (adjust<0) { /* 0-padding needed */ + if ((res->digits-adjust)>set->digits) { + adjust=res->digits-set->digits; /* to fit exactly */ + *status|=DEC_Rounded; /* [but exact] */ + } + res->digits=decShiftToMost(res->lsu, res->digits, -adjust); + res->exponent+=adjust; /* set the exponent. */ + } + #if DECSUBSET + } /* extended */ + #endif + decFinish(res, set, &residue, status); /* clean and finalize */ + break;} + + /* [NB: both fastpath and mainpath code below assume these cases */ + /* (notably 0-0) have already been handled] */ + + /* calculate the padding needed to align the operands */ + padding=rhs->exponent-lhs->exponent; + + /* Fastpath cases where the numbers are aligned and normal, the RHS */ + /* is all in one unit, no operand rounding is needed, and no carry, */ + /* lengthening, or borrow is needed */ + if (padding==0 + && rhs->digits<=DECDPUN + && rhs->exponent>=set->emin /* [some normals drop through] */ + && rhs->exponent<=set->emax-set->digits+1 /* [could clamp] */ + && rhs->digits<=reqdigits + && lhs->digits<=reqdigits) { + Int partial=*lhs->lsu; + if (!diffsign) { /* adding */ + partial+=*rhs->lsu; + if ((partial<=DECDPUNMAX) /* result fits in unit */ + && (lhs->digits>=DECDPUN || /* .. and no digits-count change */ + partial<(Int)powers[lhs->digits])) { /* .. */ + if (res!=lhs) uprv_decNumberCopy(res, lhs); /* not in place */ + *res->lsu=(Unit)partial; /* [copy could have overwritten RHS] */ + break; + } + /* else drop out for careful add */ + } + else { /* signs differ */ + partial-=*rhs->lsu; + if (partial>0) { /* no borrow needed, and non-0 result */ + if (res!=lhs) uprv_decNumberCopy(res, lhs); /* not in place */ + *res->lsu=(Unit)partial; + /* this could have reduced digits [but result>0] */ + res->digits=decGetDigits(res->lsu, D2U(res->digits)); + break; + } + /* else drop out for careful subtract */ + } + } + + /* Now align (pad) the lhs or rhs so they can be added or */ + /* subtracted, as necessary. If one number is much larger than */ + /* the other (that is, if in plain form there is a least one */ + /* digit between the lowest digit of one and the highest of the */ + /* other) padding with up to DIGITS-1 trailing zeros may be */ + /* needed; then apply rounding (as exotic rounding modes may be */ + /* affected by the residue). */ + rhsshift=0; /* rhs shift to left (padding) in Units */ + bits=lhs->bits; /* assume sign is that of LHS */ + mult=1; /* likely multiplier */ + + /* [if padding==0 the operands are aligned; no padding is needed] */ + if (padding!=0) { + /* some padding needed; always pad the RHS, as any required */ + /* padding can then be effected by a simple combination of */ + /* shifts and a multiply */ + Flag swapped=0; + if (padding<0) { /* LHS needs the padding */ + const decNumber *t; + padding=-padding; /* will be +ve */ + bits=(uByte)(rhs->bits^negate); /* assumed sign is now that of RHS */ + t=lhs; lhs=rhs; rhs=t; + swapped=1; + } + + /* If, after pad, rhs would be longer than lhs by digits+1 or */ + /* more then lhs cannot affect the answer, except as a residue, */ + /* so only need to pad up to a length of DIGITS+1. */ + if (rhs->digits+padding > lhs->digits+reqdigits+1) { + /* The RHS is sufficient */ + /* for residue use the relative sign indication... */ + Int shift=reqdigits-rhs->digits; /* left shift needed */ + residue=1; /* residue for rounding */ + if (diffsign) residue=-residue; /* signs differ */ + /* copy, shortening if necessary */ + decCopyFit(res, rhs, set, &residue, status); + /* if it was already shorter, then need to pad with zeros */ + if (shift>0) { + res->digits=decShiftToMost(res->lsu, res->digits, shift); + res->exponent-=shift; /* adjust the exponent. */ + } + /* flip the result sign if unswapped and rhs was negated */ + if (!swapped) res->bits^=negate; + decFinish(res, set, &residue, status); /* done */ + break;} + + /* LHS digits may affect result */ + rhsshift=D2U(padding+1)-1; /* this much by Unit shift .. */ + mult=powers[padding-(rhsshift*DECDPUN)]; /* .. this by multiplication */ + } /* padding needed */ + + if (diffsign) mult=-mult; /* signs differ */ + + /* determine the longer operand */ + maxdigits=rhs->digits+padding; /* virtual length of RHS */ + if (lhs->digits>maxdigits) maxdigits=lhs->digits; + + /* Decide on the result buffer to use; if possible place directly */ + /* into result. */ + acc=res->lsu; /* assume add direct to result */ + /* If destructive overlap, or the number is too long, or a carry or */ + /* borrow to DIGITS+1 might be possible, a buffer must be used. */ + /* [Might be worth more sophisticated tests when maxdigits==reqdigits] */ + if ((maxdigits>=reqdigits) /* is, or could be, too large */ + || (res==rhs && rhsshift>0)) { /* destructive overlap */ + /* buffer needed, choose it; units for maxdigits digits will be */ + /* needed, +1 Unit for carry or borrow */ + Int need=D2U(maxdigits)+1; + acc=accbuff; /* assume use local buffer */ + if (need*sizeof(Unit)>sizeof(accbuff)) { + /* printf("malloc add %ld %ld\n", need, sizeof(accbuff)); */ + allocacc=(Unit *)malloc(need*sizeof(Unit)); + if (allocacc==NULL) { /* hopeless -- abandon */ + *status|=DEC_Insufficient_storage; + break;} + acc=allocacc; + } + } + + res->bits=(uByte)(bits&DECNEG); /* it's now safe to overwrite.. */ + res->exponent=lhs->exponent; /* .. operands (even if aliased) */ + + #if DECTRACE + decDumpAr('A', lhs->lsu, D2U(lhs->digits)); + decDumpAr('B', rhs->lsu, D2U(rhs->digits)); + printf(" :h: %ld %ld\n", rhsshift, mult); + #endif + + /* add [A+B*m] or subtract [A+B*(-m)] */ + U_ASSERT(rhs->digits > 0); + U_ASSERT(lhs->digits > 0); + res->digits=decUnitAddSub(lhs->lsu, D2U(lhs->digits), + rhs->lsu, D2U(rhs->digits), + rhsshift, acc, mult) + *DECDPUN; /* [units -> digits] */ + if (res->digits<0) { /* borrowed... */ + res->digits=-res->digits; + res->bits^=DECNEG; /* flip the sign */ + } + #if DECTRACE + decDumpAr('+', acc, D2U(res->digits)); + #endif + + /* If a buffer was used the result must be copied back, possibly */ + /* shortening. (If no buffer was used then the result must have */ + /* fit, so can't need rounding and residue must be 0.) */ + residue=0; /* clear accumulator */ + if (acc!=res->lsu) { + #if DECSUBSET + if (set->extended) { /* round from first significant digit */ + #endif + /* remove leading zeros that were added due to rounding up to */ + /* integral Units -- before the test for rounding. */ + if (res->digits>reqdigits) + res->digits=decGetDigits(acc, D2U(res->digits)); + decSetCoeff(res, set, acc, res->digits, &residue, status); + #if DECSUBSET + } + else { /* subset arithmetic rounds from original significant digit */ + /* May have an underestimate. This only occurs when both */ + /* numbers fit in DECDPUN digits and are padding with a */ + /* negative multiple (-10, -100...) and the top digit(s) become */ + /* 0. (This only matters when using X3.274 rules where the */ + /* leading zero could be included in the rounding.) */ + if (res->digitsdigits))=0; /* ensure leading 0 is there */ + res->digits=maxdigits; + } + else { + /* remove leading zeros that added due to rounding up to */ + /* integral Units (but only those in excess of the original */ + /* maxdigits length, unless extended) before test for rounding. */ + if (res->digits>reqdigits) { + res->digits=decGetDigits(acc, D2U(res->digits)); + if (res->digitsdigits=maxdigits; + } + } + decSetCoeff(res, set, acc, res->digits, &residue, status); + /* Now apply rounding if needed before removing leading zeros. */ + /* This is safe because subnormals are not a possibility */ + if (residue!=0) { + decApplyRound(res, set, residue, status); + residue=0; /* did what needed to be done */ + } + } /* subset */ + #endif + } /* used buffer */ + + /* strip leading zeros [these were left on in case of subset subtract] */ + res->digits=decGetDigits(res->lsu, D2U(res->digits)); + + /* apply checks and rounding */ + decFinish(res, set, &residue, status); + + /* "When the sum of two operands with opposite signs is exactly */ + /* zero, the sign of that sum shall be '+' in all rounding modes */ + /* except round toward -Infinity, in which mode that sign shall be */ + /* '-'." [Subset zeros also never have '-', set by decFinish.] */ + if (ISZERO(res) && diffsign + #if DECSUBSET + && set->extended + #endif + && (*status&DEC_Inexact)==0) { + if (set->round==DEC_ROUND_FLOOR) res->bits|=DECNEG; /* sign - */ + else res->bits&=~DECNEG; /* sign + */ + } + } while(0); /* end protected */ + + if (allocacc!=NULL) free(allocacc); /* drop any storage used */ + #if DECSUBSET + if (allocrhs!=NULL) free(allocrhs); /* .. */ + if (alloclhs!=NULL) free(alloclhs); /* .. */ + #endif + return res; + } /* decAddOp */ + +/* ------------------------------------------------------------------ */ +/* decDivideOp -- division operation */ +/* */ +/* This routine performs the calculations for all four division */ +/* operators (divide, divideInteger, remainder, remainderNear). */ +/* */ +/* C=A op B */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X/X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context */ +/* op is DIVIDE, DIVIDEINT, REMAINDER, or REMNEAR respectively. */ +/* status is the usual accumulator */ +/* */ +/* C must have space for set->digits digits. */ +/* */ +/* ------------------------------------------------------------------ */ +/* The underlying algorithm of this routine is the same as in the */ +/* 1981 S/370 implementation, that is, non-restoring long division */ +/* with bi-unit (rather than bi-digit) estimation for each unit */ +/* multiplier. In this pseudocode overview, complications for the */ +/* Remainder operators and division residues for exact rounding are */ +/* omitted for clarity. */ +/* */ +/* Prepare operands and handle special values */ +/* Test for x/0 and then 0/x */ +/* Exp =Exp1 - Exp2 */ +/* Exp =Exp +len(var1) -len(var2) */ +/* Sign=Sign1 * Sign2 */ +/* Pad accumulator (Var1) to double-length with 0's (pad1) */ +/* Pad Var2 to same length as Var1 */ +/* msu2pair/plus=1st 2 or 1 units of var2, +1 to allow for round */ +/* have=0 */ +/* Do until (have=digits+1 OR residue=0) */ +/* if exp<0 then if integer divide/residue then leave */ +/* this_unit=0 */ +/* Do forever */ +/* compare numbers */ +/* if <0 then leave inner_loop */ +/* if =0 then (* quick exit without subtract *) do */ +/* this_unit=this_unit+1; output this_unit */ +/* leave outer_loop; end */ +/* Compare lengths of numbers (mantissae): */ +/* If same then tops2=msu2pair -- {units 1&2 of var2} */ +/* else tops2=msu2plus -- {0, unit 1 of var2} */ +/* tops1=first_unit_of_Var1*10**DECDPUN +second_unit_of_var1 */ +/* mult=tops1/tops2 -- Good and safe guess at divisor */ +/* if mult=0 then mult=1 */ +/* this_unit=this_unit+mult */ +/* subtract */ +/* end inner_loop */ +/* if have\=0 | this_unit\=0 then do */ +/* output this_unit */ +/* have=have+1; end */ +/* var2=var2/10 */ +/* exp=exp-1 */ +/* end outer_loop */ +/* exp=exp+1 -- set the proper exponent */ +/* if have=0 then generate answer=0 */ +/* Return (Result is defined by Var1) */ +/* */ +/* ------------------------------------------------------------------ */ +/* Two working buffers are needed during the division; one (digits+ */ +/* 1) to accumulate the result, and the other (up to 2*digits+1) for */ +/* long subtractions. These are acc and var1 respectively. */ +/* var1 is a copy of the lhs coefficient, var2 is the rhs coefficient.*/ +/* The static buffers may be larger than might be expected to allow */ +/* for calls from higher-level funtions (notable exp). */ +/* ------------------------------------------------------------------ */ +static decNumber * decDivideOp(decNumber *res, + const decNumber *lhs, const decNumber *rhs, + decContext *set, Flag op, uInt *status) { + #if DECSUBSET + decNumber *alloclhs=NULL; /* non-NULL if rounded lhs allocated */ + decNumber *allocrhs=NULL; /* .., rhs */ + #endif + Unit accbuff[SD2U(DECBUFFER+DECDPUN+10)]; /* local buffer */ + Unit *acc=accbuff; /* -> accumulator array for result */ + Unit *allocacc=NULL; /* -> allocated buffer, iff allocated */ + Unit *accnext; /* -> where next digit will go */ + Int acclength; /* length of acc needed [Units] */ + Int accunits; /* count of units accumulated */ + Int accdigits; /* count of digits accumulated */ + + Unit varbuff[SD2U(DECBUFFER*2+DECDPUN)]; /* buffer for var1 */ + Unit *var1=varbuff; /* -> var1 array for long subtraction */ + Unit *varalloc=NULL; /* -> allocated buffer, iff used */ + Unit *msu1; /* -> msu of var1 */ + + const Unit *var2; /* -> var2 array */ + const Unit *msu2; /* -> msu of var2 */ + Int msu2plus; /* msu2 plus one [does not vary] */ + eInt msu2pair; /* msu2 pair plus one [does not vary] */ + + Int var1units, var2units; /* actual lengths */ + Int var2ulen; /* logical length (units) */ + Int var1initpad=0; /* var1 initial padding (digits) */ + Int maxdigits; /* longest LHS or required acc length */ + Int mult; /* multiplier for subtraction */ + Unit thisunit; /* current unit being accumulated */ + Int residue; /* for rounding */ + Int reqdigits=set->digits; /* requested DIGITS */ + Int exponent; /* working exponent */ + Int maxexponent=0; /* DIVIDE maximum exponent if unrounded */ + uByte bits; /* working sign */ + Unit *target; /* work */ + const Unit *source; /* .. */ + uInt const *pow; /* .. */ + Int shift, cut; /* .. */ + #if DECSUBSET + Int dropped; /* work */ + #endif + + #if DECCHECK + if (decCheckOperands(res, lhs, rhs, set)) return res; + #endif + + do { /* protect allocated storage */ + #if DECSUBSET + if (!set->extended) { + /* reduce operands and set lostDigits status, as needed */ + if (lhs->digits>reqdigits) { + alloclhs=decRoundOperand(lhs, set, status); + if (alloclhs==NULL) break; + lhs=alloclhs; + } + if (rhs->digits>reqdigits) { + allocrhs=decRoundOperand(rhs, set, status); + if (allocrhs==NULL) break; + rhs=allocrhs; + } + } + #endif + /* [following code does not require input rounding] */ + + bits=(lhs->bits^rhs->bits)&DECNEG; /* assumed sign for divisions */ + + /* handle infinities and NaNs */ + if (SPECIALARGS) { /* a special bit set */ + if (SPECIALARGS & (DECSNAN | DECNAN)) { /* one or two NaNs */ + decNaNs(res, lhs, rhs, set, status); + break; + } + /* one or two infinities */ + if (decNumberIsInfinite(lhs)) { /* LHS (dividend) is infinite */ + if (decNumberIsInfinite(rhs) || /* two infinities are invalid .. */ + op & (REMAINDER | REMNEAR)) { /* as is remainder of infinity */ + *status|=DEC_Invalid_operation; + break; + } + /* [Note that infinity/0 raises no exceptions] */ + uprv_decNumberZero(res); + res->bits=bits|DECINF; /* set +/- infinity */ + break; + } + else { /* RHS (divisor) is infinite */ + residue=0; + if (op&(REMAINDER|REMNEAR)) { + /* result is [finished clone of] lhs */ + decCopyFit(res, lhs, set, &residue, status); + } + else { /* a division */ + uprv_decNumberZero(res); + res->bits=bits; /* set +/- zero */ + /* for DIVIDEINT the exponent is always 0. For DIVIDE, result */ + /* is a 0 with infinitely negative exponent, clamped to minimum */ + if (op&DIVIDE) { + res->exponent=set->emin-set->digits+1; + *status|=DEC_Clamped; + } + } + decFinish(res, set, &residue, status); + break; + } + } + + /* handle 0 rhs (x/0) */ + if (ISZERO(rhs)) { /* x/0 is always exceptional */ + if (ISZERO(lhs)) { + uprv_decNumberZero(res); /* [after lhs test] */ + *status|=DEC_Division_undefined;/* 0/0 will become NaN */ + } + else { + uprv_decNumberZero(res); + if (op&(REMAINDER|REMNEAR)) *status|=DEC_Invalid_operation; + else { + *status|=DEC_Division_by_zero; /* x/0 */ + res->bits=bits|DECINF; /* .. is +/- Infinity */ + } + } + break;} + + /* handle 0 lhs (0/x) */ + if (ISZERO(lhs)) { /* 0/x [x!=0] */ + #if DECSUBSET + if (!set->extended) uprv_decNumberZero(res); + else { + #endif + if (op&DIVIDE) { + residue=0; + exponent=lhs->exponent-rhs->exponent; /* ideal exponent */ + uprv_decNumberCopy(res, lhs); /* [zeros always fit] */ + res->bits=bits; /* sign as computed */ + res->exponent=exponent; /* exponent, too */ + decFinalize(res, set, &residue, status); /* check exponent */ + } + else if (op&DIVIDEINT) { + uprv_decNumberZero(res); /* integer 0 */ + res->bits=bits; /* sign as computed */ + } + else { /* a remainder */ + exponent=rhs->exponent; /* [save in case overwrite] */ + uprv_decNumberCopy(res, lhs); /* [zeros always fit] */ + if (exponentexponent) res->exponent=exponent; /* use lower */ + } + #if DECSUBSET + } + #endif + break;} + + /* Precalculate exponent. This starts off adjusted (and hence fits */ + /* in 31 bits) and becomes the usual unadjusted exponent as the */ + /* division proceeds. The order of evaluation is important, here, */ + /* to avoid wrap. */ + exponent=(lhs->exponent+lhs->digits)-(rhs->exponent+rhs->digits); + + /* If the working exponent is -ve, then some quick exits are */ + /* possible because the quotient is known to be <1 */ + /* [for REMNEAR, it needs to be < -1, as -0.5 could need work] */ + if (exponent<0 && !(op==DIVIDE)) { + if (op&DIVIDEINT) { + uprv_decNumberZero(res); /* integer part is 0 */ + #if DECSUBSET + if (set->extended) + #endif + res->bits=bits; /* set +/- zero */ + break;} + /* fastpath remainders so long as the lhs has the smaller */ + /* (or equal) exponent */ + if (lhs->exponent<=rhs->exponent) { + if (op&REMAINDER || exponent<-1) { + /* It is REMAINDER or safe REMNEAR; result is [finished */ + /* clone of] lhs (r = x - 0*y) */ + residue=0; + decCopyFit(res, lhs, set, &residue, status); + decFinish(res, set, &residue, status); + break; + } + /* [unsafe REMNEAR drops through] */ + } + } /* fastpaths */ + + /* Long (slow) division is needed; roll up the sleeves... */ + + /* The accumulator will hold the quotient of the division. */ + /* If it needs to be too long for stack storage, then allocate. */ + acclength=D2U(reqdigits+DECDPUN); /* in Units */ + if (acclength*sizeof(Unit)>sizeof(accbuff)) { + /* printf("malloc dvacc %ld units\n", acclength); */ + allocacc=(Unit *)malloc(acclength*sizeof(Unit)); + if (allocacc==NULL) { /* hopeless -- abandon */ + *status|=DEC_Insufficient_storage; + break;} + acc=allocacc; /* use the allocated space */ + } + + /* var1 is the padded LHS ready for subtractions. */ + /* If it needs to be too long for stack storage, then allocate. */ + /* The maximum units needed for var1 (long subtraction) is: */ + /* Enough for */ + /* (rhs->digits+reqdigits-1) -- to allow full slide to right */ + /* or (lhs->digits) -- to allow for long lhs */ + /* whichever is larger */ + /* +1 -- for rounding of slide to right */ + /* +1 -- for leading 0s */ + /* +1 -- for pre-adjust if a remainder or DIVIDEINT */ + /* [Note: unused units do not participate in decUnitAddSub data] */ + maxdigits=rhs->digits+reqdigits-1; + if (lhs->digits>maxdigits) maxdigits=lhs->digits; + var1units=D2U(maxdigits)+2; + /* allocate a guard unit above msu1 for REMAINDERNEAR */ + if (!(op&DIVIDE)) var1units++; + if ((var1units+1)*sizeof(Unit)>sizeof(varbuff)) { + /* printf("malloc dvvar %ld units\n", var1units+1); */ + varalloc=(Unit *)malloc((var1units+1)*sizeof(Unit)); + if (varalloc==NULL) { /* hopeless -- abandon */ + *status|=DEC_Insufficient_storage; + break;} + var1=varalloc; /* use the allocated space */ + } + + /* Extend the lhs and rhs to full long subtraction length. The lhs */ + /* is truly extended into the var1 buffer, with 0 padding, so a */ + /* subtract in place is always possible. The rhs (var2) has */ + /* virtual padding (implemented by decUnitAddSub). */ + /* One guard unit was allocated above msu1 for rem=rem+rem in */ + /* REMAINDERNEAR. */ + msu1=var1+var1units-1; /* msu of var1 */ + source=lhs->lsu+D2U(lhs->digits)-1; /* msu of input array */ + for (target=msu1; source>=lhs->lsu; source--, target--) *target=*source; + for (; target>=var1; target--) *target=0; + + /* rhs (var2) is left-aligned with var1 at the start */ + var2ulen=var1units; /* rhs logical length (units) */ + var2units=D2U(rhs->digits); /* rhs actual length (units) */ + var2=rhs->lsu; /* -> rhs array */ + msu2=var2+var2units-1; /* -> msu of var2 [never changes] */ + /* now set up the variables which will be used for estimating the */ + /* multiplication factor. If these variables are not exact, add */ + /* 1 to make sure that the multiplier is never overestimated. */ + msu2plus=*msu2; /* it's value .. */ + if (var2units>1) msu2plus++; /* .. +1 if any more */ + msu2pair=(eInt)*msu2*(DECDPUNMAX+1);/* top two pair .. */ + if (var2units>1) { /* .. [else treat 2nd as 0] */ + msu2pair+=*(msu2-1); /* .. */ + if (var2units>2) msu2pair++; /* .. +1 if any more */ + } + + /* The calculation is working in units, which may have leading zeros, */ + /* but the exponent was calculated on the assumption that they are */ + /* both left-aligned. Adjust the exponent to compensate: add the */ + /* number of leading zeros in var1 msu and subtract those in var2 msu. */ + /* [This is actually done by counting the digits and negating, as */ + /* lead1=DECDPUN-digits1, and similarly for lead2.] */ + for (pow=&powers[1]; *msu1>=*pow; pow++) exponent--; + for (pow=&powers[1]; *msu2>=*pow; pow++) exponent++; + + /* Now, if doing an integer divide or remainder, ensure that */ + /* the result will be Unit-aligned. To do this, shift the var1 */ + /* accumulator towards least if need be. (It's much easier to */ + /* do this now than to reassemble the residue afterwards, if */ + /* doing a remainder.) Also ensure the exponent is not negative. */ + if (!(op&DIVIDE)) { + Unit *u; /* work */ + /* save the initial 'false' padding of var1, in digits */ + var1initpad=(var1units-D2U(lhs->digits))*DECDPUN; + /* Determine the shift to do. */ + if (exponent<0) cut=-exponent; + else cut=DECDPUN-exponent%DECDPUN; + decShiftToLeast(var1, var1units, cut); + exponent+=cut; /* maintain numerical value */ + var1initpad-=cut; /* .. and reduce padding */ + /* clean any most-significant units which were just emptied */ + for (u=msu1; cut>=DECDPUN; cut-=DECDPUN, u--) *u=0; + } /* align */ + else { /* is DIVIDE */ + maxexponent=lhs->exponent-rhs->exponent; /* save */ + /* optimization: if the first iteration will just produce 0, */ + /* preadjust to skip it [valid for DIVIDE only] */ + if (*msu1<*msu2) { + var2ulen--; /* shift down */ + exponent-=DECDPUN; /* update the exponent */ + } + } + + /* ---- start the long-division loops ------------------------------ */ + accunits=0; /* no units accumulated yet */ + accdigits=0; /* .. or digits */ + accnext=acc+acclength-1; /* -> msu of acc [NB: allows digits+1] */ + for (;;) { /* outer forever loop */ + thisunit=0; /* current unit assumed 0 */ + /* find the next unit */ + for (;;) { /* inner forever loop */ + /* strip leading zero units [from either pre-adjust or from */ + /* subtract last time around]. Leave at least one unit. */ + for (; *msu1==0 && msu1>var1; msu1--) var1units--; + + if (var1units msu */ + for (pv1=msu1; ; pv1--, pv2--) { + /* v1=*pv1 -- always OK */ + v2=0; /* assume in padding */ + if (pv2>=var2) v2=*pv2; /* in range */ + if (*pv1!=v2) break; /* no longer the same */ + if (pv1==var1) break; /* done; leave pv1 as is */ + } + /* here when all inspected or a difference seen */ + if (*pv1v2. Prepare for real subtraction; the lengths are equal */ + /* Estimate the multiplier (there's always a msu1-1)... */ + /* Bring in two units of var2 to provide a good estimate. */ + mult=(Int)(((eInt)*msu1*(DECDPUNMAX+1)+*(msu1-1))/msu2pair); + } /* lengths the same */ + else { /* var1units > var2ulen, so subtraction is safe */ + /* The var2 msu is one unit towards the lsu of the var1 msu, */ + /* so only one unit for var2 can be used. */ + mult=(Int)(((eInt)*msu1*(DECDPUNMAX+1)+*(msu1-1))/msu2plus); + } + if (mult==0) mult=1; /* must always be at least 1 */ + /* subtraction needed; var1 is > var2 */ + thisunit=(Unit)(thisunit+mult); /* accumulate */ + /* subtract var1-var2, into var1; only the overlap needs */ + /* processing, as this is an in-place calculation */ + shift=var2ulen-var2units; + #if DECTRACE + decDumpAr('1', &var1[shift], var1units-shift); + decDumpAr('2', var2, var2units); + printf("m=%ld\n", -mult); + #endif + decUnitAddSub(&var1[shift], var1units-shift, + var2, var2units, 0, + &var1[shift], -mult); + #if DECTRACE + decDumpAr('#', &var1[shift], var1units-shift); + #endif + /* var1 now probably has leading zeros; these are removed at the */ + /* top of the inner loop. */ + } /* inner loop */ + + /* The next unit has been calculated in full; unless it's a */ + /* leading zero, add to acc */ + if (accunits!=0 || thisunit!=0) { /* is first or non-zero */ + *accnext=thisunit; /* store in accumulator */ + /* account exactly for the new digits */ + if (accunits==0) { + accdigits++; /* at least one */ + for (pow=&powers[1]; thisunit>=*pow; pow++) accdigits++; + } + else accdigits+=DECDPUN; + accunits++; /* update count */ + accnext--; /* ready for next */ + if (accdigits>reqdigits) break; /* have enough digits */ + } + + /* if the residue is zero, the operation is done (unless divide */ + /* or divideInteger and still not enough digits yet) */ + if (*var1==0 && var1units==1) { /* residue is 0 */ + if (op&(REMAINDER|REMNEAR)) break; + if ((op&DIVIDE) && (exponent<=maxexponent)) break; + /* [drop through if divideInteger] */ + } + /* also done enough if calculating remainder or integer */ + /* divide and just did the last ('units') unit */ + if (exponent==0 && !(op&DIVIDE)) break; + + /* to get here, var1 is less than var2, so divide var2 by the per- */ + /* Unit power of ten and go for the next digit */ + var2ulen--; /* shift down */ + exponent-=DECDPUN; /* update the exponent */ + } /* outer loop */ + + /* ---- division is complete --------------------------------------- */ + /* here: acc has at least reqdigits+1 of good results (or fewer */ + /* if early stop), starting at accnext+1 (its lsu) */ + /* var1 has any residue at the stopping point */ + /* accunits is the number of digits collected in acc */ + if (accunits==0) { /* acc is 0 */ + accunits=1; /* show have a unit .. */ + accdigits=1; /* .. */ + *accnext=0; /* .. whose value is 0 */ + } + else accnext++; /* back to last placed */ + /* accnext now -> lowest unit of result */ + + residue=0; /* assume no residue */ + if (op&DIVIDE) { + /* record the presence of any residue, for rounding */ + if (*var1!=0 || var1units>1) residue=1; + else { /* no residue */ + /* Had an exact division; clean up spurious trailing 0s. */ + /* There will be at most DECDPUN-1, from the final multiply, */ + /* and then only if the result is non-0 (and even) and the */ + /* exponent is 'loose'. */ + #if DECDPUN>1 + Unit lsu=*accnext; + if (!(lsu&0x01) && (lsu!=0)) { + /* count the trailing zeros */ + Int drop=0; + for (;; drop++) { /* [will terminate because lsu!=0] */ + if (exponent>=maxexponent) break; /* don't chop real 0s */ + #if DECDPUN<=4 + if ((lsu-QUOT10(lsu, drop+1) + *powers[drop+1])!=0) break; /* found non-0 digit */ + #else + if (lsu%powers[drop+1]!=0) break; /* found non-0 digit */ + #endif + exponent++; + } + if (drop>0) { + accunits=decShiftToLeast(accnext, accunits, drop); + accdigits=decGetDigits(accnext, accunits); + accunits=D2U(accdigits); + /* [exponent was adjusted in the loop] */ + } + } /* neither odd nor 0 */ + #endif + } /* exact divide */ + } /* divide */ + else /* op!=DIVIDE */ { + /* check for coefficient overflow */ + if (accdigits+exponent>reqdigits) { + *status|=DEC_Division_impossible; + break; + } + if (op & (REMAINDER|REMNEAR)) { + /* [Here, the exponent will be 0, because var1 was adjusted */ + /* appropriately.] */ + Int postshift; /* work */ + Flag wasodd=0; /* integer was odd */ + Unit *quotlsu; /* for save */ + Int quotdigits; /* .. */ + + bits=lhs->bits; /* remainder sign is always as lhs */ + + /* Fastpath when residue is truly 0 is worthwhile [and */ + /* simplifies the code below] */ + if (*var1==0 && var1units==1) { /* residue is 0 */ + Int exp=lhs->exponent; /* save min(exponents) */ + if (rhs->exponentexponent; + uprv_decNumberZero(res); /* 0 coefficient */ + #if DECSUBSET + if (set->extended) + #endif + res->exponent=exp; /* .. with proper exponent */ + res->bits=(uByte)(bits&DECNEG); /* [cleaned] */ + decFinish(res, set, &residue, status); /* might clamp */ + break; + } + /* note if the quotient was odd */ + if (*accnext & 0x01) wasodd=1; /* acc is odd */ + quotlsu=accnext; /* save in case need to reinspect */ + quotdigits=accdigits; /* .. */ + + /* treat the residue, in var1, as the value to return, via acc */ + /* calculate the unused zero digits. This is the smaller of: */ + /* var1 initial padding (saved above) */ + /* var2 residual padding, which happens to be given by: */ + postshift=var1initpad+exponent-lhs->exponent+rhs->exponent; + /* [the 'exponent' term accounts for the shifts during divide] */ + if (var1initpadexponent; /* exponent is smaller of lhs & rhs */ + if (rhs->exponentexponent; + + /* Now correct the result if doing remainderNear; if it */ + /* (looking just at coefficients) is > rhs/2, or == rhs/2 and */ + /* the integer was odd then the result should be rem-rhs. */ + if (op&REMNEAR) { + Int compare, tarunits; /* work */ + Unit *up; /* .. */ + /* calculate remainder*2 into the var1 buffer (which has */ + /* 'headroom' of an extra unit and hence enough space) */ + /* [a dedicated 'double' loop would be faster, here] */ + tarunits=decUnitAddSub(accnext, accunits, accnext, accunits, + 0, accnext, 1); + /* decDumpAr('r', accnext, tarunits); */ + + /* Here, accnext (var1) holds tarunits Units with twice the */ + /* remainder's coefficient, which must now be compared to the */ + /* RHS. The remainder's exponent may be smaller than the RHS's. */ + compare=decUnitCompare(accnext, tarunits, rhs->lsu, D2U(rhs->digits), + rhs->exponent-exponent); + if (compare==BADINT) { /* deep trouble */ + *status|=DEC_Insufficient_storage; + break;} + + /* now restore the remainder by dividing by two; the lsu */ + /* is known to be even. */ + for (up=accnext; up0 || (compare==0 && wasodd)) { /* adjustment needed */ + Int exp, expunits, exprem; /* work */ + /* This is effectively causing round-up of the quotient, */ + /* so if it was the rare case where it was full and all */ + /* nines, it would overflow and hence division-impossible */ + /* should be raised */ + Flag allnines=0; /* 1 if quotient all nines */ + if (quotdigits==reqdigits) { /* could be borderline */ + for (up=quotlsu; ; up++) { + if (quotdigits>DECDPUN) { + if (*up!=DECDPUNMAX) break;/* non-nines */ + } + else { /* this is the last Unit */ + if (*up==powers[quotdigits]-1) allnines=1; + break; + } + quotdigits-=DECDPUN; /* checked those digits */ + } /* up */ + } /* borderline check */ + if (allnines) { + *status|=DEC_Division_impossible; + break;} + + /* rem-rhs is needed; the sign will invert. Again, var1 */ + /* can safely be used for the working Units array. */ + exp=rhs->exponent-exponent; /* RHS padding needed */ + /* Calculate units and remainder from exponent. */ + expunits=exp/DECDPUN; + exprem=exp%DECDPUN; + /* subtract [A+B*(-m)]; the result will always be negative */ + accunits=-decUnitAddSub(accnext, accunits, + rhs->lsu, D2U(rhs->digits), + expunits, accnext, -(Int)powers[exprem]); + accdigits=decGetDigits(accnext, accunits); /* count digits exactly */ + accunits=D2U(accdigits); /* and recalculate the units for copy */ + /* [exponent is as for original remainder] */ + bits^=DECNEG; /* flip the sign */ + } + } /* REMNEAR */ + } /* REMAINDER or REMNEAR */ + } /* not DIVIDE */ + + /* Set exponent and bits */ + res->exponent=exponent; + res->bits=(uByte)(bits&DECNEG); /* [cleaned] */ + + /* Now the coefficient. */ + decSetCoeff(res, set, accnext, accdigits, &residue, status); + + decFinish(res, set, &residue, status); /* final cleanup */ + + #if DECSUBSET + /* If a divide then strip trailing zeros if subset [after round] */ + if (!set->extended && (op==DIVIDE)) decTrim(res, set, 0, 1, &dropped); + #endif + } while(0); /* end protected */ + + if (varalloc!=NULL) free(varalloc); /* drop any storage used */ + if (allocacc!=NULL) free(allocacc); /* .. */ + #if DECSUBSET + if (allocrhs!=NULL) free(allocrhs); /* .. */ + if (alloclhs!=NULL) free(alloclhs); /* .. */ + #endif + return res; + } /* decDivideOp */ + +/* ------------------------------------------------------------------ */ +/* decMultiplyOp -- multiplication operation */ +/* */ +/* This routine performs the multiplication C=A x B. */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X*X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context */ +/* status is the usual accumulator */ +/* */ +/* C must have space for set->digits digits. */ +/* */ +/* ------------------------------------------------------------------ */ +/* 'Classic' multiplication is used rather than Karatsuba, as the */ +/* latter would give only a minor improvement for the short numbers */ +/* expected to be handled most (and uses much more memory). */ +/* */ +/* There are two major paths here: the general-purpose ('old code') */ +/* path which handles all DECDPUN values, and a fastpath version */ +/* which is used if 64-bit ints are available, DECDPUN<=4, and more */ +/* than two calls to decUnitAddSub would be made. */ +/* */ +/* The fastpath version lumps units together into 8-digit or 9-digit */ +/* chunks, and also uses a lazy carry strategy to minimise expensive */ +/* 64-bit divisions. The chunks are then broken apart again into */ +/* units for continuing processing. Despite this overhead, the */ +/* fastpath can speed up some 16-digit operations by 10x (and much */ +/* more for higher-precision calculations). */ +/* */ +/* A buffer always has to be used for the accumulator; in the */ +/* fastpath, buffers are also always needed for the chunked copies of */ +/* of the operand coefficients. */ +/* Static buffers are larger than needed just for multiply, to allow */ +/* for calls from other operations (notably exp). */ +/* ------------------------------------------------------------------ */ +#define FASTMUL (DECUSE64 && DECDPUN<5) +static decNumber * decMultiplyOp(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set, + uInt *status) { + Int accunits; /* Units of accumulator in use */ + Int exponent; /* work */ + Int residue=0; /* rounding residue */ + uByte bits; /* result sign */ + Unit *acc; /* -> accumulator Unit array */ + Int needbytes; /* size calculator */ + void *allocacc=NULL; /* -> allocated accumulator, iff allocated */ + Unit accbuff[SD2U(DECBUFFER*4+1)]; /* buffer (+1 for DECBUFFER==0, */ + /* *4 for calls from other operations) */ + const Unit *mer, *mermsup; /* work */ + Int madlength; /* Units in multiplicand */ + Int shift; /* Units to shift multiplicand by */ + + #if FASTMUL + /* if DECDPUN is 1 or 3 work in base 10**9, otherwise */ + /* (DECDPUN is 2 or 4) then work in base 10**8 */ + #if DECDPUN & 1 /* odd */ + #define FASTBASE 1000000000 /* base */ + #define FASTDIGS 9 /* digits in base */ + #define FASTLAZY 18 /* carry resolution point [1->18] */ + #else + #define FASTBASE 100000000 + #define FASTDIGS 8 + #define FASTLAZY 1844 /* carry resolution point [1->1844] */ + #endif + /* three buffers are used, two for chunked copies of the operands */ + /* (base 10**8 or base 10**9) and one base 2**64 accumulator with */ + /* lazy carry evaluation */ + uInt zlhibuff[(DECBUFFER*2+1)/8+1]; /* buffer (+1 for DECBUFFER==0) */ + uInt *zlhi=zlhibuff; /* -> lhs array */ + uInt *alloclhi=NULL; /* -> allocated buffer, iff allocated */ + uInt zrhibuff[(DECBUFFER*2+1)/8+1]; /* buffer (+1 for DECBUFFER==0) */ + uInt *zrhi=zrhibuff; /* -> rhs array */ + uInt *allocrhi=NULL; /* -> allocated buffer, iff allocated */ + uLong zaccbuff[(DECBUFFER*2+1)/4+2]; /* buffer (+1 for DECBUFFER==0) */ + /* [allocacc is shared for both paths, as only one will run] */ + uLong *zacc=zaccbuff; /* -> accumulator array for exact result */ + #if DECDPUN==1 + Int zoff; /* accumulator offset */ + #endif + uInt *lip, *rip; /* item pointers */ + uInt *lmsi, *rmsi; /* most significant items */ + Int ilhs, irhs, iacc; /* item counts in the arrays */ + Int lazy; /* lazy carry counter */ + uLong lcarry; /* uLong carry */ + uInt carry; /* carry (NB not uLong) */ + Int count; /* work */ + const Unit *cup; /* .. */ + Unit *up; /* .. */ + uLong *lp; /* .. */ + Int p; /* .. */ + #endif + + #if DECSUBSET + decNumber *alloclhs=NULL; /* -> allocated buffer, iff allocated */ + decNumber *allocrhs=NULL; /* -> allocated buffer, iff allocated */ + #endif + + #if DECCHECK + if (decCheckOperands(res, lhs, rhs, set)) return res; + #endif + + /* precalculate result sign */ + bits=(uByte)((lhs->bits^rhs->bits)&DECNEG); + + /* handle infinities and NaNs */ + if (SPECIALARGS) { /* a special bit set */ + if (SPECIALARGS & (DECSNAN | DECNAN)) { /* one or two NaNs */ + decNaNs(res, lhs, rhs, set, status); + return res;} + /* one or two infinities; Infinity * 0 is invalid */ + if (((lhs->bits & DECINF)==0 && ISZERO(lhs)) + ||((rhs->bits & DECINF)==0 && ISZERO(rhs))) { + *status|=DEC_Invalid_operation; + return res;} + uprv_decNumberZero(res); + res->bits=bits|DECINF; /* infinity */ + return res;} + + /* For best speed, as in DMSRCN [the original Rexx numerics */ + /* module], use the shorter number as the multiplier (rhs) and */ + /* the longer as the multiplicand (lhs) to minimise the number of */ + /* adds (partial products) */ + if (lhs->digitsdigits) { /* swap... */ + const decNumber *hold=lhs; + lhs=rhs; + rhs=hold; + } + + do { /* protect allocated storage */ + #if DECSUBSET + if (!set->extended) { + /* reduce operands and set lostDigits status, as needed */ + if (lhs->digits>set->digits) { + alloclhs=decRoundOperand(lhs, set, status); + if (alloclhs==NULL) break; + lhs=alloclhs; + } + if (rhs->digits>set->digits) { + allocrhs=decRoundOperand(rhs, set, status); + if (allocrhs==NULL) break; + rhs=allocrhs; + } + } + #endif + /* [following code does not require input rounding] */ + + #if FASTMUL /* fastpath can be used */ + /* use the fast path if there are enough digits in the shorter */ + /* operand to make the setup and takedown worthwhile */ + #define NEEDTWO (DECDPUN*2) /* within two decUnitAddSub calls */ + if (rhs->digits>NEEDTWO) { /* use fastpath... */ + /* calculate the number of elements in each array */ + ilhs=(lhs->digits+FASTDIGS-1)/FASTDIGS; /* [ceiling] */ + irhs=(rhs->digits+FASTDIGS-1)/FASTDIGS; /* .. */ + iacc=ilhs+irhs; + + /* allocate buffers if required, as usual */ + needbytes=ilhs*sizeof(uInt); + if (needbytes>(Int)sizeof(zlhibuff)) { + alloclhi=(uInt *)malloc(needbytes); + zlhi=alloclhi;} + needbytes=irhs*sizeof(uInt); + if (needbytes>(Int)sizeof(zrhibuff)) { + allocrhi=(uInt *)malloc(needbytes); + zrhi=allocrhi;} + + /* Allocating the accumulator space needs a special case when */ + /* DECDPUN=1 because when converting the accumulator to Units */ + /* after the multiplication each 8-byte item becomes 9 1-byte */ + /* units. Therefore iacc extra bytes are needed at the front */ + /* (rounded up to a multiple of 8 bytes), and the uLong */ + /* accumulator starts offset the appropriate number of units */ + /* to the right to avoid overwrite during the unchunking. */ + + /* Make sure no signed int overflow below. This is always true */ + /* if the given numbers have less digits than DEC_MAX_DIGITS. */ + U_ASSERT((uint32_t)iacc <= INT32_MAX/sizeof(uLong)); + needbytes=iacc*sizeof(uLong); + #if DECDPUN==1 + zoff=(iacc+7)/8; /* items to offset by */ + needbytes+=zoff*8; + #endif + if (needbytes>(Int)sizeof(zaccbuff)) { + allocacc=(uLong *)malloc(needbytes); + zacc=(uLong *)allocacc;} + if (zlhi==NULL||zrhi==NULL||zacc==NULL) { + *status|=DEC_Insufficient_storage; + break;} + + acc=(Unit *)zacc; /* -> target Unit array */ + #if DECDPUN==1 + zacc+=zoff; /* start uLong accumulator to right */ + #endif + + /* assemble the chunked copies of the left and right sides */ + for (count=lhs->digits, cup=lhs->lsu, lip=zlhi; count>0; lip++) + for (p=0, *lip=0; p0; + p+=DECDPUN, cup++, count-=DECDPUN) + *lip+=*cup*powers[p]; + lmsi=lip-1; /* save -> msi */ + for (count=rhs->digits, cup=rhs->lsu, rip=zrhi; count>0; rip++) + for (p=0, *rip=0; p0; + p+=DECDPUN, cup++, count-=DECDPUN) + *rip+=*cup*powers[p]; + rmsi=rip-1; /* save -> msi */ + + /* zero the accumulator */ + for (lp=zacc; lp0 && rip!=rmsi) continue; + lazy=FASTLAZY; /* reset delay count */ + /* spin up the accumulator resolving overflows */ + for (lp=zacc; lp assume buffer for accumulator */ + needbytes=(D2U(lhs->digits)+D2U(rhs->digits))*sizeof(Unit); + if (needbytes>(Int)sizeof(accbuff)) { + allocacc=(Unit *)malloc(needbytes); + if (allocacc==NULL) {*status|=DEC_Insufficient_storage; break;} + acc=(Unit *)allocacc; /* use the allocated space */ + } + + /* Now the main long multiplication loop */ + /* Unlike the equivalent in the IBM Java implementation, there */ + /* is no advantage in calculating from msu to lsu. So, do it */ + /* by the book, as it were. */ + /* Each iteration calculates ACC=ACC+MULTAND*MULT */ + accunits=1; /* accumulator starts at '0' */ + *acc=0; /* .. (lsu=0) */ + shift=0; /* no multiplicand shift at first */ + madlength=D2U(lhs->digits); /* this won't change */ + mermsup=rhs->lsu+D2U(rhs->digits); /* -> msu+1 of multiplier */ + + for (mer=rhs->lsu; merlsu, madlength, 0, + &acc[shift], *mer) + + shift; + else { /* extend acc with a 0; it will be used shortly */ + *(acc+accunits)=0; /* [this avoids length of <=0 later] */ + accunits++; + } + /* multiply multiplicand by 10**DECDPUN for next Unit to left */ + shift++; /* add this for 'logical length' */ + } /* n */ + #if FASTMUL + } /* unchunked units */ + #endif + /* common end-path */ + #if DECTRACE + decDumpAr('*', acc, accunits); /* Show exact result */ + #endif + + /* acc now contains the exact result of the multiplication, */ + /* possibly with a leading zero unit; build the decNumber from */ + /* it, noting if any residue */ + res->bits=bits; /* set sign */ + res->digits=decGetDigits(acc, accunits); /* count digits exactly */ + + /* There can be a 31-bit wrap in calculating the exponent. */ + /* This can only happen if both input exponents are negative and */ + /* both their magnitudes are large. If there was a wrap, set a */ + /* safe very negative exponent, from which decFinalize() will */ + /* raise a hard underflow shortly. */ + exponent=lhs->exponent+rhs->exponent; /* calculate exponent */ + if (lhs->exponent<0 && rhs->exponent<0 && exponent>0) + exponent=-2*DECNUMMAXE; /* force underflow */ + res->exponent=exponent; /* OK to overwrite now */ + + + /* Set the coefficient. If any rounding, residue records */ + decSetCoeff(res, set, acc, res->digits, &residue, status); + decFinish(res, set, &residue, status); /* final cleanup */ + } while(0); /* end protected */ + + if (allocacc!=NULL) free(allocacc); /* drop any storage used */ + #if DECSUBSET + if (allocrhs!=NULL) free(allocrhs); /* .. */ + if (alloclhs!=NULL) free(alloclhs); /* .. */ + #endif + #if FASTMUL + if (allocrhi!=NULL) free(allocrhi); /* .. */ + if (alloclhi!=NULL) free(alloclhi); /* .. */ + #endif + return res; + } /* decMultiplyOp */ + +/* ------------------------------------------------------------------ */ +/* decExpOp -- effect exponentiation */ +/* */ +/* This computes C = exp(A) */ +/* */ +/* res is C, the result. C may be A */ +/* rhs is A */ +/* set is the context; note that rounding mode has no effect */ +/* */ +/* C must have space for set->digits digits. status is updated but */ +/* not set. */ +/* */ +/* Restrictions: */ +/* */ +/* digits, emax, and -emin in the context must be less than */ +/* 2*DEC_MAX_MATH (1999998), and the rhs must be within these */ +/* bounds or a zero. This is an internal routine, so these */ +/* restrictions are contractual and not enforced. */ +/* */ +/* A finite result is rounded using DEC_ROUND_HALF_EVEN; it will */ +/* almost always be correctly rounded, but may be up to 1 ulp in */ +/* error in rare cases. */ +/* */ +/* Finite results will always be full precision and Inexact, except */ +/* when A is a zero or -Infinity (giving 1 or 0 respectively). */ +/* ------------------------------------------------------------------ */ +/* This approach used here is similar to the algorithm described in */ +/* */ +/* Variable Precision Exponential Function, T. E. Hull and */ +/* A. Abrham, ACM Transactions on Mathematical Software, Vol 12 #2, */ +/* pp79-91, ACM, June 1986. */ +/* */ +/* with the main difference being that the iterations in the series */ +/* evaluation are terminated dynamically (which does not require the */ +/* extra variable-precision variables which are expensive in this */ +/* context). */ +/* */ +/* The error analysis in Hull & Abrham's paper applies except for the */ +/* round-off error accumulation during the series evaluation. This */ +/* code does not precalculate the number of iterations and so cannot */ +/* use Horner's scheme. Instead, the accumulation is done at double- */ +/* precision, which ensures that the additions of the terms are exact */ +/* and do not accumulate round-off (and any round-off errors in the */ +/* terms themselves move 'to the right' faster than they can */ +/* accumulate). This code also extends the calculation by allowing, */ +/* in the spirit of other decNumber operators, the input to be more */ +/* precise than the result (the precision used is based on the more */ +/* precise of the input or requested result). */ +/* */ +/* Implementation notes: */ +/* */ +/* 1. This is separated out as decExpOp so it can be called from */ +/* other Mathematical functions (notably Ln) with a wider range */ +/* than normal. In particular, it can handle the slightly wider */ +/* (double) range needed by Ln (which has to be able to calculate */ +/* exp(-x) where x can be the tiniest number (Ntiny). */ +/* */ +/* 2. Normalizing x to be <=0.1 (instead of <=1) reduces loop */ +/* iterations by appoximately a third with additional (although */ +/* diminishing) returns as the range is reduced to even smaller */ +/* fractions. However, h (the power of 10 used to correct the */ +/* result at the end, see below) must be kept <=8 as otherwise */ +/* the final result cannot be computed. Hence the leverage is a */ +/* sliding value (8-h), where potentially the range is reduced */ +/* more for smaller values. */ +/* */ +/* The leverage that can be applied in this way is severely */ +/* limited by the cost of the raise-to-the power at the end, */ +/* which dominates when the number of iterations is small (less */ +/* than ten) or when rhs is short. As an example, the adjustment */ +/* x**10,000,000 needs 31 multiplications, all but one full-width. */ +/* */ +/* 3. The restrictions (especially precision) could be raised with */ +/* care, but the full decNumber range seems very hard within the */ +/* 32-bit limits. */ +/* */ +/* 4. The working precisions for the static buffers are twice the */ +/* obvious size to allow for calls from decNumberPower. */ +/* ------------------------------------------------------------------ */ +decNumber * decExpOp(decNumber *res, const decNumber *rhs, + decContext *set, uInt *status) { + uInt ignore=0; /* working status */ + Int h; /* adjusted exponent for 0.xxxx */ + Int p; /* working precision */ + Int residue; /* rounding residue */ + uInt needbytes; /* for space calculations */ + const decNumber *x=rhs; /* (may point to safe copy later) */ + decContext aset, tset, dset; /* working contexts */ + Int comp; /* work */ + + /* the argument is often copied to normalize it, so (unusually) it */ + /* is treated like other buffers, using DECBUFFER, +1 in case */ + /* DECBUFFER is 0 */ + decNumber bufr[D2N(DECBUFFER*2+1)]; + decNumber *allocrhs=NULL; /* non-NULL if rhs buffer allocated */ + + /* the working precision will be no more than set->digits+8+1 */ + /* so for on-stack buffers DECBUFFER+9 is used, +1 in case DECBUFFER */ + /* is 0 (and twice that for the accumulator) */ + + /* buffer for t, term (working precision plus) */ + decNumber buft[D2N(DECBUFFER*2+9+1)]; + decNumber *allocbuft=NULL; /* -> allocated buft, iff allocated */ + decNumber *t=buft; /* term */ + /* buffer for a, accumulator (working precision * 2), at least 9 */ + decNumber bufa[D2N(DECBUFFER*4+18+1)]; + decNumber *allocbufa=NULL; /* -> allocated bufa, iff allocated */ + decNumber *a=bufa; /* accumulator */ + /* decNumber for the divisor term; this needs at most 9 digits */ + /* and so can be fixed size [16 so can use standard context] */ + decNumber bufd[D2N(16)]; + decNumber *d=bufd; /* divisor */ + decNumber numone; /* constant 1 */ + + #if DECCHECK + Int iterations=0; /* for later sanity check */ + if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; + #endif + + do { /* protect allocated storage */ + if (SPECIALARG) { /* handle infinities and NaNs */ + if (decNumberIsInfinite(rhs)) { /* an infinity */ + if (decNumberIsNegative(rhs)) /* -Infinity -> +0 */ + uprv_decNumberZero(res); + else uprv_decNumberCopy(res, rhs); /* +Infinity -> self */ + } + else decNaNs(res, rhs, NULL, set, status); /* a NaN */ + break;} + + if (ISZERO(rhs)) { /* zeros -> exact 1 */ + uprv_decNumberZero(res); /* make clean 1 */ + *res->lsu=1; /* .. */ + break;} /* [no status to set] */ + + /* e**x when 0 < x < 0.66 is < 1+3x/2, hence can fast-path */ + /* positive and negative tiny cases which will result in inexact */ + /* 1. This also allows the later add-accumulate to always be */ + /* exact (because its length will never be more than twice the */ + /* working precision). */ + /* The comparator (tiny) needs just one digit, so use the */ + /* decNumber d for it (reused as the divisor, etc., below); its */ + /* exponent is such that if x is positive it will have */ + /* set->digits-1 zeros between the decimal point and the digit, */ + /* which is 4, and if x is negative one more zero there as the */ + /* more precise result will be of the form 0.9999999 rather than */ + /* 1.0000001. Hence, tiny will be 0.0000004 if digits=7 and x>0 */ + /* or 0.00000004 if digits=7 and x<0. If RHS not larger than */ + /* this then the result will be 1.000000 */ + uprv_decNumberZero(d); /* clean */ + *d->lsu=4; /* set 4 .. */ + d->exponent=-set->digits; /* * 10**(-d) */ + if (decNumberIsNegative(rhs)) d->exponent--; /* negative case */ + comp=decCompare(d, rhs, 1); /* signless compare */ + if (comp==BADINT) { + *status|=DEC_Insufficient_storage; + break;} + if (comp>=0) { /* rhs < d */ + Int shift=set->digits-1; + uprv_decNumberZero(res); /* set 1 */ + *res->lsu=1; /* .. */ + res->digits=decShiftToMost(res->lsu, 1, shift); + res->exponent=-shift; /* make 1.0000... */ + *status|=DEC_Inexact | DEC_Rounded; /* .. inexactly */ + break;} /* tiny */ + + /* set up the context to be used for calculating a, as this is */ + /* used on both paths below */ + uprv_decContextDefault(&aset, DEC_INIT_DECIMAL64); + /* accumulator bounds are as requested (could underflow) */ + aset.emax=set->emax; /* usual bounds */ + aset.emin=set->emin; /* .. */ + aset.clamp=0; /* and no concrete format */ + + /* calculate the adjusted (Hull & Abrham) exponent (where the */ + /* decimal point is just to the left of the coefficient msd) */ + h=rhs->exponent+rhs->digits; + /* if h>8 then 10**h cannot be calculated safely; however, when */ + /* h=8 then exp(|rhs|) will be at least exp(1E+7) which is at */ + /* least 6.59E+4342944, so (due to the restriction on Emax/Emin) */ + /* overflow (or underflow to 0) is guaranteed -- so this case can */ + /* be handled by simply forcing the appropriate excess */ + if (h>8) { /* overflow/underflow */ + /* set up here so Power call below will over or underflow to */ + /* zero; set accumulator to either 2 or 0.02 */ + /* [stack buffer for a is always big enough for this] */ + uprv_decNumberZero(a); + *a->lsu=2; /* not 1 but < exp(1) */ + if (decNumberIsNegative(rhs)) a->exponent=-2; /* make 0.02 */ + h=8; /* clamp so 10**h computable */ + p=9; /* set a working precision */ + } + else { /* h<=8 */ + Int maxlever=(rhs->digits>8?1:0); + /* [could/should increase this for precisions >40 or so, too] */ + + /* if h is 8, cannot normalize to a lower upper limit because */ + /* the final result will not be computable (see notes above), */ + /* but leverage can be applied whenever h is less than 8. */ + /* Apply as much as possible, up to a MAXLEVER digits, which */ + /* sets the tradeoff against the cost of the later a**(10**h). */ + /* As h is increased, the working precision below also */ + /* increases to compensate for the "constant digits at the */ + /* front" effect. */ + Int lever=MINI(8-h, maxlever); /* leverage attainable */ + Int use=-rhs->digits-lever; /* exponent to use for RHS */ + h+=lever; /* apply leverage selected */ + if (h<0) { /* clamp */ + use+=h; /* [may end up subnormal] */ + h=0; + } + /* Take a copy of RHS if it needs normalization (true whenever x>=1) */ + if (rhs->exponent!=use) { + decNumber *newrhs=bufr; /* assume will fit on stack */ + needbytes=sizeof(decNumber)+(D2U(rhs->digits)-1)*sizeof(Unit); + if (needbytes>sizeof(bufr)) { /* need malloc space */ + allocrhs=(decNumber *)malloc(needbytes); + if (allocrhs==NULL) { /* hopeless -- abandon */ + *status|=DEC_Insufficient_storage; + break;} + newrhs=allocrhs; /* use the allocated space */ + } + uprv_decNumberCopy(newrhs, rhs); /* copy to safe space */ + newrhs->exponent=use; /* normalize; now <1 */ + x=newrhs; /* ready for use */ + /* decNumberShow(x); */ + } + + /* Now use the usual power series to evaluate exp(x). The */ + /* series starts as 1 + x + x^2/2 ... so prime ready for the */ + /* third term by setting the term variable t=x, the accumulator */ + /* a=1, and the divisor d=2. */ + + /* First determine the working precision. From Hull & Abrham */ + /* this is set->digits+h+2. However, if x is 'over-precise' we */ + /* need to allow for all its digits to potentially participate */ + /* (consider an x where all the excess digits are 9s) so in */ + /* this case use x->digits+h+2 */ + p=MAXI(x->digits, set->digits)+h+2; /* [h<=8] */ + + /* a and t are variable precision, and depend on p, so space */ + /* must be allocated for them if necessary */ + + /* the accumulator needs to be able to hold 2p digits so that */ + /* the additions on the second and subsequent iterations are */ + /* sufficiently exact. */ + needbytes=sizeof(decNumber)+(D2U(p*2)-1)*sizeof(Unit); + if (needbytes>sizeof(bufa)) { /* need malloc space */ + allocbufa=(decNumber *)malloc(needbytes); + if (allocbufa==NULL) { /* hopeless -- abandon */ + *status|=DEC_Insufficient_storage; + break;} + a=allocbufa; /* use the allocated space */ + } + /* the term needs to be able to hold p digits (which is */ + /* guaranteed to be larger than x->digits, so the initial copy */ + /* is safe); it may also be used for the raise-to-power */ + /* calculation below, which needs an extra two digits */ + needbytes=sizeof(decNumber)+(D2U(p+2)-1)*sizeof(Unit); + if (needbytes>sizeof(buft)) { /* need malloc space */ + allocbuft=(decNumber *)malloc(needbytes); + if (allocbuft==NULL) { /* hopeless -- abandon */ + *status|=DEC_Insufficient_storage; + break;} + t=allocbuft; /* use the allocated space */ + } + + uprv_decNumberCopy(t, x); /* term=x */ + uprv_decNumberZero(a); *a->lsu=1; /* accumulator=1 */ + uprv_decNumberZero(d); *d->lsu=2; /* divisor=2 */ + uprv_decNumberZero(&numone); *numone.lsu=1; /* constant 1 for increment */ + + /* set up the contexts for calculating a, t, and d */ + uprv_decContextDefault(&tset, DEC_INIT_DECIMAL64); + dset=tset; + /* accumulator bounds are set above, set precision now */ + aset.digits=p*2; /* double */ + /* term bounds avoid any underflow or overflow */ + tset.digits=p; + tset.emin=DEC_MIN_EMIN; /* [emax is plenty] */ + /* [dset.digits=16, etc., are sufficient] */ + + /* finally ready to roll */ + for (;;) { + #if DECCHECK + iterations++; + #endif + /* only the status from the accumulation is interesting */ + /* [but it should remain unchanged after first add] */ + decAddOp(a, a, t, &aset, 0, status); /* a=a+t */ + decMultiplyOp(t, t, x, &tset, &ignore); /* t=t*x */ + decDivideOp(t, t, d, &tset, DIVIDE, &ignore); /* t=t/d */ + /* the iteration ends when the term cannot affect the result, */ + /* if rounded to p digits, which is when its value is smaller */ + /* than the accumulator by p+1 digits. There must also be */ + /* full precision in a. */ + if (((a->digits+a->exponent)>=(t->digits+t->exponent+p+1)) + && (a->digits>=p)) break; + decAddOp(d, d, &numone, &dset, 0, &ignore); /* d=d+1 */ + } /* iterate */ + + #if DECCHECK + /* just a sanity check; comment out test to show always */ + if (iterations>p+3) + printf("Exp iterations=%ld, status=%08lx, p=%ld, d=%ld\n", + (LI)iterations, (LI)*status, (LI)p, (LI)x->digits); + #endif + } /* h<=8 */ + + /* apply postconditioning: a=a**(10**h) -- this is calculated */ + /* at a slightly higher precision than Hull & Abrham suggest */ + if (h>0) { + Int seenbit=0; /* set once a 1-bit is seen */ + Int i; /* counter */ + Int n=powers[h]; /* always positive */ + aset.digits=p+2; /* sufficient precision */ + /* avoid the overhead and many extra digits of decNumberPower */ + /* as all that is needed is the short 'multipliers' loop; here */ + /* accumulate the answer into t */ + uprv_decNumberZero(t); *t->lsu=1; /* acc=1 */ + for (i=1;;i++){ /* for each bit [top bit ignored] */ + /* abandon if have had overflow or terminal underflow */ + if (*status & (DEC_Overflow|DEC_Underflow)) { /* interesting? */ + if (*status&DEC_Overflow || ISZERO(t)) break;} + n=n<<1; /* move next bit to testable position */ + if (n<0) { /* top bit is set */ + seenbit=1; /* OK, have a significant bit */ + decMultiplyOp(t, t, a, &aset, status); /* acc=acc*x */ + } + if (i==31) break; /* that was the last bit */ + if (!seenbit) continue; /* no need to square 1 */ + decMultiplyOp(t, t, t, &aset, status); /* acc=acc*acc [square] */ + } /*i*/ /* 32 bits */ + /* decNumberShow(t); */ + a=t; /* and carry on using t instead of a */ + } + + /* Copy and round the result to res */ + residue=1; /* indicate dirt to right .. */ + if (ISZERO(a)) residue=0; /* .. unless underflowed to 0 */ + aset.digits=set->digits; /* [use default rounding] */ + decCopyFit(res, a, &aset, &residue, status); /* copy & shorten */ + decFinish(res, set, &residue, status); /* cleanup/set flags */ + } while(0); /* end protected */ + + if (allocrhs !=NULL) free(allocrhs); /* drop any storage used */ + if (allocbufa!=NULL) free(allocbufa); /* .. */ + if (allocbuft!=NULL) free(allocbuft); /* .. */ + /* [status is handled by caller] */ + return res; + } /* decExpOp */ + +/* ------------------------------------------------------------------ */ +/* Initial-estimate natural logarithm table */ +/* */ +/* LNnn -- 90-entry 16-bit table for values from .10 through .99. */ +/* The result is a 4-digit encode of the coefficient (c=the */ +/* top 14 bits encoding 0-9999) and a 2-digit encode of the */ +/* exponent (e=the bottom 2 bits encoding 0-3) */ +/* */ +/* The resulting value is given by: */ +/* */ +/* v = -c * 10**(-e-3) */ +/* */ +/* where e and c are extracted from entry k = LNnn[x-10] */ +/* where x is truncated (NB) into the range 10 through 99, */ +/* and then c = k>>2 and e = k&3. */ +/* ------------------------------------------------------------------ */ +static const uShort LNnn[90]={9016, 8652, 8316, 8008, 7724, 7456, 7208, + 6972, 6748, 6540, 6340, 6148, 5968, 5792, 5628, 5464, 5312, + 5164, 5020, 4884, 4748, 4620, 4496, 4376, 4256, 4144, 4032, + 39233, 38181, 37157, 36157, 35181, 34229, 33297, 32389, 31501, 30629, + 29777, 28945, 28129, 27329, 26545, 25777, 25021, 24281, 23553, 22837, + 22137, 21445, 20769, 20101, 19445, 18801, 18165, 17541, 16925, 16321, + 15721, 15133, 14553, 13985, 13421, 12865, 12317, 11777, 11241, 10717, + 10197, 9685, 9177, 8677, 8185, 7697, 7213, 6737, 6269, 5801, + 5341, 4889, 4437, 39930, 35534, 31186, 26886, 22630, 18418, 14254, + 10130, 6046, 20055}; + +/* ------------------------------------------------------------------ */ +/* decLnOp -- effect natural logarithm */ +/* */ +/* This computes C = ln(A) */ +/* */ +/* res is C, the result. C may be A */ +/* rhs is A */ +/* set is the context; note that rounding mode has no effect */ +/* */ +/* C must have space for set->digits digits. */ +/* */ +/* Notable cases: */ +/* A<0 -> Invalid */ +/* A=0 -> -Infinity (Exact) */ +/* A=+Infinity -> +Infinity (Exact) */ +/* A=1 exactly -> 0 (Exact) */ +/* */ +/* Restrictions (as for Exp): */ +/* */ +/* digits, emax, and -emin in the context must be less than */ +/* DEC_MAX_MATH+11 (1000010), and the rhs must be within these */ +/* bounds or a zero. This is an internal routine, so these */ +/* restrictions are contractual and not enforced. */ +/* */ +/* A finite result is rounded using DEC_ROUND_HALF_EVEN; it will */ +/* almost always be correctly rounded, but may be up to 1 ulp in */ +/* error in rare cases. */ +/* ------------------------------------------------------------------ */ +/* The result is calculated using Newton's method, with each */ +/* iteration calculating a' = a + x * exp(-a) - 1. See, for example, */ +/* Epperson 1989. */ +/* */ +/* The iteration ends when the adjustment x*exp(-a)-1 is tiny enough. */ +/* This has to be calculated at the sum of the precision of x and the */ +/* working precision. */ +/* */ +/* Implementation notes: */ +/* */ +/* 1. This is separated out as decLnOp so it can be called from */ +/* other Mathematical functions (e.g., Log 10) with a wider range */ +/* than normal. In particular, it can handle the slightly wider */ +/* (+9+2) range needed by a power function. */ +/* */ +/* 2. The speed of this function is about 10x slower than exp, as */ +/* it typically needs 4-6 iterations for short numbers, and the */ +/* extra precision needed adds a squaring effect, twice. */ +/* */ +/* 3. Fastpaths are included for ln(10) and ln(2), up to length 40, */ +/* as these are common requests. ln(10) is used by log10(x). */ +/* */ +/* 4. An iteration might be saved by widening the LNnn table, and */ +/* would certainly save at least one if it were made ten times */ +/* bigger, too (for truncated fractions 0.100 through 0.999). */ +/* However, for most practical evaluations, at least four or five */ +/* iterations will be neede -- so this would only speed up by */ +/* 20-25% and that probably does not justify increasing the table */ +/* size. */ +/* */ +/* 5. The static buffers are larger than might be expected to allow */ +/* for calls from decNumberPower. */ +/* ------------------------------------------------------------------ */ +#if defined(__clang__) || U_GCC_MAJOR_MINOR >= 406 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" +#endif +decNumber * decLnOp(decNumber *res, const decNumber *rhs, + decContext *set, uInt *status) { + uInt ignore=0; /* working status accumulator */ + uInt needbytes; /* for space calculations */ + Int residue; /* rounding residue */ + Int r; /* rhs=f*10**r [see below] */ + Int p; /* working precision */ + Int pp; /* precision for iteration */ + Int t; /* work */ + + /* buffers for a (accumulator, typically precision+2) and b */ + /* (adjustment calculator, same size) */ + decNumber bufa[D2N(DECBUFFER+12)]; + decNumber *allocbufa=NULL; /* -> allocated bufa, iff allocated */ + decNumber *a=bufa; /* accumulator/work */ + decNumber bufb[D2N(DECBUFFER*2+2)]; + decNumber *allocbufb=NULL; /* -> allocated bufa, iff allocated */ + decNumber *b=bufb; /* adjustment/work */ + + decNumber numone; /* constant 1 */ + decNumber cmp; /* work */ + decContext aset, bset; /* working contexts */ + + #if DECCHECK + Int iterations=0; /* for later sanity check */ + if (decCheckOperands(res, DECUNUSED, rhs, set)) return res; + #endif + + do { /* protect allocated storage */ + if (SPECIALARG) { /* handle infinities and NaNs */ + if (decNumberIsInfinite(rhs)) { /* an infinity */ + if (decNumberIsNegative(rhs)) /* -Infinity -> error */ + *status|=DEC_Invalid_operation; + else uprv_decNumberCopy(res, rhs); /* +Infinity -> self */ + } + else decNaNs(res, rhs, NULL, set, status); /* a NaN */ + break;} + + if (ISZERO(rhs)) { /* +/- zeros -> -Infinity */ + uprv_decNumberZero(res); /* make clean */ + res->bits=DECINF|DECNEG; /* set - infinity */ + break;} /* [no status to set] */ + + /* Non-zero negatives are bad... */ + if (decNumberIsNegative(rhs)) { /* -x -> error */ + *status|=DEC_Invalid_operation; + break;} + + /* Here, rhs is positive, finite, and in range */ + + /* lookaside fastpath code for ln(2) and ln(10) at common lengths */ + if (rhs->exponent==0 && set->digits<=40) { + #if DECDPUN==1 + if (rhs->lsu[0]==0 && rhs->lsu[1]==1 && rhs->digits==2) { /* ln(10) */ + #else + if (rhs->lsu[0]==10 && rhs->digits==2) { /* ln(10) */ + #endif + aset=*set; aset.round=DEC_ROUND_HALF_EVEN; + #define LN10 "2.302585092994045684017991454684364207601" + uprv_decNumberFromString(res, LN10, &aset); + *status|=(DEC_Inexact | DEC_Rounded); /* is inexact */ + break;} + if (rhs->lsu[0]==2 && rhs->digits==1) { /* ln(2) */ + aset=*set; aset.round=DEC_ROUND_HALF_EVEN; + #define LN2 "0.6931471805599453094172321214581765680755" + uprv_decNumberFromString(res, LN2, &aset); + *status|=(DEC_Inexact | DEC_Rounded); + break;} + } /* integer and short */ + + /* Determine the working precision. This is normally the */ + /* requested precision + 2, with a minimum of 9. However, if */ + /* the rhs is 'over-precise' then allow for all its digits to */ + /* potentially participate (consider an rhs where all the excess */ + /* digits are 9s) so in this case use rhs->digits+2. */ + p=MAXI(rhs->digits, MAXI(set->digits, 7))+2; + + /* Allocate space for the accumulator and the high-precision */ + /* adjustment calculator, if necessary. The accumulator must */ + /* be able to hold p digits, and the adjustment up to */ + /* rhs->digits+p digits. They are also made big enough for 16 */ + /* digits so that they can be used for calculating the initial */ + /* estimate. */ + needbytes=sizeof(decNumber)+(D2U(MAXI(p,16))-1)*sizeof(Unit); + if (needbytes>sizeof(bufa)) { /* need malloc space */ + allocbufa=(decNumber *)malloc(needbytes); + if (allocbufa==NULL) { /* hopeless -- abandon */ + *status|=DEC_Insufficient_storage; + break;} + a=allocbufa; /* use the allocated space */ + } + pp=p+rhs->digits; + needbytes=sizeof(decNumber)+(D2U(MAXI(pp,16))-1)*sizeof(Unit); + if (needbytes>sizeof(bufb)) { /* need malloc space */ + allocbufb=(decNumber *)malloc(needbytes); + if (allocbufb==NULL) { /* hopeless -- abandon */ + *status|=DEC_Insufficient_storage; + break;} + b=allocbufb; /* use the allocated space */ + } + + /* Prepare an initial estimate in acc. Calculate this by */ + /* considering the coefficient of x to be a normalized fraction, */ + /* f, with the decimal point at far left and multiplied by */ + /* 10**r. Then, rhs=f*10**r and 0.1<=f<1, and */ + /* ln(x) = ln(f) + ln(10)*r */ + /* Get the initial estimate for ln(f) from a small lookup */ + /* table (see above) indexed by the first two digits of f, */ + /* truncated. */ + + uprv_decContextDefault(&aset, DEC_INIT_DECIMAL64); /* 16-digit extended */ + r=rhs->exponent+rhs->digits; /* 'normalised' exponent */ + uprv_decNumberFromInt32(a, r); /* a=r */ + uprv_decNumberFromInt32(b, 2302585); /* b=ln(10) (2.302585) */ + b->exponent=-6; /* .. */ + decMultiplyOp(a, a, b, &aset, &ignore); /* a=a*b */ + /* now get top two digits of rhs into b by simple truncate and */ + /* force to integer */ + residue=0; /* (no residue) */ + aset.digits=2; aset.round=DEC_ROUND_DOWN; + decCopyFit(b, rhs, &aset, &residue, &ignore); /* copy & shorten */ + b->exponent=0; /* make integer */ + t=decGetInt(b); /* [cannot fail] */ + if (t<10) t=X10(t); /* adjust single-digit b */ + t=LNnn[t-10]; /* look up ln(b) */ + uprv_decNumberFromInt32(b, t>>2); /* b=ln(b) coefficient */ + b->exponent=-(t&3)-3; /* set exponent */ + b->bits=DECNEG; /* ln(0.10)->ln(0.99) always -ve */ + aset.digits=16; aset.round=DEC_ROUND_HALF_EVEN; /* restore */ + decAddOp(a, a, b, &aset, 0, &ignore); /* acc=a+b */ + /* the initial estimate is now in a, with up to 4 digits correct. */ + /* When rhs is at or near Nmax the estimate will be low, so we */ + /* will approach it from below, avoiding overflow when calling exp. */ + + uprv_decNumberZero(&numone); *numone.lsu=1; /* constant 1 for adjustment */ + + /* accumulator bounds are as requested (could underflow, but */ + /* cannot overflow) */ + aset.emax=set->emax; + aset.emin=set->emin; + aset.clamp=0; /* no concrete format */ + /* set up a context to be used for the multiply and subtract */ + bset=aset; + bset.emax=DEC_MAX_MATH*2; /* use double bounds for the */ + bset.emin=-DEC_MAX_MATH*2; /* adjustment calculation */ + /* [see decExpOp call below] */ + /* for each iteration double the number of digits to calculate, */ + /* up to a maximum of p */ + pp=9; /* initial precision */ + /* [initially 9 as then the sequence starts 7+2, 16+2, and */ + /* 34+2, which is ideal for standard-sized numbers] */ + aset.digits=pp; /* working context */ + bset.digits=pp+rhs->digits; /* wider context */ + for (;;) { /* iterate */ + #if DECCHECK + iterations++; + if (iterations>24) break; /* consider 9 * 2**24 */ + #endif + /* calculate the adjustment (exp(-a)*x-1) into b. This is a */ + /* catastrophic subtraction but it really is the difference */ + /* from 1 that is of interest. */ + /* Use the internal entry point to Exp as it allows the double */ + /* range for calculating exp(-a) when a is the tiniest subnormal. */ + a->bits^=DECNEG; /* make -a */ + decExpOp(b, a, &bset, &ignore); /* b=exp(-a) */ + a->bits^=DECNEG; /* restore sign of a */ + /* now multiply by rhs and subtract 1, at the wider precision */ + decMultiplyOp(b, b, rhs, &bset, &ignore); /* b=b*rhs */ + decAddOp(b, b, &numone, &bset, DECNEG, &ignore); /* b=b-1 */ + + /* the iteration ends when the adjustment cannot affect the */ + /* result by >=0.5 ulp (at the requested digits), which */ + /* is when its value is smaller than the accumulator by */ + /* set->digits+1 digits (or it is zero) -- this is a looser */ + /* requirement than for Exp because all that happens to the */ + /* accumulator after this is the final rounding (but note that */ + /* there must also be full precision in a, or a=0). */ + + if (decNumberIsZero(b) || + (a->digits+a->exponent)>=(b->digits+b->exponent+set->digits+1)) { + if (a->digits==p) break; + if (decNumberIsZero(a)) { + decCompareOp(&cmp, rhs, &numone, &aset, COMPARE, &ignore); /* rhs=1 ? */ + if (cmp.lsu[0]==0) a->exponent=0; /* yes, exact 0 */ + else *status|=(DEC_Inexact | DEC_Rounded); /* no, inexact */ + break; + } + /* force padding if adjustment has gone to 0 before full length */ + if (decNumberIsZero(b)) b->exponent=a->exponent-p; + } + + /* not done yet ... */ + decAddOp(a, a, b, &aset, 0, &ignore); /* a=a+b for next estimate */ + if (pp==p) continue; /* precision is at maximum */ + /* lengthen the next calculation */ + pp=pp*2; /* double precision */ + if (pp>p) pp=p; /* clamp to maximum */ + aset.digits=pp; /* working context */ + bset.digits=pp+rhs->digits; /* wider context */ + } /* Newton's iteration */ + + #if DECCHECK + /* just a sanity check; remove the test to show always */ + if (iterations>24) + printf("Ln iterations=%ld, status=%08lx, p=%ld, d=%ld\n", + (LI)iterations, (LI)*status, (LI)p, (LI)rhs->digits); + #endif + + /* Copy and round the result to res */ + residue=1; /* indicate dirt to right */ + if (ISZERO(a)) residue=0; /* .. unless underflowed to 0 */ + aset.digits=set->digits; /* [use default rounding] */ + decCopyFit(res, a, &aset, &residue, status); /* copy & shorten */ + decFinish(res, set, &residue, status); /* cleanup/set flags */ + } while(0); /* end protected */ + + if (allocbufa!=NULL) free(allocbufa); /* drop any storage used */ + if (allocbufb!=NULL) free(allocbufb); /* .. */ + /* [status is handled by caller] */ + return res; + } /* decLnOp */ +#if defined(__clang__) || U_GCC_MAJOR_MINOR >= 406 +#pragma GCC diagnostic pop +#endif + +/* ------------------------------------------------------------------ */ +/* decQuantizeOp -- force exponent to requested value */ +/* */ +/* This computes C = op(A, B), where op adjusts the coefficient */ +/* of C (by rounding or shifting) such that the exponent (-scale) */ +/* of C has the value B or matches the exponent of B. */ +/* The numerical value of C will equal A, except for the effects of */ +/* any rounding that occurred. */ +/* */ +/* res is C, the result. C may be A or B */ +/* lhs is A, the number to adjust */ +/* rhs is B, the requested exponent */ +/* set is the context */ +/* quant is 1 for quantize or 0 for rescale */ +/* status is the status accumulator (this can be called without */ +/* risk of control loss) */ +/* */ +/* C must have space for set->digits digits. */ +/* */ +/* Unless there is an error or the result is infinite, the exponent */ +/* after the operation is guaranteed to be that requested. */ +/* ------------------------------------------------------------------ */ +static decNumber * decQuantizeOp(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set, + Flag quant, uInt *status) { + #if DECSUBSET + decNumber *alloclhs=NULL; /* non-NULL if rounded lhs allocated */ + decNumber *allocrhs=NULL; /* .., rhs */ + #endif + const decNumber *inrhs=rhs; /* save original rhs */ + Int reqdigits=set->digits; /* requested DIGITS */ + Int reqexp; /* requested exponent [-scale] */ + Int residue=0; /* rounding residue */ + Int etiny=set->emin-(reqdigits-1); + + #if DECCHECK + if (decCheckOperands(res, lhs, rhs, set)) return res; + #endif + + do { /* protect allocated storage */ + #if DECSUBSET + if (!set->extended) { + /* reduce operands and set lostDigits status, as needed */ + if (lhs->digits>reqdigits) { + alloclhs=decRoundOperand(lhs, set, status); + if (alloclhs==NULL) break; + lhs=alloclhs; + } + if (rhs->digits>reqdigits) { /* [this only checks lostDigits] */ + allocrhs=decRoundOperand(rhs, set, status); + if (allocrhs==NULL) break; + rhs=allocrhs; + } + } + #endif + /* [following code does not require input rounding] */ + + /* Handle special values */ + if (SPECIALARGS) { + /* NaNs get usual processing */ + if (SPECIALARGS & (DECSNAN | DECNAN)) + decNaNs(res, lhs, rhs, set, status); + /* one infinity but not both is bad */ + else if ((lhs->bits ^ rhs->bits) & DECINF) + *status|=DEC_Invalid_operation; + /* both infinity: return lhs */ + else uprv_decNumberCopy(res, lhs); /* [nop if in place] */ + break; + } + + /* set requested exponent */ + if (quant) reqexp=inrhs->exponent; /* quantize -- match exponents */ + else { /* rescale -- use value of rhs */ + /* Original rhs must be an integer that fits and is in range, */ + /* which could be from -1999999997 to +999999999, thanks to */ + /* subnormals */ + reqexp=decGetInt(inrhs); /* [cannot fail] */ + } + + #if DECSUBSET + if (!set->extended) etiny=set->emin; /* no subnormals */ + #endif + + if (reqexp==BADINT /* bad (rescale only) or .. */ + || reqexp==BIGODD || reqexp==BIGEVEN /* very big (ditto) or .. */ + || (reqexpset->emax)) { /* > emax */ + *status|=DEC_Invalid_operation; + break;} + + /* the RHS has been processed, so it can be overwritten now if necessary */ + if (ISZERO(lhs)) { /* zero coefficient unchanged */ + uprv_decNumberCopy(res, lhs); /* [nop if in place] */ + res->exponent=reqexp; /* .. just set exponent */ + #if DECSUBSET + if (!set->extended) res->bits=0; /* subset specification; no -0 */ + #endif + } + else { /* non-zero lhs */ + Int adjust=reqexp-lhs->exponent; /* digit adjustment needed */ + /* if adjusted coefficient will definitely not fit, give up now */ + if ((lhs->digits-adjust)>reqdigits) { + *status|=DEC_Invalid_operation; + break; + } + + if (adjust>0) { /* increasing exponent */ + /* this will decrease the length of the coefficient by adjust */ + /* digits, and must round as it does so */ + decContext workset; /* work */ + workset=*set; /* clone rounding, etc. */ + workset.digits=lhs->digits-adjust; /* set requested length */ + /* [note that the latter can be <1, here] */ + decCopyFit(res, lhs, &workset, &residue, status); /* fit to result */ + decApplyRound(res, &workset, residue, status); /* .. and round */ + residue=0; /* [used] */ + /* If just rounded a 999s case, exponent will be off by one; */ + /* adjust back (after checking space), if so. */ + if (res->exponent>reqexp) { + /* re-check needed, e.g., for quantize(0.9999, 0.001) under */ + /* set->digits==3 */ + if (res->digits==reqdigits) { /* cannot shift by 1 */ + *status&=~(DEC_Inexact | DEC_Rounded); /* [clean these] */ + *status|=DEC_Invalid_operation; + break; + } + res->digits=decShiftToMost(res->lsu, res->digits, 1); /* shift */ + res->exponent--; /* (re)adjust the exponent. */ + } + #if DECSUBSET + if (ISZERO(res) && !set->extended) res->bits=0; /* subset; no -0 */ + #endif + } /* increase */ + else /* adjust<=0 */ { /* decreasing or = exponent */ + /* this will increase the length of the coefficient by -adjust */ + /* digits, by adding zero or more trailing zeros; this is */ + /* already checked for fit, above */ + uprv_decNumberCopy(res, lhs); /* [it will fit] */ + /* if padding needed (adjust<0), add it now... */ + if (adjust<0) { + res->digits=decShiftToMost(res->lsu, res->digits, -adjust); + res->exponent+=adjust; /* adjust the exponent */ + } + } /* decrease */ + } /* non-zero */ + + /* Check for overflow [do not use Finalize in this case, as an */ + /* overflow here is a "don't fit" situation] */ + if (res->exponent>set->emax-res->digits+1) { /* too big */ + *status|=DEC_Invalid_operation; + break; + } + else { + decFinalize(res, set, &residue, status); /* set subnormal flags */ + *status&=~DEC_Underflow; /* suppress Underflow [as per 754] */ + } + } while(0); /* end protected */ + + #if DECSUBSET + if (allocrhs!=NULL) free(allocrhs); /* drop any storage used */ + if (alloclhs!=NULL) free(alloclhs); /* .. */ + #endif + return res; + } /* decQuantizeOp */ + +/* ------------------------------------------------------------------ */ +/* decCompareOp -- compare, min, or max two Numbers */ +/* */ +/* This computes C = A ? B and carries out one of four operations: */ +/* COMPARE -- returns the signum (as a number) giving the */ +/* result of a comparison unless one or both */ +/* operands is a NaN (in which case a NaN results) */ +/* COMPSIG -- as COMPARE except that a quiet NaN raises */ +/* Invalid operation. */ +/* COMPMAX -- returns the larger of the operands, using the */ +/* 754 maxnum operation */ +/* COMPMAXMAG -- ditto, comparing absolute values */ +/* COMPMIN -- the 754 minnum operation */ +/* COMPMINMAG -- ditto, comparing absolute values */ +/* COMTOTAL -- returns the signum (as a number) giving the */ +/* result of a comparison using 754 total ordering */ +/* */ +/* res is C, the result. C may be A and/or B (e.g., X=X?X) */ +/* lhs is A */ +/* rhs is B */ +/* set is the context */ +/* op is the operation flag */ +/* status is the usual accumulator */ +/* */ +/* C must have space for one digit for COMPARE or set->digits for */ +/* COMPMAX, COMPMIN, COMPMAXMAG, or COMPMINMAG. */ +/* ------------------------------------------------------------------ */ +/* The emphasis here is on speed for common cases, and avoiding */ +/* coefficient comparison if possible. */ +/* ------------------------------------------------------------------ */ +static decNumber * decCompareOp(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set, + Flag op, uInt *status) { + #if DECSUBSET + decNumber *alloclhs=NULL; /* non-NULL if rounded lhs allocated */ + decNumber *allocrhs=NULL; /* .., rhs */ + #endif + Int result=0; /* default result value */ + uByte merged; /* work */ + + #if DECCHECK + if (decCheckOperands(res, lhs, rhs, set)) return res; + #endif + + do { /* protect allocated storage */ + #if DECSUBSET + if (!set->extended) { + /* reduce operands and set lostDigits status, as needed */ + if (lhs->digits>set->digits) { + alloclhs=decRoundOperand(lhs, set, status); + if (alloclhs==NULL) {result=BADINT; break;} + lhs=alloclhs; + } + if (rhs->digits>set->digits) { + allocrhs=decRoundOperand(rhs, set, status); + if (allocrhs==NULL) {result=BADINT; break;} + rhs=allocrhs; + } + } + #endif + /* [following code does not require input rounding] */ + + /* If total ordering then handle differing signs 'up front' */ + if (op==COMPTOTAL) { /* total ordering */ + if (decNumberIsNegative(lhs) && !decNumberIsNegative(rhs)) { + result=-1; + break; + } + if (!decNumberIsNegative(lhs) && decNumberIsNegative(rhs)) { + result=+1; + break; + } + } + + /* handle NaNs specially; let infinities drop through */ + /* This assumes sNaN (even just one) leads to NaN. */ + merged=(lhs->bits | rhs->bits) & (DECSNAN | DECNAN); + if (merged) { /* a NaN bit set */ + if (op==COMPARE); /* result will be NaN */ + else if (op==COMPSIG) /* treat qNaN as sNaN */ + *status|=DEC_Invalid_operation | DEC_sNaN; + else if (op==COMPTOTAL) { /* total ordering, always finite */ + /* signs are known to be the same; compute the ordering here */ + /* as if the signs are both positive, then invert for negatives */ + if (!decNumberIsNaN(lhs)) result=-1; + else if (!decNumberIsNaN(rhs)) result=+1; + /* here if both NaNs */ + else if (decNumberIsSNaN(lhs) && decNumberIsQNaN(rhs)) result=-1; + else if (decNumberIsQNaN(lhs) && decNumberIsSNaN(rhs)) result=+1; + else { /* both NaN or both sNaN */ + /* now it just depends on the payload */ + result=decUnitCompare(lhs->lsu, D2U(lhs->digits), + rhs->lsu, D2U(rhs->digits), 0); + /* [Error not possible, as these are 'aligned'] */ + } /* both same NaNs */ + if (decNumberIsNegative(lhs)) result=-result; + break; + } /* total order */ + + else if (merged & DECSNAN); /* sNaN -> qNaN */ + else { /* here if MIN or MAX and one or two quiet NaNs */ + /* min or max -- 754 rules ignore single NaN */ + if (!decNumberIsNaN(lhs) || !decNumberIsNaN(rhs)) { + /* just one NaN; force choice to be the non-NaN operand */ + op=COMPMAX; + if (lhs->bits & DECNAN) result=-1; /* pick rhs */ + else result=+1; /* pick lhs */ + break; + } + } /* max or min */ + op=COMPNAN; /* use special path */ + decNaNs(res, lhs, rhs, set, status); /* propagate NaN */ + break; + } + /* have numbers */ + if (op==COMPMAXMAG || op==COMPMINMAG) result=decCompare(lhs, rhs, 1); + else result=decCompare(lhs, rhs, 0); /* sign matters */ + } while(0); /* end protected */ + + if (result==BADINT) *status|=DEC_Insufficient_storage; /* rare */ + else { + if (op==COMPARE || op==COMPSIG ||op==COMPTOTAL) { /* returning signum */ + if (op==COMPTOTAL && result==0) { + /* operands are numerically equal or same NaN (and same sign, */ + /* tested first); if identical, leave result 0 */ + if (lhs->exponent!=rhs->exponent) { + if (lhs->exponentexponent) result=-1; + else result=+1; + if (decNumberIsNegative(lhs)) result=-result; + } /* lexp!=rexp */ + } /* total-order by exponent */ + uprv_decNumberZero(res); /* [always a valid result] */ + if (result!=0) { /* must be -1 or +1 */ + *res->lsu=1; + if (result<0) res->bits=DECNEG; + } + } + else if (op==COMPNAN); /* special, drop through */ + else { /* MAX or MIN, non-NaN result */ + Int residue=0; /* rounding accumulator */ + /* choose the operand for the result */ + const decNumber *choice; + if (result==0) { /* operands are numerically equal */ + /* choose according to sign then exponent (see 754) */ + uByte slhs=(lhs->bits & DECNEG); + uByte srhs=(rhs->bits & DECNEG); + #if DECSUBSET + if (!set->extended) { /* subset: force left-hand */ + op=COMPMAX; + result=+1; + } + else + #endif + if (slhs!=srhs) { /* signs differ */ + if (slhs) result=-1; /* rhs is max */ + else result=+1; /* lhs is max */ + } + else if (slhs && srhs) { /* both negative */ + if (lhs->exponentexponent) result=+1; + else result=-1; + /* [if equal, use lhs, technically identical] */ + } + else { /* both positive */ + if (lhs->exponent>rhs->exponent) result=+1; + else result=-1; + /* [ditto] */ + } + } /* numerically equal */ + /* here result will be non-0; reverse if looking for MIN */ + if (op==COMPMIN || op==COMPMINMAG) result=-result; + choice=(result>0 ? lhs : rhs); /* choose */ + /* copy chosen to result, rounding if need be */ + decCopyFit(res, choice, set, &residue, status); + decFinish(res, set, &residue, status); + } + } + #if DECSUBSET + if (allocrhs!=NULL) free(allocrhs); /* free any storage used */ + if (alloclhs!=NULL) free(alloclhs); /* .. */ + #endif + return res; + } /* decCompareOp */ + +/* ------------------------------------------------------------------ */ +/* decCompare -- compare two decNumbers by numerical value */ +/* */ +/* This routine compares A ? B without altering them. */ +/* */ +/* Arg1 is A, a decNumber which is not a NaN */ +/* Arg2 is B, a decNumber which is not a NaN */ +/* Arg3 is 1 for a sign-independent compare, 0 otherwise */ +/* */ +/* returns -1, 0, or 1 for AB, or BADINT if failure */ +/* (the only possible failure is an allocation error) */ +/* ------------------------------------------------------------------ */ +static Int decCompare(const decNumber *lhs, const decNumber *rhs, + Flag abs_c) { + Int result; /* result value */ + Int sigr; /* rhs signum */ + Int compare; /* work */ + + result=1; /* assume signum(lhs) */ + if (ISZERO(lhs)) result=0; + if (abs_c) { + if (ISZERO(rhs)) return result; /* LHS wins or both 0 */ + /* RHS is non-zero */ + if (result==0) return -1; /* LHS is 0; RHS wins */ + /* [here, both non-zero, result=1] */ + } + else { /* signs matter */ + if (result && decNumberIsNegative(lhs)) result=-1; + sigr=1; /* compute signum(rhs) */ + if (ISZERO(rhs)) sigr=0; + else if (decNumberIsNegative(rhs)) sigr=-1; + if (result > sigr) return +1; /* L > R, return 1 */ + if (result < sigr) return -1; /* L < R, return -1 */ + if (result==0) return 0; /* both 0 */ + } + + /* signums are the same; both are non-zero */ + if ((lhs->bits | rhs->bits) & DECINF) { /* one or more infinities */ + if (decNumberIsInfinite(rhs)) { + if (decNumberIsInfinite(lhs)) result=0;/* both infinite */ + else result=-result; /* only rhs infinite */ + } + return result; + } + /* must compare the coefficients, allowing for exponents */ + if (lhs->exponent>rhs->exponent) { /* LHS exponent larger */ + /* swap sides, and sign */ + const decNumber *temp=lhs; + lhs=rhs; + rhs=temp; + result=-result; + } + compare=decUnitCompare(lhs->lsu, D2U(lhs->digits), + rhs->lsu, D2U(rhs->digits), + rhs->exponent-lhs->exponent); + if (compare!=BADINT) compare*=result; /* comparison succeeded */ + return compare; + } /* decCompare */ + +/* ------------------------------------------------------------------ */ +/* decUnitCompare -- compare two >=0 integers in Unit arrays */ +/* */ +/* This routine compares A ? B*10**E where A and B are unit arrays */ +/* A is a plain integer */ +/* B has an exponent of E (which must be non-negative) */ +/* */ +/* Arg1 is A first Unit (lsu) */ +/* Arg2 is A length in Units */ +/* Arg3 is B first Unit (lsu) */ +/* Arg4 is B length in Units */ +/* Arg5 is E (0 if the units are aligned) */ +/* */ +/* returns -1, 0, or 1 for AB, or BADINT if failure */ +/* (the only possible failure is an allocation error, which can */ +/* only occur if E!=0) */ +/* ------------------------------------------------------------------ */ +static Int decUnitCompare(const Unit *a, Int alength, + const Unit *b, Int blength, Int exp) { + Unit *acc; /* accumulator for result */ + Unit accbuff[SD2U(DECBUFFER*2+1)]; /* local buffer */ + Unit *allocacc=NULL; /* -> allocated acc buffer, iff allocated */ + Int accunits, need; /* units in use or needed for acc */ + const Unit *l, *r, *u; /* work */ + Int expunits, exprem, result; /* .. */ + + if (exp==0) { /* aligned; fastpath */ + if (alength>blength) return 1; + if (alength=a; l--, r--) { + if (*l>*r) return 1; + if (*l<*r) return -1; + } + return 0; /* all units match */ + } /* aligned */ + + /* Unaligned. If one is >1 unit longer than the other, padded */ + /* approximately, then can return easily */ + if (alength>blength+(Int)D2U(exp)) return 1; + if (alength+1sizeof(accbuff)) { + allocacc=(Unit *)malloc(need*sizeof(Unit)); + if (allocacc==NULL) return BADINT; /* hopeless -- abandon */ + acc=allocacc; + } + /* Calculate units and remainder from exponent. */ + expunits=exp/DECDPUN; + exprem=exp%DECDPUN; + /* subtract [A+B*(-m)] */ + accunits=decUnitAddSub(a, alength, b, blength, expunits, acc, + -(Int)powers[exprem]); + /* [UnitAddSub result may have leading zeros, even on zero] */ + if (accunits<0) result=-1; /* negative result */ + else { /* non-negative result */ + /* check units of the result before freeing any storage */ + for (u=acc; u=0 integers in Unit arrays */ +/* */ +/* This routine performs the calculation: */ +/* */ +/* C=A+(B*M) */ +/* */ +/* Where M is in the range -DECDPUNMAX through +DECDPUNMAX. */ +/* */ +/* A may be shorter or longer than B. */ +/* */ +/* Leading zeros are not removed after a calculation. The result is */ +/* either the same length as the longer of A and B (adding any */ +/* shift), or one Unit longer than that (if a Unit carry occurred). */ +/* */ +/* A and B content are not altered unless C is also A or B. */ +/* C may be the same array as A or B, but only if no zero padding is */ +/* requested (that is, C may be B only if bshift==0). */ +/* C is filled from the lsu; only those units necessary to complete */ +/* the calculation are referenced. */ +/* */ +/* Arg1 is A first Unit (lsu) */ +/* Arg2 is A length in Units */ +/* Arg3 is B first Unit (lsu) */ +/* Arg4 is B length in Units */ +/* Arg5 is B shift in Units (>=0; pads with 0 units if positive) */ +/* Arg6 is C first Unit (lsu) */ +/* Arg7 is M, the multiplier */ +/* */ +/* returns the count of Units written to C, which will be non-zero */ +/* and negated if the result is negative. That is, the sign of the */ +/* returned Int is the sign of the result (positive for zero) and */ +/* the absolute value of the Int is the count of Units. */ +/* */ +/* It is the caller's responsibility to make sure that C size is */ +/* safe, allowing space if necessary for a one-Unit carry. */ +/* */ +/* This routine is severely performance-critical; *any* change here */ +/* must be measured (timed) to assure no performance degradation. */ +/* In particular, trickery here tends to be counter-productive, as */ +/* increased complexity of code hurts register optimizations on */ +/* register-poor architectures. Avoiding divisions is nearly */ +/* always a Good Idea, however. */ +/* */ +/* Special thanks to Rick McGuire (IBM Cambridge, MA) and Dave Clark */ +/* (IBM Warwick, UK) for some of the ideas used in this routine. */ +/* ------------------------------------------------------------------ */ +static Int decUnitAddSub(const Unit *a, Int alength, + const Unit *b, Int blength, Int bshift, + Unit *c, Int m) { + const Unit *alsu=a; /* A lsu [need to remember it] */ + Unit *clsu=c; /* C ditto */ + Unit *minC; /* low water mark for C */ + Unit *maxC; /* high water mark for C */ + eInt carry=0; /* carry integer (could be Long) */ + Int add; /* work */ + #if DECDPUN<=4 /* myriadal, millenary, etc. */ + Int est; /* estimated quotient */ + #endif + + #if DECTRACE + if (alength<1 || blength<1) + printf("decUnitAddSub: alen blen m %ld %ld [%ld]\n", alength, blength, m); + #endif + + maxC=c+alength; /* A is usually the longer */ + minC=c+blength; /* .. and B the shorter */ + if (bshift!=0) { /* B is shifted; low As copy across */ + minC+=bshift; + /* if in place [common], skip copy unless there's a gap [rare] */ + if (a==c && bshift<=alength) { + c+=bshift; + a+=bshift; + } + else for (; cmaxC) { /* swap */ + Unit *hold=minC; + minC=maxC; + maxC=hold; + } + + /* For speed, do the addition as two loops; the first where both A */ + /* and B contribute, and the second (if necessary) where only one or */ + /* other of the numbers contribute. */ + /* Carry handling is the same (i.e., duplicated) in each case. */ + for (; c=0) { + est=(((ueInt)carry>>11)*53687)>>18; + *c=(Unit)(carry-est*(DECDPUNMAX+1)); /* remainder */ + carry=est; /* likely quotient [89%] */ + if (*c>11)*53687)>>18; + *c=(Unit)(carry-est*(DECDPUNMAX+1)); + carry=est-(DECDPUNMAX+1); /* correctly negative */ + if (*c=0) { + est=(((ueInt)carry>>3)*16777)>>21; + *c=(Unit)(carry-est*(DECDPUNMAX+1)); /* remainder */ + carry=est; /* likely quotient [99%] */ + if (*c>3)*16777)>>21; + *c=(Unit)(carry-est*(DECDPUNMAX+1)); + carry=est-(DECDPUNMAX+1); /* correctly negative */ + if (*c=0) { + est=QUOT10(carry, DECDPUN); + *c=(Unit)(carry-est*(DECDPUNMAX+1)); /* remainder */ + carry=est; /* quotient */ + continue; + } + /* negative case */ + carry=carry+(eInt)(DECDPUNMAX+1)*(DECDPUNMAX+1); /* make positive */ + est=QUOT10(carry, DECDPUN); + *c=(Unit)(carry-est*(DECDPUNMAX+1)); + carry=est-(DECDPUNMAX+1); /* correctly negative */ + #else + /* remainder operator is undefined if negative, so must test */ + if ((ueInt)carry<(DECDPUNMAX+1)*2) { /* fastpath carry +1 */ + *c=(Unit)(carry-(DECDPUNMAX+1)); /* [helps additions] */ + carry=1; + continue; + } + if (carry>=0) { + *c=(Unit)(carry%(DECDPUNMAX+1)); + carry=carry/(DECDPUNMAX+1); + continue; + } + /* negative case */ + carry=carry+(eInt)(DECDPUNMAX+1)*(DECDPUNMAX+1); /* make positive */ + *c=(Unit)(carry%(DECDPUNMAX+1)); + carry=carry/(DECDPUNMAX+1)-(DECDPUNMAX+1); + #endif + } /* c */ + + /* now may have one or other to complete */ + /* [pretest to avoid loop setup/shutdown] */ + if (cDECDPUNMAX */ + #if DECDPUN==4 /* use divide-by-multiply */ + if (carry>=0) { + est=(((ueInt)carry>>11)*53687)>>18; + *c=(Unit)(carry-est*(DECDPUNMAX+1)); /* remainder */ + carry=est; /* likely quotient [79.7%] */ + if (*c>11)*53687)>>18; + *c=(Unit)(carry-est*(DECDPUNMAX+1)); + carry=est-(DECDPUNMAX+1); /* correctly negative */ + if (*c=0) { + est=(((ueInt)carry>>3)*16777)>>21; + *c=(Unit)(carry-est*(DECDPUNMAX+1)); /* remainder */ + carry=est; /* likely quotient [99%] */ + if (*c>3)*16777)>>21; + *c=(Unit)(carry-est*(DECDPUNMAX+1)); + carry=est-(DECDPUNMAX+1); /* correctly negative */ + if (*c=0) { + est=QUOT10(carry, DECDPUN); + *c=(Unit)(carry-est*(DECDPUNMAX+1)); /* remainder */ + carry=est; /* quotient */ + continue; + } + /* negative case */ + carry=carry+(eInt)(DECDPUNMAX+1)*(DECDPUNMAX+1); /* make positive */ + est=QUOT10(carry, DECDPUN); + *c=(Unit)(carry-est*(DECDPUNMAX+1)); + carry=est-(DECDPUNMAX+1); /* correctly negative */ + #else + if ((ueInt)carry<(DECDPUNMAX+1)*2){ /* fastpath carry 1 */ + *c=(Unit)(carry-(DECDPUNMAX+1)); + carry=1; + continue; + } + /* remainder operator is undefined if negative, so must test */ + if (carry>=0) { + *c=(Unit)(carry%(DECDPUNMAX+1)); + carry=carry/(DECDPUNMAX+1); + continue; + } + /* negative case */ + carry=carry+(eInt)(DECDPUNMAX+1)*(DECDPUNMAX+1); /* make positive */ + *c=(Unit)(carry%(DECDPUNMAX+1)); + carry=carry/(DECDPUNMAX+1)-(DECDPUNMAX+1); + #endif + } /* c */ + + /* OK, all A and B processed; might still have carry or borrow */ + /* return number of Units in the result, negated if a borrow */ + if (carry==0) return c-clsu; /* no carry, so no more to do */ + if (carry>0) { /* positive carry */ + *c=(Unit)carry; /* place as new unit */ + c++; /* .. */ + return c-clsu; + } + /* -ve carry: it's a borrow; complement needed */ + add=1; /* temporary carry... */ + for (c=clsu; c current Unit */ + + #if DECCHECK + if (decCheckOperands(dn, DECUNUSED, DECUNUSED, DECUNCONT)) return dn; + #endif + + *dropped=0; /* assume no zeros dropped */ + if ((dn->bits & DECSPECIAL) /* fast exit if special .. */ + || (*dn->lsu & 0x01)) return dn; /* .. or odd */ + if (ISZERO(dn)) { /* .. or 0 */ + dn->exponent=0; /* (sign is preserved) */ + return dn; + } + + /* have a finite number which is even */ + exp=dn->exponent; + cut=1; /* digit (1-DECDPUN) in Unit */ + up=dn->lsu; /* -> current Unit */ + for (d=0; ddigits-1; d++) { /* [don't strip the final digit] */ + /* slice by powers */ + #if DECDPUN<=4 + uInt quot=QUOT10(*up, cut); + if ((*up-quot*powers[cut])!=0) break; /* found non-0 digit */ + #else + if (*up%powers[cut]!=0) break; /* found non-0 digit */ + #endif + /* have a trailing 0 */ + if (!all) { /* trimming */ + /* [if exp>0 then all trailing 0s are significant for trim] */ + if (exp<=0) { /* if digit might be significant */ + if (exp==0) break; /* then quit */ + exp++; /* next digit might be significant */ + } + } + cut++; /* next power */ + if (cut>DECDPUN) { /* need new Unit */ + up++; + cut=1; + } + } /* d */ + if (d==0) return dn; /* none to drop */ + + /* may need to limit drop if clamping */ + if (set->clamp && !noclamp) { + Int maxd=set->emax-set->digits+1-dn->exponent; + if (maxd<=0) return dn; /* nothing possible */ + if (d>maxd) d=maxd; + } + + /* effect the drop */ + decShiftToLeast(dn->lsu, D2U(dn->digits), d); + dn->exponent+=d; /* maintain numerical value */ + dn->digits-=d; /* new length */ + *dropped=d; /* report the count */ + return dn; + } /* decTrim */ + +/* ------------------------------------------------------------------ */ +/* decReverse -- reverse a Unit array in place */ +/* */ +/* ulo is the start of the array */ +/* uhi is the end of the array (highest Unit to include) */ +/* */ +/* The units ulo through uhi are reversed in place (if the number */ +/* of units is odd, the middle one is untouched). Note that the */ +/* digit(s) in each unit are unaffected. */ +/* ------------------------------------------------------------------ */ +static void decReverse(Unit *ulo, Unit *uhi) { + Unit temp; + for (; ulo=uar; source--, target--) *target=*source; + } + else { + first=uar+D2U(digits+shift)-1; /* where msu of source will end up */ + for (; source>=uar; source--, target--) { + /* split the source Unit and accumulate remainder for next */ + #if DECDPUN<=4 + uInt quot=QUOT10(*source, cut); + uInt rem=*source-quot*powers[cut]; + next+=quot; + #else + uInt rem=*source%powers[cut]; + next+=*source/powers[cut]; + #endif + if (target<=first) *target=(Unit)next; /* write to target iff valid */ + next=rem*powers[DECDPUN-cut]; /* save remainder for next Unit */ + } + } /* shift-move */ + + /* propagate any partial unit to one below and clear the rest */ + for (; target>=uar; target--) { + *target=(Unit)next; + next=0; + } + return digits+shift; + } /* decShiftToMost */ + +/* ------------------------------------------------------------------ */ +/* decShiftToLeast -- shift digits in array towards least significant */ +/* */ +/* uar is the array */ +/* units is length of the array, in units */ +/* shift is the number of digits to remove from the lsu end; it */ +/* must be zero or positive and <= than units*DECDPUN. */ +/* */ +/* returns the new length of the integer in the array, in units */ +/* */ +/* Removed digits are discarded (lost). Units not required to hold */ +/* the final result are unchanged. */ +/* ------------------------------------------------------------------ */ +static Int decShiftToLeast(Unit *uar, Int units, Int shift) { + Unit *target, *up; /* work */ + Int cut, count; /* work */ + Int quot, rem; /* for division */ + + if (shift==0) return units; /* [fastpath] nothing to do */ + if (shift==units*DECDPUN) { /* [fastpath] little to do */ + *uar=0; /* all digits cleared gives zero */ + return 1; /* leaves just the one */ + } + + target=uar; /* both paths */ + cut=MSUDIGITS(shift); + if (cut==DECDPUN) { /* unit-boundary case; easy */ + up=uar+D2U(shift); + for (; updigits is > set->digits) */ +/* set is the relevant context */ +/* status is the status accumulator */ +/* */ +/* returns an allocated decNumber with the rounded result. */ +/* */ +/* lostDigits and other status may be set by this. */ +/* */ +/* Since the input is an operand, it must not be modified. */ +/* Instead, return an allocated decNumber, rounded as required. */ +/* It is the caller's responsibility to free the allocated storage. */ +/* */ +/* If no storage is available then the result cannot be used, so NULL */ +/* is returned. */ +/* ------------------------------------------------------------------ */ +static decNumber *decRoundOperand(const decNumber *dn, decContext *set, + uInt *status) { + decNumber *res; /* result structure */ + uInt newstatus=0; /* status from round */ + Int residue=0; /* rounding accumulator */ + + /* Allocate storage for the returned decNumber, big enough for the */ + /* length specified by the context */ + res=(decNumber *)malloc(sizeof(decNumber) + +(D2U(set->digits)-1)*sizeof(Unit)); + if (res==NULL) { + *status|=DEC_Insufficient_storage; + return NULL; + } + decCopyFit(res, dn, set, &residue, &newstatus); + decApplyRound(res, set, residue, &newstatus); + + /* If that set Inexact then "lost digits" is raised... */ + if (newstatus & DEC_Inexact) newstatus|=DEC_Lost_digits; + *status|=newstatus; + return res; + } /* decRoundOperand */ +#endif + +/* ------------------------------------------------------------------ */ +/* decCopyFit -- copy a number, truncating the coefficient if needed */ +/* */ +/* dest is the target decNumber */ +/* src is the source decNumber */ +/* set is the context [used for length (digits) and rounding mode] */ +/* residue is the residue accumulator */ +/* status contains the current status to be updated */ +/* */ +/* (dest==src is allowed and will be a no-op if fits) */ +/* All fields are updated as required. */ +/* ------------------------------------------------------------------ */ +static void decCopyFit(decNumber *dest, const decNumber *src, + decContext *set, Int *residue, uInt *status) { + dest->bits=src->bits; + dest->exponent=src->exponent; + decSetCoeff(dest, set, src->lsu, src->digits, residue, status); + } /* decCopyFit */ + +/* ------------------------------------------------------------------ */ +/* decSetCoeff -- set the coefficient of a number */ +/* */ +/* dn is the number whose coefficient array is to be set. */ +/* It must have space for set->digits digits */ +/* set is the context [for size] */ +/* lsu -> lsu of the source coefficient [may be dn->lsu] */ +/* len is digits in the source coefficient [may be dn->digits] */ +/* residue is the residue accumulator. This has values as in */ +/* decApplyRound, and will be unchanged unless the */ +/* target size is less than len. In this case, the */ +/* coefficient is truncated and the residue is updated to */ +/* reflect the previous residue and the dropped digits. */ +/* status is the status accumulator, as usual */ +/* */ +/* The coefficient may already be in the number, or it can be an */ +/* external intermediate array. If it is in the number, lsu must == */ +/* dn->lsu and len must == dn->digits. */ +/* */ +/* Note that the coefficient length (len) may be < set->digits, and */ +/* in this case this merely copies the coefficient (or is a no-op */ +/* if dn->lsu==lsu). */ +/* */ +/* Note also that (only internally, from decQuantizeOp and */ +/* decSetSubnormal) the value of set->digits may be less than one, */ +/* indicating a round to left. This routine handles that case */ +/* correctly; caller ensures space. */ +/* */ +/* dn->digits, dn->lsu (and as required), and dn->exponent are */ +/* updated as necessary. dn->bits (sign) is unchanged. */ +/* */ +/* DEC_Rounded status is set if any digits are discarded. */ +/* DEC_Inexact status is set if any non-zero digits are discarded, or */ +/* incoming residue was non-0 (implies rounded) */ +/* ------------------------------------------------------------------ */ +/* mapping array: maps 0-9 to canonical residues, so that a residue */ +/* can be adjusted in the range [-1, +1] and achieve correct rounding */ +/* 0 1 2 3 4 5 6 7 8 9 */ +static const uByte resmap[10]={0, 3, 3, 3, 3, 5, 7, 7, 7, 7}; +static void decSetCoeff(decNumber *dn, decContext *set, const Unit *lsu, + Int len, Int *residue, uInt *status) { + Int discard; /* number of digits to discard */ + uInt cut; /* cut point in Unit */ + const Unit *up; /* work */ + Unit *target; /* .. */ + Int count; /* .. */ + #if DECDPUN<=4 + uInt temp; /* .. */ + #endif + + discard=len-set->digits; /* digits to discard */ + if (discard<=0) { /* no digits are being discarded */ + if (dn->lsu!=lsu) { /* copy needed */ + /* copy the coefficient array to the result number; no shift needed */ + count=len; /* avoids D2U */ + up=lsu; + for (target=dn->lsu; count>0; target++, up++, count-=DECDPUN) + *target=*up; + dn->digits=len; /* set the new length */ + } + /* dn->exponent and residue are unchanged, record any inexactitude */ + if (*residue!=0) *status|=(DEC_Inexact | DEC_Rounded); + return; + } + + /* some digits must be discarded ... */ + dn->exponent+=discard; /* maintain numerical value */ + *status|=DEC_Rounded; /* accumulate Rounded status */ + if (*residue>1) *residue=1; /* previous residue now to right, so reduce */ + + if (discard>len) { /* everything, +1, is being discarded */ + /* guard digit is 0 */ + /* residue is all the number [NB could be all 0s] */ + if (*residue<=0) { /* not already positive */ + count=len; /* avoids D2U */ + for (up=lsu; count>0; up++, count-=DECDPUN) if (*up!=0) { /* found non-0 */ + *residue=1; + break; /* no need to check any others */ + } + } + if (*residue!=0) *status|=DEC_Inexact; /* record inexactitude */ + *dn->lsu=0; /* coefficient will now be 0 */ + dn->digits=1; /* .. */ + return; + } /* total discard */ + + /* partial discard [most common case] */ + /* here, at least the first (most significant) discarded digit exists */ + + /* spin up the number, noting residue during the spin, until get to */ + /* the Unit with the first discarded digit. When reach it, extract */ + /* it and remember its position */ + count=0; + for (up=lsu;; up++) { + count+=DECDPUN; + if (count>=discard) break; /* full ones all checked */ + if (*up!=0) *residue=1; + } /* up */ + + /* here up -> Unit with first discarded digit */ + cut=discard-(count-DECDPUN)-1; + if (cut==DECDPUN-1) { /* unit-boundary case (fast) */ + Unit half=(Unit)powers[DECDPUN]>>1; + /* set residue directly */ + if (*up>=half) { + if (*up>half) *residue=7; + else *residue+=5; /* add sticky bit */ + } + else { /* digits<=0) { /* special for Quantize/Subnormal :-( */ + *dn->lsu=0; /* .. result is 0 */ + dn->digits=1; /* .. */ + } + else { /* shift to least */ + count=set->digits; /* now digits to end up with */ + dn->digits=count; /* set the new length */ + up++; /* move to next */ + /* on unit boundary, so shift-down copy loop is simple */ + for (target=dn->lsu; count>0; target++, up++, count-=DECDPUN) + *target=*up; + } + } /* unit-boundary case */ + + else { /* discard digit is in low digit(s), and not top digit */ + uInt discard1; /* first discarded digit */ + uInt quot, rem; /* for divisions */ + if (cut==0) quot=*up; /* is at bottom of unit */ + else /* cut>0 */ { /* it's not at bottom of unit */ + #if DECDPUN<=4 + U_ASSERT(/* cut >= 0 &&*/ cut <= 4); + quot=QUOT10(*up, cut); + rem=*up-quot*powers[cut]; + #else + rem=*up%powers[cut]; + quot=*up/powers[cut]; + #endif + if (rem!=0) *residue=1; + } + /* discard digit is now at bottom of quot */ + #if DECDPUN<=4 + temp=(quot*6554)>>16; /* fast /10 */ + /* Vowels algorithm here not a win (9 instructions) */ + discard1=quot-X10(temp); + quot=temp; + #else + discard1=quot%10; + quot=quot/10; + #endif + /* here, discard1 is the guard digit, and residue is everything */ + /* else [use mapping array to accumulate residue safely] */ + *residue+=resmap[discard1]; + cut++; /* update cut */ + /* here: up -> Unit of the array with bottom digit */ + /* cut is the division point for each Unit */ + /* quot holds the uncut high-order digits for the current unit */ + if (set->digits<=0) { /* special for Quantize/Subnormal :-( */ + *dn->lsu=0; /* .. result is 0 */ + dn->digits=1; /* .. */ + } + else { /* shift to least needed */ + count=set->digits; /* now digits to end up with */ + dn->digits=count; /* set the new length */ + /* shift-copy the coefficient array to the result number */ + for (target=dn->lsu; ; target++) { + *target=(Unit)quot; + count-=(DECDPUN-cut); + if (count<=0) break; + up++; + quot=*up; + #if DECDPUN<=4 + quot=QUOT10(quot, cut); + rem=*up-quot*powers[cut]; + #else + rem=quot%powers[cut]; + quot=quot/powers[cut]; + #endif + *target=(Unit)(*target+rem*powers[DECDPUN-cut]); + count-=cut; + if (count<=0) break; + } /* shift-copy loop */ + } /* shift to least */ + } /* not unit boundary */ + + if (*residue!=0) *status|=DEC_Inexact; /* record inexactitude */ + return; + } /* decSetCoeff */ + +/* ------------------------------------------------------------------ */ +/* decApplyRound -- apply pending rounding to a number */ +/* */ +/* dn is the number, with space for set->digits digits */ +/* set is the context [for size and rounding mode] */ +/* residue indicates pending rounding, being any accumulated */ +/* guard and sticky information. It may be: */ +/* 6-9: rounding digit is >5 */ +/* 5: rounding digit is exactly half-way */ +/* 1-4: rounding digit is <5 and >0 */ +/* 0: the coefficient is exact */ +/* -1: as 1, but the hidden digits are subtractive, that */ +/* is, of the opposite sign to dn. In this case the */ +/* coefficient must be non-0. This case occurs when */ +/* subtracting a small number (which can be reduced to */ +/* a sticky bit); see decAddOp. */ +/* status is the status accumulator, as usual */ +/* */ +/* This routine applies rounding while keeping the length of the */ +/* coefficient constant. The exponent and status are unchanged */ +/* except if: */ +/* */ +/* -- the coefficient was increased and is all nines (in which */ +/* case Overflow could occur, and is handled directly here so */ +/* the caller does not need to re-test for overflow) */ +/* */ +/* -- the coefficient was decreased and becomes all nines (in which */ +/* case Underflow could occur, and is also handled directly). */ +/* */ +/* All fields in dn are updated as required. */ +/* */ +/* ------------------------------------------------------------------ */ +static void decApplyRound(decNumber *dn, decContext *set, Int residue, + uInt *status) { + Int bump; /* 1 if coefficient needs to be incremented */ + /* -1 if coefficient needs to be decremented */ + + if (residue==0) return; /* nothing to apply */ + + bump=0; /* assume a smooth ride */ + + /* now decide whether, and how, to round, depending on mode */ + switch (set->round) { + case DEC_ROUND_05UP: { /* round zero or five up (for reround) */ + /* This is the same as DEC_ROUND_DOWN unless there is a */ + /* positive residue and the lsd of dn is 0 or 5, in which case */ + /* it is bumped; when residue is <0, the number is therefore */ + /* bumped down unless the final digit was 1 or 6 (in which */ + /* case it is bumped down and then up -- a no-op) */ + Int lsd5=*dn->lsu%5; /* get lsd and quintate */ + if (residue<0 && lsd5!=1) bump=-1; + else if (residue>0 && lsd5==0) bump=1; + /* [bump==1 could be applied directly; use common path for clarity] */ + break;} /* r-05 */ + + case DEC_ROUND_DOWN: { + /* no change, except if negative residue */ + if (residue<0) bump=-1; + break;} /* r-d */ + + case DEC_ROUND_HALF_DOWN: { + if (residue>5) bump=1; + break;} /* r-h-d */ + + case DEC_ROUND_HALF_EVEN: { + if (residue>5) bump=1; /* >0.5 goes up */ + else if (residue==5) { /* exactly 0.5000... */ + /* 0.5 goes up iff [new] lsd is odd */ + if (*dn->lsu & 0x01) bump=1; + } + break;} /* r-h-e */ + + case DEC_ROUND_HALF_UP: { + if (residue>=5) bump=1; + break;} /* r-h-u */ + + case DEC_ROUND_UP: { + if (residue>0) bump=1; + break;} /* r-u */ + + case DEC_ROUND_CEILING: { + /* same as _UP for positive numbers, and as _DOWN for negatives */ + /* [negative residue cannot occur on 0] */ + if (decNumberIsNegative(dn)) { + if (residue<0) bump=-1; + } + else { + if (residue>0) bump=1; + } + break;} /* r-c */ + + case DEC_ROUND_FLOOR: { + /* same as _UP for negative numbers, and as _DOWN for positive */ + /* [negative residue cannot occur on 0] */ + if (!decNumberIsNegative(dn)) { + if (residue<0) bump=-1; + } + else { + if (residue>0) bump=1; + } + break;} /* r-f */ + + default: { /* e.g., DEC_ROUND_MAX */ + *status|=DEC_Invalid_context; + #if DECTRACE || (DECCHECK && DECVERB) + printf("Unknown rounding mode: %d\n", set->round); + #endif + break;} + } /* switch */ + + /* now bump the number, up or down, if need be */ + if (bump==0) return; /* no action required */ + + /* Simply use decUnitAddSub unless bumping up and the number is */ + /* all nines. In this special case set to 100... explicitly */ + /* and adjust the exponent by one (as otherwise could overflow */ + /* the array) */ + /* Similarly handle all-nines result if bumping down. */ + if (bump>0) { + Unit *up; /* work */ + uInt count=dn->digits; /* digits to be checked */ + for (up=dn->lsu; ; up++) { + if (count<=DECDPUN) { + /* this is the last Unit (the msu) */ + if (*up!=powers[count]-1) break; /* not still 9s */ + /* here if it, too, is all nines */ + *up=(Unit)powers[count-1]; /* here 999 -> 100 etc. */ + for (up=up-1; up>=dn->lsu; up--) *up=0; /* others all to 0 */ + dn->exponent++; /* and bump exponent */ + /* [which, very rarely, could cause Overflow...] */ + if ((dn->exponent+dn->digits)>set->emax+1) { + decSetOverflow(dn, set, status); + } + return; /* done */ + } + /* a full unit to check, with more to come */ + if (*up!=DECDPUNMAX) break; /* not still 9s */ + count-=DECDPUN; + } /* up */ + } /* bump>0 */ + else { /* -1 */ + /* here checking for a pre-bump of 1000... (leading 1, all */ + /* other digits zero) */ + Unit *up, *sup; /* work */ + uInt count=dn->digits; /* digits to be checked */ + for (up=dn->lsu; ; up++) { + if (count<=DECDPUN) { + /* this is the last Unit (the msu) */ + if (*up!=powers[count-1]) break; /* not 100.. */ + /* here if have the 1000... case */ + sup=up; /* save msu pointer */ + *up=(Unit)powers[count]-1; /* here 100 in msu -> 999 */ + /* others all to all-nines, too */ + for (up=up-1; up>=dn->lsu; up--) *up=(Unit)powers[DECDPUN]-1; + dn->exponent--; /* and bump exponent */ + + /* iff the number was at the subnormal boundary (exponent=etiny) */ + /* then the exponent is now out of range, so it will in fact get */ + /* clamped to etiny and the final 9 dropped. */ + /* printf(">> emin=%d exp=%d sdig=%d\n", set->emin, */ + /* dn->exponent, set->digits); */ + if (dn->exponent+1==set->emin-set->digits+1) { + if (count==1 && dn->digits==1) *sup=0; /* here 9 -> 0[.9] */ + else { + *sup=(Unit)powers[count-1]-1; /* here 999.. in msu -> 99.. */ + dn->digits--; + } + dn->exponent++; + *status|=DEC_Underflow | DEC_Subnormal | DEC_Inexact | DEC_Rounded; + } + return; /* done */ + } + + /* a full unit to check, with more to come */ + if (*up!=0) break; /* not still 0s */ + count-=DECDPUN; + } /* up */ + + } /* bump<0 */ + + /* Actual bump needed. Do it. */ + decUnitAddSub(dn->lsu, D2U(dn->digits), uarrone, 1, 0, dn->lsu, bump); + } /* decApplyRound */ + +#if DECSUBSET +/* ------------------------------------------------------------------ */ +/* decFinish -- finish processing a number */ +/* */ +/* dn is the number */ +/* set is the context */ +/* residue is the rounding accumulator (as in decApplyRound) */ +/* status is the accumulator */ +/* */ +/* This finishes off the current number by: */ +/* 1. If not extended: */ +/* a. Converting a zero result to clean '0' */ +/* b. Reducing positive exponents to 0, if would fit in digits */ +/* 2. Checking for overflow and subnormals (always) */ +/* Note this is just Finalize when no subset arithmetic. */ +/* All fields are updated as required. */ +/* ------------------------------------------------------------------ */ +static void decFinish(decNumber *dn, decContext *set, Int *residue, + uInt *status) { + if (!set->extended) { + if ISZERO(dn) { /* value is zero */ + dn->exponent=0; /* clean exponent .. */ + dn->bits=0; /* .. and sign */ + return; /* no error possible */ + } + if (dn->exponent>=0) { /* non-negative exponent */ + /* >0; reduce to integer if possible */ + if (set->digits >= (dn->exponent+dn->digits)) { + dn->digits=decShiftToMost(dn->lsu, dn->digits, dn->exponent); + dn->exponent=0; + } + } + } /* !extended */ + + decFinalize(dn, set, residue, status); + } /* decFinish */ +#endif + +/* ------------------------------------------------------------------ */ +/* decFinalize -- final check, clamp, and round of a number */ +/* */ +/* dn is the number */ +/* set is the context */ +/* residue is the rounding accumulator (as in decApplyRound) */ +/* status is the status accumulator */ +/* */ +/* This finishes off the current number by checking for subnormal */ +/* results, applying any pending rounding, checking for overflow, */ +/* and applying any clamping. */ +/* Underflow and overflow conditions are raised as appropriate. */ +/* All fields are updated as required. */ +/* ------------------------------------------------------------------ */ +static void decFinalize(decNumber *dn, decContext *set, Int *residue, + uInt *status) { + Int shift; /* shift needed if clamping */ + Int tinyexp=set->emin-dn->digits+1; /* precalculate subnormal boundary */ + + /* Must be careful, here, when checking the exponent as the */ + /* adjusted exponent could overflow 31 bits [because it may already */ + /* be up to twice the expected]. */ + + /* First test for subnormal. This must be done before any final */ + /* round as the result could be rounded to Nmin or 0. */ + if (dn->exponent<=tinyexp) { /* prefilter */ + Int comp; + decNumber nmin; + /* A very nasty case here is dn == Nmin and residue<0 */ + if (dn->exponentemin; + comp=decCompare(dn, &nmin, 1); /* (signless compare) */ + if (comp==BADINT) { /* oops */ + *status|=DEC_Insufficient_storage; /* abandon... */ + return; + } + if (*residue<0 && comp==0) { /* neg residue and dn==Nmin */ + decApplyRound(dn, set, *residue, status); /* might force down */ + decSetSubnormal(dn, set, residue, status); + return; + } + } + + /* now apply any pending round (this could raise overflow). */ + if (*residue!=0) decApplyRound(dn, set, *residue, status); + + /* Check for overflow [redundant in the 'rare' case] or clamp */ + if (dn->exponent<=set->emax-set->digits+1) return; /* neither needed */ + + + /* here when might have an overflow or clamp to do */ + if (dn->exponent>set->emax-dn->digits+1) { /* too big */ + decSetOverflow(dn, set, status); + return; + } + /* here when the result is normal but in clamp range */ + if (!set->clamp) return; + + /* here when need to apply the IEEE exponent clamp (fold-down) */ + shift=dn->exponent-(set->emax-set->digits+1); + + /* shift coefficient (if non-zero) */ + if (!ISZERO(dn)) { + dn->digits=decShiftToMost(dn->lsu, dn->digits, shift); + } + dn->exponent-=shift; /* adjust the exponent to match */ + *status|=DEC_Clamped; /* and record the dirty deed */ + return; + } /* decFinalize */ + +/* ------------------------------------------------------------------ */ +/* decSetOverflow -- set number to proper overflow value */ +/* */ +/* dn is the number (used for sign [only] and result) */ +/* set is the context [used for the rounding mode, etc.] */ +/* status contains the current status to be updated */ +/* */ +/* This sets the sign of a number and sets its value to either */ +/* Infinity or the maximum finite value, depending on the sign of */ +/* dn and the rounding mode, following IEEE 754 rules. */ +/* ------------------------------------------------------------------ */ +static void decSetOverflow(decNumber *dn, decContext *set, uInt *status) { + Flag needmax=0; /* result is maximum finite value */ + uByte sign=dn->bits&DECNEG; /* clean and save sign bit */ + + if (ISZERO(dn)) { /* zero does not overflow magnitude */ + Int emax=set->emax; /* limit value */ + if (set->clamp) emax-=set->digits-1; /* lower if clamping */ + if (dn->exponent>emax) { /* clamp required */ + dn->exponent=emax; + *status|=DEC_Clamped; + } + return; + } + + uprv_decNumberZero(dn); + switch (set->round) { + case DEC_ROUND_DOWN: { + needmax=1; /* never Infinity */ + break;} /* r-d */ + case DEC_ROUND_05UP: { + needmax=1; /* never Infinity */ + break;} /* r-05 */ + case DEC_ROUND_CEILING: { + if (sign) needmax=1; /* Infinity if non-negative */ + break;} /* r-c */ + case DEC_ROUND_FLOOR: { + if (!sign) needmax=1; /* Infinity if negative */ + break;} /* r-f */ + default: break; /* Infinity in all other cases */ + } + if (needmax) { + decSetMaxValue(dn, set); + dn->bits=sign; /* set sign */ + } + else dn->bits=sign|DECINF; /* Value is +/-Infinity */ + *status|=DEC_Overflow | DEC_Inexact | DEC_Rounded; + } /* decSetOverflow */ + +/* ------------------------------------------------------------------ */ +/* decSetMaxValue -- set number to +Nmax (maximum normal value) */ +/* */ +/* dn is the number to set */ +/* set is the context [used for digits and emax] */ +/* */ +/* This sets the number to the maximum positive value. */ +/* ------------------------------------------------------------------ */ +static void decSetMaxValue(decNumber *dn, decContext *set) { + Unit *up; /* work */ + Int count=set->digits; /* nines to add */ + dn->digits=count; + /* fill in all nines to set maximum value */ + for (up=dn->lsu; ; up++) { + if (count>DECDPUN) *up=DECDPUNMAX; /* unit full o'nines */ + else { /* this is the msu */ + *up=(Unit)(powers[count]-1); + break; + } + count-=DECDPUN; /* filled those digits */ + } /* up */ + dn->bits=0; /* + sign */ + dn->exponent=set->emax-set->digits+1; + } /* decSetMaxValue */ + +/* ------------------------------------------------------------------ */ +/* decSetSubnormal -- process value whose exponent is extended) { + uprv_decNumberZero(dn); + /* always full overflow */ + *status|=DEC_Underflow | DEC_Subnormal | DEC_Inexact | DEC_Rounded; + return; + } + #endif + + /* Full arithmetic -- allow subnormals, rounded to minimum exponent */ + /* (Etiny) if needed */ + etiny=set->emin-(set->digits-1); /* smallest allowed exponent */ + + if ISZERO(dn) { /* value is zero */ + /* residue can never be non-zero here */ + #if DECCHECK + if (*residue!=0) { + printf("++ Subnormal 0 residue %ld\n", (LI)*residue); + *status|=DEC_Invalid_operation; + } + #endif + if (dn->exponentexponent=etiny; + *status|=DEC_Clamped; + } + return; + } + + *status|=DEC_Subnormal; /* have a non-zero subnormal */ + adjust=etiny-dn->exponent; /* calculate digits to remove */ + if (adjust<=0) { /* not out of range; unrounded */ + /* residue can never be non-zero here, except in the Nmin-residue */ + /* case (which is a subnormal result), so can take fast-path here */ + /* it may already be inexact (from setting the coefficient) */ + if (*status&DEC_Inexact) *status|=DEC_Underflow; + return; + } + + /* adjust>0, so need to rescale the result so exponent becomes Etiny */ + /* [this code is similar to that in rescale] */ + workset=*set; /* clone rounding, etc. */ + workset.digits=dn->digits-adjust; /* set requested length */ + workset.emin-=adjust; /* and adjust emin to match */ + /* [note that the latter can be <1, here, similar to Rescale case] */ + decSetCoeff(dn, &workset, dn->lsu, dn->digits, residue, status); + decApplyRound(dn, &workset, *residue, status); + + /* Use 754 default rule: Underflow is set iff Inexact */ + /* [independent of whether trapped] */ + if (*status&DEC_Inexact) *status|=DEC_Underflow; + + /* if rounded up a 999s case, exponent will be off by one; adjust */ + /* back if so [it will fit, because it was shortened earlier] */ + if (dn->exponent>etiny) { + dn->digits=decShiftToMost(dn->lsu, dn->digits, 1); + dn->exponent--; /* (re)adjust the exponent. */ + } + + /* if rounded to zero, it is by definition clamped... */ + if (ISZERO(dn)) *status|=DEC_Clamped; + } /* decSetSubnormal */ + +/* ------------------------------------------------------------------ */ +/* decCheckMath - check entry conditions for a math function */ +/* */ +/* This checks the context and the operand */ +/* */ +/* rhs is the operand to check */ +/* set is the context to check */ +/* status is unchanged if both are good */ +/* */ +/* returns non-zero if status is changed, 0 otherwise */ +/* */ +/* Restrictions enforced: */ +/* */ +/* digits, emax, and -emin in the context must be less than */ +/* DEC_MAX_MATH (999999), and A must be within these bounds if */ +/* non-zero. Invalid_operation is set in the status if a */ +/* restriction is violated. */ +/* ------------------------------------------------------------------ */ +static uInt decCheckMath(const decNumber *rhs, decContext *set, + uInt *status) { + uInt save=*status; /* record */ + if (set->digits>DEC_MAX_MATH + || set->emax>DEC_MAX_MATH + || -set->emin>DEC_MAX_MATH) *status|=DEC_Invalid_context; + else if ((rhs->digits>DEC_MAX_MATH + || rhs->exponent+rhs->digits>DEC_MAX_MATH+1 + || rhs->exponent+rhs->digits<2*(1-DEC_MAX_MATH)) + && !ISZERO(rhs)) *status|=DEC_Invalid_operation; + return (*status!=save); + } /* decCheckMath */ + +/* ------------------------------------------------------------------ */ +/* decGetInt -- get integer from a number */ +/* */ +/* dn is the number [which will not be altered] */ +/* */ +/* returns one of: */ +/* BADINT if there is a non-zero fraction */ +/* the converted integer */ +/* BIGEVEN if the integer is even and magnitude > 2*10**9 */ +/* BIGODD if the integer is odd and magnitude > 2*10**9 */ +/* */ +/* This checks and gets a whole number from the input decNumber. */ +/* The sign can be determined from dn by the caller when BIGEVEN or */ +/* BIGODD is returned. */ +/* ------------------------------------------------------------------ */ +static Int decGetInt(const decNumber *dn) { + Int theInt; /* result accumulator */ + const Unit *up; /* work */ + Int got; /* digits (real or not) processed */ + Int ilength=dn->digits+dn->exponent; /* integral length */ + Flag neg=decNumberIsNegative(dn); /* 1 if -ve */ + + /* The number must be an integer that fits in 10 digits */ + /* Assert, here, that 10 is enough for any rescale Etiny */ + #if DEC_MAX_EMAX > 999999999 + #error GetInt may need updating [for Emax] + #endif + #if DEC_MIN_EMIN < -999999999 + #error GetInt may need updating [for Emin] + #endif + if (ISZERO(dn)) return 0; /* zeros are OK, with any exponent */ + + up=dn->lsu; /* ready for lsu */ + theInt=0; /* ready to accumulate */ + if (dn->exponent>=0) { /* relatively easy */ + /* no fractional part [usual]; allow for positive exponent */ + got=dn->exponent; + } + else { /* -ve exponent; some fractional part to check and discard */ + Int count=-dn->exponent; /* digits to discard */ + /* spin up whole units until reach the Unit with the unit digit */ + for (; count>=DECDPUN; up++) { + if (*up!=0) return BADINT; /* non-zero Unit to discard */ + count-=DECDPUN; + } + if (count==0) got=0; /* [a multiple of DECDPUN] */ + else { /* [not multiple of DECDPUN] */ + Int rem; /* work */ + /* slice off fraction digits and check for non-zero */ + #if DECDPUN<=4 + theInt=QUOT10(*up, count); + rem=*up-theInt*powers[count]; + #else + rem=*up%powers[count]; /* slice off discards */ + theInt=*up/powers[count]; + #endif + if (rem!=0) return BADINT; /* non-zero fraction */ + /* it looks good */ + got=DECDPUN-count; /* number of digits so far */ + up++; /* ready for next */ + } + } + /* now it's known there's no fractional part */ + + /* tricky code now, to accumulate up to 9.3 digits */ + if (got==0) {theInt=*up; got+=DECDPUN; up++;} /* ensure lsu is there */ + + if (ilength<11) { + Int save=theInt; + /* collect any remaining unit(s) */ + for (; got1999999997) ilength=11; + else if (!neg && theInt>999999999) ilength=11; + if (ilength==11) theInt=save; /* restore correct low bit */ + } + } + + if (ilength>10) { /* too big */ + if (theInt&1) return BIGODD; /* bottom bit 1 */ + return BIGEVEN; /* bottom bit 0 */ + } + + if (neg) theInt=-theInt; /* apply sign */ + return theInt; + } /* decGetInt */ + +/* ------------------------------------------------------------------ */ +/* decDecap -- decapitate the coefficient of a number */ +/* */ +/* dn is the number to be decapitated */ +/* drop is the number of digits to be removed from the left of dn; */ +/* this must be <= dn->digits (if equal, the coefficient is */ +/* set to 0) */ +/* */ +/* Returns dn; dn->digits will be <= the initial digits less drop */ +/* (after removing drop digits there may be leading zero digits */ +/* which will also be removed). Only dn->lsu and dn->digits change. */ +/* ------------------------------------------------------------------ */ +static decNumber *decDecap(decNumber *dn, Int drop) { + Unit *msu; /* -> target cut point */ + Int cut; /* work */ + if (drop>=dn->digits) { /* losing the whole thing */ + #if DECCHECK + if (drop>dn->digits) + printf("decDecap called with drop>digits [%ld>%ld]\n", + (LI)drop, (LI)dn->digits); + #endif + dn->lsu[0]=0; + dn->digits=1; + return dn; + } + msu=dn->lsu+D2U(dn->digits-drop)-1; /* -> likely msu */ + cut=MSUDIGITS(dn->digits-drop); /* digits to be in use in msu */ + if (cut!=DECDPUN) *msu%=powers[cut]; /* clear left digits */ + /* that may have left leading zero digits, so do a proper count... */ + dn->digits=decGetDigits(dn->lsu, msu-dn->lsu+1); + return dn; + } /* decDecap */ + +/* ------------------------------------------------------------------ */ +/* decBiStr -- compare string with pairwise options */ +/* */ +/* targ is the string to compare */ +/* str1 is one of the strings to compare against (length may be 0) */ +/* str2 is the other; it must be the same length as str1 */ +/* */ +/* returns 1 if strings compare equal, (that is, it is the same */ +/* length as str1 and str2, and each character of targ is in either */ +/* str1 or str2 in the corresponding position), or 0 otherwise */ +/* */ +/* This is used for generic caseless compare, including the awkward */ +/* case of the Turkish dotted and dotless Is. Use as (for example): */ +/* if (decBiStr(test, "mike", "MIKE")) ... */ +/* ------------------------------------------------------------------ */ +static Flag decBiStr(const char *targ, const char *str1, const char *str2) { + for (;;targ++, str1++, str2++) { + if (*targ!=*str1 && *targ!=*str2) return 0; + /* *targ has a match in one (or both, if terminator) */ + if (*targ=='\0') break; + } /* forever */ + return 1; + } /* decBiStr */ + +/* ------------------------------------------------------------------ */ +/* decNaNs -- handle NaN operand or operands */ +/* */ +/* res is the result number */ +/* lhs is the first operand */ +/* rhs is the second operand, or NULL if none */ +/* context is used to limit payload length */ +/* status contains the current status */ +/* returns res in case convenient */ +/* */ +/* Called when one or both operands is a NaN, and propagates the */ +/* appropriate result to res. When an sNaN is found, it is changed */ +/* to a qNaN and Invalid operation is set. */ +/* ------------------------------------------------------------------ */ +static decNumber * decNaNs(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set, + uInt *status) { + /* This decision tree ends up with LHS being the source pointer, */ + /* and status updated if need be */ + if (lhs->bits & DECSNAN) + *status|=DEC_Invalid_operation | DEC_sNaN; + else if (rhs==NULL); + else if (rhs->bits & DECSNAN) { + lhs=rhs; + *status|=DEC_Invalid_operation | DEC_sNaN; + } + else if (lhs->bits & DECNAN); + else lhs=rhs; + + /* propagate the payload */ + if (lhs->digits<=set->digits) uprv_decNumberCopy(res, lhs); /* easy */ + else { /* too long */ + const Unit *ul; + Unit *ur, *uresp1; + /* copy safe number of units, then decapitate */ + res->bits=lhs->bits; /* need sign etc. */ + uresp1=res->lsu+D2U(set->digits); + for (ur=res->lsu, ul=lhs->lsu; urdigits=D2U(set->digits)*DECDPUN; + /* maybe still too long */ + if (res->digits>set->digits) decDecap(res, res->digits-set->digits); + } + + res->bits&=~DECSNAN; /* convert any sNaN to NaN, while */ + res->bits|=DECNAN; /* .. preserving sign */ + res->exponent=0; /* clean exponent */ + /* [coefficient was copied/decapitated] */ + return res; + } /* decNaNs */ + +/* ------------------------------------------------------------------ */ +/* decStatus -- apply non-zero status */ +/* */ +/* dn is the number to set if error */ +/* status contains the current status (not yet in context) */ +/* set is the context */ +/* */ +/* If the status is an error status, the number is set to a NaN, */ +/* unless the error was an overflow, divide-by-zero, or underflow, */ +/* in which case the number will have already been set. */ +/* */ +/* The context status is then updated with the new status. Note that */ +/* this may raise a signal, so control may never return from this */ +/* routine (hence resources must be recovered before it is called). */ +/* ------------------------------------------------------------------ */ +static void decStatus(decNumber *dn, uInt status, decContext *set) { + if (status & DEC_NaNs) { /* error status -> NaN */ + /* if cause was an sNaN, clear and propagate [NaN is already set up] */ + if (status & DEC_sNaN) status&=~DEC_sNaN; + else { + uprv_decNumberZero(dn); /* other error: clean throughout */ + dn->bits=DECNAN; /* and make a quiet NaN */ + } + } + uprv_decContextSetStatus(set, status); /* [may not return] */ + return; + } /* decStatus */ + +/* ------------------------------------------------------------------ */ +/* decGetDigits -- count digits in a Units array */ +/* */ +/* uar is the Unit array holding the number (this is often an */ +/* accumulator of some sort) */ +/* len is the length of the array in units [>=1] */ +/* */ +/* returns the number of (significant) digits in the array */ +/* */ +/* All leading zeros are excluded, except the last if the array has */ +/* only zero Units. */ +/* ------------------------------------------------------------------ */ +/* This may be called twice during some operations. */ +static Int decGetDigits(Unit *uar, Int len) { + Unit *up=uar+(len-1); /* -> msu */ + Int digits=(len-1)*DECDPUN+1; /* possible digits excluding msu */ + #if DECDPUN>4 + uInt const *pow; /* work */ + #endif + /* (at least 1 in final msu) */ + #if DECCHECK + if (len<1) printf("decGetDigits called with len<1 [%ld]\n", (LI)len); + #endif + + for (; up>=uar; up--) { + if (*up==0) { /* unit is all 0s */ + if (digits==1) break; /* a zero has one digit */ + digits-=DECDPUN; /* adjust for 0 unit */ + continue;} + /* found the first (most significant) non-zero Unit */ + #if DECDPUN>1 /* not done yet */ + if (*up<10) break; /* is 1-9 */ + digits++; + #if DECDPUN>2 /* not done yet */ + if (*up<100) break; /* is 10-99 */ + digits++; + #if DECDPUN>3 /* not done yet */ + if (*up<1000) break; /* is 100-999 */ + digits++; + #if DECDPUN>4 /* count the rest ... */ + for (pow=&powers[4]; *up>=*pow; pow++) digits++; + #endif + #endif + #endif + #endif + break; + } /* up */ + return digits; + } /* decGetDigits */ + +#if DECTRACE | DECCHECK +/* ------------------------------------------------------------------ */ +/* decNumberShow -- display a number [debug aid] */ +/* dn is the number to show */ +/* */ +/* Shows: sign, exponent, coefficient (msu first), digits */ +/* or: sign, special-value */ +/* ------------------------------------------------------------------ */ +/* this is public so other modules can use it */ +void uprv_decNumberShow(const decNumber *dn) { + const Unit *up; /* work */ + uInt u, d; /* .. */ + Int cut; /* .. */ + char isign='+'; /* main sign */ + if (dn==NULL) { + printf("NULL\n"); + return;} + if (decNumberIsNegative(dn)) isign='-'; + printf(" >> %c ", isign); + if (dn->bits&DECSPECIAL) { /* Is a special value */ + if (decNumberIsInfinite(dn)) printf("Infinity"); + else { /* a NaN */ + if (dn->bits&DECSNAN) printf("sNaN"); /* signalling NaN */ + else printf("NaN"); + } + /* if coefficient and exponent are 0, no more to do */ + if (dn->exponent==0 && dn->digits==1 && *dn->lsu==0) { + printf("\n"); + return;} + /* drop through to report other information */ + printf(" "); + } + + /* now carefully display the coefficient */ + up=dn->lsu+D2U(dn->digits)-1; /* msu */ + printf("%ld", (LI)*up); + for (up=up-1; up>=dn->lsu; up--) { + u=*up; + printf(":"); + for (cut=DECDPUN-1; cut>=0; cut--) { + d=u/powers[cut]; + u-=d*powers[cut]; + printf("%ld", (LI)d); + } /* cut */ + } /* up */ + if (dn->exponent!=0) { + char esign='+'; + if (dn->exponent<0) esign='-'; + printf(" E%c%ld", esign, (LI)abs(dn->exponent)); + } + printf(" [%ld]\n", (LI)dn->digits); + } /* decNumberShow */ +#endif + +#if DECTRACE || DECCHECK +/* ------------------------------------------------------------------ */ +/* decDumpAr -- display a unit array [debug/check aid] */ +/* name is a single-character tag name */ +/* ar is the array to display */ +/* len is the length of the array in Units */ +/* ------------------------------------------------------------------ */ +static void decDumpAr(char name, const Unit *ar, Int len) { + Int i; + const char *spec; + #if DECDPUN==9 + spec="%09d "; + #elif DECDPUN==8 + spec="%08d "; + #elif DECDPUN==7 + spec="%07d "; + #elif DECDPUN==6 + spec="%06d "; + #elif DECDPUN==5 + spec="%05d "; + #elif DECDPUN==4 + spec="%04d "; + #elif DECDPUN==3 + spec="%03d "; + #elif DECDPUN==2 + spec="%02d "; + #else + spec="%d "; + #endif + printf(" :%c: ", name); + for (i=len-1; i>=0; i--) { + if (i==len-1) printf("%ld ", (LI)ar[i]); + else printf(spec, ar[i]); + } + printf("\n"); + return;} +#endif + +#if DECCHECK +/* ------------------------------------------------------------------ */ +/* decCheckOperands -- check operand(s) to a routine */ +/* res is the result structure (not checked; it will be set to */ +/* quiet NaN if error found (and it is not NULL)) */ +/* lhs is the first operand (may be DECUNRESU) */ +/* rhs is the second (may be DECUNUSED) */ +/* set is the context (may be DECUNCONT) */ +/* returns 0 if both operands, and the context are clean, or 1 */ +/* otherwise (in which case the context will show an error, */ +/* unless NULL). Note that res is not cleaned; caller should */ +/* handle this so res=NULL case is safe. */ +/* The caller is expected to abandon immediately if 1 is returned. */ +/* ------------------------------------------------------------------ */ +static Flag decCheckOperands(decNumber *res, const decNumber *lhs, + const decNumber *rhs, decContext *set) { + Flag bad=0; + if (set==NULL) { /* oops; hopeless */ + #if DECTRACE || DECVERB + printf("Reference to context is NULL.\n"); + #endif + bad=1; + return 1;} + else if (set!=DECUNCONT + && (set->digits<1 || set->round>=DEC_ROUND_MAX)) { + bad=1; + #if DECTRACE || DECVERB + printf("Bad context [digits=%ld round=%ld].\n", + (LI)set->digits, (LI)set->round); + #endif + } + else { + if (res==NULL) { + bad=1; + #if DECTRACE + /* this one not DECVERB as standard tests include NULL */ + printf("Reference to result is NULL.\n"); + #endif + } + if (!bad && lhs!=DECUNUSED) bad=(decCheckNumber(lhs)); + if (!bad && rhs!=DECUNUSED) bad=(decCheckNumber(rhs)); + } + if (bad) { + if (set!=DECUNCONT) uprv_decContextSetStatus(set, DEC_Invalid_operation); + if (res!=DECUNRESU && res!=NULL) { + uprv_decNumberZero(res); + res->bits=DECNAN; /* qNaN */ + } + } + return bad; + } /* decCheckOperands */ + +/* ------------------------------------------------------------------ */ +/* decCheckNumber -- check a number */ +/* dn is the number to check */ +/* returns 0 if the number is clean, or 1 otherwise */ +/* */ +/* The number is considered valid if it could be a result from some */ +/* operation in some valid context. */ +/* ------------------------------------------------------------------ */ +static Flag decCheckNumber(const decNumber *dn) { + const Unit *up; /* work */ + uInt maxuint; /* .. */ + Int ae, d, digits; /* .. */ + Int emin, emax; /* .. */ + + if (dn==NULL) { /* hopeless */ + #if DECTRACE + /* this one not DECVERB as standard tests include NULL */ + printf("Reference to decNumber is NULL.\n"); + #endif + return 1;} + + /* check special values */ + if (dn->bits & DECSPECIAL) { + if (dn->exponent!=0) { + #if DECTRACE || DECVERB + printf("Exponent %ld (not 0) for a special value [%02x].\n", + (LI)dn->exponent, dn->bits); + #endif + return 1;} + + /* 2003.09.08: NaNs may now have coefficients, so next tests Inf only */ + if (decNumberIsInfinite(dn)) { + if (dn->digits!=1) { + #if DECTRACE || DECVERB + printf("Digits %ld (not 1) for an infinity.\n", (LI)dn->digits); + #endif + return 1;} + if (*dn->lsu!=0) { + #if DECTRACE || DECVERB + printf("LSU %ld (not 0) for an infinity.\n", (LI)*dn->lsu); + #endif + decDumpAr('I', dn->lsu, D2U(dn->digits)); + return 1;} + } /* Inf */ + /* 2002.12.26: negative NaNs can now appear through proposed IEEE */ + /* concrete formats (decimal64, etc.). */ + return 0; + } + + /* check the coefficient */ + if (dn->digits<1 || dn->digits>DECNUMMAXP) { + #if DECTRACE || DECVERB + printf("Digits %ld in number.\n", (LI)dn->digits); + #endif + return 1;} + + d=dn->digits; + + for (up=dn->lsu; d>0; up++) { + if (d>DECDPUN) maxuint=DECDPUNMAX; + else { /* reached the msu */ + maxuint=powers[d]-1; + if (dn->digits>1 && *upmaxuint) { + #if DECTRACE || DECVERB + printf("Bad Unit [%08lx] in %ld-digit number at offset %ld [maxuint %ld].\n", + (LI)*up, (LI)dn->digits, (LI)(up-dn->lsu), (LI)maxuint); + #endif + return 1;} + d-=DECDPUN; + } + + /* check the exponent. Note that input operands can have exponents */ + /* which are out of the set->emin/set->emax and set->digits range */ + /* (just as they can have more digits than set->digits). */ + ae=dn->exponent+dn->digits-1; /* adjusted exponent */ + emax=DECNUMMAXE; + emin=DECNUMMINE; + digits=DECNUMMAXP; + if (ae+emax) { + #if DECTRACE || DECVERB + printf("Adjusted exponent overflow [%ld].\n", (LI)ae); + uprv_decNumberShow(dn); + #endif + return 1;} + + return 0; /* it's OK */ + } /* decCheckNumber */ + +/* ------------------------------------------------------------------ */ +/* decCheckInexact -- check a normal finite inexact result has digits */ +/* dn is the number to check */ +/* set is the context (for status and precision) */ +/* sets Invalid operation, etc., if some digits are missing */ +/* [this check is not made for DECSUBSET compilation or when */ +/* subnormal is not set] */ +/* ------------------------------------------------------------------ */ +static void decCheckInexact(const decNumber *dn, decContext *set) { + #if !DECSUBSET && DECEXTFLAG + if ((set->status & (DEC_Inexact|DEC_Subnormal))==DEC_Inexact + && (set->digits!=dn->digits) && !(dn->bits & DECSPECIAL)) { + #if DECTRACE || DECVERB + printf("Insufficient digits [%ld] on normal Inexact result.\n", + (LI)dn->digits); + uprv_decNumberShow(dn); + #endif + uprv_decContextSetStatus(set, DEC_Invalid_operation); + } + #else + /* next is a noop for quiet compiler */ + if (dn!=NULL && dn->digits==0) set->status|=DEC_Invalid_operation; + #endif + return; + } /* decCheckInexact */ +#endif + +#if DECALLOC +#undef malloc +#undef free +/* ------------------------------------------------------------------ */ +/* decMalloc -- accountable allocation routine */ +/* n is the number of bytes to allocate */ +/* */ +/* Semantics is the same as the stdlib malloc routine, but bytes */ +/* allocated are accounted for globally, and corruption fences are */ +/* added before and after the 'actual' storage. */ +/* ------------------------------------------------------------------ */ +/* This routine allocates storage with an extra twelve bytes; 8 are */ +/* at the start and hold: */ +/* 0-3 the original length requested */ +/* 4-7 buffer corruption detection fence (DECFENCE, x4) */ +/* The 4 bytes at the end also hold a corruption fence (DECFENCE, x4) */ +/* ------------------------------------------------------------------ */ +static void *decMalloc(size_t n) { + uInt size=n+12; /* true size */ + void *alloc; /* -> allocated storage */ + uByte *b, *b0; /* work */ + uInt uiwork; /* for macros */ + + alloc=malloc(size); /* -> allocated storage */ + if (alloc==NULL) return NULL; /* out of strorage */ + b0=(uByte *)alloc; /* as bytes */ + decAllocBytes+=n; /* account for storage */ + UBFROMUI(alloc, n); /* save n */ + /* printf(" alloc ++ dAB: %ld (%ld)\n", (LI)decAllocBytes, (LI)n); */ + for (b=b0+4; b play area */ + } /* decMalloc */ + +/* ------------------------------------------------------------------ */ +/* decFree -- accountable free routine */ +/* alloc is the storage to free */ +/* */ +/* Semantics is the same as the stdlib malloc routine, except that */ +/* the global storage accounting is updated and the fences are */ +/* checked to ensure that no routine has written 'out of bounds'. */ +/* ------------------------------------------------------------------ */ +/* This routine first checks that the fences have not been corrupted. */ +/* It then frees the storage using the 'truw' storage address (that */ +/* is, offset by 8). */ +/* ------------------------------------------------------------------ */ +static void decFree(void *alloc) { + uInt n; /* original length */ + uByte *b, *b0; /* work */ + uInt uiwork; /* for macros */ + + if (alloc==NULL) return; /* allowed; it's a nop */ + b0=(uByte *)alloc; /* as bytes */ + b0-=8; /* -> true start of storage */ + n=UBTOUI(b0); /* lift length */ + for (b=b0+4; bshouldRollBack((double)number)) { + if (result->shouldRollBack(number)) { if (hi == 1) { // bad rule set, no prior rule to rollback to from this base return NULL; } @@ -829,18 +830,20 @@ int64_t util64_fromDouble(double d) { return result; } -int64_t util64_pow(int32_t r, uint32_t e) { - if (r == 0) { +int64_t util64_pow(int32_t base, uint16_t exponent) { + if (base == 0) { return 0; - } else if (e == 0) { - return 1; - } else { - int64_t n = r; - while (--e > 0) { - n *= r; + } + int64_t result = 1; + int64_t pow = base; + while (exponent > 0) { + if ((exponent & 1) == 1) { + result *= pow; } - return n; + pow *= pow; + exponent >>= 1; } + return result; } static const uint8_t asciiDigits[] = { diff --git a/deps/icu-small/source/i18n/nfrs.h b/deps/icu-small/source/i18n/nfrs.h index b06c2b2215..eafb1ca441 100644 --- a/deps/icu-small/source/i18n/nfrs.h +++ b/deps/icu-small/source/i18n/nfrs.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ****************************************************************************** * file name: nfrs.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -88,7 +88,7 @@ private: int64_t util64_fromDouble(double d); // raise radix to the power exponent, only non-negative exponents -int64_t util64_pow(int32_t radix, uint32_t exponent); +int64_t util64_pow(int32_t radix, uint16_t exponent); // convert n to digit string in buffer, return length of string uint32_t util64_tou(int64_t n, UChar* buffer, uint32_t buflen, uint32_t radix = 10, UBool raw = FALSE); diff --git a/deps/icu-small/source/i18n/nfrule.cpp b/deps/icu-small/source/i18n/nfrule.cpp index 100a46490d..2c26aff2d1 100644 --- a/deps/icu-small/source/i18n/nfrule.cpp +++ b/deps/icu-small/source/i18n/nfrule.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ****************************************************************************** * file name: nfrule.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -30,6 +30,7 @@ #include "nfrlist.h" #include "nfsubs.h" #include "patternprops.h" +#include "putilimp.h" U_NAMESPACE_BEGIN @@ -715,6 +716,12 @@ NFRule::_appendRuleText(UnicodeString& result) const result.append(gSemicolon); } +int64_t NFRule::getDivisor() const +{ + return util64_pow(radix, exponent); +} + + //----------------------------------------------------------------------- // formatting //----------------------------------------------------------------------- @@ -749,7 +756,7 @@ NFRule::doFormat(int64_t number, UnicodeString& toInsertInto, int32_t pos, int32 toInsertInto.insert(pos, ruleText.tempSubString(pluralRuleEnd + 2)); } toInsertInto.insert(pos, - rulePatternFormat->format((int32_t)(number/uprv_pow(radix, exponent)), status)); + rulePatternFormat->format((int32_t)(number/util64_pow(radix, exponent)), status)); if (pluralRuleStart > 0) { toInsertInto.insert(pos, ruleText.tempSubString(0, pluralRuleStart)); } @@ -798,10 +805,10 @@ NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos, int32_ if (0 <= pluralVal && pluralVal < 1) { // We're in a fractional rule, and we have to match the NumeratorSubstitution behavior. // 2.3 can become 0.2999999999999998 for the fraction due to rounding errors. - pluralVal = uprv_round(pluralVal * uprv_pow(radix, exponent)); + pluralVal = uprv_round(pluralVal * util64_pow(radix, exponent)); } else { - pluralVal = pluralVal / uprv_pow(radix, exponent); + pluralVal = pluralVal / util64_pow(radix, exponent); } toInsertInto.insert(pos, rulePatternFormat->format((int32_t)(pluralVal), status)); if (pluralRuleStart > 0) { @@ -827,7 +834,7 @@ NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos, int32_ * this one in its list; false if it should use this rule */ UBool -NFRule::shouldRollBack(double number) const +NFRule::shouldRollBack(int64_t number) const { // we roll back if the rule contains a modulus substitution, // the number being formatted is an even multiple of the rule's @@ -847,7 +854,7 @@ NFRule::shouldRollBack(double number) const // multiple of 100. This is called the "rollback rule." if ((sub1 != NULL && sub1->isModulusSubstitution()) || (sub2 != NULL && sub2->isModulusSubstitution())) { int64_t re = util64_pow(radix, exponent); - return uprv_fmod(number, (double)re) == 0 && (baseValue % re) != 0; + return (number % re) == 0 && (baseValue % re) != 0; } return FALSE; } diff --git a/deps/icu-small/source/i18n/nfrule.h b/deps/icu-small/source/i18n/nfrule.h index 5424b968a1..21cdd24fbd 100644 --- a/deps/icu-small/source/i18n/nfrule.h +++ b/deps/icu-small/source/i18n/nfrule.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -17,7 +17,6 @@ #include "unicode/utypes.h" #include "unicode/uobject.h" #include "unicode/unistr.h" -#include "putilimp.h" U_NAMESPACE_BEGIN @@ -66,7 +65,7 @@ public: UChar getDecimalPoint() const { return decimalPoint; } - double getDivisor() const { return uprv_pow(radix, exponent); } + int64_t getDivisor() const; void doFormat(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const; void doFormat(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const; @@ -77,7 +76,7 @@ public: double upperBound, Formattable& result) const; - UBool shouldRollBack(double number) const; + UBool shouldRollBack(int64_t number) const; void _appendRuleText(UnicodeString& result) const; diff --git a/deps/icu-small/source/i18n/nfsubs.cpp b/deps/icu-small/source/i18n/nfsubs.cpp index 58039c8bac..6e7eabe350 100644 --- a/deps/icu-small/source/i18n/nfsubs.cpp +++ b/deps/icu-small/source/i18n/nfsubs.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ****************************************************************************** * file name: nfsubs.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -20,6 +20,7 @@ #include "nfsubs.h" #include "digitlst.h" +#include "fmtableimp.h" #if U_HAVE_RBNF @@ -68,27 +69,24 @@ public: SameValueSubstitution::~SameValueSubstitution() {} class MultiplierSubstitution : public NFSubstitution { - double divisor; - int64_t ldivisor; + int64_t divisor; public: MultiplierSubstitution(int32_t _pos, - double _divisor, + const NFRule *rule, const NFRuleSet* _ruleSet, const UnicodeString& description, UErrorCode& status) - : NFSubstitution(_pos, _ruleSet, description, status), divisor(_divisor) + : NFSubstitution(_pos, _ruleSet, description, status), divisor(rule->getDivisor()) { - ldivisor = util64_fromDouble(divisor); if (divisor == 0) { status = U_PARSE_ERROR; } } virtual ~MultiplierSubstitution(); - virtual void setDivisor(int32_t radix, int32_t exponent, UErrorCode& status) { - divisor = uprv_pow(radix, exponent); - ldivisor = util64_fromDouble(divisor); + virtual void setDivisor(int32_t radix, int16_t exponent, UErrorCode& status) { + divisor = util64_pow(radix, exponent); if(divisor == 0) { status = U_PARSE_ERROR; @@ -98,14 +96,14 @@ public: virtual UBool operator==(const NFSubstitution& rhs) const; virtual int64_t transformNumber(int64_t number) const { - return number / ldivisor; + return number / divisor; } virtual double transformNumber(double number) const { if (getRuleSet()) { return uprv_floor(number / divisor); } else { - return number/divisor; + return number / divisor; } } @@ -125,21 +123,19 @@ public: MultiplierSubstitution::~MultiplierSubstitution() {} class ModulusSubstitution : public NFSubstitution { - double divisor; - int64_t ldivisor; + int64_t divisor; const NFRule* ruleToUse; public: ModulusSubstitution(int32_t pos, - double _divisor, + const NFRule* rule, const NFRule* rulePredecessor, const NFRuleSet* ruleSet, const UnicodeString& description, UErrorCode& status); virtual ~ModulusSubstitution(); - virtual void setDivisor(int32_t radix, int32_t exponent, UErrorCode& status) { - divisor = uprv_pow(radix, exponent); - ldivisor = util64_fromDouble(divisor); + virtual void setDivisor(int32_t radix, int16_t exponent, UErrorCode& status) { + divisor = util64_pow(radix, exponent); if (divisor == 0) { status = U_PARSE_ERROR; @@ -151,7 +147,7 @@ public: virtual void doSubstitution(int64_t number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const; virtual void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const; - virtual int64_t transformNumber(int64_t number) const { return number % ldivisor; } + virtual int64_t transformNumber(int64_t number) const { return number % divisor; } virtual double transformNumber(double number) const { return uprv_fmod(number, divisor); } virtual UBool doParse(const UnicodeString& text, @@ -353,7 +349,7 @@ NFSubstitution::makeSubstitution(int32_t pos, // otherwise, return a MultiplierSubstitution else { - return new MultiplierSubstitution(pos, rule->getDivisor(), ruleSet, + return new MultiplierSubstitution(pos, rule, ruleSet, description, status); } @@ -383,7 +379,7 @@ NFSubstitution::makeSubstitution(int32_t pos, // otherwise, return a ModulusSubstitution else { - return new ModulusSubstitution(pos, rule->getDivisor(), predecessor, + return new ModulusSubstitution(pos, rule, predecessor, ruleSet, description, status); } @@ -491,7 +487,7 @@ NFSubstitution::~NFSubstitution() * @param exponent The exponent of the divisor */ void -NFSubstitution::setDivisor(int32_t /*radix*/, int32_t /*exponent*/, UErrorCode& /*status*/) { +NFSubstitution::setDivisor(int32_t /*radix*/, int16_t /*exponent*/, UErrorCode& /*status*/) { // a no-op for all substitutions except multiplier and modulus substitutions } @@ -572,23 +568,38 @@ void NFSubstitution::doSubstitution(int64_t number, UnicodeString& toInsertInto, int32_t _pos, int32_t recursionCount, UErrorCode& status) const { if (ruleSet != NULL) { - // perform a transformation on the number that is dependent + // Perform a transformation on the number that is dependent // on the type of substitution this is, then just call its // rule set's format() method to format the result ruleSet->format(transformNumber(number), toInsertInto, _pos + this->pos, recursionCount, status); } else if (numberFormat != NULL) { - // or perform the transformation on the number (preserving - // the result's fractional part if the formatter it set - // to show it), then use that formatter's format() method - // to format the result - double numberToFormat = transformNumber((double)number); - if (numberFormat->getMaximumFractionDigits() == 0) { - numberToFormat = uprv_floor(numberToFormat); - } + if (number <= MAX_INT64_IN_DOUBLE) { + // or perform the transformation on the number (preserving + // the result's fractional part if the formatter it set + // to show it), then use that formatter's format() method + // to format the result + double numberToFormat = transformNumber((double)number); + if (numberFormat->getMaximumFractionDigits() == 0) { + numberToFormat = uprv_floor(numberToFormat); + } - UnicodeString temp; - numberFormat->format(numberToFormat, temp, status); - toInsertInto.insert(_pos + this->pos, temp); + UnicodeString temp; + numberFormat->format(numberToFormat, temp, status); + toInsertInto.insert(_pos + this->pos, temp); + } + else { + // We have gone beyond double precision. Something has to give. + // We're favoring accuracy of the large number over potential rules + // that round like a CompactDecimalFormat, which is not a common use case. + // + // Perform a transformation on the number that is dependent + // on the type of substitution this is, then just call its + // rule set's format() method to format the result + int64_t numberToFormat = transformNumber(number); + UnicodeString temp; + numberFormat->format(numberToFormat, temp, status); + toInsertInto.insert(_pos + this->pos, temp); + } } } @@ -809,22 +820,20 @@ UBool MultiplierSubstitution::operator==(const NFSubstitution& rhs) const * regular rule. */ ModulusSubstitution::ModulusSubstitution(int32_t _pos, - double _divisor, + const NFRule* rule, const NFRule* predecessor, const NFRuleSet* _ruleSet, const UnicodeString& description, UErrorCode& status) : NFSubstitution(_pos, _ruleSet, description, status) - , divisor(_divisor) + , divisor(rule->getDivisor()) , ruleToUse(NULL) { - ldivisor = util64_fromDouble(_divisor); - // the owning rule's divisor controls the behavior of this // substitution: rather than keeping a backpointer to the rule, // we keep a copy of the divisor - if (ldivisor == 0) { + if (divisor == 0) { status = U_PARSE_ERROR; } diff --git a/deps/icu-small/source/i18n/nfsubs.h b/deps/icu-small/source/i18n/nfsubs.h index 4fb0c06caf..e77f7ada8c 100644 --- a/deps/icu-small/source/i18n/nfsubs.h +++ b/deps/icu-small/source/i18n/nfsubs.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ****************************************************************************** * file name: nfsubs.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -91,7 +91,7 @@ public: * @param radix The radix of the divisor * @param exponent The exponent of the divisor */ - virtual void setDivisor(int32_t radix, int32_t exponent, UErrorCode& status); + virtual void setDivisor(int32_t radix, int16_t exponent, UErrorCode& status); /** * Replaces result with the string describing the substitution. diff --git a/deps/icu-small/source/i18n/nortrans.cpp b/deps/icu-small/source/i18n/nortrans.cpp index da0206776c..589c82482e 100644 --- a/deps/icu-small/source/i18n/nortrans.cpp +++ b/deps/icu-small/source/i18n/nortrans.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/nortrans.h b/deps/icu-small/source/i18n/nortrans.h index 79d1be3b07..d309452f9a 100644 --- a/deps/icu-small/source/i18n/nortrans.h +++ b/deps/icu-small/source/i18n/nortrans.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/nultrans.cpp b/deps/icu-small/source/i18n/nultrans.cpp index 600873e373..62d1290ac7 100644 --- a/deps/icu-small/source/i18n/nultrans.cpp +++ b/deps/icu-small/source/i18n/nultrans.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/nultrans.h b/deps/icu-small/source/i18n/nultrans.h index a9856bde5b..a01b04e9ba 100644 --- a/deps/icu-small/source/i18n/nultrans.h +++ b/deps/icu-small/source/i18n/nultrans.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/numfmt.cpp b/deps/icu-small/source/i18n/numfmt.cpp index c00955a781..90c53ce390 100644 --- a/deps/icu-small/source/i18n/numfmt.cpp +++ b/deps/icu-small/source/i18n/numfmt.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -529,7 +529,7 @@ UnicodeString& NumberFormat::format(const DigitList &number, FieldPositionIterator* posIter, UErrorCode& status) const { // DecimalFormat overrides this function, and handles DigitList based big decimals. - // Other subclasses (ChoiceFormat, RuleBasedNumberFormat) do not (yet) handle DigitLists, + // Other subclasses (ChoiceFormat) do not (yet) handle DigitLists, // so this default implementation falls back to formatting decimal numbers as doubles. if (U_FAILURE(status)) { return appendTo; @@ -547,7 +547,7 @@ NumberFormat::format(const DigitList &number, FieldPosition& pos, UErrorCode &status) const { // DecimalFormat overrides this function, and handles DigitList based big decimals. - // Other subclasses (ChoiceFormat, RuleBasedNumberFormat) do not (yet) handle DigitLists, + // Other subclasses (ChoiceFormat) do not (yet) handle DigitLists, // so this default implementation falls back to formatting decimal numbers as doubles. if (U_FAILURE(status)) { return appendTo; @@ -1188,7 +1188,7 @@ void NumberFormat::setCurrency(const UChar* theCurrency, UErrorCode& ec) { } } -const UChar* NumberFormat::getCurrency() const { +const char16_t* NumberFormat::getCurrency() const { return fCurrency; } diff --git a/deps/icu-small/source/i18n/numsys.cpp b/deps/icu-small/source/i18n/numsys.cpp index 442ad7f255..b24340f0d2 100644 --- a/deps/icu-small/source/i18n/numsys.cpp +++ b/deps/icu-small/source/i18n/numsys.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -230,7 +230,7 @@ void NumberingSystem::setAlgorithmic(UBool c) { algorithmic = c; } -void NumberingSystem::setDesc(UnicodeString d) { +void NumberingSystem::setDesc(const UnicodeString &d) { desc.setTo(d); } void NumberingSystem::setName(const char *n) { diff --git a/deps/icu-small/source/i18n/numsys_impl.h b/deps/icu-small/source/i18n/numsys_impl.h index d39faba5af..6385fa5408 100644 --- a/deps/icu-small/source/i18n/numsys_impl.h +++ b/deps/icu-small/source/i18n/numsys_impl.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/olsontz.cpp b/deps/icu-small/source/i18n/olsontz.cpp index f4c3dd24c9..df025c0808 100644 --- a/deps/icu-small/source/i18n/olsontz.cpp +++ b/deps/icu-small/source/i18n/olsontz.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/olsontz.h b/deps/icu-small/source/i18n/olsontz.h index 7dbc303a05..6f0d36e5de 100644 --- a/deps/icu-small/source/i18n/olsontz.h +++ b/deps/icu-small/source/i18n/olsontz.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/persncal.cpp b/deps/icu-small/source/i18n/persncal.cpp index 210646ca8c..0ccff4d2bd 100644 --- a/deps/icu-small/source/i18n/persncal.cpp +++ b/deps/icu-small/source/i18n/persncal.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/i18n/persncal.h b/deps/icu-small/source/i18n/persncal.h index 3fe5a61464..ec818822b3 100644 --- a/deps/icu-small/source/i18n/persncal.h +++ b/deps/icu-small/source/i18n/persncal.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/i18n/pluralaffix.cpp b/deps/icu-small/source/i18n/pluralaffix.cpp index f6a51a79ef..ea400206b3 100644 --- a/deps/icu-small/source/i18n/pluralaffix.cpp +++ b/deps/icu-small/source/i18n/pluralaffix.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* * Copyright (C) 2015, International Business Machines diff --git a/deps/icu-small/source/i18n/pluralaffix.h b/deps/icu-small/source/i18n/pluralaffix.h index a08f2131d9..94366ce4cf 100644 --- a/deps/icu-small/source/i18n/pluralaffix.h +++ b/deps/icu-small/source/i18n/pluralaffix.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/plurfmt.cpp b/deps/icu-small/source/i18n/plurfmt.cpp index 8a000ce6e9..e14ef6d831 100644 --- a/deps/icu-small/source/i18n/plurfmt.cpp +++ b/deps/icu-small/source/i18n/plurfmt.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/plurrule.cpp b/deps/icu-small/source/i18n/plurrule.cpp index 7b50634551..08ea969b5a 100644 --- a/deps/icu-small/source/i18n/plurrule.cpp +++ b/deps/icu-small/source/i18n/plurrule.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -17,6 +17,8 @@ #include "unicode/plurrule.h" #include "unicode/upluralrules.h" #include "unicode/ures.h" +#include "unicode/numfmt.h" +#include "unicode/decimfmt.h" #include "charstr.h" #include "cmemory.h" #include "cstring.h" @@ -36,7 +38,6 @@ #include "digitinterval.h" #include "visibledigits.h" - #if !UCONFIG_NO_FORMATTING U_NAMESPACE_BEGIN @@ -246,6 +247,26 @@ PluralRules::select(double number) const { return select(FixedDecimal(number)); } +UnicodeString +PluralRules::select(const Formattable& obj, const NumberFormat& fmt, UErrorCode& status) const { + if (U_SUCCESS(status)) { + const DecimalFormat *decFmt = dynamic_cast(&fmt); + if (decFmt != NULL) { + VisibleDigitsWithExponent digits; + decFmt->initVisibleDigitsWithExponent(obj, digits, status); + if (U_SUCCESS(status)) { + return select(digits); + } + } else { + double number = obj.getDouble(status); + if (U_SUCCESS(status)) { + return select(number); + } + } + } + return UnicodeString(); +} + UnicodeString PluralRules::select(const FixedDecimal &number) const { if (mRules == NULL) { diff --git a/deps/icu-small/source/i18n/plurrule_impl.h b/deps/icu-small/source/i18n/plurrule_impl.h index c6e4767a09..9f5f66c1b7 100644 --- a/deps/icu-small/source/i18n/plurrule_impl.h +++ b/deps/icu-small/source/i18n/plurrule_impl.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -24,6 +24,7 @@ #include "unicode/format.h" #include "unicode/locid.h" #include "unicode/parseerr.h" +#include "unicode/strenum.h" #include "unicode/ures.h" #include "uvector.h" #include "hash.h" diff --git a/deps/icu-small/source/i18n/precision.cpp b/deps/icu-small/source/i18n/precision.cpp index 5d07e0f9ee..4a68b0d886 100644 --- a/deps/icu-small/source/i18n/precision.cpp +++ b/deps/icu-small/source/i18n/precision.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* * Copyright (C) 2015, International Business Machines diff --git a/deps/icu-small/source/i18n/precision.h b/deps/icu-small/source/i18n/precision.h index f002fd228b..0598fa17d6 100644 --- a/deps/icu-small/source/i18n/precision.h +++ b/deps/icu-small/source/i18n/precision.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/quant.cpp b/deps/icu-small/source/i18n/quant.cpp index 6e08e628fc..1908a50484 100644 --- a/deps/icu-small/source/i18n/quant.cpp +++ b/deps/icu-small/source/i18n/quant.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/quant.h b/deps/icu-small/source/i18n/quant.h index 21adf19e70..1abb0db61a 100644 --- a/deps/icu-small/source/i18n/quant.h +++ b/deps/icu-small/source/i18n/quant.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/quantityformatter.cpp b/deps/icu-small/source/i18n/quantityformatter.cpp index c44357a53b..208e064700 100644 --- a/deps/icu-small/source/i18n/quantityformatter.cpp +++ b/deps/icu-small/source/i18n/quantityformatter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/i18n/quantityformatter.h b/deps/icu-small/source/i18n/quantityformatter.h index 0f61022666..6698b7a8a0 100644 --- a/deps/icu-small/source/i18n/quantityformatter.h +++ b/deps/icu-small/source/i18n/quantityformatter.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/i18n/rbnf.cpp b/deps/icu-small/source/i18n/rbnf.cpp index 5e32d80444..d4fd574998 100644 --- a/deps/icu-small/source/i18n/rbnf.cpp +++ b/deps/icu-small/source/i18n/rbnf.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -27,12 +27,14 @@ #include "unicode/udata.h" #include "unicode/udisplaycontext.h" #include "unicode/brkiter.h" -#include "nfrs.h" +#include "unicode/ucasemap.h" #include "cmemory.h" #include "cstring.h" #include "patternprops.h" #include "uresimp.h" +#include "nfrs.h" +#include "digitlst.h" // debugging // #define RBNF_DEBUG @@ -1078,18 +1080,77 @@ RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status return NULL; } +UnicodeString& +RuleBasedNumberFormat::format(const DigitList &number, + UnicodeString &appendTo, + FieldPositionIterator *posIter, + UErrorCode &status) const { + if (U_FAILURE(status)) { + return appendTo; + } + DigitList copy(number); + if (copy.fitsIntoInt64(false)) { + format(((DigitList &)number).getInt64(), appendTo, posIter, status); + } + else { + copy.roundAtExponent(0); + if (copy.fitsIntoInt64(false)) { + format(number.getDouble(), appendTo, posIter, status); + } + else { + // We're outside of our normal range that this framework can handle. + // The DecimalFormat will provide more accurate results. + + // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J. + NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status); + Formattable f; + f.adoptDigitList(new DigitList(number)); + decimalFormat->format(f, appendTo, posIter, status); + delete decimalFormat; + } + } + return appendTo; +} + + +UnicodeString& +RuleBasedNumberFormat::format(const DigitList &number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode &status) const { + if (U_FAILURE(status)) { + return appendTo; + } + DigitList copy(number); + if (copy.fitsIntoInt64(false)) { + format(((DigitList &)number).getInt64(), appendTo, pos, status); + } + else { + copy.roundAtExponent(0); + if (copy.fitsIntoInt64(false)) { + format(number.getDouble(), appendTo, pos, status); + } + else { + // We're outside of our normal range that this framework can handle. + // The DecimalFormat will provide more accurate results. + + // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J. + NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status); + Formattable f; + f.adoptDigitList(new DigitList(number)); + decimalFormat->format(f, appendTo, pos, status); + delete decimalFormat; + } + } + return appendTo; +} + UnicodeString& RuleBasedNumberFormat::format(int32_t number, UnicodeString& toAppendTo, - FieldPosition& /* pos */) const + FieldPosition& pos) const { - if (defaultRuleSet) { - UErrorCode status = U_ZERO_ERROR; - int32_t startPos = toAppendTo.length(); - defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status); - adjustForCapitalizationContext(startPos, toAppendTo); - } - return toAppendTo; + return format((int64_t)number, toAppendTo, pos); } @@ -1100,9 +1161,7 @@ RuleBasedNumberFormat::format(int64_t number, { if (defaultRuleSet) { UErrorCode status = U_ZERO_ERROR; - int32_t startPos = toAppendTo.length(); - defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status); - adjustForCapitalizationContext(startPos, toAppendTo); + format(number, defaultRuleSet, toAppendTo, status); } return toAppendTo; } @@ -1114,11 +1173,11 @@ RuleBasedNumberFormat::format(double number, FieldPosition& /* pos */) const { int32_t startPos = toAppendTo.length(); + UErrorCode status = U_ZERO_ERROR; if (defaultRuleSet) { - UErrorCode status = U_ZERO_ERROR; defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status); } - return adjustForCapitalizationContext(startPos, toAppendTo); + return adjustForCapitalizationContext(startPos, toAppendTo, status); } @@ -1126,24 +1185,10 @@ UnicodeString& RuleBasedNumberFormat::format(int32_t number, const UnicodeString& ruleSetName, UnicodeString& toAppendTo, - FieldPosition& /* pos */, + FieldPosition& pos, UErrorCode& status) const { - // return format((int64_t)number, ruleSetName, toAppendTo, pos, status); - if (U_SUCCESS(status)) { - if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { - // throw new IllegalArgumentException("Can't use internal rule set"); - status = U_ILLEGAL_ARGUMENT_ERROR; - } else { - NFRuleSet *rs = findRuleSet(ruleSetName, status); - if (rs) { - int32_t startPos = toAppendTo.length(); - rs->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status); - adjustForCapitalizationContext(startPos, toAppendTo); - } - } - } - return toAppendTo; + return format((int64_t)number, ruleSetName, toAppendTo, pos, status); } @@ -1161,9 +1206,7 @@ RuleBasedNumberFormat::format(int64_t number, } else { NFRuleSet *rs = findRuleSet(ruleSetName, status); if (rs) { - int32_t startPos = toAppendTo.length(); - rs->format(number, toAppendTo, toAppendTo.length(), 0, status); - adjustForCapitalizationContext(startPos, toAppendTo); + format(number, rs, toAppendTo, status); } } } @@ -1187,27 +1230,72 @@ RuleBasedNumberFormat::format(double number, if (rs) { int32_t startPos = toAppendTo.length(); rs->format(number, toAppendTo, toAppendTo.length(), 0, status); - adjustForCapitalizationContext(startPos, toAppendTo); + adjustForCapitalizationContext(startPos, toAppendTo, status); } } } return toAppendTo; } +/** + * Bottleneck through which all the public format() methods + * that take a long pass. By the time we get here, we know + * which rule set we're using to do the formatting. + * @param number The number to format + * @param ruleSet The rule set to use to format the number + * @return The text that resulted from formatting the number + */ +UnicodeString& +RuleBasedNumberFormat::format(int64_t number, NFRuleSet *ruleSet, UnicodeString& toAppendTo, UErrorCode& status) const +{ + // all API format() routines that take a double vector through + // here. We have these two identical functions-- one taking a + // double and one taking a long-- the couple digits of precision + // that long has but double doesn't (both types are 8 bytes long, + // but double has to borrow some of the mantissa bits to hold + // the exponent). + // Create an empty string buffer where the result will + // be built, and pass it to the rule set (along with an insertion + // position of 0 and the number being formatted) to the rule set + // for formatting + + if (U_SUCCESS(status)) { + if (number == U_INT64_MIN) { + // We can't handle this value right now. Provide an accurate default value. + + // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J. + NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status); + Formattable f; + FieldPosition pos(FieldPosition::DONT_CARE); + DigitList *digitList = new DigitList(); + digitList->set(number); + f.adoptDigitList(digitList); + decimalFormat->format(f, toAppendTo, pos, status); + delete decimalFormat; + } + else { + int32_t startPos = toAppendTo.length(); + ruleSet->format(number, toAppendTo, toAppendTo.length(), 0, status); + adjustForCapitalizationContext(startPos, toAppendTo, status); + } + } + return toAppendTo; +} + UnicodeString& RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos, - UnicodeString& currentResult) const + UnicodeString& currentResult, + UErrorCode& status) const { #if !UCONFIG_NO_BREAK_ITERATION - if (startPos==0 && currentResult.length() > 0) { + UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status); + if (capitalizationContext != UDISPCTX_CAPITALIZATION_NONE && startPos == 0 && currentResult.length() > 0) { // capitalize currentResult according to context UChar32 ch = currentResult.char32At(0); - UErrorCode status = U_ZERO_ERROR; - UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status); - if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL && - ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || - (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) || - (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) { + if (u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter != NULL && + ( capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || + (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) || + (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) { // titlecase first word of currentResult, here use sentence iterator unlike current implementations // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT); diff --git a/deps/icu-small/source/i18n/rbt.cpp b/deps/icu-small/source/i18n/rbt.cpp index 62aae52f18..0444729b25 100644 --- a/deps/icu-small/source/i18n/rbt.cpp +++ b/deps/icu-small/source/i18n/rbt.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/rbt.h b/deps/icu-small/source/i18n/rbt.h index 9add715aca..005fb85384 100644 --- a/deps/icu-small/source/i18n/rbt.h +++ b/deps/icu-small/source/i18n/rbt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/rbt_data.cpp b/deps/icu-small/source/i18n/rbt_data.cpp index 1b6163956c..7a9707b988 100644 --- a/deps/icu-small/source/i18n/rbt_data.cpp +++ b/deps/icu-small/source/i18n/rbt_data.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/rbt_data.h b/deps/icu-small/source/i18n/rbt_data.h index 29e39a59ef..52a961dde0 100644 --- a/deps/icu-small/source/i18n/rbt_data.h +++ b/deps/icu-small/source/i18n/rbt_data.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/rbt_pars.cpp b/deps/icu-small/source/i18n/rbt_pars.cpp index 5e7c0ff5f9..8e49a8473a 100644 --- a/deps/icu-small/source/i18n/rbt_pars.cpp +++ b/deps/icu-small/source/i18n/rbt_pars.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/rbt_pars.h b/deps/icu-small/source/i18n/rbt_pars.h index 48067905b9..d51f2e852b 100644 --- a/deps/icu-small/source/i18n/rbt_pars.h +++ b/deps/icu-small/source/i18n/rbt_pars.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/rbt_rule.cpp b/deps/icu-small/source/i18n/rbt_rule.cpp index a04a535f0e..db02f76035 100644 --- a/deps/icu-small/source/i18n/rbt_rule.cpp +++ b/deps/icu-small/source/i18n/rbt_rule.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/rbt_rule.h b/deps/icu-small/source/i18n/rbt_rule.h index b25afd6ef0..eb8556df0c 100644 --- a/deps/icu-small/source/i18n/rbt_rule.h +++ b/deps/icu-small/source/i18n/rbt_rule.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* * Copyright (C) {1999-2001}, International Business Machines Corporation and others. All Rights Reserved. diff --git a/deps/icu-small/source/i18n/rbt_set.cpp b/deps/icu-small/source/i18n/rbt_set.cpp index f2c78ca9c8..939c0ea39a 100644 --- a/deps/icu-small/source/i18n/rbt_set.cpp +++ b/deps/icu-small/source/i18n/rbt_set.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/rbt_set.h b/deps/icu-small/source/i18n/rbt_set.h index ed76e6ddf7..9b2b8b38db 100644 --- a/deps/icu-small/source/i18n/rbt_set.h +++ b/deps/icu-small/source/i18n/rbt_set.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/rbtz.cpp b/deps/icu-small/source/i18n/rbtz.cpp index fb458dd502..951073abc5 100644 --- a/deps/icu-small/source/i18n/rbtz.cpp +++ b/deps/icu-small/source/i18n/rbtz.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/regexcmp.cpp b/deps/icu-small/source/i18n/regexcmp.cpp index d0e166cfa4..6cfa61f187 100644 --- a/deps/icu-small/source/i18n/regexcmp.cpp +++ b/deps/icu-small/source/i18n/regexcmp.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // // file: regexcmp.cpp @@ -2637,6 +2637,16 @@ void RegexCompile::findCaseInsensitiveStarters(UChar32 c, UnicodeSet *starterCh } +// Increment with overflow check. +// val and delta will both be positive. + +static int32_t safeIncrement(int32_t val, int32_t delta) { + if (INT32_MAX - val > delta) { + return val + delta; + } else { + return INT32_MAX; + } +} //------------------------------------------------------------------------------ @@ -2737,7 +2747,7 @@ void RegexCompile::matchStartType() { fRXPat->fInitialChars->add(URX_VAL(op)); numInitialStrings += 2; } - currentLen++; + currentLen = safeIncrement(currentLen, 1); atStart = FALSE; break; @@ -2750,7 +2760,7 @@ void RegexCompile::matchStartType() { fRXPat->fInitialChars->addAll(*s); numInitialStrings += 2; } - currentLen++; + currentLen = safeIncrement(currentLen, 1); atStart = FALSE; break; @@ -2787,7 +2797,7 @@ void RegexCompile::matchStartType() { fRXPat->fInitialChars->addAll(*s); numInitialStrings += 2; } - currentLen++; + currentLen = safeIncrement(currentLen, 1); atStart = FALSE; break; @@ -2802,7 +2812,7 @@ void RegexCompile::matchStartType() { fRXPat->fInitialChars->addAll(sc); numInitialStrings += 2; } - currentLen++; + currentLen = safeIncrement(currentLen, 1); atStart = FALSE; break; @@ -2819,7 +2829,7 @@ void RegexCompile::matchStartType() { fRXPat->fInitialChars->addAll(s); numInitialStrings += 2; } - currentLen++; + currentLen = safeIncrement(currentLen, 1); atStart = FALSE; break; @@ -2836,7 +2846,7 @@ void RegexCompile::matchStartType() { fRXPat->fInitialChars->addAll(s); numInitialStrings += 2; } - currentLen++; + currentLen = safeIncrement(currentLen, 1); atStart = FALSE; break; @@ -2855,7 +2865,7 @@ void RegexCompile::matchStartType() { fRXPat->fInitialChars->addAll(s); numInitialStrings += 2; } - currentLen++; + currentLen = safeIncrement(currentLen, 1); atStart = FALSE; break; @@ -2879,7 +2889,7 @@ void RegexCompile::matchStartType() { } numInitialStrings += 2; } - currentLen++; + currentLen = safeIncrement(currentLen, 1); atStart = FALSE; break; @@ -2895,7 +2905,7 @@ void RegexCompile::matchStartType() { fRXPat->fInitialChars->complement(); numInitialStrings += 2; } - currentLen++; + currentLen = safeIncrement(currentLen, 1); atStart = FALSE; break; @@ -2975,7 +2985,7 @@ void RegexCompile::matchStartType() { fRXPat->fInitialStringLen = stringLen; } - currentLen += stringLen; + currentLen = safeIncrement(currentLen, stringLen); atStart = FALSE; } break; @@ -3000,7 +3010,7 @@ void RegexCompile::matchStartType() { fRXPat->fInitialChars->addAll(s); numInitialStrings += 2; // Matching on an initial string not possible. } - currentLen += stringLen; + currentLen = safeIncrement(currentLen, stringLen); atStart = FALSE; } break; @@ -3258,7 +3268,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) { case URX_DOTANY_ALL: // . matches one or two. case URX_DOTANY: case URX_DOTANY_UNIX: - currentLen++; + currentLen = safeIncrement(currentLen, 1); break; @@ -3310,7 +3320,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) { { loc++; int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc); - currentLen += URX_VAL(stringLenOp); + currentLen = safeIncrement(currentLen, URX_VAL(stringLenOp)); } break; @@ -3323,7 +3333,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) { // Assume a min length of one for now. A min length of zero causes // optimization failures for a pattern like "string"+ // currentLen += URX_VAL(stringLenOp); - currentLen += 1; + currentLen = safeIncrement(currentLen, 1); } break; @@ -3433,18 +3443,6 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) { return currentLen; } -// Increment with overflow check. -// val and delta will both be positive. - -static int32_t safeIncrement(int32_t val, int32_t delta) { - if (INT32_MAX - val > delta) { - return val + delta; - } else { - return INT32_MAX; - } -} - - //------------------------------------------------------------------------------ // // maxMatchLength Calculate the length of the longest string that could diff --git a/deps/icu-small/source/i18n/regexcmp.h b/deps/icu-small/source/i18n/regexcmp.h index 931f2387b5..85b7586793 100644 --- a/deps/icu-small/source/i18n/regexcmp.h +++ b/deps/icu-small/source/i18n/regexcmp.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // // regexcmp.h diff --git a/deps/icu-small/source/i18n/regexcst.h b/deps/icu-small/source/i18n/regexcst.h index 259b44f5dd..a07d85a277 100644 --- a/deps/icu-small/source/i18n/regexcst.h +++ b/deps/icu-small/source/i18n/regexcst.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html //--------------------------------------------------------------------------------- // diff --git a/deps/icu-small/source/i18n/regeximp.cpp b/deps/icu-small/source/i18n/regeximp.cpp index c1360ebf6c..454e7f836b 100644 --- a/deps/icu-small/source/i18n/regeximp.cpp +++ b/deps/icu-small/source/i18n/regeximp.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // // Copyright (C) 2012 International Business Machines Corporation @@ -19,8 +19,7 @@ U_NAMESPACE_BEGIN CaseFoldingUTextIterator::CaseFoldingUTextIterator(UText &text) : - fUText(text), fcsp(NULL), fFoldChars(NULL), fFoldLength(0) { - fcsp = ucase_getSingleton(); + fUText(text), fFoldChars(NULL), fFoldLength(0) { } CaseFoldingUTextIterator::~CaseFoldingUTextIterator() {} @@ -35,7 +34,7 @@ UChar32 CaseFoldingUTextIterator::next() { if (originalC == U_SENTINEL) { return originalC; } - fFoldLength = ucase_toFullFolding(fcsp, originalC, &fFoldChars, U_FOLD_CASE_DEFAULT); + fFoldLength = ucase_toFullFolding(originalC, &fFoldChars, U_FOLD_CASE_DEFAULT); if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) { // input code point folds to a single code point, possibly itself. // See comment in ucase.h for explanation of return values from ucase_toFullFoldings. @@ -65,8 +64,7 @@ UBool CaseFoldingUTextIterator::inExpansion() { CaseFoldingUCharIterator::CaseFoldingUCharIterator(const UChar *chars, int64_t start, int64_t limit) : - fChars(chars), fIndex(start), fLimit(limit), fcsp(NULL), fFoldChars(NULL), fFoldLength(0) { - fcsp = ucase_getSingleton(); + fChars(chars), fIndex(start), fLimit(limit), fFoldChars(NULL), fFoldLength(0) { } @@ -84,7 +82,7 @@ UChar32 CaseFoldingUCharIterator::next() { } U16_NEXT(fChars, fIndex, fLimit, originalC); - fFoldLength = ucase_toFullFolding(fcsp, originalC, &fFoldChars, U_FOLD_CASE_DEFAULT); + fFoldLength = ucase_toFullFolding(originalC, &fFoldChars, U_FOLD_CASE_DEFAULT); if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) { // input code point folds to a single code point, possibly itself. // See comment in ucase.h for explanation of return values from ucase_toFullFoldings. diff --git a/deps/icu-small/source/i18n/regeximp.h b/deps/icu-small/source/i18n/regeximp.h index 0261c58c63..da4a861bde 100644 --- a/deps/icu-small/source/i18n/regeximp.h +++ b/deps/icu-small/source/i18n/regeximp.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // // Copyright (C) 2002-2015 International Business Machines Corporation @@ -374,7 +374,6 @@ class CaseFoldingUTextIterator: public UMemory { // folding of the same code point from the orignal UText. private: UText &fUText; - const UCaseProps *fcsp; const UChar *fFoldChars; int32_t fFoldLength; int32_t fFoldIndex; @@ -404,7 +403,6 @@ class CaseFoldingUCharIterator: public UMemory { const UChar *fChars; int64_t fIndex; int64_t fLimit; - const UCaseProps *fcsp; const UChar *fFoldChars; int32_t fFoldLength; int32_t fFoldIndex; diff --git a/deps/icu-small/source/i18n/regexst.cpp b/deps/icu-small/source/i18n/regexst.cpp index a8feffa1bd..ad74ee508e 100644 --- a/deps/icu-small/source/i18n/regexst.cpp +++ b/deps/icu-small/source/i18n/regexst.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // // regexst.h diff --git a/deps/icu-small/source/i18n/regexst.h b/deps/icu-small/source/i18n/regexst.h index 21f7ec945c..f0696c25a3 100644 --- a/deps/icu-small/source/i18n/regexst.h +++ b/deps/icu-small/source/i18n/regexst.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // // regexst.h diff --git a/deps/icu-small/source/i18n/regextxt.cpp b/deps/icu-small/source/i18n/regextxt.cpp index d6157f5ed6..41bb4a944b 100644 --- a/deps/icu-small/source/i18n/regextxt.cpp +++ b/deps/icu-small/source/i18n/regextxt.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /******************************************************************** * COPYRIGHT: diff --git a/deps/icu-small/source/i18n/regextxt.h b/deps/icu-small/source/i18n/regextxt.h index c5651aefd4..9cfabbe415 100644 --- a/deps/icu-small/source/i18n/regextxt.h +++ b/deps/icu-small/source/i18n/regextxt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /******************************************************************** * COPYRIGHT: diff --git a/deps/icu-small/source/i18n/region.cpp b/deps/icu-small/source/i18n/region.cpp index fdd467fc98..66f9ef35de 100644 --- a/deps/icu-small/source/i18n/region.cpp +++ b/deps/icu-small/source/i18n/region.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/region_impl.h b/deps/icu-small/source/i18n/region_impl.h index 852209603b..c0702af7df 100644 --- a/deps/icu-small/source/i18n/region_impl.h +++ b/deps/icu-small/source/i18n/region_impl.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/reldatefmt.cpp b/deps/icu-small/source/i18n/reldatefmt.cpp index 7009b190a2..18c073b9ee 100644 --- a/deps/icu-small/source/i18n/reldatefmt.cpp +++ b/deps/icu-small/source/i18n/reldatefmt.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -15,6 +15,7 @@ #if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_BREAK_ITERATION #include "unicode/dtfmtsym.h" +#include "unicode/ucasemap.h" #include "unicode/ureldatefmt.h" #include "unicode/udisplaycontext.h" #include "unicode/unum.h" diff --git a/deps/icu-small/source/i18n/reldtfmt.cpp b/deps/icu-small/source/i18n/reldtfmt.cpp index 4a92869535..d3ab45dc63 100644 --- a/deps/icu-small/source/i18n/reldtfmt.cpp +++ b/deps/icu-small/source/i18n/reldtfmt.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -20,7 +20,7 @@ #include "unicode/udisplaycontext.h" #include "unicode/uchar.h" #include "unicode/brkiter.h" - +#include "unicode/ucasemap.h" #include "reldtfmt.h" #include "cmemory.h" #include "uresimp.h" diff --git a/deps/icu-small/source/i18n/reldtfmt.h b/deps/icu-small/source/i18n/reldtfmt.h index ea091a91c3..5063a6388f 100644 --- a/deps/icu-small/source/i18n/reldtfmt.h +++ b/deps/icu-small/source/i18n/reldtfmt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/rematch.cpp b/deps/icu-small/source/i18n/rematch.cpp index 5a5bb80e05..e3fdff7484 100644 --- a/deps/icu-small/source/i18n/rematch.cpp +++ b/deps/icu-small/source/i18n/rematch.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ************************************************************************** @@ -3566,7 +3566,14 @@ GC_Done: } } fp = StateSave(fp, fp->fPatIdx, status); + } else { + // Increment time-out counter. (StateSave() does it if count >= minCount) + fTickCounter--; + if (fTickCounter <= 0) { + IncrementTime(status); // Re-initializes fTickCounter + } } + fp->fPatIdx = opValue + 4; // Loop back. } break; @@ -3623,6 +3630,11 @@ GC_Done: // We haven't met the minimum number of matches yet. // Loop back for another one. fp->fPatIdx = opValue + 4; // Loop back. + // Increment time-out counter. (StateSave() does it if count >= minCount) + fTickCounter--; + if (fTickCounter <= 0) { + IncrementTime(status); // Re-initializes fTickCounter + } } else { // We do have the minimum number of matches. @@ -5099,6 +5111,12 @@ GC_Done: } } fp = StateSave(fp, fp->fPatIdx, status); + } else { + // Increment time-out counter. (StateSave() does it if count >= minCount) + fTickCounter--; + if (fTickCounter <= 0) { + IncrementTime(status); // Re-initializes fTickCounter + } } fp->fPatIdx = opValue + 4; // Loop back. } @@ -5156,6 +5174,10 @@ GC_Done: // We haven't met the minimum number of matches yet. // Loop back for another one. fp->fPatIdx = opValue + 4; // Loop back. + fTickCounter--; + if (fTickCounter <= 0) { + IncrementTime(status); // Re-initializes fTickCounter + } } else { // We do have the minimum number of matches. diff --git a/deps/icu-small/source/i18n/remtrans.cpp b/deps/icu-small/source/i18n/remtrans.cpp index 89837f991d..70a6ed3935 100644 --- a/deps/icu-small/source/i18n/remtrans.cpp +++ b/deps/icu-small/source/i18n/remtrans.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/remtrans.h b/deps/icu-small/source/i18n/remtrans.h index a5635781f9..ed038d5f2f 100644 --- a/deps/icu-small/source/i18n/remtrans.h +++ b/deps/icu-small/source/i18n/remtrans.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/repattrn.cpp b/deps/icu-small/source/i18n/repattrn.cpp index b792ca0484..b03873066c 100644 --- a/deps/icu-small/source/i18n/repattrn.cpp +++ b/deps/icu-small/source/i18n/repattrn.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html // // file: repattrn.cpp diff --git a/deps/icu-small/source/i18n/rulebasedcollator.cpp b/deps/icu-small/source/i18n/rulebasedcollator.cpp index 4852667ada..ab65f10a3b 100644 --- a/deps/icu-small/source/i18n/rulebasedcollator.cpp +++ b/deps/icu-small/source/i18n/rulebasedcollator.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/scientificnumberformatter.cpp b/deps/icu-small/source/i18n/scientificnumberformatter.cpp index 56a43f9b7f..adf032d989 100644 --- a/deps/icu-small/source/i18n/scientificnumberformatter.cpp +++ b/deps/icu-small/source/i18n/scientificnumberformatter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/scriptset.cpp b/deps/icu-small/source/i18n/scriptset.cpp index 951fe44080..9358e63b9e 100644 --- a/deps/icu-small/source/i18n/scriptset.cpp +++ b/deps/icu-small/source/i18n/scriptset.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/scriptset.h b/deps/icu-small/source/i18n/scriptset.h index e8de3b9613..385c3e3e53 100644 --- a/deps/icu-small/source/i18n/scriptset.h +++ b/deps/icu-small/source/i18n/scriptset.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/search.cpp b/deps/icu-small/source/i18n/search.cpp index 77323cc664..baf879414b 100644 --- a/deps/icu-small/source/i18n/search.cpp +++ b/deps/icu-small/source/i18n/search.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/selfmt.cpp b/deps/icu-small/source/i18n/selfmt.cpp index 041fea515c..29aee36457 100644 --- a/deps/icu-small/source/i18n/selfmt.cpp +++ b/deps/icu-small/source/i18n/selfmt.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /******************************************************************** * COPYRIGHT: diff --git a/deps/icu-small/source/i18n/selfmtimpl.h b/deps/icu-small/source/i18n/selfmtimpl.h index 75bc3e343a..74d6dc2181 100644 --- a/deps/icu-small/source/i18n/selfmtimpl.h +++ b/deps/icu-small/source/i18n/selfmtimpl.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /******************************************************************** * COPYRIGHT: diff --git a/deps/icu-small/source/i18n/sharedbreakiterator.cpp b/deps/icu-small/source/i18n/sharedbreakiterator.cpp index ca962c6283..82f482bdd7 100644 --- a/deps/icu-small/source/i18n/sharedbreakiterator.cpp +++ b/deps/icu-small/source/i18n/sharedbreakiterator.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/sharedbreakiterator.h b/deps/icu-small/source/i18n/sharedbreakiterator.h index 58be1f6a76..b6d67bc8e8 100644 --- a/deps/icu-small/source/i18n/sharedbreakiterator.h +++ b/deps/icu-small/source/i18n/sharedbreakiterator.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/i18n/sharedcalendar.h b/deps/icu-small/source/i18n/sharedcalendar.h index f6d97b55bc..1526f92e88 100644 --- a/deps/icu-small/source/i18n/sharedcalendar.h +++ b/deps/icu-small/source/i18n/sharedcalendar.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/i18n/shareddateformatsymbols.h b/deps/icu-small/source/i18n/shareddateformatsymbols.h index a11a8a391b..ca9a210819 100644 --- a/deps/icu-small/source/i18n/shareddateformatsymbols.h +++ b/deps/icu-small/source/i18n/shareddateformatsymbols.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/i18n/sharednumberformat.h b/deps/icu-small/source/i18n/sharednumberformat.h index fcb618a4d4..a7e105b5ac 100644 --- a/deps/icu-small/source/i18n/sharednumberformat.h +++ b/deps/icu-small/source/i18n/sharednumberformat.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/i18n/sharedpluralrules.h b/deps/icu-small/source/i18n/sharedpluralrules.h index faed6dea0e..28d8b25c14 100644 --- a/deps/icu-small/source/i18n/sharedpluralrules.h +++ b/deps/icu-small/source/i18n/sharedpluralrules.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/i18n/significantdigitinterval.h b/deps/icu-small/source/i18n/significantdigitinterval.h index 336af784a5..fc23370de5 100644 --- a/deps/icu-small/source/i18n/significantdigitinterval.h +++ b/deps/icu-small/source/i18n/significantdigitinterval.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/simpletz.cpp b/deps/icu-small/source/i18n/simpletz.cpp index 7dadef5ae6..557b02620b 100644 --- a/deps/icu-small/source/i18n/simpletz.cpp +++ b/deps/icu-small/source/i18n/simpletz.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -1189,13 +1189,22 @@ SimpleTimeZone::initTransitionRules(UErrorCode& status) { // Create a TimeZoneRule for initial time if (firstStdStart < firstDstStart) { initialRule = new InitialTimeZoneRule(tzid+UnicodeString(DST_STR), getRawOffset(), dstRule->getDSTSavings()); + if (initialRule == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + deleteTransitionRules(); + return; + } firstTransition = new TimeZoneTransition(firstStdStart, *initialRule, *stdRule); } else { initialRule = new InitialTimeZoneRule(tzid+UnicodeString(STD_STR), getRawOffset(), 0); + if (initialRule == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + deleteTransitionRules(); + return; + } firstTransition = new TimeZoneTransition(firstDstStart, *initialRule, *dstRule); } - // Check for null pointers. - if (initialRule == NULL || firstTransition == NULL) { + if (firstTransition == NULL) { status = U_MEMORY_ALLOCATION_ERROR; deleteTransitionRules(); return; diff --git a/deps/icu-small/source/i18n/smallintformatter.cpp b/deps/icu-small/source/i18n/smallintformatter.cpp index b96f6dad3b..0c56e38bd6 100644 --- a/deps/icu-small/source/i18n/smallintformatter.cpp +++ b/deps/icu-small/source/i18n/smallintformatter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* * Copyright (C) 2015, International Business Machines diff --git a/deps/icu-small/source/i18n/smallintformatter.h b/deps/icu-small/source/i18n/smallintformatter.h index 846d6b4054..3373a9c35f 100644 --- a/deps/icu-small/source/i18n/smallintformatter.h +++ b/deps/icu-small/source/i18n/smallintformatter.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/smpdtfmt.cpp b/deps/icu-small/source/i18n/smpdtfmt.cpp index 85cc162a11..3c0670446b 100644 --- a/deps/icu-small/source/i18n/smpdtfmt.cpp +++ b/deps/icu-small/source/i18n/smpdtfmt.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -48,6 +48,7 @@ #include "unicode/simpletz.h" #include "unicode/rbtz.h" #include "unicode/tzfmt.h" +#include "unicode/ucasemap.h" #include "unicode/utf16.h" #include "unicode/vtzone.h" #include "unicode/udisplaycontext.h" @@ -64,6 +65,7 @@ #include #include "smpdtfst.h" #include "sharednumberformat.h" +#include "ucasemap_imp.h" #include "ustr_imp.h" #include "charstr.h" #include "uvector.h" diff --git a/deps/icu-small/source/i18n/smpdtfst.cpp b/deps/icu-small/source/i18n/smpdtfst.cpp index 50980a99e4..ff0dec232d 100644 --- a/deps/icu-small/source/i18n/smpdtfst.cpp +++ b/deps/icu-small/source/i18n/smpdtfst.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/smpdtfst.h b/deps/icu-small/source/i18n/smpdtfst.h index 38ad558de2..ed8ce4371f 100644 --- a/deps/icu-small/source/i18n/smpdtfst.h +++ b/deps/icu-small/source/i18n/smpdtfst.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -16,6 +16,7 @@ #ifndef SMPDTFST_H #define SMPDTFST_H +#include "unicode/uobject.h" #include "unicode/utypes.h" #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/sortkey.cpp b/deps/icu-small/source/i18n/sortkey.cpp index 68b0f062b8..fb030c4990 100644 --- a/deps/icu-small/source/i18n/sortkey.cpp +++ b/deps/icu-small/source/i18n/sortkey.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/standardplural.cpp b/deps/icu-small/source/i18n/standardplural.cpp index c39bae1ab1..0391034b3e 100644 --- a/deps/icu-small/source/i18n/standardplural.cpp +++ b/deps/icu-small/source/i18n/standardplural.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/standardplural.h b/deps/icu-small/source/i18n/standardplural.h index 56c63c347c..33e1d605f6 100644 --- a/deps/icu-small/source/i18n/standardplural.h +++ b/deps/icu-small/source/i18n/standardplural.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/strmatch.cpp b/deps/icu-small/source/i18n/strmatch.cpp index e72cfc9ab0..d5b018aa6b 100644 --- a/deps/icu-small/source/i18n/strmatch.cpp +++ b/deps/icu-small/source/i18n/strmatch.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/strmatch.h b/deps/icu-small/source/i18n/strmatch.h index 0241adfd3f..7152a24a07 100644 --- a/deps/icu-small/source/i18n/strmatch.h +++ b/deps/icu-small/source/i18n/strmatch.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* * Copyright (C) 2001-2011, International Business Machines Corporation diff --git a/deps/icu-small/source/i18n/strrepl.cpp b/deps/icu-small/source/i18n/strrepl.cpp index d061eff579..132c844c2d 100644 --- a/deps/icu-small/source/i18n/strrepl.cpp +++ b/deps/icu-small/source/i18n/strrepl.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/strrepl.h b/deps/icu-small/source/i18n/strrepl.h index a452db993f..feec058152 100644 --- a/deps/icu-small/source/i18n/strrepl.h +++ b/deps/icu-small/source/i18n/strrepl.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/stsearch.cpp b/deps/icu-small/source/i18n/stsearch.cpp index 643ec21b27..bf4d80b6db 100644 --- a/deps/icu-small/source/i18n/stsearch.cpp +++ b/deps/icu-small/source/i18n/stsearch.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/taiwncal.cpp b/deps/icu-small/source/i18n/taiwncal.cpp index f1ca6fa135..e2757dbd55 100644 --- a/deps/icu-small/source/i18n/taiwncal.cpp +++ b/deps/icu-small/source/i18n/taiwncal.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/taiwncal.h b/deps/icu-small/source/i18n/taiwncal.h index b15cff5beb..99bbfb53f2 100644 --- a/deps/icu-small/source/i18n/taiwncal.h +++ b/deps/icu-small/source/i18n/taiwncal.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** diff --git a/deps/icu-small/source/i18n/timezone.cpp b/deps/icu-small/source/i18n/timezone.cpp index 427674aac4..e662bf7674 100644 --- a/deps/icu-small/source/i18n/timezone.cpp +++ b/deps/icu-small/source/i18n/timezone.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/titletrn.cpp b/deps/icu-small/source/i18n/titletrn.cpp index a1de8be666..4e75c824a0 100644 --- a/deps/icu-small/source/i18n/titletrn.cpp +++ b/deps/icu-small/source/i18n/titletrn.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -97,7 +97,7 @@ void TitlecaseTransliterator::handleTransliterate( int32_t start; for (start = offsets.start - 1; start >= offsets.contextStart; start -= U16_LENGTH(c)) { c = text.char32At(start); - type=ucase_getTypeOrIgnorable(fCsp, c); + type=ucase_getTypeOrIgnorable(c); if(type>0) { // cased doTitle=FALSE; break; @@ -118,19 +118,19 @@ void TitlecaseTransliterator::handleTransliterate( UnicodeString tmp; const UChar *s; - int32_t textPos, delta, result, locCache=0; + int32_t textPos, delta, result; for(textPos=offsets.start; textPos=0) { // not case-ignorable if(doTitle) { - result=ucase_toFullTitle(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache); + result=ucase_toFullTitle(c, utrans_rep_caseContextIterator, &csc, &s, UCASE_LOC_ROOT); } else { - result=ucase_toFullLower(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache); + result=ucase_toFullLower(c, utrans_rep_caseContextIterator, &csc, &s, UCASE_LOC_ROOT); } doTitle = (UBool)(type==0); // doTitle=isUncased diff --git a/deps/icu-small/source/i18n/titletrn.h b/deps/icu-small/source/i18n/titletrn.h index a6380e3bd1..166378fe9d 100644 --- a/deps/icu-small/source/i18n/titletrn.h +++ b/deps/icu-small/source/i18n/titletrn.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/tmunit.cpp b/deps/icu-small/source/i18n/tmunit.cpp index d9da268125..ca308cca22 100644 --- a/deps/icu-small/source/i18n/tmunit.cpp +++ b/deps/icu-small/source/i18n/tmunit.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/tmutamt.cpp b/deps/icu-small/source/i18n/tmutamt.cpp index 7be730765b..0e2b91fbb2 100644 --- a/deps/icu-small/source/i18n/tmutamt.cpp +++ b/deps/icu-small/source/i18n/tmutamt.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/tmutfmt.cpp b/deps/icu-small/source/i18n/tmutfmt.cpp index 1669546f76..50dac8b7ce 100644 --- a/deps/icu-small/source/i18n/tmutfmt.cpp +++ b/deps/icu-small/source/i18n/tmutfmt.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/tolowtrn.cpp b/deps/icu-small/source/i18n/tolowtrn.cpp index f0a59bbd0b..063cc88d1c 100644 --- a/deps/icu-small/source/i18n/tolowtrn.cpp +++ b/deps/icu-small/source/i18n/tolowtrn.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/tolowtrn.h b/deps/icu-small/source/i18n/tolowtrn.h index 616e59899f..e311431224 100644 --- a/deps/icu-small/source/i18n/tolowtrn.h +++ b/deps/icu-small/source/i18n/tolowtrn.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/toupptrn.cpp b/deps/icu-small/source/i18n/toupptrn.cpp index a34792e07c..098dba9a3c 100644 --- a/deps/icu-small/source/i18n/toupptrn.cpp +++ b/deps/icu-small/source/i18n/toupptrn.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/toupptrn.h b/deps/icu-small/source/i18n/toupptrn.h index eae44e7d18..677a04e5c7 100644 --- a/deps/icu-small/source/i18n/toupptrn.h +++ b/deps/icu-small/source/i18n/toupptrn.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/translit.cpp b/deps/icu-small/source/i18n/translit.cpp index 79328baa2b..de54e952dc 100644 --- a/deps/icu-small/source/i18n/translit.cpp +++ b/deps/icu-small/source/i18n/translit.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/transreg.cpp b/deps/icu-small/source/i18n/transreg.cpp index cc1d51dea8..d864ad3463 100644 --- a/deps/icu-small/source/i18n/transreg.cpp +++ b/deps/icu-small/source/i18n/transreg.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/transreg.h b/deps/icu-small/source/i18n/transreg.h index 334963f8d1..6fc35c8247 100644 --- a/deps/icu-small/source/i18n/transreg.h +++ b/deps/icu-small/source/i18n/transreg.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/tridpars.cpp b/deps/icu-small/source/i18n/tridpars.cpp index 140e3d7d1c..68bbd2d040 100644 --- a/deps/icu-small/source/i18n/tridpars.cpp +++ b/deps/icu-small/source/i18n/tridpars.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/tridpars.h b/deps/icu-small/source/i18n/tridpars.h index 7c226023ef..3d657ed17c 100644 --- a/deps/icu-small/source/i18n/tridpars.h +++ b/deps/icu-small/source/i18n/tridpars.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ************************************************************************** diff --git a/deps/icu-small/source/i18n/tzfmt.cpp b/deps/icu-small/source/i18n/tzfmt.cpp index 783edac34f..45eda6ffb6 100644 --- a/deps/icu-small/source/i18n/tzfmt.cpp +++ b/deps/icu-small/source/i18n/tzfmt.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -14,8 +14,10 @@ #include "unicode/calendar.h" #include "unicode/tzfmt.h" #include "unicode/numsys.h" +#include "unicode/strenum.h" #include "unicode/uchar.h" #include "unicode/udat.h" +#include "unicode/ustring.h" #include "tzgnames.h" #include "cmemory.h" #include "cstring.h" diff --git a/deps/icu-small/source/i18n/tzgnames.cpp b/deps/icu-small/source/i18n/tzgnames.cpp index 4fc726ea54..b14e9835d9 100644 --- a/deps/icu-small/source/i18n/tzgnames.cpp +++ b/deps/icu-small/source/i18n/tzgnames.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -18,6 +18,7 @@ #include "unicode/rbtz.h" #include "unicode/simpleformatter.h" #include "unicode/simpletz.h" +#include "unicode/strenum.h" #include "unicode/vtzone.h" #include "cmemory.h" diff --git a/deps/icu-small/source/i18n/tzgnames.h b/deps/icu-small/source/i18n/tzgnames.h index e78e8ee991..d896af8ba8 100644 --- a/deps/icu-small/source/i18n/tzgnames.h +++ b/deps/icu-small/source/i18n/tzgnames.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/tznames.cpp b/deps/icu-small/source/i18n/tznames.cpp index 6aefd13b53..689fdeb091 100644 --- a/deps/icu-small/source/i18n/tznames.cpp +++ b/deps/icu-small/source/i18n/tznames.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/tznames_impl.cpp b/deps/icu-small/source/i18n/tznames_impl.cpp index 3e92acb6f9..d00d7e1145 100644 --- a/deps/icu-small/source/i18n/tznames_impl.cpp +++ b/deps/icu-small/source/i18n/tznames_impl.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -15,6 +15,7 @@ #if !UCONFIG_NO_FORMATTING +#include "unicode/strenum.h" #include "unicode/ustring.h" #include "unicode/timezone.h" diff --git a/deps/icu-small/source/i18n/tznames_impl.h b/deps/icu-small/source/i18n/tznames_impl.h index 6b913bb6bf..9251f9ef47 100644 --- a/deps/icu-small/source/i18n/tznames_impl.h +++ b/deps/icu-small/source/i18n/tznames_impl.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/tzrule.cpp b/deps/icu-small/source/i18n/tzrule.cpp index 2ff61302b8..f60a5e0dd5 100644 --- a/deps/icu-small/source/i18n/tzrule.cpp +++ b/deps/icu-small/source/i18n/tzrule.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/tztrans.cpp b/deps/icu-small/source/i18n/tztrans.cpp index 76e259c5ae..3199b78ea8 100644 --- a/deps/icu-small/source/i18n/tztrans.cpp +++ b/deps/icu-small/source/i18n/tztrans.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/ucal.cpp b/deps/icu-small/source/i18n/ucal.cpp index a9377b1e51..4154eea83f 100644 --- a/deps/icu-small/source/i18n/ucal.cpp +++ b/deps/icu-small/source/i18n/ucal.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/ucln_in.cpp b/deps/icu-small/source/i18n/ucln_in.cpp index b33a689237..74c8acfab1 100644 --- a/deps/icu-small/source/i18n/ucln_in.cpp +++ b/deps/icu-small/source/i18n/ucln_in.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * * ****************************************************************************** * file name: ucln_in.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/i18n/ucln_in.h b/deps/icu-small/source/i18n/ucln_in.h index b609fce0c2..35a8a23e90 100644 --- a/deps/icu-small/source/i18n/ucln_in.h +++ b/deps/icu-small/source/i18n/ucln_in.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ****************************************************************************** * file name: ucln_in.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/i18n/ucol.cpp b/deps/icu-small/source/i18n/ucol.cpp index c622aef7c2..34a394682f 100644 --- a/deps/icu-small/source/i18n/ucol.cpp +++ b/deps/icu-small/source/i18n/ucol.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: ucol.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/i18n/ucol_imp.h b/deps/icu-small/source/i18n/ucol_imp.h index 7c9e8f6891..a251fc461d 100644 --- a/deps/icu-small/source/i18n/ucol_imp.h +++ b/deps/icu-small/source/i18n/ucol_imp.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -10,7 +10,7 @@ * * Private implementation header for C collation * file name: ucol_imp.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/i18n/ucol_res.cpp b/deps/icu-small/source/i18n/ucol_res.cpp index 314b766ee6..d1597021c3 100644 --- a/deps/icu-small/source/i18n/ucol_res.cpp +++ b/deps/icu-small/source/i18n/ucol_res.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: ucol_res.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -680,6 +680,7 @@ ucol_getKeywordValuesForLocale(const char* /*key*/, const char* locale, return NULL; } memcpy(en, &defaultKeywordValues, sizeof(UEnumeration)); + ulist_resetList(sink.values); // Initialize the iterator. en->context = sink.values; sink.values = NULL; // Avoid deletion in the sink destructor. return en; diff --git a/deps/icu-small/source/i18n/ucol_sit.cpp b/deps/icu-small/source/i18n/ucol_sit.cpp index c81977b8a3..cf507f61ed 100644 --- a/deps/icu-small/source/i18n/ucol_sit.cpp +++ b/deps/icu-small/source/i18n/ucol_sit.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: ucol_sit.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/i18n/ucoleitr.cpp b/deps/icu-small/source/i18n/ucoleitr.cpp index 4b46b205aa..6842061bab 100644 --- a/deps/icu-small/source/i18n/ucoleitr.cpp +++ b/deps/icu-small/source/i18n/ucoleitr.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/i18n/ucsdet.cpp b/deps/icu-small/source/i18n/ucsdet.cpp index dd69d9f548..46f69cf90c 100644 --- a/deps/icu-small/source/i18n/ucsdet.cpp +++ b/deps/icu-small/source/i18n/ucsdet.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** diff --git a/deps/icu-small/source/i18n/udat.cpp b/deps/icu-small/source/i18n/udat.cpp index b07e1ceab7..d086067c03 100644 --- a/deps/icu-small/source/i18n/udat.cpp +++ b/deps/icu-small/source/i18n/udat.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/udateintervalformat.cpp b/deps/icu-small/source/i18n/udateintervalformat.cpp index e6eec44847..44ba6b9fb1 100644 --- a/deps/icu-small/source/i18n/udateintervalformat.cpp +++ b/deps/icu-small/source/i18n/udateintervalformat.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ***************************************************************************************** diff --git a/deps/icu-small/source/i18n/udatpg.cpp b/deps/icu-small/source/i18n/udatpg.cpp index d8824afdfc..9ba82b529c 100644 --- a/deps/icu-small/source/i18n/udatpg.cpp +++ b/deps/icu-small/source/i18n/udatpg.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: udatpg.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/i18n/ufieldpositer.cpp b/deps/icu-small/source/i18n/ufieldpositer.cpp index b1c9c64805..64de856c30 100644 --- a/deps/icu-small/source/i18n/ufieldpositer.cpp +++ b/deps/icu-small/source/i18n/ufieldpositer.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ***************************************************************************************** diff --git a/deps/icu-small/source/i18n/uitercollationiterator.cpp b/deps/icu-small/source/i18n/uitercollationiterator.cpp index eb71725380..103c91cac8 100644 --- a/deps/icu-small/source/i18n/uitercollationiterator.cpp +++ b/deps/icu-small/source/i18n/uitercollationiterator.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/uitercollationiterator.h b/deps/icu-small/source/i18n/uitercollationiterator.h index da9f8d3468..62b6f83419 100644 --- a/deps/icu-small/source/i18n/uitercollationiterator.h +++ b/deps/icu-small/source/i18n/uitercollationiterator.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/ulocdata.c b/deps/icu-small/source/i18n/ulocdata.c deleted file mode 100644 index e1e61ce870..0000000000 --- a/deps/icu-small/source/i18n/ulocdata.c +++ /dev/null @@ -1,386 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* * -* Copyright (C) 2003-2016, International Business Machines * -* Corporation and others. All Rights Reserved. * -* * -****************************************************************************** -* file name: ulocdata.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2003Oct21 -* created by: Ram Viswanadha,John Emmons -*/ - -#include "cmemory.h" -#include "unicode/ustring.h" -#include "unicode/ures.h" -#include "unicode/uloc.h" -#include "unicode/ulocdata.h" -#include "uresimp.h" -#include "ureslocs.h" -#include "ulocimp.h" - -#define MEASUREMENT_SYSTEM "MeasurementSystem" -#define PAPER_SIZE "PaperSize" - -/** A locale data object. - * For usage in C programs. - * @draft ICU 3.4 - */ -struct ULocaleData { - /** - * Controls the "No Substitute" behavior of this locale data object - */ - UBool noSubstitute; - - /** - * Pointer to the resource bundle associated with this locale data object - */ - UResourceBundle *bundle; - - /** - * Pointer to the lang resource bundle associated with this locale data object - */ - UResourceBundle *langBundle; -}; - -U_CAPI ULocaleData* U_EXPORT2 -ulocdata_open(const char *localeID, UErrorCode *status) -{ - ULocaleData *uld; - - if (U_FAILURE(*status)) { - return NULL; - } - - uld = (ULocaleData *)uprv_malloc(sizeof(ULocaleData)); - if (uld == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return(NULL); - } - - uld->langBundle = NULL; - - uld->noSubstitute = FALSE; - uld->bundle = ures_open(NULL, localeID, status); - uld->langBundle = ures_open(U_ICUDATA_LANG, localeID, status); - - if (U_FAILURE(*status)) { - uprv_free(uld); - return NULL; - } - - return uld; -} - -U_CAPI void U_EXPORT2 -ulocdata_close(ULocaleData *uld) -{ - if ( uld != NULL ) { - ures_close(uld->langBundle); - ures_close(uld->bundle); - uprv_free(uld); - } -} - -U_CAPI void U_EXPORT2 -ulocdata_setNoSubstitute(ULocaleData *uld, UBool setting) -{ - uld->noSubstitute = setting; -} - -U_CAPI UBool U_EXPORT2 -ulocdata_getNoSubstitute(ULocaleData *uld) -{ - return uld->noSubstitute; -} - -U_CAPI USet* U_EXPORT2 -ulocdata_getExemplarSet(ULocaleData *uld, USet *fillIn, - uint32_t options, ULocaleDataExemplarSetType extype, UErrorCode *status){ - - static const char* const exemplarSetTypes[] = { "ExemplarCharacters", - "AuxExemplarCharacters", - "ExemplarCharactersIndex", - "ExemplarCharactersPunctuation"}; - const UChar *exemplarChars = NULL; - int32_t len = 0; - UErrorCode localStatus = U_ZERO_ERROR; - - if (U_FAILURE(*status)) - return NULL; - - exemplarChars = ures_getStringByKey(uld->bundle, exemplarSetTypes[extype], &len, &localStatus); - if ( (localStatus == U_USING_DEFAULT_WARNING) && uld->noSubstitute ) { - localStatus = U_MISSING_RESOURCE_ERROR; - } - - if (localStatus != U_ZERO_ERROR) { - *status = localStatus; - } - - if (U_FAILURE(*status)) - return NULL; - - if(fillIn != NULL) - uset_applyPattern(fillIn, exemplarChars, len, - USET_IGNORE_SPACE | options, status); - else - fillIn = uset_openPatternOptions(exemplarChars, len, - USET_IGNORE_SPACE | options, status); - - return fillIn; - -} - -U_CAPI int32_t U_EXPORT2 -ulocdata_getDelimiter(ULocaleData *uld, ULocaleDataDelimiterType type, - UChar *result, int32_t resultLength, UErrorCode *status){ - - static const char* const delimiterKeys[] = { - "quotationStart", - "quotationEnd", - "alternateQuotationStart", - "alternateQuotationEnd" - }; - - UResourceBundle *delimiterBundle; - int32_t len = 0; - const UChar *delimiter = NULL; - UErrorCode localStatus = U_ZERO_ERROR; - - if (U_FAILURE(*status)) - return 0; - - delimiterBundle = ures_getByKey(uld->bundle, "delimiters", NULL, &localStatus); - - if ( (localStatus == U_USING_DEFAULT_WARNING) && uld->noSubstitute ) { - localStatus = U_MISSING_RESOURCE_ERROR; - } - - if (localStatus != U_ZERO_ERROR) { - *status = localStatus; - } - - if (U_FAILURE(*status)){ - ures_close(delimiterBundle); - return 0; - } - - delimiter = ures_getStringByKey(delimiterBundle, delimiterKeys[type], &len, &localStatus); - ures_close(delimiterBundle); - - if ( (localStatus == U_USING_DEFAULT_WARNING) && uld->noSubstitute ) { - localStatus = U_MISSING_RESOURCE_ERROR; - } - - if (localStatus != U_ZERO_ERROR) { - *status = localStatus; - } - - if (U_FAILURE(*status)){ - return 0; - } - - u_strncpy(result,delimiter, resultLength); - return len; -} - -static UResourceBundle * measurementTypeBundleForLocale(const char *localeID, const char *measurementType, UErrorCode *status){ - char region[ULOC_COUNTRY_CAPACITY]; - UResourceBundle *rb; - UResourceBundle *measTypeBundle = NULL; - - ulocimp_getRegionForSupplementalData(localeID, TRUE, region, ULOC_COUNTRY_CAPACITY, status); - - rb = ures_openDirect(NULL, "supplementalData", status); - ures_getByKey(rb, "measurementData", rb, status); - if (rb != NULL) { - UResourceBundle *measDataBundle = ures_getByKey(rb, region, NULL, status); - if (U_SUCCESS(*status)) { - measTypeBundle = ures_getByKey(measDataBundle, measurementType, NULL, status); - } - if (*status == U_MISSING_RESOURCE_ERROR) { - *status = U_ZERO_ERROR; - if (measDataBundle != NULL) { - ures_close(measDataBundle); - } - measDataBundle = ures_getByKey(rb, "001", NULL, status); - measTypeBundle = ures_getByKey(measDataBundle, measurementType, NULL, status); - } - ures_close(measDataBundle); - } - ures_close(rb); - return measTypeBundle; -} - -U_CAPI UMeasurementSystem U_EXPORT2 -ulocdata_getMeasurementSystem(const char *localeID, UErrorCode *status){ - - UResourceBundle* measurement=NULL; - UMeasurementSystem system = UMS_LIMIT; - - if(status == NULL || U_FAILURE(*status)){ - return system; - } - - measurement = measurementTypeBundleForLocale(localeID, MEASUREMENT_SYSTEM, status); - system = (UMeasurementSystem) ures_getInt(measurement, status); - - ures_close(measurement); - - return system; - -} - -U_CAPI void U_EXPORT2 -ulocdata_getPaperSize(const char* localeID, int32_t *height, int32_t *width, UErrorCode *status){ - UResourceBundle* paperSizeBundle = NULL; - const int32_t* paperSize=NULL; - int32_t len = 0; - - if(status == NULL || U_FAILURE(*status)){ - return; - } - - paperSizeBundle = measurementTypeBundleForLocale(localeID, PAPER_SIZE, status); - paperSize = ures_getIntVector(paperSizeBundle, &len, status); - - if(U_SUCCESS(*status)){ - if(len < 2){ - *status = U_INTERNAL_PROGRAM_ERROR; - }else{ - *height = paperSize[0]; - *width = paperSize[1]; - } - } - - ures_close(paperSizeBundle); - -} - -U_CAPI void U_EXPORT2 -ulocdata_getCLDRVersion(UVersionInfo versionArray, UErrorCode *status) { - UResourceBundle *rb = NULL; - rb = ures_openDirect(NULL, "supplementalData", status); - ures_getVersionByKey(rb, "cldrVersion", versionArray, status); - ures_close(rb); -} - -U_CAPI int32_t U_EXPORT2 -ulocdata_getLocaleDisplayPattern(ULocaleData *uld, - UChar *result, - int32_t resultCapacity, - UErrorCode *status) { - UResourceBundle *patternBundle; - int32_t len = 0; - const UChar *pattern = NULL; - UErrorCode localStatus = U_ZERO_ERROR; - - if (U_FAILURE(*status)) - return 0; - - patternBundle = ures_getByKey(uld->langBundle, "localeDisplayPattern", NULL, &localStatus); - - if ( (localStatus == U_USING_DEFAULT_WARNING) && uld->noSubstitute ) { - localStatus = U_MISSING_RESOURCE_ERROR; - } - - if (localStatus != U_ZERO_ERROR) { - *status = localStatus; - } - - if (U_FAILURE(*status)){ - ures_close(patternBundle); - return 0; - } - - pattern = ures_getStringByKey(patternBundle, "pattern", &len, &localStatus); - ures_close(patternBundle); - - if ( (localStatus == U_USING_DEFAULT_WARNING) && uld->noSubstitute ) { - localStatus = U_MISSING_RESOURCE_ERROR; - } - - if (localStatus != U_ZERO_ERROR) { - *status = localStatus; - } - - if (U_FAILURE(*status)){ - return 0; - } - - u_strncpy(result, pattern, resultCapacity); - return len; -} - - -U_CAPI int32_t U_EXPORT2 -ulocdata_getLocaleSeparator(ULocaleData *uld, - UChar *result, - int32_t resultCapacity, - UErrorCode *status) { - UResourceBundle *separatorBundle; - int32_t len = 0; - const UChar *separator = NULL; - UErrorCode localStatus = U_ZERO_ERROR; - UChar *p0, *p1; - static const UChar sub0[4] = { 0x007b, 0x0030, 0x007d , 0x0000 }; /* {0} */ - static const UChar sub1[4] = { 0x007b, 0x0031, 0x007d , 0x0000 }; /* {1} */ - static const int32_t subLen = 3; - - if (U_FAILURE(*status)) - return 0; - - separatorBundle = ures_getByKey(uld->langBundle, "localeDisplayPattern", NULL, &localStatus); - - if ( (localStatus == U_USING_DEFAULT_WARNING) && uld->noSubstitute ) { - localStatus = U_MISSING_RESOURCE_ERROR; - } - - if (localStatus != U_ZERO_ERROR) { - *status = localStatus; - } - - if (U_FAILURE(*status)){ - ures_close(separatorBundle); - return 0; - } - - separator = ures_getStringByKey(separatorBundle, "separator", &len, &localStatus); - ures_close(separatorBundle); - - if ( (localStatus == U_USING_DEFAULT_WARNING) && uld->noSubstitute ) { - localStatus = U_MISSING_RESOURCE_ERROR; - } - - if (localStatus != U_ZERO_ERROR) { - *status = localStatus; - } - - if (U_FAILURE(*status)){ - return 0; - } - - /* For backwards compatibility, if we have a pattern, return the portion between {0} and {1} */ - p0=u_strstr(separator, sub0); - p1=u_strstr(separator, sub1); - if (p0!=NULL && p1!=NULL && p0<=p1) { - separator = (const UChar *)p0 + subLen; - len = p1 - separator; - /* Desired separator is no longer zero-terminated; handle that if necessary */ - if (len < resultCapacity) { - u_strncpy(result, separator, len); - result[len] = 0; - return len; - } - } - - u_strncpy(result, separator, resultCapacity); - return len; -} diff --git a/deps/icu-small/source/i18n/ulocdata.cpp b/deps/icu-small/source/i18n/ulocdata.cpp new file mode 100644 index 0000000000..551f6c64ed --- /dev/null +++ b/deps/icu-small/source/i18n/ulocdata.cpp @@ -0,0 +1,386 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* * +* Copyright (C) 2003-2016, International Business Machines * +* Corporation and others. All Rights Reserved. * +* * +****************************************************************************** +* file name: ulocdata.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2003Oct21 +* created by: Ram Viswanadha,John Emmons +*/ + +#include "cmemory.h" +#include "unicode/ustring.h" +#include "unicode/ures.h" +#include "unicode/uloc.h" +#include "unicode/ulocdata.h" +#include "uresimp.h" +#include "ureslocs.h" +#include "ulocimp.h" + +#define MEASUREMENT_SYSTEM "MeasurementSystem" +#define PAPER_SIZE "PaperSize" + +/** A locale data object. + * For usage in C programs. + * @draft ICU 3.4 + */ +struct ULocaleData { + /** + * Controls the "No Substitute" behavior of this locale data object + */ + UBool noSubstitute; + + /** + * Pointer to the resource bundle associated with this locale data object + */ + UResourceBundle *bundle; + + /** + * Pointer to the lang resource bundle associated with this locale data object + */ + UResourceBundle *langBundle; +}; + +U_CAPI ULocaleData* U_EXPORT2 +ulocdata_open(const char *localeID, UErrorCode *status) +{ + ULocaleData *uld; + + if (U_FAILURE(*status)) { + return NULL; + } + + uld = (ULocaleData *)uprv_malloc(sizeof(ULocaleData)); + if (uld == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return(NULL); + } + + uld->langBundle = NULL; + + uld->noSubstitute = FALSE; + uld->bundle = ures_open(NULL, localeID, status); + uld->langBundle = ures_open(U_ICUDATA_LANG, localeID, status); + + if (U_FAILURE(*status)) { + uprv_free(uld); + return NULL; + } + + return uld; +} + +U_CAPI void U_EXPORT2 +ulocdata_close(ULocaleData *uld) +{ + if ( uld != NULL ) { + ures_close(uld->langBundle); + ures_close(uld->bundle); + uprv_free(uld); + } +} + +U_CAPI void U_EXPORT2 +ulocdata_setNoSubstitute(ULocaleData *uld, UBool setting) +{ + uld->noSubstitute = setting; +} + +U_CAPI UBool U_EXPORT2 +ulocdata_getNoSubstitute(ULocaleData *uld) +{ + return uld->noSubstitute; +} + +U_CAPI USet* U_EXPORT2 +ulocdata_getExemplarSet(ULocaleData *uld, USet *fillIn, + uint32_t options, ULocaleDataExemplarSetType extype, UErrorCode *status){ + + static const char* const exemplarSetTypes[] = { "ExemplarCharacters", + "AuxExemplarCharacters", + "ExemplarCharactersIndex", + "ExemplarCharactersPunctuation"}; + const UChar *exemplarChars = NULL; + int32_t len = 0; + UErrorCode localStatus = U_ZERO_ERROR; + + if (U_FAILURE(*status)) + return NULL; + + exemplarChars = ures_getStringByKey(uld->bundle, exemplarSetTypes[extype], &len, &localStatus); + if ( (localStatus == U_USING_DEFAULT_WARNING) && uld->noSubstitute ) { + localStatus = U_MISSING_RESOURCE_ERROR; + } + + if (localStatus != U_ZERO_ERROR) { + *status = localStatus; + } + + if (U_FAILURE(*status)) + return NULL; + + if(fillIn != NULL) + uset_applyPattern(fillIn, exemplarChars, len, + USET_IGNORE_SPACE | options, status); + else + fillIn = uset_openPatternOptions(exemplarChars, len, + USET_IGNORE_SPACE | options, status); + + return fillIn; + +} + +U_CAPI int32_t U_EXPORT2 +ulocdata_getDelimiter(ULocaleData *uld, ULocaleDataDelimiterType type, + UChar *result, int32_t resultLength, UErrorCode *status){ + + static const char* const delimiterKeys[] = { + "quotationStart", + "quotationEnd", + "alternateQuotationStart", + "alternateQuotationEnd" + }; + + UResourceBundle *delimiterBundle; + int32_t len = 0; + const UChar *delimiter = NULL; + UErrorCode localStatus = U_ZERO_ERROR; + + if (U_FAILURE(*status)) + return 0; + + delimiterBundle = ures_getByKey(uld->bundle, "delimiters", NULL, &localStatus); + + if ( (localStatus == U_USING_DEFAULT_WARNING) && uld->noSubstitute ) { + localStatus = U_MISSING_RESOURCE_ERROR; + } + + if (localStatus != U_ZERO_ERROR) { + *status = localStatus; + } + + if (U_FAILURE(*status)){ + ures_close(delimiterBundle); + return 0; + } + + delimiter = ures_getStringByKey(delimiterBundle, delimiterKeys[type], &len, &localStatus); + ures_close(delimiterBundle); + + if ( (localStatus == U_USING_DEFAULT_WARNING) && uld->noSubstitute ) { + localStatus = U_MISSING_RESOURCE_ERROR; + } + + if (localStatus != U_ZERO_ERROR) { + *status = localStatus; + } + + if (U_FAILURE(*status)){ + return 0; + } + + u_strncpy(result,delimiter, resultLength); + return len; +} + +static UResourceBundle * measurementTypeBundleForLocale(const char *localeID, const char *measurementType, UErrorCode *status){ + char region[ULOC_COUNTRY_CAPACITY]; + UResourceBundle *rb; + UResourceBundle *measTypeBundle = NULL; + + ulocimp_getRegionForSupplementalData(localeID, TRUE, region, ULOC_COUNTRY_CAPACITY, status); + + rb = ures_openDirect(NULL, "supplementalData", status); + ures_getByKey(rb, "measurementData", rb, status); + if (rb != NULL) { + UResourceBundle *measDataBundle = ures_getByKey(rb, region, NULL, status); + if (U_SUCCESS(*status)) { + measTypeBundle = ures_getByKey(measDataBundle, measurementType, NULL, status); + } + if (*status == U_MISSING_RESOURCE_ERROR) { + *status = U_ZERO_ERROR; + if (measDataBundle != NULL) { + ures_close(measDataBundle); + } + measDataBundle = ures_getByKey(rb, "001", NULL, status); + measTypeBundle = ures_getByKey(measDataBundle, measurementType, NULL, status); + } + ures_close(measDataBundle); + } + ures_close(rb); + return measTypeBundle; +} + +U_CAPI UMeasurementSystem U_EXPORT2 +ulocdata_getMeasurementSystem(const char *localeID, UErrorCode *status){ + + UResourceBundle* measurement=NULL; + UMeasurementSystem system = UMS_LIMIT; + + if(status == NULL || U_FAILURE(*status)){ + return system; + } + + measurement = measurementTypeBundleForLocale(localeID, MEASUREMENT_SYSTEM, status); + system = (UMeasurementSystem) ures_getInt(measurement, status); + + ures_close(measurement); + + return system; + +} + +U_CAPI void U_EXPORT2 +ulocdata_getPaperSize(const char* localeID, int32_t *height, int32_t *width, UErrorCode *status){ + UResourceBundle* paperSizeBundle = NULL; + const int32_t* paperSize=NULL; + int32_t len = 0; + + if(status == NULL || U_FAILURE(*status)){ + return; + } + + paperSizeBundle = measurementTypeBundleForLocale(localeID, PAPER_SIZE, status); + paperSize = ures_getIntVector(paperSizeBundle, &len, status); + + if(U_SUCCESS(*status)){ + if(len < 2){ + *status = U_INTERNAL_PROGRAM_ERROR; + }else{ + *height = paperSize[0]; + *width = paperSize[1]; + } + } + + ures_close(paperSizeBundle); + +} + +U_CAPI void U_EXPORT2 +ulocdata_getCLDRVersion(UVersionInfo versionArray, UErrorCode *status) { + UResourceBundle *rb = NULL; + rb = ures_openDirect(NULL, "supplementalData", status); + ures_getVersionByKey(rb, "cldrVersion", versionArray, status); + ures_close(rb); +} + +U_CAPI int32_t U_EXPORT2 +ulocdata_getLocaleDisplayPattern(ULocaleData *uld, + UChar *result, + int32_t resultCapacity, + UErrorCode *status) { + UResourceBundle *patternBundle; + int32_t len = 0; + const UChar *pattern = NULL; + UErrorCode localStatus = U_ZERO_ERROR; + + if (U_FAILURE(*status)) + return 0; + + patternBundle = ures_getByKey(uld->langBundle, "localeDisplayPattern", NULL, &localStatus); + + if ( (localStatus == U_USING_DEFAULT_WARNING) && uld->noSubstitute ) { + localStatus = U_MISSING_RESOURCE_ERROR; + } + + if (localStatus != U_ZERO_ERROR) { + *status = localStatus; + } + + if (U_FAILURE(*status)){ + ures_close(patternBundle); + return 0; + } + + pattern = ures_getStringByKey(patternBundle, "pattern", &len, &localStatus); + ures_close(patternBundle); + + if ( (localStatus == U_USING_DEFAULT_WARNING) && uld->noSubstitute ) { + localStatus = U_MISSING_RESOURCE_ERROR; + } + + if (localStatus != U_ZERO_ERROR) { + *status = localStatus; + } + + if (U_FAILURE(*status)){ + return 0; + } + + u_strncpy(result, pattern, resultCapacity); + return len; +} + + +U_CAPI int32_t U_EXPORT2 +ulocdata_getLocaleSeparator(ULocaleData *uld, + UChar *result, + int32_t resultCapacity, + UErrorCode *status) { + UResourceBundle *separatorBundle; + int32_t len = 0; + const UChar *separator = NULL; + UErrorCode localStatus = U_ZERO_ERROR; + UChar *p0, *p1; + static const UChar sub0[4] = { 0x007b, 0x0030, 0x007d , 0x0000 }; /* {0} */ + static const UChar sub1[4] = { 0x007b, 0x0031, 0x007d , 0x0000 }; /* {1} */ + static const int32_t subLen = 3; + + if (U_FAILURE(*status)) + return 0; + + separatorBundle = ures_getByKey(uld->langBundle, "localeDisplayPattern", NULL, &localStatus); + + if ( (localStatus == U_USING_DEFAULT_WARNING) && uld->noSubstitute ) { + localStatus = U_MISSING_RESOURCE_ERROR; + } + + if (localStatus != U_ZERO_ERROR) { + *status = localStatus; + } + + if (U_FAILURE(*status)){ + ures_close(separatorBundle); + return 0; + } + + separator = ures_getStringByKey(separatorBundle, "separator", &len, &localStatus); + ures_close(separatorBundle); + + if ( (localStatus == U_USING_DEFAULT_WARNING) && uld->noSubstitute ) { + localStatus = U_MISSING_RESOURCE_ERROR; + } + + if (localStatus != U_ZERO_ERROR) { + *status = localStatus; + } + + if (U_FAILURE(*status)){ + return 0; + } + + /* For backwards compatibility, if we have a pattern, return the portion between {0} and {1} */ + p0=u_strstr(separator, sub0); + p1=u_strstr(separator, sub1); + if (p0!=NULL && p1!=NULL && p0<=p1) { + separator = (const UChar *)p0 + subLen; + len = p1 - separator; + /* Desired separator is no longer zero-terminated; handle that if necessary */ + if (len < resultCapacity) { + u_strncpy(result, separator, len); + result[len] = 0; + return len; + } + } + + u_strncpy(result, separator, resultCapacity); + return len; +} diff --git a/deps/icu-small/source/i18n/umsg.cpp b/deps/icu-small/source/i18n/umsg.cpp index 75647e37d6..a385eb487d 100644 --- a/deps/icu-small/source/i18n/umsg.cpp +++ b/deps/icu-small/source/i18n/umsg.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: umsg.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/i18n/umsg_imp.h b/deps/icu-small/source/i18n/umsg_imp.h index e3538d3971..43ef1c78f0 100644 --- a/deps/icu-small/source/i18n/umsg_imp.h +++ b/deps/icu-small/source/i18n/umsg_imp.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ********************************************************************** * file name: umsg_imp.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/i18n/unesctrn.cpp b/deps/icu-small/source/i18n/unesctrn.cpp index 2e79067dee..fcce9528e2 100644 --- a/deps/icu-small/source/i18n/unesctrn.cpp +++ b/deps/icu-small/source/i18n/unesctrn.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/unesctrn.h b/deps/icu-small/source/i18n/unesctrn.h index 7ae8302b0f..e8e171f2bc 100644 --- a/deps/icu-small/source/i18n/unesctrn.h +++ b/deps/icu-small/source/i18n/unesctrn.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/uni2name.cpp b/deps/icu-small/source/i18n/uni2name.cpp index 24323b3f60..86d7a4904a 100644 --- a/deps/icu-small/source/i18n/uni2name.cpp +++ b/deps/icu-small/source/i18n/uni2name.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/uni2name.h b/deps/icu-small/source/i18n/uni2name.h index 7d85113f66..4d6eaa0a9a 100644 --- a/deps/icu-small/source/i18n/uni2name.h +++ b/deps/icu-small/source/i18n/uni2name.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/unicode/alphaindex.h b/deps/icu-small/source/i18n/unicode/alphaindex.h index e9e8739ed2..54bd29ff88 100644 --- a/deps/icu-small/source/i18n/unicode/alphaindex.h +++ b/deps/icu-small/source/i18n/unicode/alphaindex.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -15,6 +15,7 @@ #include "unicode/utypes.h" #include "unicode/uobject.h" #include "unicode/locid.h" +#include "unicode/unistr.h" #if !UCONFIG_NO_COLLATION diff --git a/deps/icu-small/source/i18n/unicode/basictz.h b/deps/icu-small/source/i18n/unicode/basictz.h index 8da4a00bf8..eb62abaf0a 100644 --- a/deps/icu-small/source/i18n/unicode/basictz.h +++ b/deps/icu-small/source/i18n/unicode/basictz.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/unicode/calendar.h b/deps/icu-small/source/i18n/unicode/calendar.h index b7da5f3c5b..e43c181c8a 100644 --- a/deps/icu-small/source/i18n/unicode/calendar.h +++ b/deps/icu-small/source/i18n/unicode/calendar.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** diff --git a/deps/icu-small/source/i18n/unicode/choicfmt.h b/deps/icu-small/source/i18n/unicode/choicfmt.h index ab3c28fe07..c9f0f1114f 100644 --- a/deps/icu-small/source/i18n/unicode/choicfmt.h +++ b/deps/icu-small/source/i18n/unicode/choicfmt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** diff --git a/deps/icu-small/source/i18n/unicode/coleitr.h b/deps/icu-small/source/i18n/unicode/coleitr.h index 628b461f94..bf0e1d51a4 100644 --- a/deps/icu-small/source/i18n/unicode/coleitr.h +++ b/deps/icu-small/source/i18n/unicode/coleitr.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -47,6 +47,7 @@ U_NAMESPACE_BEGIN struct CollationData; +class CharacterIterator; class CollationIterator; class RuleBasedCollator; class UCollationPCE; diff --git a/deps/icu-small/source/i18n/unicode/coll.h b/deps/icu-small/source/i18n/unicode/coll.h index e41be2ee81..7e467df80e 100644 --- a/deps/icu-small/source/i18n/unicode/coll.h +++ b/deps/icu-small/source/i18n/unicode/coll.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -58,7 +58,7 @@ #include "unicode/uobject.h" #include "unicode/ucol.h" -#include "unicode/normlzr.h" +#include "unicode/unorm.h" #include "unicode/locid.h" #include "unicode/uniset.h" #include "unicode/umisc.h" @@ -158,7 +158,7 @@ class CollationKey; * @see CollationKey * @see CollationElementIterator * @see Locale -* @see Normalizer +* @see Normalizer2 * @version 2.0 11/15/01 */ @@ -393,8 +393,8 @@ public: * is less than, greater than or equal to another string array. *

Example of use: *

-     * .       UChar ABC[] = {0x41, 0x42, 0x43, 0};  // = "ABC"
-     * .       UChar abc[] = {0x61, 0x62, 0x63, 0};  // = "abc"
+     * .       char16_t ABC[] = {0x41, 0x42, 0x43, 0};  // = "ABC"
+     * .       char16_t abc[] = {0x61, 0x62, 0x63, 0};  // = "abc"
      * .       UErrorCode status = U_ZERO_ERROR;
      * .       Collator *myCollation =
      * .                         Collator::createInstance(Locale::getUS(), status);
@@ -420,8 +420,8 @@ public:
      *         target
      * @deprecated ICU 2.6 use the overload with UErrorCode &
      */
-    virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
-                                      const UChar* target, int32_t targetLength)
+    virtual EComparisonResult compare(const char16_t* source, int32_t sourceLength,
+                                      const char16_t* target, int32_t targetLength)
                                       const;
 
     /**
@@ -440,8 +440,8 @@ public:
      * than target
      * @stable ICU 2.6
      */
-    virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
-                                      const UChar* target, int32_t targetLength,
+    virtual UCollationResult compare(const char16_t* source, int32_t sourceLength,
+                                      const char16_t* target, int32_t targetLength,
                                       UErrorCode &status) const = 0;
 
     /**
@@ -517,7 +517,7 @@ public:
      * @see CollationKey#compare
      * @stable ICU 2.0
      */
-    virtual CollationKey& getCollationKey(const UChar*source,
+    virtual CollationKey& getCollationKey(const char16_t*source,
                                           int32_t sourceLength,
                                           CollationKey& key,
                                           UErrorCode& status) const = 0;
@@ -911,7 +911,7 @@ public:
      * the top of one of the supported reordering groups,
      * and it must not be beyond the last of those groups.
      * See setMaxVariable().
-     * @param varTop one or more (if contraction) UChars to which the variable top should be set
+     * @param varTop one or more (if contraction) char16_ts to which the variable top should be set
      * @param len length of variable top string. If -1 it is considered to be zero terminated.
      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: 
* U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction
@@ -920,7 +920,7 @@ public: * @return variable top primary weight * @deprecated ICU 53 Call setMaxVariable() instead. */ - virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) = 0; + virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status) = 0; /** * Sets the variable top to the primary weight of the specified string. @@ -929,7 +929,7 @@ public: * the top of one of the supported reordering groups, * and it must not be beyond the last of those groups. * See setMaxVariable(). - * @param varTop a UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set + * @param varTop a UnicodeString size 1 or more (if contraction) of char16_ts to which the variable top should be set * @param status error code. If error code is set, the return value is undefined. Errors set by this function are:
* U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction
* U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond @@ -1002,7 +1002,7 @@ public: int32_t resultLength) const = 0; /** - * Get the sort key as an array of bytes from a UChar buffer. + * Get the sort key as an array of bytes from a char16_t buffer. * Sort key byte arrays are zero-terminated and can be compared using * strcmp(). * @@ -1020,7 +1020,7 @@ public: * @return Number of bytes needed for storing the sort key * @stable ICU 2.2 */ - virtual int32_t getSortKey(const UChar*source, int32_t sourceLength, + virtual int32_t getSortKey(const char16_t*source, int32_t sourceLength, uint8_t*result, int32_t resultLength) const = 0; /** diff --git a/deps/icu-small/source/i18n/unicode/compactdecimalformat.h b/deps/icu-small/source/i18n/unicode/compactdecimalformat.h index 1fcc5c581e..3fbe5da9ce 100644 --- a/deps/icu-small/source/i18n/unicode/compactdecimalformat.h +++ b/deps/icu-small/source/i18n/unicode/compactdecimalformat.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** diff --git a/deps/icu-small/source/i18n/unicode/curramt.h b/deps/icu-small/source/i18n/unicode/curramt.h index 268d53c0b1..03ec856e3b 100644 --- a/deps/icu-small/source/i18n/unicode/curramt.h +++ b/deps/icu-small/source/i18n/unicode/curramt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -46,7 +46,7 @@ class U_I18N_API CurrencyAmount: public Measure { * is invalid, then this will be set to a failing value. * @stable ICU 3.0 */ - CurrencyAmount(const Formattable& amount, const UChar* isoCode, + CurrencyAmount(const Formattable& amount, ConstChar16Ptr isoCode, UErrorCode &ec); /** @@ -59,7 +59,7 @@ class U_I18N_API CurrencyAmount: public Measure { * then this will be set to a failing value. * @stable ICU 3.0 */ - CurrencyAmount(double amount, const UChar* isoCode, + CurrencyAmount(double amount, ConstChar16Ptr isoCode, UErrorCode &ec); /** @@ -115,14 +115,14 @@ class U_I18N_API CurrencyAmount: public Measure { * Return the ISO currency code of this object. * @stable ICU 3.0 */ - inline const UChar* getISOCurrency() const; + inline const char16_t* getISOCurrency() const; }; inline const CurrencyUnit& CurrencyAmount::getCurrency() const { return (const CurrencyUnit&) getUnit(); } -inline const UChar* CurrencyAmount::getISOCurrency() const { +inline const char16_t* CurrencyAmount::getISOCurrency() const { return getCurrency().getISOCurrency(); } diff --git a/deps/icu-small/source/i18n/unicode/currpinf.h b/deps/icu-small/source/i18n/unicode/currpinf.h index 133de38fc2..1a327c5bae 100644 --- a/deps/icu-small/source/i18n/unicode/currpinf.h +++ b/deps/icu-small/source/i18n/unicode/currpinf.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/unicode/currunit.h b/deps/icu-small/source/i18n/unicode/currunit.h index 313c92a6ac..b72dc5e68d 100644 --- a/deps/icu-small/source/i18n/unicode/currunit.h +++ b/deps/icu-small/source/i18n/unicode/currunit.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -28,7 +28,7 @@ U_NAMESPACE_BEGIN /** * A unit of currency, such as USD (U.S. dollars) or JPY (Japanese - * yen). This class is a thin wrapper over a UChar string that + * yen). This class is a thin wrapper over a char16_t string that * subclasses MeasureUnit, for use with Measure and MeasureFormat. * * @author Alan Liu @@ -44,7 +44,7 @@ class U_I18N_API CurrencyUnit: public MeasureUnit { * then this will be set to a failing value. * @stable ICU 3.0 */ - CurrencyUnit(const UChar* isoCode, UErrorCode &ec); + CurrencyUnit(ConstChar16Ptr isoCode, UErrorCode &ec); /** * Copy constructor @@ -93,16 +93,16 @@ class U_I18N_API CurrencyUnit: public MeasureUnit { * Return the ISO currency code of this object. * @stable ICU 3.0 */ - inline const UChar* getISOCurrency() const; + inline const char16_t* getISOCurrency() const; private: /** * The ISO 4217 code of this object. */ - UChar isoCode[4]; + char16_t isoCode[4]; }; -inline const UChar* CurrencyUnit::getISOCurrency() const { +inline const char16_t* CurrencyUnit::getISOCurrency() const { return isoCode; } diff --git a/deps/icu-small/source/i18n/unicode/datefmt.h b/deps/icu-small/source/i18n/unicode/datefmt.h index 6e3a78f291..d70d8d1dd5 100644 --- a/deps/icu-small/source/i18n/unicode/datefmt.h +++ b/deps/icu-small/source/i18n/unicode/datefmt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** diff --git a/deps/icu-small/source/i18n/unicode/dcfmtsym.h b/deps/icu-small/source/i18n/unicode/dcfmtsym.h index 946227addb..3a502d0ec0 100644 --- a/deps/icu-small/source/i18n/unicode/dcfmtsym.h +++ b/deps/icu-small/source/i18n/unicode/dcfmtsym.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** @@ -35,6 +35,7 @@ #include "unicode/uobject.h" #include "unicode/locid.h" #include "unicode/unum.h" +#include "unicode/unistr.h" /** * \file @@ -392,7 +393,7 @@ public: * Returns that pattern stored in currecy info. Internal API for use by NumberFormat API. * @internal */ - inline const UChar* getCurrencyPattern(void) const; + inline const char16_t* getCurrencyPattern(void) const; #endif /* U_HIDE_INTERNAL_API */ private: @@ -423,7 +424,7 @@ private: char actualLocale[ULOC_FULLNAME_CAPACITY]; char validLocale[ULOC_FULLNAME_CAPACITY]; - const UChar* currPattern; + const char16_t* currPattern; UnicodeString currencySpcBeforeSym[UNUM_CURRENCY_SPACING_COUNT]; UnicodeString currencySpcAfterSym[UNUM_CURRENCY_SPACING_COUNT]; @@ -491,7 +492,7 @@ DecimalFormatSymbols::getLocale() const { } #ifndef U_HIDE_INTERNAL_API -inline const UChar* +inline const char16_t* DecimalFormatSymbols::getCurrencyPattern() const { return currPattern; } diff --git a/deps/icu-small/source/i18n/unicode/decimfmt.h b/deps/icu-small/source/i18n/unicode/decimfmt.h index 7339399f72..1deff5bf92 100644 --- a/deps/icu-small/source/i18n/unicode/decimfmt.h +++ b/deps/icu-small/source/i18n/unicode/decimfmt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** @@ -604,7 +604,7 @@ template class U_I18N_API EnumSet"* #0 o''clock", the format width is 10. * - *
  • The width is counted in 16-bit code units (UChars). + *
  • The width is counted in 16-bit code units (char16_ts). * *
  • Some parameters which usually do not matter have meaning when padding is * used, because the pattern width is significant with padding. In the pattern @@ -1961,14 +1961,14 @@ public: * @param ec input-output error code * @stable ICU 3.0 */ - virtual void setCurrency(const UChar* theCurrency, UErrorCode& ec); + virtual void setCurrency(const char16_t* theCurrency, UErrorCode& ec); /** * Sets the currency used to display currency amounts. See - * setCurrency(const UChar*, UErrorCode&). - * @deprecated ICU 3.0. Use setCurrency(const UChar*, UErrorCode&). + * setCurrency(const char16_t*, UErrorCode&). + * @deprecated ICU 3.0. Use setCurrency(const char16_t*, UErrorCode&). */ - virtual void setCurrency(const UChar* theCurrency); + virtual void setCurrency(const char16_t* theCurrency); /** * Sets the Currency Context object used to display currency. @@ -2108,7 +2108,7 @@ private: void parse(const UnicodeString& text, Formattable& result, ParsePosition& pos, - UChar* currency) const; + char16_t* currency) const; enum { fgStatusInfinite, @@ -2124,7 +2124,7 @@ private: int8_t type, ParsePosition& parsePosition, DigitList& digits, UBool* status, - UChar* currency) const; + char16_t* currency) const; // Mixed style parsing for currency. // It parses against the current currency pattern @@ -2135,7 +2135,7 @@ private: ParsePosition& parsePosition, DigitList& digits, UBool* status, - UChar* currency) const; + char16_t* currency) const; int32_t skipPadding(const UnicodeString& text, int32_t position) const; @@ -2146,7 +2146,7 @@ private: const UnicodeString* affixPat, UBool complexCurrencyParsing, int8_t type, - UChar* currency) const; + char16_t* currency) const; static UnicodeString& trimMarksFromAffix(const UnicodeString& affix, UnicodeString& trimmedAffix); @@ -2169,7 +2169,7 @@ private: const UnicodeString& input, int32_t pos, int8_t type, - UChar* currency) const; + char16_t* currency) const; static int32_t match(const UnicodeString& text, int32_t pos, UChar32 ch); @@ -2195,11 +2195,11 @@ private: void setupCurrencyAffixPatterns(UErrorCode& status); // get the currency rounding with respect to currency usage - double getCurrencyRounding(const UChar* currency, + double getCurrencyRounding(const char16_t* currency, UErrorCode* ec) const; // get the currency fraction with respect to currency usage - int getCurrencyFractionDigits(const UChar* currency, + int getCurrencyFractionDigits(const char16_t* currency, UErrorCode* ec) const; // hashtable operations @@ -2271,7 +2271,7 @@ protected: * have a capacity of at least 4 * @internal */ - virtual void getEffectiveCurrency(UChar* result, UErrorCode& ec) const; + virtual void getEffectiveCurrency(char16_t* result, UErrorCode& ec) const; /** number of integer digits * @stable ICU 2.4 diff --git a/deps/icu-small/source/i18n/unicode/dtfmtsym.h b/deps/icu-small/source/i18n/unicode/dtfmtsym.h index 507868e2c3..ed7c189846 100644 --- a/deps/icu-small/source/i18n/unicode/dtfmtsym.h +++ b/deps/icu-small/source/i18n/unicode/dtfmtsym.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** @@ -25,6 +25,7 @@ #if !UCONFIG_NO_FORMATTING #include "unicode/calendar.h" +#include "unicode/strenum.h" #include "unicode/uobject.h" #include "unicode/locid.h" #include "unicode/udat.h" @@ -426,13 +427,13 @@ public: * doesn't specify any time separator, and always recognized when parsing. * @internal */ - static const UChar DEFAULT_TIME_SEPARATOR = 0x003a; // ':' + static const char16_t DEFAULT_TIME_SEPARATOR = 0x003a; // ':' /** * This alternate time separator is always recognized when parsing. * @internal */ - static const UChar ALTERNATE_TIME_SEPARATOR = 0x002e; // '.' + static const char16_t ALTERNATE_TIME_SEPARATOR = 0x002e; // '.' /** * Gets the time separator string. For example: ":". @@ -566,7 +567,7 @@ public: * @return the non-localized date-time pattern characters * @stable ICU 2.0 */ - static const UChar * U_EXPORT2 getPatternUChars(void); + static const char16_t * U_EXPORT2 getPatternUChars(void); /** * Gets localized date-time pattern characters. For example: 'u', 't', etc. @@ -977,7 +978,7 @@ private: * Returns the date format field index of the pattern character c, * or UDAT_FIELD_COUNT if c is not a pattern character. */ - static UDateFormatField U_EXPORT2 getPatternCharIndex(UChar c); + static UDateFormatField U_EXPORT2 getPatternCharIndex(char16_t c); /** * Returns TRUE if f (with its pattern character repeated count times) is a numeric field. @@ -987,7 +988,7 @@ private: /** * Returns TRUE if c (repeated count times) is the pattern character for a numeric field. */ - static UBool U_EXPORT2 isNumericPatternChar(UChar c, int32_t count); + static UBool U_EXPORT2 isNumericPatternChar(char16_t c, int32_t count); public: #ifndef U_HIDE_INTERNAL_API /** diff --git a/deps/icu-small/source/i18n/unicode/dtitvfmt.h b/deps/icu-small/source/i18n/unicode/dtitvfmt.h index 68360b87df..5eaa559d0e 100644 --- a/deps/icu-small/source/i18n/unicode/dtitvfmt.h +++ b/deps/icu-small/source/i18n/unicode/dtitvfmt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /******************************************************************************** * Copyright (C) 2008-2016, International Business Machines Corporation and @@ -996,7 +996,7 @@ private: // from calendar field to pattern letter - static const UChar fgCalendarFieldToPatternLetter[]; + static const char16_t fgCalendarFieldToPatternLetter[]; /** diff --git a/deps/icu-small/source/i18n/unicode/dtitvinf.h b/deps/icu-small/source/i18n/unicode/dtitvinf.h index b31061e16a..e537bed0c9 100644 --- a/deps/icu-small/source/i18n/unicode/dtitvinf.h +++ b/deps/icu-small/source/i18n/unicode/dtitvinf.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/unicode/dtptngen.h b/deps/icu-small/source/i18n/unicode/dtptngen.h index fd617ce3cd..6fd5f5fd30 100644 --- a/deps/icu-small/source/i18n/unicode/dtptngen.h +++ b/deps/icu-small/source/i18n/unicode/dtptngen.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -27,6 +27,7 @@ U_NAMESPACE_BEGIN */ +class CharString; class Hashtable; class FormatParser; class DateTimeMatcher; @@ -517,7 +518,7 @@ private: DateTimeMatcher *skipMatcher; Hashtable *fAvailableFormatKeyHash; UnicodeString emptyString; - UChar fDefaultHourFormatChar; + char16_t fDefaultHourFormatChar; int32_t fAllowedHourFormats[7]; // Actually an array of AllowedHourFormat enum type, ending with UNKNOWN. diff --git a/deps/icu-small/source/i18n/unicode/dtrule.h b/deps/icu-small/source/i18n/unicode/dtrule.h index 32d230ea77..24dfc69de1 100644 --- a/deps/icu-small/source/i18n/unicode/dtrule.h +++ b/deps/icu-small/source/i18n/unicode/dtrule.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/unicode/fieldpos.h b/deps/icu-small/source/i18n/unicode/fieldpos.h index 6091941106..78561a4de7 100644 --- a/deps/icu-small/source/i18n/unicode/fieldpos.h +++ b/deps/icu-small/source/i18n/unicode/fieldpos.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** diff --git a/deps/icu-small/source/i18n/unicode/fmtable.h b/deps/icu-small/source/i18n/unicode/fmtable.h index ac5daba893..766a71969d 100644 --- a/deps/icu-small/source/i18n/unicode/fmtable.h +++ b/deps/icu-small/source/i18n/unicode/fmtable.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** diff --git a/deps/icu-small/source/i18n/unicode/format.h b/deps/icu-small/source/i18n/unicode/format.h index 1484e9f00e..e64cc1c6eb 100644 --- a/deps/icu-small/source/i18n/unicode/format.h +++ b/deps/icu-small/source/i18n/unicode/format.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** diff --git a/deps/icu-small/source/i18n/unicode/fpositer.h b/deps/icu-small/source/i18n/unicode/fpositer.h index 694a1d8770..898b66ceea 100644 --- a/deps/icu-small/source/i18n/unicode/fpositer.h +++ b/deps/icu-small/source/i18n/unicode/fpositer.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** diff --git a/deps/icu-small/source/i18n/unicode/gender.h b/deps/icu-small/source/i18n/unicode/gender.h index 0294895184..467b64ec5e 100644 --- a/deps/icu-small/source/i18n/unicode/gender.h +++ b/deps/icu-small/source/i18n/unicode/gender.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/unicode/gregocal.h b/deps/icu-small/source/i18n/unicode/gregocal.h index 60ba0cc6ac..1d881e0be7 100644 --- a/deps/icu-small/source/i18n/unicode/gregocal.h +++ b/deps/icu-small/source/i18n/unicode/gregocal.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* * Copyright (C) 1997-2013, International Business Machines Corporation and others. diff --git a/deps/icu-small/source/i18n/unicode/measfmt.h b/deps/icu-small/source/i18n/unicode/measfmt.h index 866d7d3227..dcd4f42343 100644 --- a/deps/icu-small/source/i18n/unicode/measfmt.h +++ b/deps/icu-small/source/i18n/unicode/measfmt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/unicode/measunit.h b/deps/icu-small/source/i18n/unicode/measunit.h index 9810b91194..1cb97ed549 100644 --- a/deps/icu-small/source/i18n/unicode/measunit.h +++ b/deps/icu-small/source/i18n/unicode/measunit.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -345,35 +345,29 @@ class U_I18N_API MeasureUnit: public UObject { */ static MeasureUnit *createKarat(UErrorCode &status); -#ifndef U_HIDE_DRAFT_API /** * Returns unit of concentr: milligram-per-deciliter. * Caller owns returned value and must free it. * @param status ICU error code. - * @draft ICU 57 + * @stable ICU 57 */ static MeasureUnit *createMilligramPerDeciliter(UErrorCode &status); -#endif /* U_HIDE_DRAFT_API */ -#ifndef U_HIDE_DRAFT_API /** * Returns unit of concentr: millimole-per-liter. * Caller owns returned value and must free it. * @param status ICU error code. - * @draft ICU 57 + * @stable ICU 57 */ static MeasureUnit *createMillimolePerLiter(UErrorCode &status); -#endif /* U_HIDE_DRAFT_API */ -#ifndef U_HIDE_DRAFT_API /** * Returns unit of concentr: part-per-million. * Caller owns returned value and must free it. * @param status ICU error code. - * @draft ICU 57 + * @stable ICU 57 */ static MeasureUnit *createPartPerMillion(UErrorCode &status); -#endif /* U_HIDE_DRAFT_API */ /** * Returns unit of consumption: liter-per-100kilometers. @@ -399,55 +393,21 @@ class U_I18N_API MeasureUnit: public UObject { */ static MeasureUnit *createMilePerGallon(UErrorCode &status); -#ifndef U_HIDE_DRAFT_API /** * Returns unit of consumption: mile-per-gallon-imperial. * Caller owns returned value and must free it. * @param status ICU error code. - * @draft ICU 57 + * @stable ICU 57 */ static MeasureUnit *createMilePerGallonImperial(UErrorCode &status); -#endif /* U_HIDE_DRAFT_API */ - -#ifndef U_HIDE_DRAFT_API - /** - * Returns unit of coordinate: east. - * Caller owns returned value and must free it. - * @param status ICU error code. - * @draft ICU 58 - */ - static MeasureUnit *createEast(UErrorCode &status); -#endif /* U_HIDE_DRAFT_API */ -#ifndef U_HIDE_DRAFT_API - /** - * Returns unit of coordinate: north. - * Caller owns returned value and must free it. - * @param status ICU error code. - * @draft ICU 58 + /* + * The following were draft ICU 58, but have been withdrawn: + * static MeasureUnit *createEast(UErrorCode &status); + * static MeasureUnit *createNorth(UErrorCode &status); + * static MeasureUnit *createSouth(UErrorCode &status); + * static MeasureUnit *createWest(UErrorCode &status); */ - static MeasureUnit *createNorth(UErrorCode &status); -#endif /* U_HIDE_DRAFT_API */ - -#ifndef U_HIDE_DRAFT_API - /** - * Returns unit of coordinate: south. - * Caller owns returned value and must free it. - * @param status ICU error code. - * @draft ICU 58 - */ - static MeasureUnit *createSouth(UErrorCode &status); -#endif /* U_HIDE_DRAFT_API */ - -#ifndef U_HIDE_DRAFT_API - /** - * Returns unit of coordinate: west. - * Caller owns returned value and must free it. - * @param status ICU error code. - * @draft ICU 58 - */ - static MeasureUnit *createWest(UErrorCode &status); -#endif /* U_HIDE_DRAFT_API */ /** * Returns unit of digital: bit. @@ -873,6 +833,16 @@ class U_I18N_API MeasureUnit: public UObject { */ static MeasureUnit *createPicometer(UErrorCode &status); +#ifndef U_HIDE_DRAFT_API + /** + * Returns unit of length: point. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @draft ICU 59 + */ + static MeasureUnit *createPoint(UErrorCode &status); +#endif /* U_HIDE_DRAFT_API */ + /** * Returns unit of length: yard. * Caller owns returned value and must free it. @@ -1249,15 +1219,13 @@ class U_I18N_API MeasureUnit: public UObject { */ static MeasureUnit *createGallon(UErrorCode &status); -#ifndef U_HIDE_DRAFT_API /** * Returns unit of volume: gallon-imperial. * Caller owns returned value and must free it. * @param status ICU error code. - * @draft ICU 57 + * @stable ICU 57 */ static MeasureUnit *createGallonImperial(UErrorCode &status); -#endif /* U_HIDE_DRAFT_API */ /** * Returns unit of volume: hectoliter. diff --git a/deps/icu-small/source/i18n/unicode/measure.h b/deps/icu-small/source/i18n/unicode/measure.h index 719bc6bc8f..71438d5c85 100644 --- a/deps/icu-small/source/i18n/unicode/measure.h +++ b/deps/icu-small/source/i18n/unicode/measure.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/unicode/msgfmt.h b/deps/icu-small/source/i18n/unicode/msgfmt.h index 1a9973872d..fef8010774 100644 --- a/deps/icu-small/source/i18n/unicode/msgfmt.h +++ b/deps/icu-small/source/i18n/unicode/msgfmt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* * Copyright (C) 2007-2013, International Business Machines Corporation and @@ -939,7 +939,7 @@ private: * @return the index of the list which matches the keyword s. */ static int32_t findKeyword( const UnicodeString& s, - const UChar * const *list); + const char16_t * const *list); /** * Thin wrapper around the format(... AppendableWrapper ...) variant. diff --git a/deps/icu-small/source/i18n/unicode/numfmt.h b/deps/icu-small/source/i18n/unicode/numfmt.h index 9e3d5d34ec..7147204a7c 100644 --- a/deps/icu-small/source/i18n/unicode/numfmt.h +++ b/deps/icu-small/source/i18n/unicode/numfmt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** @@ -931,7 +931,7 @@ public: * @param ec input-output error code * @stable ICU 3.0 */ - virtual void setCurrency(const UChar* theCurrency, UErrorCode& ec); + virtual void setCurrency(const char16_t* theCurrency, UErrorCode& ec); /** * Gets the currency used to display currency @@ -940,7 +940,7 @@ public: * the currency in use, or a pointer to the empty string. * @stable ICU 2.6 */ - const UChar* getCurrency() const; + const char16_t* getCurrency() const; /** * Set a particular UDisplayContext value in the formatter, such as @@ -1018,7 +1018,7 @@ protected: * have a capacity of at least 4 * @internal */ - virtual void getEffectiveCurrency(UChar* result, UErrorCode& ec) const; + virtual void getEffectiveCurrency(char16_t* result, UErrorCode& ec) const; #ifndef U_HIDE_INTERNAL_API /** @@ -1065,7 +1065,7 @@ private: UBool fLenient; // TRUE => lenient parse is enabled // ISO currency code - UChar fCurrency[4]; + char16_t fCurrency[4]; UDisplayContext fCapitalizationContext; diff --git a/deps/icu-small/source/i18n/unicode/numsys.h b/deps/icu-small/source/i18n/unicode/numsys.h index da181551c2..5f52721278 100644 --- a/deps/icu-small/source/i18n/unicode/numsys.h +++ b/deps/icu-small/source/i18n/unicode/numsys.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -193,7 +193,7 @@ private: void setAlgorithmic(UBool algorithmic); - void setDesc(UnicodeString desc); + void setDesc(const UnicodeString &desc); void setName(const char* name); diff --git a/deps/icu-small/source/i18n/unicode/plurfmt.h b/deps/icu-small/source/i18n/unicode/plurfmt.h index b10e4179b6..9a83e52550 100644 --- a/deps/icu-small/source/i18n/unicode/plurfmt.h +++ b/deps/icu-small/source/i18n/unicode/plurfmt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/unicode/plurrule.h b/deps/icu-small/source/i18n/unicode/plurrule.h index 146e6bea83..a14f392b7a 100644 --- a/deps/icu-small/source/i18n/unicode/plurrule.h +++ b/deps/icu-small/source/i18n/unicode/plurrule.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -29,6 +29,9 @@ #include "unicode/format.h" #include "unicode/upluralrules.h" +#ifndef U_HIDE_INTERNAL_API +#include "unicode/numfmt.h" +#endif /* U_HIDE_INTERNAL_API */ /** * Value returned by PluralRules::getUniqueKeywordValue() when there is no @@ -345,6 +348,22 @@ public: UnicodeString select(double number) const; #ifndef U_HIDE_INTERNAL_API + /** + * Given a number and a format, returns the keyword of the first applicable + * rule for this PluralRules object. + * Note: This internal preview interface may be removed in the future if + * an architecturally cleaner solution reaches stable status. + * @param obj The numeric object for which the rule should be determined. + * @param fmt The NumberFormat specifying how the number will be formatted + * (this can affect the plural form, e.g. "1 dollar" vs "1.0 dollars"). + * @param status Input/output parameter. If at entry this indicates a + * failure status, the method returns immediately; otherwise + * this is set to indicate the outcome of the call. + * @return The keyword of the selected rule. Undefined in the case of an error. + * @internal ICU 59 technology preview, may be removed in the future + */ + UnicodeString select(const Formattable& obj, const NumberFormat& fmt, UErrorCode& status) const; + /** * @internal */ diff --git a/deps/icu-small/source/i18n/unicode/rbnf.h b/deps/icu-small/source/i18n/unicode/rbnf.h index 14230f8982..b4cbb06732 100644 --- a/deps/icu-small/source/i18n/unicode/rbnf.h +++ b/deps/icu-small/source/i18n/unicode/rbnf.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -866,6 +866,52 @@ public: FieldPosition& pos, UErrorCode& status) const; +protected: + /** + * Format a decimal number. + * The number is a DigitList wrapper onto a floating point decimal number. + * The default implementation in NumberFormat converts the decimal number + * to a double and formats that. Subclasses of NumberFormat that want + * to specifically handle big decimal numbers must override this method. + * class DecimalFormat does so. + * + * @param number The number, a DigitList format Decimal Floating Point. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(const DigitList &number, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + + /** + * Format a decimal number. + * The number is a DigitList wrapper onto a floating point decimal number. + * The default implementation in NumberFormat converts the decimal number + * to a double and formats that. Subclasses of NumberFormat that want + * to specifically handle big decimal numbers must override this method. + * class DecimalFormat does so. + * + * @param number The number, a DigitList format Decimal Floating Point. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(const DigitList &number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; +public: + using NumberFormat::parse; /** @@ -1031,7 +1077,8 @@ private: NFRule * initializeDefaultNaNRule(UErrorCode &status); const NFRule * getDefaultNaNRule() const; PluralFormat *createPluralFormat(UPluralType pluralType, const UnicodeString &pattern, UErrorCode& status) const; - UnicodeString& adjustForCapitalizationContext(int32_t startPos, UnicodeString& currentResult) const; + UnicodeString& adjustForCapitalizationContext(int32_t startPos, UnicodeString& currentResult, UErrorCode& status) const; + UnicodeString& format(int64_t number, NFRuleSet *ruleSet, UnicodeString& toAppendTo, UErrorCode& status) const; private: NFRuleSet **ruleSets; diff --git a/deps/icu-small/source/i18n/unicode/rbtz.h b/deps/icu-small/source/i18n/unicode/rbtz.h index 20de34bb17..542a7c140c 100644 --- a/deps/icu-small/source/i18n/unicode/rbtz.h +++ b/deps/icu-small/source/i18n/unicode/rbtz.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/unicode/regex.h b/deps/icu-small/source/i18n/unicode/regex.h index 96c64874a4..7a68039fe8 100644 --- a/deps/icu-small/source/i18n/unicode/regex.h +++ b/deps/icu-small/source/i18n/unicode/regex.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ********************************************************************** * file name: regex.h -* encoding: US-ASCII +* encoding: UTF-8 * indentation:4 * * created on: 2002oct22 @@ -350,17 +350,17 @@ public: private: /** * Cause a compilation error if an application accidentally attempts to - * create a matcher with a (UChar *) string as input rather than + * create a matcher with a (char16_t *) string as input rather than * a UnicodeString. Avoids a dangling reference to a temporary string. *

    - * To efficiently work with UChar *strings, wrap the data in a UnicodeString + * To efficiently work with char16_t *strings, wrap the data in a UnicodeString * using one of the aliasing constructors, such as - * UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength); + * UnicodeString(UBool isTerminated, const char16_t *text, int32_t textLength); * or in a UText, using - * utext_openUChars(UText *ut, const UChar *text, int64_t textLength, UErrorCode *status); + * utext_openUChars(UText *ut, const char16_t *text, int64_t textLength, UErrorCode *status); * */ - RegexMatcher *matcher(const UChar *input, + RegexMatcher *matcher(const char16_t *input, UErrorCode &status) const; public: @@ -748,17 +748,17 @@ public: private: /** * Cause a compilation error if an application accidentally attempts to - * create a matcher with a (UChar *) string as input rather than + * create a matcher with a (char16_t *) string as input rather than * a UnicodeString. Avoids a dangling reference to a temporary string. *

    - * To efficiently work with UChar *strings, wrap the data in a UnicodeString + * To efficiently work with char16_t *strings, wrap the data in a UnicodeString * using one of the aliasing constructors, such as - * UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength); + * UnicodeString(UBool isTerminated, const char16_t *text, int32_t textLength); * or in a UText, using - * utext_openUChars(UText *ut, const UChar *text, int64_t textLength, UErrorCode *status); + * utext_openUChars(UText *ut, const char16_t *text, int64_t textLength, UErrorCode *status); * */ - RegexMatcher(const UnicodeString ®exp, const UChar *input, + RegexMatcher(const UnicodeString ®exp, const char16_t *input, uint32_t flags, UErrorCode &status); public: @@ -1156,17 +1156,17 @@ public: private: /** * Cause a compilation error if an application accidentally attempts to - * reset a matcher with a (UChar *) string as input rather than + * reset a matcher with a (char16_t *) string as input rather than * a UnicodeString. Avoids a dangling reference to a temporary string. *

    - * To efficiently work with UChar *strings, wrap the data in a UnicodeString + * To efficiently work with char16_t *strings, wrap the data in a UnicodeString * using one of the aliasing constructors, such as - * UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength); + * UnicodeString(UBool isTerminated, const char16_t *text, int32_t textLength); * or in a UText, using - * utext_openUChars(UText *ut, const UChar *text, int64_t textLength, UErrorCode *status); + * utext_openUChars(UText *ut, const char16_t *text, int64_t textLength, UErrorCode *status); * */ - RegexMatcher &reset(const UChar *input); + RegexMatcher &reset(const char16_t *input); public: /** diff --git a/deps/icu-small/source/i18n/unicode/region.h b/deps/icu-small/source/i18n/unicode/region.h index 47829944a3..667c4051f0 100644 --- a/deps/icu-small/source/i18n/unicode/region.h +++ b/deps/icu-small/source/i18n/unicode/region.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/unicode/reldatefmt.h b/deps/icu-small/source/i18n/unicode/reldatefmt.h index 8e659e2bc8..abd43522c3 100644 --- a/deps/icu-small/source/i18n/unicode/reldatefmt.h +++ b/deps/icu-small/source/i18n/unicode/reldatefmt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ***************************************************************************** @@ -230,6 +230,7 @@ typedef enum UDateDirection { U_NAMESPACE_BEGIN +class BreakIterator; class RelativeDateTimeCacheData; class SharedNumberFormat; class SharedPluralRules; @@ -414,7 +415,6 @@ public: UnicodeString& appendTo, UErrorCode& status) const; -#ifndef U_HIDE_DRAFT_API /** * Format a combination of URelativeDateTimeUnit and numeric offset * using a numeric style, e.g. "1 week ago", "in 1 week", @@ -430,7 +430,7 @@ public: * appended. * @param status ICU error code returned here. * @return appendTo - * @draft ICU 57 + * @stable ICU 57 */ UnicodeString& formatNumeric( double offset, @@ -453,14 +453,13 @@ public: * appended. * @param status ICU error code returned here. * @return appendTo - * @draft ICU 57 + * @stable ICU 57 */ UnicodeString& format( double offset, URelativeDateTimeUnit unit, UnicodeString& appendTo, UErrorCode& status) const; -#endif /* U_HIDE_DRAFT_API */ /** * Combines a relative date string and a time string in this object's diff --git a/deps/icu-small/source/i18n/unicode/scientificnumberformatter.h b/deps/icu-small/source/i18n/unicode/scientificnumberformatter.h index 0b34755dc2..30edee7ecc 100644 --- a/deps/icu-small/source/i18n/unicode/scientificnumberformatter.h +++ b/deps/icu-small/source/i18n/unicode/scientificnumberformatter.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/unicode/search.h b/deps/icu-small/source/i18n/unicode/search.h index 35a0552623..12dd5c7727 100644 --- a/deps/icu-small/source/i18n/unicode/search.h +++ b/deps/icu-small/source/i18n/unicode/search.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/unicode/selfmt.h b/deps/icu-small/source/i18n/unicode/selfmt.h index 37a8f2b821..08e9d444ee 100755 --- a/deps/icu-small/source/i18n/unicode/selfmt.h +++ b/deps/icu-small/source/i18n/unicode/selfmt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /******************************************************************** * COPYRIGHT: diff --git a/deps/icu-small/source/i18n/unicode/simpletz.h b/deps/icu-small/source/i18n/unicode/simpletz.h index 7e41a4ab8a..1b23ab79d1 100644 --- a/deps/icu-small/source/i18n/unicode/simpletz.h +++ b/deps/icu-small/source/i18n/unicode/simpletz.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** diff --git a/deps/icu-small/source/i18n/unicode/smpdtfmt.h b/deps/icu-small/source/i18n/unicode/smpdtfmt.h index e6cf28d22b..4733e759aa 100644 --- a/deps/icu-small/source/i18n/unicode/smpdtfmt.h +++ b/deps/icu-small/source/i18n/unicode/smpdtfmt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* * Copyright (C) 1997-2016, International Business Machines Corporation and @@ -1170,7 +1170,7 @@ public: * @param field The UDateFormatField to get * @stable ICU 54 */ - const NumberFormat * getNumberFormatForField(UChar field) const; + const NumberFormat * getNumberFormatForField(char16_t field) const; #ifndef U_HIDE_INTERNAL_API /** @@ -1262,7 +1262,7 @@ private: * succeeds. */ void subFormat(UnicodeString &appendTo, - UChar ch, + char16_t ch, int32_t count, UDisplayContext capitalizationContext, int32_t fieldNum, @@ -1294,7 +1294,7 @@ private: * Return true if the given format character, occuring count * times, represents a numeric field. */ - static UBool isNumeric(UChar formatChar, int32_t count); + static UBool isNumeric(char16_t formatChar, int32_t count); /** * Returns TRUE if the patternOffset is at the start of a numeric field. @@ -1412,7 +1412,7 @@ private: * @return the new start position if matching succeeded; a negative number * indicating matching failure, otherwise. */ - int32_t subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count, + int32_t subParse(const UnicodeString& text, int32_t& start, char16_t ch, int32_t count, UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], int32_t& saveHebrewMonth, Calendar& cal, int32_t patLoc, MessageFormat * numericLeapMonthFormatter, UTimeZoneFormatTimeType *tzTimeType, SimpleDateFormatMutableNFs &mutableNFs, int32_t *dayPeriod=NULL) const; @@ -1523,12 +1523,12 @@ private: /** * Map calendar field letter into calendar field level. */ - static int32_t getLevelFromChar(UChar ch); + static int32_t getLevelFromChar(char16_t ch); /** * Tell if a character can be used to define a field in a format string. */ - static UBool isSyntaxChar(UChar ch); + static UBool isSyntaxChar(char16_t ch); /** * The formatting pattern for this formatter. diff --git a/deps/icu-small/source/i18n/unicode/sortkey.h b/deps/icu-small/source/i18n/unicode/sortkey.h index 6f1543da40..6895be7a2b 100644 --- a/deps/icu-small/source/i18n/unicode/sortkey.h +++ b/deps/icu-small/source/i18n/unicode/sortkey.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ***************************************************************************** diff --git a/deps/icu-small/source/i18n/unicode/stsearch.h b/deps/icu-small/source/i18n/unicode/stsearch.h index 1cae53d128..46bc51b30e 100644 --- a/deps/icu-small/source/i18n/unicode/stsearch.h +++ b/deps/icu-small/source/i18n/unicode/stsearch.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/unicode/tblcoll.h b/deps/icu-small/source/i18n/unicode/tblcoll.h index c48ea38c13..24ba213b41 100644 --- a/deps/icu-small/source/i18n/unicode/tblcoll.h +++ b/deps/icu-small/source/i18n/unicode/tblcoll.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -308,8 +308,8 @@ public: * than target * @stable ICU 2.6 */ - virtual UCollationResult compare(const UChar* source, int32_t sourceLength, - const UChar* target, int32_t targetLength, + virtual UCollationResult compare(const char16_t* source, int32_t sourceLength, + const char16_t* target, int32_t targetLength, UErrorCode &status) const; /** @@ -377,7 +377,7 @@ public: * @see CollationKey * @stable ICU 2.0 */ - virtual CollationKey& getCollationKey(const UChar *source, + virtual CollationKey& getCollationKey(const char16_t *source, int32_t sourceLength, CollationKey& key, UErrorCode& status) const; @@ -552,7 +552,7 @@ public: * the top of one of the supported reordering groups, * and it must not be beyond the last of those groups. * See setMaxVariable(). - * @param varTop one or more (if contraction) UChars to which the variable top should be set + * @param varTop one or more (if contraction) char16_ts to which the variable top should be set * @param len length of variable top string. If -1 it is considered to be zero terminated. * @param status error code. If error code is set, the return value is undefined. Errors set by this function are:
    * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction
    @@ -561,7 +561,7 @@ public: * @return variable top primary weight * @deprecated ICU 53 Call setMaxVariable() instead. */ - virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status); + virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status); /** * Sets the variable top to the primary weight of the specified string. @@ -570,7 +570,7 @@ public: * the top of one of the supported reordering groups, * and it must not be beyond the last of those groups. * See setMaxVariable(). - * @param varTop a UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set + * @param varTop a UnicodeString size 1 or more (if contraction) of char16_ts to which the variable top should be set * @param status error code. If error code is set, the return value is undefined. Errors set by this function are:
    * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction
    * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond @@ -631,7 +631,7 @@ public: int32_t resultLength) const; /** - * Get the sort key as an array of bytes from a UChar buffer. + * Get the sort key as an array of bytes from a char16_t buffer. * * Note that sort keys are often less efficient than simply doing comparison. * For more details, see the ICU User Guide. @@ -646,7 +646,7 @@ public: * @return Number of bytes needed for storing the sort key * @stable ICU 2.2 */ - virtual int32_t getSortKey(const UChar *source, int32_t sourceLength, + virtual int32_t getSortKey(const char16_t *source, int32_t sourceLength, uint8_t *result, int32_t resultLength) const; /** @@ -821,17 +821,17 @@ private: void adoptTailoring(CollationTailoring *t, UErrorCode &errorCode); // Both lengths must be <0 or else both must be >=0. - UCollationResult doCompare(const UChar *left, int32_t leftLength, - const UChar *right, int32_t rightLength, + UCollationResult doCompare(const char16_t *left, int32_t leftLength, + const char16_t *right, int32_t rightLength, UErrorCode &errorCode) const; UCollationResult doCompare(const uint8_t *left, int32_t leftLength, const uint8_t *right, int32_t rightLength, UErrorCode &errorCode) const; - void writeSortKey(const UChar *s, int32_t length, + void writeSortKey(const char16_t *s, int32_t length, SortKeyByteSink &sink, UErrorCode &errorCode) const; - void writeIdenticalLevel(const UChar *s, const UChar *limit, + void writeIdenticalLevel(const char16_t *s, const char16_t *limit, SortKeyByteSink &sink, UErrorCode &errorCode) const; const CollationSettings &getDefaultSettings() const; diff --git a/deps/icu-small/source/i18n/unicode/timezone.h b/deps/icu-small/source/i18n/unicode/timezone.h index 58c84d062b..d4cd7cb36d 100644 --- a/deps/icu-small/source/i18n/unicode/timezone.h +++ b/deps/icu-small/source/i18n/unicode/timezone.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /************************************************************************* * Copyright (c) 1997-2016, International Business Machines Corporation @@ -863,7 +863,7 @@ private: * @param id zone id string * @return the pointer of the ID resource, or NULL. */ - static const UChar* findID(const UnicodeString& id); + static const char16_t* findID(const UnicodeString& id); /** * Resolve a link in Olson tzdata. When the given id is known and it's not a link, @@ -873,7 +873,7 @@ private: * @param id zone id string * @return the dereferenced zone or NULL */ - static const UChar* dereferOlsonLink(const UnicodeString& id); + static const char16_t* dereferOlsonLink(const UnicodeString& id); /** * Returns the region code associated with the given zone, @@ -881,7 +881,7 @@ private: * @param id zone id string * @return the region associated with the given zone */ - static const UChar* getRegion(const UnicodeString& id); + static const char16_t* getRegion(const UnicodeString& id); public: #ifndef U_HIDE_INTERNAL_API @@ -893,7 +893,7 @@ private: * @return the region associated with the given zone * @internal */ - static const UChar* getRegion(const UnicodeString& id, UErrorCode& status); + static const char16_t* getRegion(const UnicodeString& id, UErrorCode& status); #endif /* U_HIDE_INTERNAL_API */ private: diff --git a/deps/icu-small/source/i18n/unicode/tmunit.h b/deps/icu-small/source/i18n/unicode/tmunit.h index a19a1f3c17..fa59f10473 100644 --- a/deps/icu-small/source/i18n/unicode/tmunit.h +++ b/deps/icu-small/source/i18n/unicode/tmunit.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/unicode/tmutamt.h b/deps/icu-small/source/i18n/unicode/tmutamt.h index 887150121f..1717b7605f 100644 --- a/deps/icu-small/source/i18n/unicode/tmutamt.h +++ b/deps/icu-small/source/i18n/unicode/tmutamt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/unicode/tmutfmt.h b/deps/icu-small/source/i18n/unicode/tmutfmt.h index b90d4a096d..8f245859a6 100644 --- a/deps/icu-small/source/i18n/unicode/tmutfmt.h +++ b/deps/icu-small/source/i18n/unicode/tmutfmt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/unicode/translit.h b/deps/icu-small/source/i18n/unicode/translit.h index 1e49bfb969..dc31d97bc6 100644 --- a/deps/icu-small/source/i18n/unicode/translit.h +++ b/deps/icu-small/source/i18n/unicode/translit.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -1319,7 +1319,7 @@ inline int32_t Transliterator::getMaximumContextLength(void) const { inline void Transliterator::setID(const UnicodeString& id) { ID = id; // NUL-terminate the ID string, which is a non-aliased copy. - ID.append((UChar)0); + ID.append((char16_t)0); ID.truncate(ID.length()-1); } diff --git a/deps/icu-small/source/i18n/unicode/tzfmt.h b/deps/icu-small/source/i18n/unicode/tzfmt.h index dd86f1b48c..633cd8dc69 100644 --- a/deps/icu-small/source/i18n/unicode/tzfmt.h +++ b/deps/icu-small/source/i18n/unicode/tzfmt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -942,7 +942,7 @@ private: * @param parsedLen the parsed length, or 0 on failure. * @return the parsed offset in milliseconds. */ - int32_t parseDefaultOffsetFields(const UnicodeString& text, int32_t start, UChar separator, + int32_t parseDefaultOffsetFields(const UnicodeString& text, int32_t start, char16_t separator, int32_t& parsedLen) const; /** @@ -982,7 +982,7 @@ private: * @param maxFields The maximum fields * @return The offset string */ - static UnicodeString& formatOffsetWithAsciiDigits(int32_t offset, UChar sep, + static UnicodeString& formatOffsetWithAsciiDigits(int32_t offset, char16_t sep, OffsetFields minFields, OffsetFields maxFields, UnicodeString& result); /** @@ -1012,7 +1012,7 @@ private: * @param maxFields The maximum Fields to be parsed * @return Parsed offset, 0 or positive number. */ - static int32_t parseAsciiOffsetFields(const UnicodeString& text, ParsePosition& pos, UChar sep, + static int32_t parseAsciiOffsetFields(const UnicodeString& text, ParsePosition& pos, char16_t sep, OffsetFields minFields, OffsetFields maxFields); /** diff --git a/deps/icu-small/source/i18n/unicode/tznames.h b/deps/icu-small/source/i18n/unicode/tznames.h index 8861a7d026..60f0e5e4a1 100644 --- a/deps/icu-small/source/i18n/unicode/tznames.h +++ b/deps/icu-small/source/i18n/unicode/tznames.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/unicode/tzrule.h b/deps/icu-small/source/i18n/unicode/tzrule.h index 5e020bc1a3..171486f1c7 100644 --- a/deps/icu-small/source/i18n/unicode/tzrule.h +++ b/deps/icu-small/source/i18n/unicode/tzrule.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/unicode/tztrans.h b/deps/icu-small/source/i18n/unicode/tztrans.h index b2e09999bb..1276d67c31 100644 --- a/deps/icu-small/source/i18n/unicode/tztrans.h +++ b/deps/icu-small/source/i18n/unicode/tztrans.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/unicode/ucal.h b/deps/icu-small/source/i18n/unicode/ucal.h index 18522f6475..10d8bc5274 100644 --- a/deps/icu-small/source/i18n/unicode/ucal.h +++ b/deps/icu-small/source/i18n/unicode/ucal.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -425,8 +425,8 @@ enum UCalendarDateFields { */ UCAL_IS_LEAP_MONTH, - // Do not conditionalize with #ifndef U_HIDE_DEPRECATED_API, - // it is needed for layout of Calendar, DateFormat, and other objects + /* Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API, + * it is needed for layout of Calendar, DateFormat, and other objects */ /** * One more than the highest normal UCalendarDateFields value. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. diff --git a/deps/icu-small/source/i18n/unicode/ucol.h b/deps/icu-small/source/i18n/unicode/ucol.h index 0b3fab90b1..b5bacbfcb4 100644 --- a/deps/icu-small/source/i18n/unicode/ucol.h +++ b/deps/icu-small/source/i18n/unicode/ucol.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -131,7 +131,7 @@ typedef enum { * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ UCOL_ATTRIBUTE_VALUE_COUNT -#endif // U_HIDE_DEPRECATED_API +#endif /* U_HIDE_DEPRECATED_API */ } UColAttributeValue; /** @@ -204,7 +204,7 @@ typedef enum { * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ UCOL_REORDER_CODE_LIMIT = 0x1005 -#endif // U_HIDE_DEPRECATED_API +#endif /* U_HIDE_DEPRECATED_API */ } UColReorderCode; /** @@ -342,8 +342,8 @@ typedef enum { */ UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2, - // Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API, - // it is needed for layout of RuleBasedCollator object. + /* Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API, + * it is needed for layout of RuleBasedCollator object. */ /** * One more than the highest normal UColAttribute value. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. @@ -1067,7 +1067,7 @@ typedef enum { * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ UCOL_BOUND_VALUE_COUNT -#endif // U_HIDE_DEPRECATED_API +#endif /* U_HIDE_DEPRECATED_API */ } UColBoundMode; /** diff --git a/deps/icu-small/source/i18n/unicode/ucoleitr.h b/deps/icu-small/source/i18n/unicode/ucoleitr.h index 89fd9e85cf..1d644fc259 100644 --- a/deps/icu-small/source/i18n/unicode/ucoleitr.h +++ b/deps/icu-small/source/i18n/unicode/ucoleitr.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/unicode/ucsdet.h b/deps/icu-small/source/i18n/unicode/ucsdet.h index a926d2f22c..7a8564f9ea 100644 --- a/deps/icu-small/source/i18n/unicode/ucsdet.h +++ b/deps/icu-small/source/i18n/unicode/ucsdet.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ********************************************************************** * file name: ucsdet.h - * encoding: US-ASCII + * encoding: UTF-8 * indentation:4 * * created on: 2005Aug04 @@ -45,6 +45,10 @@ * in a single language, and a minimum of a few hundred bytes worth of plain text * in the language are needed. The detection process will attempt to * ignore html or xml style markup that could otherwise obscure the content. + *

    + * An alternative to the ICU Charset Detector is the + * Compact Encoding Detector, https://github.com/google/compact_enc_det. + * It often gives more accurate results, especially with short input samples. */ @@ -395,7 +399,7 @@ ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status); /** * Enable or disable individual charset encoding. * A name of charset encoding must be included in the names returned by - * {@link #getAllDetectableCharsets()}. + * {@link #ucsdet_getAllDetectableCharsets()}. * * @param ucsd a Charset detector. * @param encoding encoding the name of charset encoding. diff --git a/deps/icu-small/source/i18n/unicode/udat.h b/deps/icu-small/source/i18n/unicode/udat.h index cacfbe8500..90aff20df2 100644 --- a/deps/icu-small/source/i18n/unicode/udat.h +++ b/deps/icu-small/source/i18n/unicode/udat.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -763,21 +763,19 @@ typedef enum UDateFormatField { UDAT_RELATED_YEAR_FIELD = 34, #endif /* U_HIDE_INTERNAL_API */ -#ifndef U_HIDE_DRAFT_API /** * FieldPosition selector for 'b' field alignment. * Displays midnight and noon for 12am and 12pm, respectively, if available; * otherwise fall back to AM / PM. - * @draft ICU 57 + * @stable ICU 57 */ UDAT_AM_PM_MIDNIGHT_NOON_FIELD = 35, /* FieldPosition selector for 'B' field alignment. * Displays flexible day periods, such as "in the morning", if available. - * @draft ICU 57 + * @stable ICU 57 */ UDAT_FLEXIBLE_DAY_PERIOD_FIELD = 36, -#endif /* U_HIDE_DRAFT_API */ #ifndef U_HIDE_INTERNAL_API /** @@ -797,7 +795,7 @@ typedef enum UDateFormatField { * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ UDAT_FIELD_COUNT = 38 -#endif // U_HIDE_DEPRECATED_API +#endif /* U_HIDE_DEPRECATED_API */ } UDateFormatField; @@ -819,7 +817,7 @@ typedef enum UDateFormatField { * of error (e.g., the input field is UDAT_FIELD_COUNT). * @stable ICU 4.4 */ -U_STABLE UCalendarDateFields U_EXPORT2 +U_CAPI UCalendarDateFields U_EXPORT2 udat_toCalendarDateField(UDateFormatField field); @@ -851,7 +849,7 @@ udat_toCalendarDateField(UDateFormatField field); * an error occurred. * @stable ICU 2.0 */ -U_STABLE UDateFormat* U_EXPORT2 +U_CAPI UDateFormat* U_EXPORT2 udat_open(UDateFormatStyle timeStyle, UDateFormatStyle dateStyle, const char *locale, @@ -868,7 +866,7 @@ udat_open(UDateFormatStyle timeStyle, * @param format The formatter to close. * @stable ICU 2.0 */ -U_STABLE void U_EXPORT2 +U_CAPI void U_EXPORT2 udat_close(UDateFormat* format); @@ -902,8 +900,8 @@ typedef enum UDateFormatBooleanAttribute { */ UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH = 3, - // Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API, - // it is needed for layout of DateFormat object. + /* Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API, + * it is needed for layout of DateFormat object. */ /** * One more than the highest normal UDateFormatBooleanAttribute value. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. @@ -921,7 +919,7 @@ typedef enum UDateFormatBooleanAttribute { * @return The value of attr. * @stable ICU 53 */ -U_STABLE UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 udat_getBooleanAttribute(const UDateFormat* fmt, UDateFormatBooleanAttribute attr, UErrorCode* status); /** @@ -934,7 +932,7 @@ udat_getBooleanAttribute(const UDateFormat* fmt, UDateFormatBooleanAttribute att * @param status A pointer to an UErrorCode to receive any errors * @stable ICU 53 */ -U_STABLE void U_EXPORT2 +U_CAPI void U_EXPORT2 udat_setBooleanAttribute(UDateFormat *fmt, UDateFormatBooleanAttribute attr, UBool newValue, UErrorCode* status); @@ -966,7 +964,7 @@ U_NAMESPACE_END * @return A pointer to a UDateFormat identical to fmt. * @stable ICU 2.0 */ -U_STABLE UDateFormat* U_EXPORT2 +U_CAPI UDateFormat* U_EXPORT2 udat_clone(const UDateFormat *fmt, UErrorCode *status); @@ -988,7 +986,7 @@ udat_clone(const UDateFormat *fmt, * @see UFieldPosition * @stable ICU 2.0 */ -U_STABLE int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 udat_format( const UDateFormat* format, UDate dateToFormat, UChar* result, @@ -1018,7 +1016,7 @@ udat_format( const UDateFormat* format, * @see UFieldPosition * @stable ICU 55 */ -U_STABLE int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 udat_formatCalendar( const UDateFormat* format, UCalendar* calendar, UChar* result, @@ -1053,7 +1051,7 @@ udat_formatCalendar( const UDateFormat* format, * @see UFieldPositionIterator * @stable ICU 55 */ -U_STABLE int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 udat_formatForFields( const UDateFormat* format, UDate dateToFormat, UChar* result, @@ -1091,7 +1089,7 @@ udat_formatForFields( const UDateFormat* format, * @see UFieldPositionIterator * @stable ICU 55 */ -U_STABLE int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 udat_formatCalendarForFields( const UDateFormat* format, UCalendar* calendar, UChar* result, @@ -1125,7 +1123,7 @@ udat_formatCalendarForFields( const UDateFormat* format, * @see udat_format * @stable ICU 2.0 */ -U_STABLE UDate U_EXPORT2 +U_CAPI UDate U_EXPORT2 udat_parse(const UDateFormat* format, const UChar* text, int32_t textLength, @@ -1153,7 +1151,7 @@ udat_parse(const UDateFormat* format, * @see udat_format * @stable ICU 2.0 */ -U_STABLE void U_EXPORT2 +U_CAPI void U_EXPORT2 udat_parseCalendar(const UDateFormat* format, UCalendar* calendar, const UChar* text, @@ -1170,7 +1168,7 @@ udat_parseCalendar(const UDateFormat* format, * @see udat_setLenient * @stable ICU 2.0 */ -U_STABLE UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 udat_isLenient(const UDateFormat* fmt); /** @@ -1182,7 +1180,7 @@ udat_isLenient(const UDateFormat* fmt); * @see dat_isLenient * @stable ICU 2.0 */ -U_STABLE void U_EXPORT2 +U_CAPI void U_EXPORT2 udat_setLenient( UDateFormat* fmt, UBool isLenient); @@ -1195,7 +1193,7 @@ udat_setLenient( UDateFormat* fmt, * @see udat_setCalendar * @stable ICU 2.0 */ -U_STABLE const UCalendar* U_EXPORT2 +U_CAPI const UCalendar* U_EXPORT2 udat_getCalendar(const UDateFormat* fmt); /** @@ -1207,7 +1205,7 @@ udat_getCalendar(const UDateFormat* fmt); * @see udat_setCalendar * @stable ICU 2.0 */ -U_STABLE void U_EXPORT2 +U_CAPI void U_EXPORT2 udat_setCalendar( UDateFormat* fmt, const UCalendar* calendarToSet); @@ -1220,7 +1218,7 @@ udat_setCalendar( UDateFormat* fmt, * @see udat_setNumberFormat * @stable ICU 2.0 */ -U_STABLE const UNumberFormat* U_EXPORT2 +U_CAPI const UNumberFormat* U_EXPORT2 udat_getNumberFormat(const UDateFormat* fmt); /** @@ -1232,7 +1230,7 @@ udat_getNumberFormat(const UDateFormat* fmt); * @see udat_setNumberFormatForField * @stable ICU 54 */ -U_STABLE const UNumberFormat* U_EXPORT2 +U_CAPI const UNumberFormat* U_EXPORT2 udat_getNumberFormatForField(const UDateFormat* fmt, UChar field); /** @@ -1250,7 +1248,7 @@ udat_getNumberFormatForField(const UDateFormat* fmt, UChar field); * @see udat_getNumberFormatForField * @stable ICU 54 */ -U_STABLE void U_EXPORT2 +U_CAPI void U_EXPORT2 udat_adoptNumberFormatForFields( UDateFormat* fmt, const UChar* fields, UNumberFormat* numberFormatToSet, @@ -1267,7 +1265,7 @@ udat_adoptNumberFormatForFields( UDateFormat* fmt, * @see udat_setNumberFormatForField * @stable ICU 2.0 */ -U_STABLE void U_EXPORT2 +U_CAPI void U_EXPORT2 udat_setNumberFormat( UDateFormat* fmt, const UNumberFormat* numberFormatToSet); /** @@ -1279,7 +1277,7 @@ udat_setNumberFormat( UDateFormat* fmt, * @see udat_getNumberFormat * @stable ICU 54 */ -U_STABLE void U_EXPORT2 +U_CAPI void U_EXPORT2 udat_adoptNumberFormat( UDateFormat* fmt, UNumberFormat* numberFormatToAdopt); /** @@ -1291,7 +1289,7 @@ udat_adoptNumberFormat( UDateFormat* fmt, * @see udat_countAvailable * @stable ICU 2.0 */ -U_STABLE const char* U_EXPORT2 +U_CAPI const char* U_EXPORT2 udat_getAvailable(int32_t localeIndex); /** @@ -1302,7 +1300,7 @@ udat_getAvailable(int32_t localeIndex); * @see udat_getAvailable * @stable ICU 2.0 */ -U_STABLE int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 udat_countAvailable(void); /** @@ -1315,7 +1313,7 @@ udat_countAvailable(void); * @see udat_Set2DigitYearStart * @stable ICU 2.0 */ -U_STABLE UDate U_EXPORT2 +U_CAPI UDate U_EXPORT2 udat_get2DigitYearStart( const UDateFormat *fmt, UErrorCode *status); @@ -1329,7 +1327,7 @@ udat_get2DigitYearStart( const UDateFormat *fmt, * @see udat_Set2DigitYearStart * @stable ICU 2.0 */ -U_STABLE void U_EXPORT2 +U_CAPI void U_EXPORT2 udat_set2DigitYearStart( UDateFormat *fmt, UDate d, UErrorCode *status); @@ -1346,7 +1344,7 @@ udat_set2DigitYearStart( UDateFormat *fmt, * @see udat_applyPattern * @stable ICU 2.0 */ -U_STABLE int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 udat_toPattern( const UDateFormat *fmt, UBool localized, UChar *result, @@ -1363,7 +1361,7 @@ udat_toPattern( const UDateFormat *fmt, * @see udat_toPattern * @stable ICU 2.0 */ -U_STABLE void U_EXPORT2 +U_CAPI void U_EXPORT2 udat_applyPattern( UDateFormat *format, UBool localized, const UChar *pattern, @@ -1489,7 +1487,7 @@ typedef struct UDateFormatSymbols UDateFormatSymbols; * @see udat_setSymbols * @stable ICU 2.0 */ -U_STABLE int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 udat_getSymbols(const UDateFormat *fmt, UDateFormatSymbolType type, int32_t symbolIndex, @@ -1509,7 +1507,7 @@ udat_getSymbols(const UDateFormat *fmt, * @see udat_setSymbols * @stable ICU 2.0 */ -U_STABLE int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 udat_countSymbols( const UDateFormat *fmt, UDateFormatSymbolType type); @@ -1528,7 +1526,7 @@ udat_countSymbols( const UDateFormat *fmt, * @see udat_countSymbols * @stable ICU 2.0 */ -U_STABLE void U_EXPORT2 +U_CAPI void U_EXPORT2 udat_setSymbols( UDateFormat *format, UDateFormatSymbolType type, int32_t symbolIndex, @@ -1545,7 +1543,7 @@ udat_setSymbols( UDateFormat *format, * @return the locale name * @stable ICU 2.8 */ -U_STABLE const char* U_EXPORT2 +U_CAPI const char* U_EXPORT2 udat_getLocaleByType(const UDateFormat *fmt, ULocDataLocaleType type, UErrorCode* status); @@ -1558,7 +1556,7 @@ udat_getLocaleByType(const UDateFormat *fmt, * @param status A pointer to an UErrorCode to receive any errors * @stable ICU 51 */ -U_DRAFT void U_EXPORT2 +U_CAPI void U_EXPORT2 udat_setContext(UDateFormat* fmt, UDisplayContext value, UErrorCode* status); /** @@ -1570,7 +1568,7 @@ udat_setContext(UDateFormat* fmt, UDisplayContext value, UErrorCode* status); * @return The UDisplayContextValue for the specified type. * @stable ICU 53 */ -U_STABLE UDisplayContext U_EXPORT2 +U_CAPI UDisplayContext U_EXPORT2 udat_getContext(const UDateFormat* fmt, UDisplayContextType type, UErrorCode* status); #ifndef U_HIDE_INTERNAL_API diff --git a/deps/icu-small/source/i18n/unicode/udateintervalformat.h b/deps/icu-small/source/i18n/unicode/udateintervalformat.h index 81bff16d6e..70cbadeb57 100644 --- a/deps/icu-small/source/i18n/unicode/udateintervalformat.h +++ b/deps/icu-small/source/i18n/unicode/udateintervalformat.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ***************************************************************************************** diff --git a/deps/icu-small/source/i18n/unicode/udatpg.h b/deps/icu-small/source/i18n/unicode/udatpg.h index 365d51c493..9e3bdd4114 100644 --- a/deps/icu-small/source/i18n/unicode/udatpg.h +++ b/deps/icu-small/source/i18n/unicode/udatpg.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: udatpg.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -86,8 +86,8 @@ typedef enum UDateTimePatternField { /** @stable ICU 3.8 */ UDATPG_ZONE_FIELD, - // Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API, - // it is needed for layout of DateTimePatternGenerator object. + /* Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API, + * it is needed for layout of DateTimePatternGenerator object. */ /** * One more than the highest normal UDateTimePatternField value. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. diff --git a/deps/icu-small/source/i18n/unicode/ufieldpositer.h b/deps/icu-small/source/i18n/unicode/ufieldpositer.h index 8dfa3df5a4..3ae73b6d84 100644 --- a/deps/icu-small/source/i18n/unicode/ufieldpositer.h +++ b/deps/icu-small/source/i18n/unicode/ufieldpositer.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ***************************************************************************************** diff --git a/deps/icu-small/source/i18n/unicode/uformattable.h b/deps/icu-small/source/i18n/unicode/uformattable.h index e4683d56c3..9ba2a36901 100644 --- a/deps/icu-small/source/i18n/unicode/uformattable.h +++ b/deps/icu-small/source/i18n/unicode/uformattable.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** @@ -57,7 +57,7 @@ typedef enum UFormattableType { * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ UFMT_COUNT -#endif // U_HIDE_DEPRECATED_API +#endif /* U_HIDE_DEPRECATED_API */ } UFormattableType; diff --git a/deps/icu-small/source/i18n/unicode/ugender.h b/deps/icu-small/source/i18n/unicode/ugender.h index c1e591ed28..d015a2300c 100644 --- a/deps/icu-small/source/i18n/unicode/ugender.h +++ b/deps/icu-small/source/i18n/unicode/ugender.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ***************************************************************************************** diff --git a/deps/icu-small/source/i18n/unicode/ulocdata.h b/deps/icu-small/source/i18n/unicode/ulocdata.h index ecf6fdcb3f..de8d8539c6 100644 --- a/deps/icu-small/source/i18n/unicode/ulocdata.h +++ b/deps/icu-small/source/i18n/unicode/ulocdata.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * * ****************************************************************************** * file name: ulocdata.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -55,7 +55,7 @@ typedef enum ULocaleDataExemplarSetType { * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ ULOCDATA_ES_COUNT=4 -#endif // U_HIDE_DEPRECATED_API +#endif /* U_HIDE_DEPRECATED_API */ } ULocaleDataExemplarSetType; /** The possible types of delimiters. @@ -76,7 +76,7 @@ typedef enum ULocaleDataDelimiterType { * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ ULOCDATA_DELIMITER_COUNT = 4 -#endif // U_HIDE_DEPRECATED_API +#endif /* U_HIDE_DEPRECATED_API */ } ULocaleDataDelimiterType; /** @@ -207,7 +207,7 @@ typedef enum UMeasurementSystem { * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ UMS_LIMIT -#endif // U_HIDE_DEPRECATED_API +#endif /* U_HIDE_DEPRECATED_API */ } UMeasurementSystem; /** diff --git a/deps/icu-small/source/i18n/unicode/umsg.h b/deps/icu-small/source/i18n/unicode/umsg.h index 0beb39d5ab..6818820612 100644 --- a/deps/icu-small/source/i18n/unicode/umsg.h +++ b/deps/icu-small/source/i18n/unicode/umsg.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /******************************************************************** * COPYRIGHT: @@ -8,7 +8,7 @@ ******************************************************************** * * file name: umsg.h - * encoding: US-ASCII + * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/i18n/unicode/unirepl.h b/deps/icu-small/source/i18n/unicode/unirepl.h index 37815a9a24..8fb25d4689 100644 --- a/deps/icu-small/source/i18n/unicode/unirepl.h +++ b/deps/icu-small/source/i18n/unicode/unirepl.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/unicode/unum.h b/deps/icu-small/source/i18n/unicode/unum.h index 7c652e09cf..5fc65486fc 100644 --- a/deps/icu-small/source/i18n/unicode/unum.h +++ b/deps/icu-small/source/i18n/unicode/unum.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -25,6 +25,7 @@ #include "unicode/parseerr.h" #include "unicode/uformattable.h" #include "unicode/udisplaycontext.h" +#include "unicode/ufieldpositer.h" /** * \file @@ -249,7 +250,7 @@ typedef enum UNumberFormatStyle { * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ UNUM_FORMAT_STYLE_COUNT=17, -#endif // U_HIDE_DEPRECATED_API +#endif /* U_HIDE_DEPRECATED_API */ /** * Default format @@ -326,8 +327,8 @@ enum UCurrencySpacing { /** @stable ICU 4.8 */ UNUM_CURRENCY_INSERT, - // Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API, - // it is needed for layout of DecimalFormatSymbols object. + /* Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API, + * it is needed for layout of DecimalFormatSymbols object. */ /** * One more than the highest normal UCurrencySpacing value. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. @@ -371,7 +372,7 @@ typedef enum UNumberFormatFields { * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ UNUM_FIELD_COUNT -#endif // U_HIDE_DEPRECATED_API +#endif /* U_HIDE_DEPRECATED_API */ } UNumberFormatFields; @@ -553,6 +554,59 @@ unum_formatDouble( const UNumberFormat* fmt, UFieldPosition *pos, /* 0 if ignore */ UErrorCode* status); +#ifndef U_HIDE_DRAFT_API +/** +* Format a double using a UNumberFormat according to the UNumberFormat's locale, +* and initialize a UFieldPositionIterator that enumerates the subcomponents of +* the resulting string. +* +* @param format +* The formatter to use. +* @param number +* The number to format. +* @param result +* A pointer to a buffer to receive the NULL-terminated formatted +* number. If the formatted number fits into dest but cannot be +* NULL-terminated (length == resultLength) then the error code is set +* to U_STRING_NOT_TERMINATED_WARNING. If the formatted number doesn't +* fit into result then the error code is set to +* U_BUFFER_OVERFLOW_ERROR. +* @param resultLength +* The maximum size of result. +* @param fpositer +* A pointer to a UFieldPositionIterator created by {@link #ufieldpositer_open} +* (may be NULL if field position information is not needed, but in this +* case it's preferable to use {@link #unum_formatDouble}). Iteration +* information already present in the UFieldPositionIterator is deleted, +* and the iterator is reset to apply to the fields in the formatted +* string created by this function call. The field values and indexes +* returned by {@link #ufieldpositer_next} represent fields denoted by +* the UNumberFormatFields enum. Fields are not returned in a guaranteed +* order. Fields cannot overlap, but they may nest. For example, 1234 +* could format as "1,234" which might consist of a grouping separator +* field for ',' and an integer field encompassing the entire string. +* @param status +* A pointer to an UErrorCode to receive any errors +* @return +* The total buffer size needed; if greater than resultLength, the +* output was truncated. +* @see unum_formatDouble +* @see unum_parse +* @see unum_parseDouble +* @see UFieldPositionIterator +* @see UNumberFormatFields +* @draft ICU 59 +*/ +U_DRAFT int32_t U_EXPORT2 +unum_formatDoubleForFields(const UNumberFormat* format, + double number, + UChar* result, + int32_t resultLength, + UFieldPositionIterator* fpositer, + UErrorCode* status); + +#endif /* U_HIDE_DRAFT_API */ + /** * Format a decimal number using a UNumberFormat. * The number will be formatted according to the UNumberFormat's locale. @@ -1291,7 +1345,7 @@ typedef enum UNumberFormatSymbol { * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ UNUM_FORMAT_SYMBOL_COUNT = 28 -#endif // U_HIDE_DEPRECATED_API +#endif /* U_HIDE_DEPRECATED_API */ } UNumberFormatSymbol; /** diff --git a/deps/icu-small/source/i18n/unicode/unumsys.h b/deps/icu-small/source/i18n/unicode/unumsys.h index 396d55d6b7..2c794c23d3 100644 --- a/deps/icu-small/source/i18n/unicode/unumsys.h +++ b/deps/icu-small/source/i18n/unicode/unumsys.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ***************************************************************************************** diff --git a/deps/icu-small/source/i18n/unicode/upluralrules.h b/deps/icu-small/source/i18n/unicode/upluralrules.h index 52e34d8d25..99d93a4e05 100644 --- a/deps/icu-small/source/i18n/unicode/upluralrules.h +++ b/deps/icu-small/source/i18n/unicode/upluralrules.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ***************************************************************************************** @@ -15,6 +15,10 @@ #if !UCONFIG_NO_FORMATTING #include "unicode/localpointer.h" +#include "unicode/uenum.h" +#ifndef U_HIDE_INTERNAL_API +#include "unicode/unum.h" +#endif /* U_HIDE_INTERNAL_API */ /** * \file @@ -60,7 +64,7 @@ enum UPluralType { * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ UPLURAL_TYPE_COUNT -#endif // U_HIDE_DEPRECATED_API +#endif /* U_HIDE_DEPRECATED_API */ }; /** * @stable ICU 50 @@ -83,7 +87,7 @@ typedef struct UPluralRules UPluralRules; /**< C typedef for struct UPluralRule * @return A UPluralRules for the specified locale, or NULL if an error occurred. * @stable ICU 4.8 */ -U_STABLE UPluralRules* U_EXPORT2 +U_CAPI UPluralRules* U_EXPORT2 uplrules_open(const char *locale, UErrorCode *status); /** @@ -95,7 +99,7 @@ uplrules_open(const char *locale, UErrorCode *status); * @return A UPluralRules for the specified locale, or NULL if an error occurred. * @stable ICU 50 */ -U_DRAFT UPluralRules* U_EXPORT2 +U_CAPI UPluralRules* U_EXPORT2 uplrules_openForType(const char *locale, UPluralType type, UErrorCode *status); /** @@ -103,7 +107,7 @@ uplrules_openForType(const char *locale, UPluralType type, UErrorCode *status); * @param uplrules The UPluralRules object to close. * @stable ICU 4.8 */ -U_STABLE void U_EXPORT2 +U_CAPI void U_EXPORT2 uplrules_close(UPluralRules *uplrules); @@ -138,12 +142,55 @@ U_NAMESPACE_END * @return The length of keyword. * @stable ICU 4.8 */ -U_STABLE int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uplrules_select(const UPluralRules *uplrules, double number, UChar *keyword, int32_t capacity, UErrorCode *status); +#ifndef U_HIDE_INTERNAL_API +/** + * Given a number, returns the keyword of the first rule that applies to the + * number, according to the UPluralRules object and given the number format + * specified by the UNumberFormat object. + * Note: This internal preview interface may be removed in the future if + * an architecturally cleaner solution reaches stable status. + * @param uplrules The UPluralRules object specifying the rules. + * @param number The number for which the rule has to be determined. + * @param fmt The UNumberFormat specifying how the number will be formatted + * (this can affect the plural form, e.g. "1 dollar" vs "1.0 dollars"). + * If this is NULL, the function behaves like uplrules_select. + * @param keyword The keyword of the rule that applies to number. + * @param capacity The capacity of the keyword buffer. + * @param status A pointer to a UErrorCode to receive any errors. + * @return The length of keyword. + * @internal ICU 59 technology preview, may be removed in the future + */ +U_INTERNAL int32_t U_EXPORT2 +uplrules_selectWithFormat(const UPluralRules *uplrules, + double number, + const UNumberFormat *fmt, + UChar *keyword, int32_t capacity, + UErrorCode *status); + +#endif /* U_HIDE_INTERNAL_API */ + +#ifndef U_HIDE_DRAFT_API +/** + * Creates a string enumeration of all plural rule keywords used in this + * UPluralRules object. The rule "other" is always present by default. + * @param uplrules The UPluralRules object specifying the rules for + * a given locale. + * @param status A pointer to a UErrorCode to receive any errors. + * @return a string enumeration over plural rule keywords, or NULL + * upon error. The caller is responsible for closing the result. + * @draft ICU 59 + */ +U_DRAFT UEnumeration* U_EXPORT2 +uplrules_getKeywords(const UPluralRules *uplrules, + UErrorCode *status); +#endif /* U_HIDE_DRAFT_API */ + #endif /* #if !UCONFIG_NO_FORMATTING */ #endif diff --git a/deps/icu-small/source/i18n/unicode/uregex.h b/deps/icu-small/source/i18n/unicode/uregex.h index 7806a74afc..69c0eead95 100644 --- a/deps/icu-small/source/i18n/unicode/uregex.h +++ b/deps/icu-small/source/i18n/unicode/uregex.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ********************************************************************** * file name: uregex.h -* encoding: US-ASCII +* encoding: UTF-8 * indentation:4 * * created on: 2004mar09 diff --git a/deps/icu-small/source/i18n/unicode/uregion.h b/deps/icu-small/source/i18n/unicode/uregion.h index b5d03691ca..9d0c1e99de 100644 --- a/deps/icu-small/source/i18n/unicode/uregion.h +++ b/deps/icu-small/source/i18n/unicode/uregion.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ***************************************************************************************** @@ -113,7 +113,7 @@ typedef enum URegionType { * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ URGN_LIMIT -#endif // U_HIDE_DEPRECATED_API +#endif /* U_HIDE_DEPRECATED_API */ } URegionType; #if !UCONFIG_NO_FORMATTING diff --git a/deps/icu-small/source/i18n/unicode/ureldatefmt.h b/deps/icu-small/source/i18n/unicode/ureldatefmt.h index fad8ffd9e1..0eff80a16b 100644 --- a/deps/icu-small/source/i18n/unicode/ureldatefmt.h +++ b/deps/icu-small/source/i18n/unicode/ureldatefmt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ***************************************************************************************** @@ -34,7 +34,7 @@ * for determining which unit to use, such as deciding between "in 7 days" * and "in 1 week". * - * @draft ICU 57 + * @stable ICU 57 */ /** @@ -66,104 +66,103 @@ typedef enum UDateRelativeDateTimeFormatterStyle { * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ UDAT_STYLE_COUNT -#endif // U_HIDE_DEPRECATED_API +#endif /* U_HIDE_DEPRECATED_API */ } UDateRelativeDateTimeFormatterStyle; -#ifndef U_HIDE_DRAFT_API /** * Represents the unit for formatting a relative date. e.g "in 5 days" * or "next year" - * @draft ICU 57 + * @stable ICU 57 */ typedef enum URelativeDateTimeUnit { /** * Specifies that relative unit is year, e.g. "last year", * "in 5 years". - * @draft ICU 57 + * @stable ICU 57 */ UDAT_REL_UNIT_YEAR, /** * Specifies that relative unit is quarter, e.g. "last quarter", * "in 5 quarters". - * @draft ICU 57 + * @stable ICU 57 */ UDAT_REL_UNIT_QUARTER, /** * Specifies that relative unit is month, e.g. "last month", * "in 5 months". - * @draft ICU 57 + * @stable ICU 57 */ UDAT_REL_UNIT_MONTH, /** * Specifies that relative unit is week, e.g. "last week", * "in 5 weeks". - * @draft ICU 57 + * @stable ICU 57 */ UDAT_REL_UNIT_WEEK, /** * Specifies that relative unit is day, e.g. "yesterday", * "in 5 days". - * @draft ICU 57 + * @stable ICU 57 */ UDAT_REL_UNIT_DAY, /** * Specifies that relative unit is hour, e.g. "1 hour ago", * "in 5 hours". - * @draft ICU 57 + * @stable ICU 57 */ UDAT_REL_UNIT_HOUR, /** * Specifies that relative unit is minute, e.g. "1 minute ago", * "in 5 minutes". - * @draft ICU 57 + * @stable ICU 57 */ UDAT_REL_UNIT_MINUTE, /** * Specifies that relative unit is second, e.g. "1 second ago", * "in 5 seconds". - * @draft ICU 57 + * @stable ICU 57 */ UDAT_REL_UNIT_SECOND, /** * Specifies that relative unit is Sunday, e.g. "last Sunday", * "this Sunday", "next Sunday", "in 5 Sundays". - * @draft ICU 57 + * @stable ICU 57 */ UDAT_REL_UNIT_SUNDAY, /** * Specifies that relative unit is Monday, e.g. "last Monday", * "this Monday", "next Monday", "in 5 Mondays". - * @draft ICU 57 + * @stable ICU 57 */ UDAT_REL_UNIT_MONDAY, /** * Specifies that relative unit is Tuesday, e.g. "last Tuesday", * "this Tuesday", "next Tuesday", "in 5 Tuesdays". - * @draft ICU 57 + * @stable ICU 57 */ UDAT_REL_UNIT_TUESDAY, /** * Specifies that relative unit is Wednesday, e.g. "last Wednesday", * "this Wednesday", "next Wednesday", "in 5 Wednesdays". - * @draft ICU 57 + * @stable ICU 57 */ UDAT_REL_UNIT_WEDNESDAY, /** * Specifies that relative unit is Thursday, e.g. "last Thursday", * "this Thursday", "next Thursday", "in 5 Thursdays". - * @draft ICU 57 + * @stable ICU 57 */ UDAT_REL_UNIT_THURSDAY, /** * Specifies that relative unit is Friday, e.g. "last Friday", * "this Friday", "next Friday", "in 5 Fridays". - * @draft ICU 57 + * @stable ICU 57 */ UDAT_REL_UNIT_FRIDAY, /** * Specifies that relative unit is Saturday, e.g. "last Saturday", * "this Saturday", "next Saturday", "in 5 Saturdays". - * @draft ICU 57 + * @stable ICU 57 */ UDAT_REL_UNIT_SATURDAY, #ifndef U_HIDE_DEPRECATED_API @@ -172,18 +171,15 @@ typedef enum URelativeDateTimeUnit { * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ UDAT_REL_UNIT_COUNT -#endif // U_HIDE_DEPRECATED_API +#endif /* U_HIDE_DEPRECATED_API */ } URelativeDateTimeUnit; -#endif /* U_HIDE_DRAFT_API */ - -#ifndef U_HIDE_DRAFT_API /** * Opaque URelativeDateTimeFormatter object for use in C programs. - * @draft ICU 57 + * @stable ICU 57 */ struct URelativeDateTimeFormatter; -typedef struct URelativeDateTimeFormatter URelativeDateTimeFormatter; /**< C typedef for struct URelativeDateTimeFormatter. @draft ICU 57 */ +typedef struct URelativeDateTimeFormatter URelativeDateTimeFormatter; /**< C typedef for struct URelativeDateTimeFormatter. @stable ICU 57 */ /** @@ -216,9 +212,9 @@ typedef struct URelativeDateTimeFormatter URelativeDateTimeFormatter; /**< C ty * @return * A pointer to a URelativeDateTimeFormatter object for the specified locale, * or NULL if an error occurred. - * @draft ICU 57 + * @stable ICU 57 */ -U_DRAFT URelativeDateTimeFormatter* U_EXPORT2 +U_STABLE URelativeDateTimeFormatter* U_EXPORT2 ureldatefmt_open( const char* locale, UNumberFormat* nfToAdopt, UDateRelativeDateTimeFormatterStyle width, @@ -229,9 +225,9 @@ ureldatefmt_open( const char* locale, * Close a URelativeDateTimeFormatter object. Once closed it may no longer be used. * @param reldatefmt * The URelativeDateTimeFormatter object to close. - * @draft ICU 57 + * @stable ICU 57 */ -U_DRAFT void U_EXPORT2 +U_STABLE void U_EXPORT2 ureldatefmt_close(URelativeDateTimeFormatter *reldatefmt); #if U_SHOW_CPLUSPLUS_API @@ -245,7 +241,7 @@ U_NAMESPACE_BEGIN * * @see LocalPointerBase * @see LocalPointer - * @draft ICU 57 + * @stable ICU 57 */ U_DEFINE_LOCAL_OPEN_POINTER(LocalURelativeDateTimeFormatterPointer, URelativeDateTimeFormatter, ureldatefmt_close); @@ -279,9 +275,9 @@ U_NAMESPACE_END * @return * The length of the formatted result; may be greater * than resultCapacity, in which case an error is returned. - * @draft ICU 57 + * @stable ICU 57 */ -U_DRAFT int32_t U_EXPORT2 +U_STABLE int32_t U_EXPORT2 ureldatefmt_formatNumeric( const URelativeDateTimeFormatter* reldatefmt, double offset, URelativeDateTimeUnit unit, @@ -315,9 +311,9 @@ ureldatefmt_formatNumeric( const URelativeDateTimeFormatter* reldatefmt, * @return * The length of the formatted result; may be greater * than resultCapacity, in which case an error is returned. - * @draft ICU 57 + * @stable ICU 57 */ -U_DRAFT int32_t U_EXPORT2 +U_STABLE int32_t U_EXPORT2 ureldatefmt_format( const URelativeDateTimeFormatter* reldatefmt, double offset, URelativeDateTimeUnit unit, @@ -352,9 +348,9 @@ ureldatefmt_format( const URelativeDateTimeFormatter* reldatefmt, * @return * The length of the formatted result; may be greater than resultCapacity, * in which case an error is returned. - * @draft ICU 57 + * @stable ICU 57 */ -U_DRAFT int32_t U_EXPORT2 +U_STABLE int32_t U_EXPORT2 ureldatefmt_combineDateAndTime( const URelativeDateTimeFormatter* reldatefmt, const UChar * relativeDateString, int32_t relativeDateStringLen, @@ -364,8 +360,6 @@ ureldatefmt_combineDateAndTime( const URelativeDateTimeFormatter* reldatefmt, int32_t resultCapacity, UErrorCode* status ); -#endif /* U_HIDE_DRAFT_API */ - #endif /* !UCONFIG_NO_FORMATTING && !UCONFIG_NO_BREAK_ITERATION */ #endif diff --git a/deps/icu-small/source/i18n/unicode/usearch.h b/deps/icu-small/source/i18n/unicode/usearch.h index dcdb7fe420..600f9142b4 100644 --- a/deps/icu-small/source/i18n/unicode/usearch.h +++ b/deps/icu-small/source/i18n/unicode/usearch.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -196,7 +196,7 @@ typedef enum { * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ USEARCH_ATTRIBUTE_COUNT = 3 -#endif // U_HIDE_DEPRECATED_API +#endif /* U_HIDE_DEPRECATED_API */ } USearchAttribute; /** @@ -272,7 +272,7 @@ typedef enum { * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ USEARCH_ATTRIBUTE_VALUE_COUNT -#endif // U_HIDE_DEPRECATED_API +#endif /* U_HIDE_DEPRECATED_API */ } USearchAttributeValue; /* open and close ------------------------------------------------------ */ diff --git a/deps/icu-small/source/i18n/unicode/uspoof.h b/deps/icu-small/source/i18n/unicode/uspoof.h index 40b73380c5..6c2ac5e109 100644 --- a/deps/icu-small/source/i18n/unicode/uspoof.h +++ b/deps/icu-small/source/i18n/unicode/uspoof.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* *************************************************************************** @@ -6,7 +6,7 @@ * and others. All Rights Reserved. *************************************************************************** * file name: uspoof.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/i18n/unicode/utmscale.h b/deps/icu-small/source/i18n/unicode/utmscale.h index 6b4b389ac8..2392c6414e 100644 --- a/deps/icu-small/source/i18n/unicode/utmscale.h +++ b/deps/icu-small/source/i18n/unicode/utmscale.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -283,10 +283,14 @@ typedef enum UDateTimeScale { */ UDTS_UNIX_MICROSECONDS_TIME, +#ifndef U_HIDE_DEPRECATED_API /** * The first unused time scale value. The limit of this enum + * @deprecated ICU 59 The numeric value may change over time, see ICU ticket #12420. */ UDTS_MAX_SCALE +#endif /* U_HIDE_DEPRECATED_API */ + } UDateTimeScale; /** @@ -423,12 +427,15 @@ typedef enum UTimeScaleValue { #endif /* U_HIDE_INTERNAL_API */ +#ifndef U_HIDE_DEPRECATED_API /** * The number of time scale values, in other words limit of this enum. * * @see utmscale_getTimeScaleValue + * @deprecated ICU 59 The numeric value may change over time, see ICU ticket #12420. */ UTSV_MAX_SCALE_VALUE=11 +#endif /* U_HIDE_DEPRECATED_API */ } UTimeScaleValue; diff --git a/deps/icu-small/source/i18n/unicode/utrans.h b/deps/icu-small/source/i18n/unicode/utrans.h index 661ee5d385..a4158726ca 100644 --- a/deps/icu-small/source/i18n/unicode/utrans.h +++ b/deps/icu-small/source/i18n/unicode/utrans.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/unicode/vtzone.h b/deps/icu-small/source/i18n/unicode/vtzone.h index 1682a3d526..5d16177868 100644 --- a/deps/icu-small/source/i18n/unicode/vtzone.h +++ b/deps/icu-small/source/i18n/unicode/vtzone.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/unum.cpp b/deps/icu-small/source/i18n/unum.cpp index 0e224858db..95c744c128 100644 --- a/deps/icu-small/source/i18n/unum.cpp +++ b/deps/icu-small/source/i18n/unum.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -247,6 +247,33 @@ unum_formatDouble( const UNumberFormat* fmt, return res.extract(result, resultLength, *status); } +U_CAPI int32_t U_EXPORT2 +unum_formatDoubleForFields(const UNumberFormat* format, + double number, + UChar* result, + int32_t resultLength, + UFieldPositionIterator* fpositer, + UErrorCode* status) +{ + if (U_FAILURE(*status)) + return -1; + + if (result == NULL ? resultLength != 0 : resultLength < 0) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return -1; + } + + UnicodeString res; + if (result != NULL) { + // NULL destination for pure preflighting: empty dummy string + // otherwise, alias the destination buffer + res.setTo(result, 0, resultLength); + } + + ((const NumberFormat*)format)->format(number, res, (FieldPositionIterator*)fpositer, *status); + + return res.extract(result, resultLength, *status); +} U_CAPI int32_t U_EXPORT2 unum_formatDecimal(const UNumberFormat* fmt, diff --git a/deps/icu-small/source/i18n/unumsys.cpp b/deps/icu-small/source/i18n/unumsys.cpp index f643e342eb..4a0d0fa3b6 100644 --- a/deps/icu-small/source/i18n/unumsys.cpp +++ b/deps/icu-small/source/i18n/unumsys.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ***************************************************************************************** diff --git a/deps/icu-small/source/i18n/upluralrules.cpp b/deps/icu-small/source/i18n/upluralrules.cpp index 5e1eebf53d..24e74e3ee2 100644 --- a/deps/icu-small/source/i18n/upluralrules.cpp +++ b/deps/icu-small/source/i18n/upluralrules.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ***************************************************************************************** @@ -15,6 +15,8 @@ #include "unicode/plurrule.h" #include "unicode/locid.h" #include "unicode/unistr.h" +#include "unicode/unum.h" +#include "unicode/numfmt.h" U_NAMESPACE_USE @@ -54,5 +56,48 @@ uplrules_select(const UPluralRules *uplrules, return result.extract(keyword, capacity, *status); } +U_CAPI int32_t U_EXPORT2 +uplrules_selectWithFormat(const UPluralRules *uplrules, + double number, + const UNumberFormat *fmt, + UChar *keyword, int32_t capacity, + UErrorCode *status) +{ + if (U_FAILURE(*status)) { + return 0; + } + const PluralRules* plrules = reinterpret_cast(uplrules); + const NumberFormat* nf = reinterpret_cast(fmt); + if (plrules == NULL || nf == NULL || ((keyword == NULL)? capacity != 0 : capacity < 0)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + Formattable obj(number); + UnicodeString result = plrules->select(obj, *nf, *status); + return result.extract(keyword, capacity, *status); +} + +U_CAPI UEnumeration* U_EXPORT2 +uplrules_getKeywords(const UPluralRules *uplrules, + UErrorCode *status) +{ + if (U_FAILURE(*status)) { + return NULL; + } + const PluralRules* plrules = reinterpret_cast(uplrules); + if (plrules == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + StringEnumeration *senum = plrules->getKeywords(*status); + if (U_FAILURE(*status)) { + return NULL; + } + if (senum == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + return uenum_openFromStringEnumeration(senum, status); +} #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/uregex.cpp b/deps/icu-small/source/i18n/uregex.cpp index 03e2586c5f..a5dee6241d 100644 --- a/deps/icu-small/source/i18n/uregex.cpp +++ b/deps/icu-small/source/i18n/uregex.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/uregexc.cpp b/deps/icu-small/source/i18n/uregexc.cpp index 6d0dc2062c..8674b4f17f 100644 --- a/deps/icu-small/source/i18n/uregexc.cpp +++ b/deps/icu-small/source/i18n/uregexc.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/uregion.cpp b/deps/icu-small/source/i18n/uregion.cpp index 8e079ec593..79a623730c 100644 --- a/deps/icu-small/source/i18n/uregion.cpp +++ b/deps/icu-small/source/i18n/uregion.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ***************************************************************************************** diff --git a/deps/icu-small/source/i18n/usearch.cpp b/deps/icu-small/source/i18n/usearch.cpp index 5e1617eb3e..e1e6c28e2b 100644 --- a/deps/icu-small/source/i18n/usearch.cpp +++ b/deps/icu-small/source/i18n/usearch.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/uspoof.cpp b/deps/icu-small/source/i18n/uspoof.cpp index d81b5b2149..1cb726e0b0 100644 --- a/deps/icu-small/source/i18n/uspoof.cpp +++ b/deps/icu-small/source/i18n/uspoof.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* *************************************************************************** @@ -6,7 +6,7 @@ * and others. All Rights Reserved. *************************************************************************** * file name: uspoof.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/i18n/uspoof_build.cpp b/deps/icu-small/source/i18n/uspoof_build.cpp index d676fe9977..7d2440e5af 100644 --- a/deps/icu-small/source/i18n/uspoof_build.cpp +++ b/deps/icu-small/source/i18n/uspoof_build.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* *************************************************************************** @@ -6,7 +6,7 @@ * and others. All Rights Reserved. *************************************************************************** * file name: uspoof_build.cpp - * encoding: US-ASCII + * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/i18n/uspoof_conf.cpp b/deps/icu-small/source/i18n/uspoof_conf.cpp index 6edd1fa3f3..e5d9bb6338 100644 --- a/deps/icu-small/source/i18n/uspoof_conf.cpp +++ b/deps/icu-small/source/i18n/uspoof_conf.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: uspoof_conf.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/i18n/uspoof_conf.h b/deps/icu-small/source/i18n/uspoof_conf.h index 72cd028104..ee8aa2678e 100644 --- a/deps/icu-small/source/i18n/uspoof_conf.h +++ b/deps/icu-small/source/i18n/uspoof_conf.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * ****************************************************************************** * file name: uspoof_conf.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/i18n/uspoof_impl.cpp b/deps/icu-small/source/i18n/uspoof_impl.cpp index fba742c3ff..0ca85c00a9 100644 --- a/deps/icu-small/source/i18n/uspoof_impl.cpp +++ b/deps/icu-small/source/i18n/uspoof_impl.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/uspoof_impl.h b/deps/icu-small/source/i18n/uspoof_impl.h index 62480e39e8..1184b8d906 100644 --- a/deps/icu-small/source/i18n/uspoof_impl.h +++ b/deps/icu-small/source/i18n/uspoof_impl.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* *************************************************************************** diff --git a/deps/icu-small/source/i18n/usrchimp.h b/deps/icu-small/source/i18n/usrchimp.h index e6693d16b7..5438417e7e 100644 --- a/deps/icu-small/source/i18n/usrchimp.h +++ b/deps/icu-small/source/i18n/usrchimp.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/i18n/utf16collationiterator.cpp b/deps/icu-small/source/i18n/utf16collationiterator.cpp index 733729fae7..7598b0ee52 100644 --- a/deps/icu-small/source/i18n/utf16collationiterator.cpp +++ b/deps/icu-small/source/i18n/utf16collationiterator.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/utf16collationiterator.h b/deps/icu-small/source/i18n/utf16collationiterator.h index 505ab810d3..fd3a05e9ef 100644 --- a/deps/icu-small/source/i18n/utf16collationiterator.h +++ b/deps/icu-small/source/i18n/utf16collationiterator.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/utf8collationiterator.cpp b/deps/icu-small/source/i18n/utf8collationiterator.cpp index 0a0205e7b3..85d4b76b08 100644 --- a/deps/icu-small/source/i18n/utf8collationiterator.cpp +++ b/deps/icu-small/source/i18n/utf8collationiterator.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/utf8collationiterator.h b/deps/icu-small/source/i18n/utf8collationiterator.h index 8deb5ea395..9a3ec45aeb 100644 --- a/deps/icu-small/source/i18n/utf8collationiterator.h +++ b/deps/icu-small/source/i18n/utf8collationiterator.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/utmscale.c b/deps/icu-small/source/i18n/utmscale.c deleted file mode 100644 index 6868b9db22..0000000000 --- a/deps/icu-small/source/i18n/utmscale.c +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2004-2012, International Business Machines Corporation and -* others. All Rights Reserved. -******************************************************************************* -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_FORMATTING - -#include "unicode/utmscale.h" - -#define ticks INT64_C(1) -#define microseconds (ticks * 10) -#define milliseconds (microseconds * 1000) -#define seconds (milliseconds * 1000) -#define minutes (seconds * 60) -#define hours (minutes * 60) -#define days (hours * 24) - -/* Constants generated by ICU4J com.ibm.icu.dev.tool.timescale.GenerateCTimeScaleData. */ -static const int64_t timeScaleTable[UDTS_MAX_SCALE][UTSV_MAX_SCALE_VALUE] = { - /* units epochOffset fromMin fromMax toMin toMax epochOffsetP1 epochOffsetM1 unitsRound minRound maxRound */ - {milliseconds, INT64_C(62135596800000), INT64_C(-984472800485477), INT64_C(860201606885477), INT64_C(-9223372036854774999), INT64_C(9223372036854774999), INT64_C(62135596800001), INT64_C(62135596799999), INT64_C(5000), INT64_C(-9223372036854770808), INT64_C(9223372036854770807)}, - {seconds, INT64_C(62135596800), INT64_C(-984472800485), INT64_C(860201606885), U_INT64_MIN, U_INT64_MAX, INT64_C(62135596801), INT64_C(62135596799), INT64_C(5000000), INT64_C(-9223372036849775808), INT64_C(9223372036849775807)}, - {milliseconds, INT64_C(62135596800000), INT64_C(-984472800485477), INT64_C(860201606885477), INT64_C(-9223372036854774999), INT64_C(9223372036854774999), INT64_C(62135596800001), INT64_C(62135596799999), INT64_C(5000), INT64_C(-9223372036854770808), INT64_C(9223372036854770807)}, - {ticks, INT64_C(504911232000000000), U_INT64_MIN, INT64_C(8718460804854775807), INT64_C(-8718460804854775808), U_INT64_MAX, INT64_C(504911232000000000), INT64_C(504911232000000000), INT64_C(0), U_INT64_MIN, U_INT64_MAX}, - {ticks, INT64_C(0), U_INT64_MIN, U_INT64_MAX, U_INT64_MIN, U_INT64_MAX, INT64_C(0), INT64_C(0), INT64_C(0), U_INT64_MIN, U_INT64_MAX}, - {seconds, INT64_C(60052752000), INT64_C(-982389955685), INT64_C(862284451685), U_INT64_MIN, U_INT64_MAX, INT64_C(60052752001), INT64_C(60052751999), INT64_C(5000000), INT64_C(-9223372036849775808), INT64_C(9223372036849775807)}, - {seconds, INT64_C(63113904000), INT64_C(-985451107685), INT64_C(859223299685), U_INT64_MIN, U_INT64_MAX, INT64_C(63113904001), INT64_C(63113903999), INT64_C(5000000), INT64_C(-9223372036849775808), INT64_C(9223372036849775807)}, - {days, INT64_C(693594), INT64_C(-11368793), INT64_C(9981605), U_INT64_MIN, U_INT64_MAX, INT64_C(693595), INT64_C(693593), INT64_C(432000000000), INT64_C(-9223371604854775808), INT64_C(9223371604854775807)}, - {days, INT64_C(693594), INT64_C(-11368793), INT64_C(9981605), U_INT64_MIN, U_INT64_MAX, INT64_C(693595), INT64_C(693593), INT64_C(432000000000), INT64_C(-9223371604854775808), INT64_C(9223371604854775807)}, - {microseconds, INT64_C(62135596800000000), INT64_C(-984472800485477580), INT64_C(860201606885477580), INT64_C(-9223372036854775804), INT64_C(9223372036854775804), INT64_C(62135596800000001), INT64_C(62135596799999999), INT64_C(5), INT64_C(-9223372036854775803), INT64_C(9223372036854775802)}, -}; - -U_CAPI int64_t U_EXPORT2 -utmscale_getTimeScaleValue(UDateTimeScale timeScale, UTimeScaleValue value, UErrorCode *status) -{ - if (status == NULL || U_FAILURE(*status)) { - return 0; - } - - if (timeScale < UDTS_JAVA_TIME || UDTS_MAX_SCALE <= timeScale - || value < UTSV_UNITS_VALUE || UTSV_MAX_SCALE_VALUE <= value) - { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - return timeScaleTable[timeScale][value]; -} - -U_CAPI int64_t U_EXPORT2 -utmscale_fromInt64(int64_t otherTime, UDateTimeScale timeScale, UErrorCode *status) -{ - const int64_t *data; - - if (status == NULL || U_FAILURE(*status)) { - return 0; - } - - if ((int32_t)timeScale < 0 || timeScale >= UDTS_MAX_SCALE) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - data = (const int64_t *)(&timeScaleTable[timeScale]); - - if (otherTime < data[UTSV_FROM_MIN_VALUE] || otherTime > data[UTSV_FROM_MAX_VALUE]) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - return (otherTime + data[UTSV_EPOCH_OFFSET_VALUE]) * data[UTSV_UNITS_VALUE]; -} - -U_CAPI int64_t U_EXPORT2 -utmscale_toInt64(int64_t universalTime, UDateTimeScale timeScale, UErrorCode *status) -{ - const int64_t *data; - - if (status == NULL || U_FAILURE(*status)) { - return 0; - } - - if ((int32_t)timeScale < 0 || timeScale >= UDTS_MAX_SCALE) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - data = (const int64_t *)(&timeScaleTable[timeScale]); - - if (universalTime < data[UTSV_TO_MIN_VALUE] || universalTime > data[UTSV_TO_MAX_VALUE]) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - if (universalTime < 0) { - if (universalTime < data[UTSV_MIN_ROUND_VALUE]) { - return (universalTime + data[UTSV_UNITS_ROUND_VALUE]) / data[UTSV_UNITS_VALUE] - data[UTSV_EPOCH_OFFSET_PLUS_1_VALUE]; - } - - return (universalTime - data[UTSV_UNITS_ROUND_VALUE]) / data[UTSV_UNITS_VALUE] - data[UTSV_EPOCH_OFFSET_VALUE]; - } - - if (universalTime > data[UTSV_MAX_ROUND_VALUE]) { - return (universalTime - data[UTSV_UNITS_ROUND_VALUE]) / data[UTSV_UNITS_VALUE] - data[UTSV_EPOCH_OFFSET_MINUS_1_VALUE]; - } - - return (universalTime + data[UTSV_UNITS_ROUND_VALUE]) / data[UTSV_UNITS_VALUE] - data[UTSV_EPOCH_OFFSET_VALUE]; -} - -#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/utmscale.cpp b/deps/icu-small/source/i18n/utmscale.cpp new file mode 100644 index 0000000000..461985937a --- /dev/null +++ b/deps/icu-small/source/i18n/utmscale.cpp @@ -0,0 +1,116 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2004-2012, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/utmscale.h" + +#define ticks INT64_C(1) +#define microseconds (ticks * 10) +#define milliseconds (microseconds * 1000) +#define seconds (milliseconds * 1000) +#define minutes (seconds * 60) +#define hours (minutes * 60) +#define days (hours * 24) + +/* Constants generated by ICU4J com.ibm.icu.dev.tool.timescale.GenerateCTimeScaleData. */ +static const int64_t timeScaleTable[UDTS_MAX_SCALE][UTSV_MAX_SCALE_VALUE] = { + /* units epochOffset fromMin fromMax toMin toMax epochOffsetP1 epochOffsetM1 unitsRound minRound maxRound */ + {milliseconds, INT64_C(62135596800000), INT64_C(-984472800485477), INT64_C(860201606885477), INT64_C(-9223372036854774999), INT64_C(9223372036854774999), INT64_C(62135596800001), INT64_C(62135596799999), INT64_C(5000), INT64_C(-9223372036854770808), INT64_C(9223372036854770807)}, + {seconds, INT64_C(62135596800), INT64_C(-984472800485), INT64_C(860201606885), U_INT64_MIN, U_INT64_MAX, INT64_C(62135596801), INT64_C(62135596799), INT64_C(5000000), INT64_C(-9223372036849775808), INT64_C(9223372036849775807)}, + {milliseconds, INT64_C(62135596800000), INT64_C(-984472800485477), INT64_C(860201606885477), INT64_C(-9223372036854774999), INT64_C(9223372036854774999), INT64_C(62135596800001), INT64_C(62135596799999), INT64_C(5000), INT64_C(-9223372036854770808), INT64_C(9223372036854770807)}, + {ticks, INT64_C(504911232000000000), U_INT64_MIN, INT64_C(8718460804854775807), INT64_C(-8718460804854775808), U_INT64_MAX, INT64_C(504911232000000000), INT64_C(504911232000000000), INT64_C(0), U_INT64_MIN, U_INT64_MAX}, + {ticks, INT64_C(0), U_INT64_MIN, U_INT64_MAX, U_INT64_MIN, U_INT64_MAX, INT64_C(0), INT64_C(0), INT64_C(0), U_INT64_MIN, U_INT64_MAX}, + {seconds, INT64_C(60052752000), INT64_C(-982389955685), INT64_C(862284451685), U_INT64_MIN, U_INT64_MAX, INT64_C(60052752001), INT64_C(60052751999), INT64_C(5000000), INT64_C(-9223372036849775808), INT64_C(9223372036849775807)}, + {seconds, INT64_C(63113904000), INT64_C(-985451107685), INT64_C(859223299685), U_INT64_MIN, U_INT64_MAX, INT64_C(63113904001), INT64_C(63113903999), INT64_C(5000000), INT64_C(-9223372036849775808), INT64_C(9223372036849775807)}, + {days, INT64_C(693594), INT64_C(-11368793), INT64_C(9981605), U_INT64_MIN, U_INT64_MAX, INT64_C(693595), INT64_C(693593), INT64_C(432000000000), INT64_C(-9223371604854775808), INT64_C(9223371604854775807)}, + {days, INT64_C(693594), INT64_C(-11368793), INT64_C(9981605), U_INT64_MIN, U_INT64_MAX, INT64_C(693595), INT64_C(693593), INT64_C(432000000000), INT64_C(-9223371604854775808), INT64_C(9223371604854775807)}, + {microseconds, INT64_C(62135596800000000), INT64_C(-984472800485477580), INT64_C(860201606885477580), INT64_C(-9223372036854775804), INT64_C(9223372036854775804), INT64_C(62135596800000001), INT64_C(62135596799999999), INT64_C(5), INT64_C(-9223372036854775803), INT64_C(9223372036854775802)}, +}; + +U_CAPI int64_t U_EXPORT2 +utmscale_getTimeScaleValue(UDateTimeScale timeScale, UTimeScaleValue value, UErrorCode *status) +{ + if (status == NULL || U_FAILURE(*status)) { + return 0; + } + + if (timeScale < UDTS_JAVA_TIME || UDTS_MAX_SCALE <= timeScale + || value < UTSV_UNITS_VALUE || UTSV_MAX_SCALE_VALUE <= value) + { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + return timeScaleTable[timeScale][value]; +} + +U_CAPI int64_t U_EXPORT2 +utmscale_fromInt64(int64_t otherTime, UDateTimeScale timeScale, UErrorCode *status) +{ + const int64_t *data; + + if (status == NULL || U_FAILURE(*status)) { + return 0; + } + + if ((int32_t)timeScale < 0 || timeScale >= UDTS_MAX_SCALE) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + data = (const int64_t *)(&timeScaleTable[timeScale]); + + if (otherTime < data[UTSV_FROM_MIN_VALUE] || otherTime > data[UTSV_FROM_MAX_VALUE]) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + return (otherTime + data[UTSV_EPOCH_OFFSET_VALUE]) * data[UTSV_UNITS_VALUE]; +} + +U_CAPI int64_t U_EXPORT2 +utmscale_toInt64(int64_t universalTime, UDateTimeScale timeScale, UErrorCode *status) +{ + const int64_t *data; + + if (status == NULL || U_FAILURE(*status)) { + return 0; + } + + if ((int32_t)timeScale < 0 || timeScale >= UDTS_MAX_SCALE) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + data = (const int64_t *)(&timeScaleTable[timeScale]); + + if (universalTime < data[UTSV_TO_MIN_VALUE] || universalTime > data[UTSV_TO_MAX_VALUE]) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if (universalTime < 0) { + if (universalTime < data[UTSV_MIN_ROUND_VALUE]) { + return (universalTime + data[UTSV_UNITS_ROUND_VALUE]) / data[UTSV_UNITS_VALUE] - data[UTSV_EPOCH_OFFSET_PLUS_1_VALUE]; + } + + return (universalTime - data[UTSV_UNITS_ROUND_VALUE]) / data[UTSV_UNITS_VALUE] - data[UTSV_EPOCH_OFFSET_VALUE]; + } + + if (universalTime > data[UTSV_MAX_ROUND_VALUE]) { + return (universalTime - data[UTSV_UNITS_ROUND_VALUE]) / data[UTSV_UNITS_VALUE] - data[UTSV_EPOCH_OFFSET_MINUS_1_VALUE]; + } + + return (universalTime + data[UTSV_UNITS_ROUND_VALUE]) / data[UTSV_UNITS_VALUE] - data[UTSV_EPOCH_OFFSET_VALUE]; +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/utrans.cpp b/deps/icu-small/source/i18n/utrans.cpp index aed817ce26..62fd630d9e 100644 --- a/deps/icu-small/source/i18n/utrans.cpp +++ b/deps/icu-small/source/i18n/utrans.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/valueformatter.cpp b/deps/icu-small/source/i18n/valueformatter.cpp index 45b08f60b6..e769f369d4 100644 --- a/deps/icu-small/source/i18n/valueformatter.cpp +++ b/deps/icu-small/source/i18n/valueformatter.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/valueformatter.h b/deps/icu-small/source/i18n/valueformatter.h index da2dd1b337..836a05b17c 100644 --- a/deps/icu-small/source/i18n/valueformatter.h +++ b/deps/icu-small/source/i18n/valueformatter.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/visibledigits.cpp b/deps/icu-small/source/i18n/visibledigits.cpp index 1fea3504df..03cfc68d25 100644 --- a/deps/icu-small/source/i18n/visibledigits.cpp +++ b/deps/icu-small/source/i18n/visibledigits.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* * Copyright (C) 2016, International Business Machines diff --git a/deps/icu-small/source/i18n/visibledigits.h b/deps/icu-small/source/i18n/visibledigits.h index cd18239a56..03c8013e39 100644 --- a/deps/icu-small/source/i18n/visibledigits.h +++ b/deps/icu-small/source/i18n/visibledigits.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * Copyright (C) 2015, International Business Machines diff --git a/deps/icu-small/source/i18n/vtzone.cpp b/deps/icu-small/source/i18n/vtzone.cpp index ba5f3bc0d7..85b42b0e06 100644 --- a/deps/icu-small/source/i18n/vtzone.cpp +++ b/deps/icu-small/source/i18n/vtzone.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -358,7 +358,7 @@ static void millisToOffset(int32_t millis, UnicodeString& str) { /* * Create a default TZNAME from TZID */ -static void getDefaultTZName(const UnicodeString tzid, UBool isDST, UnicodeString& zonename) { +static void getDefaultTZName(const UnicodeString &tzid, UBool isDST, UnicodeString& zonename) { zonename = tzid; if (isDST) { zonename += UNICODE_STRING_SIMPLE("(DST)"); diff --git a/deps/icu-small/source/i18n/vzone.cpp b/deps/icu-small/source/i18n/vzone.cpp index 7ee95e4ede..6db3ba04c5 100644 --- a/deps/icu-small/source/i18n/vzone.cpp +++ b/deps/icu-small/source/i18n/vzone.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/vzone.h b/deps/icu-small/source/i18n/vzone.h index 22a41b4e37..700687e0cb 100644 --- a/deps/icu-small/source/i18n/vzone.h +++ b/deps/icu-small/source/i18n/vzone.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/windtfmt.cpp b/deps/icu-small/source/i18n/windtfmt.cpp index 20ce338bb4..70a9364a0c 100644 --- a/deps/icu-small/source/i18n/windtfmt.cpp +++ b/deps/icu-small/source/i18n/windtfmt.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** @@ -13,7 +13,7 @@ #include "unicode/utypes.h" -#if U_PLATFORM_HAS_WIN32_API +#if U_PLATFORM_USES_ONLY_WIN32_API #if !UCONFIG_NO_FORMATTING @@ -35,7 +35,9 @@ #include "windtfmt.h" #include "wintzimpl.h" +#ifndef WIN32_LEAN_AND_MEAN # define WIN32_LEAN_AND_MEAN +#endif # define VC_EXTRALEAN # define NOUSER # define NOSERVICE @@ -92,12 +94,83 @@ UnicodeString* Win32DateFormat::getTimeDateFormat(const Calendar *cal, const Loc return result; } +// TODO: This is copied in both winnmfmt.cpp and windtfmt.cpp, but really should +// be factored out into a common helper for both. +static UErrorCode GetEquivalentWindowsLocaleName(const Locale& locale, UnicodeString** buffer) +{ + UErrorCode status = U_ZERO_ERROR; + char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {}; + + // Convert from names like "en_CA" and "de_DE@collation=phonebook" to "en-CA" and "de-DE-u-co-phonebk". + int32_t length = uloc_toLanguageTag(locale.getName(), asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, &status); + + if (U_SUCCESS(status)) + { + // Need it to be UTF-16, not 8-bit + // TODO: This seems like a good thing for a helper + wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {}; + int32_t i; + for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++) + { + if (asciiBCP47Tag[i] == '\0') + { + break; + } + else + { + // normally just copy the character + bcp47Tag[i] = static_cast(asciiBCP47Tag[i]); + } + } + + // Ensure it's null terminated + if (i < (UPRV_LENGTHOF(bcp47Tag) - 1)) + { + bcp47Tag[i] = L'\0'; + } + else + { + // Ran out of room. + bcp47Tag[UPRV_LENGTHOF(bcp47Tag) - 1] = L'\0'; + } + + + wchar_t windowsLocaleName[LOCALE_NAME_MAX_LENGTH] = {}; + + // Note: On Windows versions below 10, there is no support for locale name aliases. + // This means that it will fail for locales where ICU has a completely different + // name (like ku vs ckb), and it will also not work for alternate sort locale + // names like "de-DE-u-co-phonebk". + + // TODO: We could add some sort of exception table for cases like ku vs ckb. + + int length = ResolveLocaleName(bcp47Tag, windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName)); + + if (length > 0) + { + *buffer = new UnicodeString(windowsLocaleName); + } + else + { + status = U_UNSUPPORTED_ERROR; + } + } + return status; +} + // TODO: Range-check timeStyle, dateStyle Win32DateFormat::Win32DateFormat(DateFormat::EStyle timeStyle, DateFormat::EStyle dateStyle, const Locale &locale, UErrorCode &status) - : DateFormat(), fDateTimeMsg(NULL), fTimeStyle(timeStyle), fDateStyle(dateStyle), fLocale(locale), fZoneID() + : DateFormat(), fDateTimeMsg(NULL), fTimeStyle(timeStyle), fDateStyle(dateStyle), fLocale(locale), fZoneID(), fWindowsLocaleName(nullptr) { if (U_SUCCESS(status)) { - fLCID = locale.getLCID(); + GetEquivalentWindowsLocaleName(locale, &fWindowsLocaleName); + // Note: In the previous code, it would look up the LCID for the locale, and if + // the locale was not recognized then it would get an LCID of 0, which is a + // synonym for LOCALE_USER_DEFAULT on Windows. + // If the above method fails, then fWindowsLocaleName will remain as nullptr, and + // then we will pass nullptr to API GetLocaleInfoEx, which is the same as passing + // LOCALE_USER_DEFAULT. + fTZI = NEW_ARRAY(TIME_ZONE_INFORMATION, 1); uprv_memset(fTZI, 0, sizeof(TIME_ZONE_INFORMATION)); adoptCalendar(Calendar::createInstance(locale, status)); @@ -115,6 +188,7 @@ Win32DateFormat::~Win32DateFormat() // delete fCalendar; uprv_free(fTZI); delete fDateTimeMsg; + delete fWindowsLocaleName; } Win32DateFormat &Win32DateFormat::operator=(const Win32DateFormat &other) @@ -128,13 +202,14 @@ Win32DateFormat &Win32DateFormat::operator=(const Win32DateFormat &other) this->fTimeStyle = other.fTimeStyle; this->fDateStyle = other.fDateStyle; this->fLocale = other.fLocale; - this->fLCID = other.fLCID; // this->fCalendar = other.fCalendar->clone(); this->fZoneID = other.fZoneID; this->fTZI = NEW_ARRAY(TIME_ZONE_INFORMATION, 1); *this->fTZI = *other.fTZI; + this->fWindowsLocaleName = other.fWindowsLocaleName == NULL ? NULL : new UnicodeString(*other.fWindowsLocaleName); + return *this; } @@ -231,18 +306,25 @@ static const DWORD dfFlags[] = {DATE_LONGDATE, DATE_LONGDATE, DATE_SHORTDATE, DA void Win32DateFormat::formatDate(const SYSTEMTIME *st, UnicodeString &appendTo) const { - int result; + int result=0; wchar_t stackBuffer[STACK_BUFFER_SIZE]; wchar_t *buffer = stackBuffer; + const wchar_t *localeName = nullptr; + + if (fWindowsLocaleName != nullptr) + { + localeName = reinterpret_cast(toOldUCharPtr(fWindowsLocaleName->getTerminatedBuffer())); + } - result = GetDateFormatW(fLCID, dfFlags[fDateStyle - kDateOffset], st, NULL, buffer, STACK_BUFFER_SIZE); + result = GetDateFormatEx(localeName, dfFlags[fDateStyle - kDateOffset], st, NULL, buffer, STACK_BUFFER_SIZE, NULL); if (result == 0) { if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { - int newLength = GetDateFormatW(fLCID, dfFlags[fDateStyle - kDateOffset], st, NULL, NULL, 0); + int newLength = GetDateFormatEx(localeName, dfFlags[fDateStyle - kDateOffset], st, NULL, NULL, 0, NULL); buffer = NEW_ARRAY(wchar_t, newLength); - GetDateFormatW(fLCID, dfFlags[fDateStyle - kDateOffset], st, NULL, buffer, newLength); + + GetDateFormatEx(localeName, dfFlags[fDateStyle - kDateOffset], st, NULL, buffer, newLength, NULL); } } @@ -260,15 +342,22 @@ void Win32DateFormat::formatTime(const SYSTEMTIME *st, UnicodeString &appendTo) int result; wchar_t stackBuffer[STACK_BUFFER_SIZE]; wchar_t *buffer = stackBuffer; + const wchar_t *localeName = nullptr; - result = GetTimeFormatW(fLCID, tfFlags[fTimeStyle], st, NULL, buffer, STACK_BUFFER_SIZE); + if (fWindowsLocaleName != nullptr) + { + localeName = reinterpret_cast(toOldUCharPtr(fWindowsLocaleName->getTerminatedBuffer())); + } + + result = GetTimeFormatEx(localeName, tfFlags[fTimeStyle], st, NULL, buffer, STACK_BUFFER_SIZE); if (result == 0) { if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { - int newLength = GetTimeFormatW(fLCID, tfFlags[fTimeStyle], st, NULL, NULL, 0); + int newLength = GetTimeFormatEx(localeName, tfFlags[fTimeStyle], st, NULL, NULL, 0); buffer = NEW_ARRAY(wchar_t, newLength); - GetDateFormatW(fLCID, tfFlags[fTimeStyle], st, NULL, buffer, newLength); + + GetTimeFormatEx(localeName, tfFlags[fTimeStyle], st, NULL, buffer, newLength); } } @@ -314,4 +403,4 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ -#endif // U_PLATFORM_HAS_WIN32_API +#endif // U_PLATFORM_USES_ONLY_WIN32_API diff --git a/deps/icu-small/source/i18n/windtfmt.h b/deps/icu-small/source/i18n/windtfmt.h index a822194370..43b6fe6dba 100644 --- a/deps/icu-small/source/i18n/windtfmt.h +++ b/deps/icu-small/source/i18n/windtfmt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** @@ -16,7 +16,7 @@ #include "unicode/utypes.h" -#if U_PLATFORM_HAS_WIN32_API +#if U_PLATFORM_USES_ONLY_WIN32_API #if !UCONFIG_NO_FORMATTING @@ -95,7 +95,7 @@ public: *

          * .   Base* polymorphic_pointer = createPolymorphicObject();
          * .   if (polymorphic_pointer->getDynamicClassID() ==
    -     * .       erived::getStaticClassID()) ...
    +     * .       derived::getStaticClassID()) ...
          * 
    * @return The class ID for all objects of this class. */ @@ -124,15 +124,16 @@ private: DateFormat::EStyle fTimeStyle; DateFormat::EStyle fDateStyle; Locale fLocale; - int32_t fLCID; UnicodeString fZoneID; TIME_ZONE_INFORMATION *fTZI; + + UnicodeString* fWindowsLocaleName; // Stores the equivalent Windows locale name. }; U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ -#endif // U_PLATFORM_HAS_WIN32_API +#endif // U_PLATFORM_USES_ONLY_WIN32_API #endif // __WINDTFMT diff --git a/deps/icu-small/source/i18n/winnmfmt.cpp b/deps/icu-small/source/i18n/winnmfmt.cpp index d7e98723bb..40b4b64776 100644 --- a/deps/icu-small/source/i18n/winnmfmt.cpp +++ b/deps/icu-small/source/i18n/winnmfmt.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** @@ -28,7 +28,9 @@ #include "uassert.h" #include "locmap.h" +#ifndef WIN32_LEAN_AND_MEAN # define WIN32_LEAN_AND_MEAN +#endif # define VC_EXTRALEAN # define NOUSER # define NOSERVICE @@ -58,43 +60,43 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Win32NumberFormat) * end in ";0" then the return value should be multiplied by 10. * (e.g. "3" => 30, "3;2" => 320) */ -static UINT getGrouping(const char *grouping) +static UINT getGrouping(const wchar_t *grouping) { UINT g = 0; - const char *s; + const wchar_t *s; - for (s = grouping; *s != '\0'; s += 1) { - if (*s > '0' && *s < '9') { - g = g * 10 + (*s - '0'); - } else if (*s != ';') { + for (s = grouping; *s != L'\0'; s += 1) { + if (*s > L'0' && *s < L'9') { + g = g * 10 + (*s - L'0'); + } else if (*s != L';') { break; } } - if (*s != '0') { + if (*s != L'0') { g *= 10; } return g; } -static void getNumberFormat(NUMBERFMTW *fmt, int32_t lcid) +static void getNumberFormat(NUMBERFMTW *fmt, const wchar_t *windowsLocaleName) { - char buf[10]; + wchar_t buf[10]; - GetLocaleInfoW(lcid, LOCALE_RETURN_NUMBER|LOCALE_IDIGITS, (LPWSTR) &fmt->NumDigits, sizeof(UINT)); - GetLocaleInfoW(lcid, LOCALE_RETURN_NUMBER|LOCALE_ILZERO, (LPWSTR) &fmt->LeadingZero, sizeof(UINT)); + GetLocaleInfoEx(windowsLocaleName, LOCALE_RETURN_NUMBER|LOCALE_IDIGITS, (LPWSTR) &fmt->NumDigits, sizeof(UINT)); + GetLocaleInfoEx(windowsLocaleName, LOCALE_RETURN_NUMBER|LOCALE_ILZERO, (LPWSTR) &fmt->LeadingZero, sizeof(UINT)); - GetLocaleInfoA(lcid, LOCALE_SGROUPING, buf, 10); + GetLocaleInfoEx(windowsLocaleName, LOCALE_SGROUPING, (LPWSTR)buf, 10); fmt->Grouping = getGrouping(buf); fmt->lpDecimalSep = NEW_ARRAY(wchar_t, 6); - GetLocaleInfoW(lcid, LOCALE_SDECIMAL, fmt->lpDecimalSep, 6); + GetLocaleInfoEx(windowsLocaleName, LOCALE_SDECIMAL, fmt->lpDecimalSep, 6); fmt->lpThousandSep = NEW_ARRAY(wchar_t, 6); - GetLocaleInfoW(lcid, LOCALE_STHOUSAND, fmt->lpThousandSep, 6); + GetLocaleInfoEx(windowsLocaleName, LOCALE_STHOUSAND, fmt->lpThousandSep, 6); - GetLocaleInfoW(lcid, LOCALE_RETURN_NUMBER|LOCALE_INEGNUMBER, (LPWSTR) &fmt->NegativeOrder, sizeof(UINT)); + GetLocaleInfoEx(windowsLocaleName, LOCALE_RETURN_NUMBER|LOCALE_INEGNUMBER, (LPWSTR) &fmt->NegativeOrder, sizeof(UINT)); } static void freeNumberFormat(NUMBERFMTW *fmt) @@ -105,27 +107,27 @@ static void freeNumberFormat(NUMBERFMTW *fmt) } } -static void getCurrencyFormat(CURRENCYFMTW *fmt, int32_t lcid) +static void getCurrencyFormat(CURRENCYFMTW *fmt, const wchar_t *windowsLocaleName) { - char buf[10]; + wchar_t buf[10]; - GetLocaleInfoW(lcid, LOCALE_RETURN_NUMBER|LOCALE_ICURRDIGITS, (LPWSTR) &fmt->NumDigits, sizeof(UINT)); - GetLocaleInfoW(lcid, LOCALE_RETURN_NUMBER|LOCALE_ILZERO, (LPWSTR) &fmt->LeadingZero, sizeof(UINT)); + GetLocaleInfoEx(windowsLocaleName, LOCALE_RETURN_NUMBER|LOCALE_ICURRDIGITS, (LPWSTR) &fmt->NumDigits, sizeof(UINT)); + GetLocaleInfoEx(windowsLocaleName, LOCALE_RETURN_NUMBER|LOCALE_ILZERO, (LPWSTR) &fmt->LeadingZero, sizeof(UINT)); - GetLocaleInfoA(lcid, LOCALE_SMONGROUPING, buf, sizeof(buf)); + GetLocaleInfoEx(windowsLocaleName, LOCALE_SMONGROUPING, (LPWSTR)buf, sizeof(buf)); fmt->Grouping = getGrouping(buf); fmt->lpDecimalSep = NEW_ARRAY(wchar_t, 6); - GetLocaleInfoW(lcid, LOCALE_SMONDECIMALSEP, fmt->lpDecimalSep, 6); + GetLocaleInfoEx(windowsLocaleName, LOCALE_SMONDECIMALSEP, fmt->lpDecimalSep, 6); fmt->lpThousandSep = NEW_ARRAY(wchar_t, 6); - GetLocaleInfoW(lcid, LOCALE_SMONTHOUSANDSEP, fmt->lpThousandSep, 6); + GetLocaleInfoEx(windowsLocaleName, LOCALE_SMONTHOUSANDSEP, fmt->lpThousandSep, 6); - GetLocaleInfoW(lcid, LOCALE_RETURN_NUMBER|LOCALE_INEGCURR, (LPWSTR) &fmt->NegativeOrder, sizeof(UINT)); - GetLocaleInfoW(lcid, LOCALE_RETURN_NUMBER|LOCALE_ICURRENCY, (LPWSTR) &fmt->PositiveOrder, sizeof(UINT)); + GetLocaleInfoEx(windowsLocaleName, LOCALE_RETURN_NUMBER|LOCALE_INEGCURR, (LPWSTR) &fmt->NegativeOrder, sizeof(UINT)); + GetLocaleInfoEx(windowsLocaleName, LOCALE_RETURN_NUMBER|LOCALE_ICURRENCY, (LPWSTR) &fmt->PositiveOrder, sizeof(UINT)); fmt->lpCurrencySymbol = NEW_ARRAY(wchar_t, 8); - GetLocaleInfoW(lcid, LOCALE_SCURRENCY, (LPWSTR) fmt->lpCurrencySymbol, 8); + GetLocaleInfoEx(windowsLocaleName, LOCALE_SCURRENCY, (LPWSTR) fmt->lpCurrencySymbol, 8); } static void freeCurrencyFormat(CURRENCYFMTW *fmt) @@ -137,12 +139,84 @@ static void freeCurrencyFormat(CURRENCYFMTW *fmt) } } +// TODO: This is copied in both winnmfmt.cpp and windtfmt.cpp, but really should +// be factored out into a common helper for both. +static UErrorCode GetEquivalentWindowsLocaleName(const Locale& locale, UnicodeString** buffer) +{ + UErrorCode status = U_ZERO_ERROR; + char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {}; + + // Convert from names like "en_CA" and "de_DE@collation=phonebook" to "en-CA" and "de-DE-u-co-phonebk". + int32_t length = uloc_toLanguageTag(locale.getName(), asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, &status); + + if (U_SUCCESS(status)) + { + // Need it to be UTF-16, not 8-bit + // TODO: This seems like a good thing for a helper + wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {}; + int32_t i; + for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++) + { + if (asciiBCP47Tag[i] == '\0') + { + break; + } + else + { + // normally just copy the character + bcp47Tag[i] = static_cast(asciiBCP47Tag[i]); + } + } + + // Ensure it's null terminated + if (i < (UPRV_LENGTHOF(bcp47Tag) - 1)) + { + bcp47Tag[i] = L'\0'; + } + else + { + // Ran out of room. + bcp47Tag[UPRV_LENGTHOF(bcp47Tag) - 1] = L'\0'; + } + + + wchar_t windowsLocaleName[LOCALE_NAME_MAX_LENGTH] = {}; + + // Note: On Windows versions below 10, there is no support for locale name aliases. + // This means that it will fail for locales where ICU has a completely different + // name (like ku vs ckb), and it will also not work for alternate sort locale + // names like "de-DE-u-co-phonebk". + + // TODO: We could add some sort of exception table for cases like ku vs ckb. + + int length = ResolveLocaleName(bcp47Tag, windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName)); + + if (length > 0) + { + *buffer = new UnicodeString(windowsLocaleName); + } + else + { + status = U_UNSUPPORTED_ERROR; + } + } + return status; +} + Win32NumberFormat::Win32NumberFormat(const Locale &locale, UBool currency, UErrorCode &status) - : NumberFormat(), fCurrency(currency), fFormatInfo(NULL), fFractionDigitsSet(FALSE) + : NumberFormat(), fCurrency(currency), fFormatInfo(NULL), fFractionDigitsSet(FALSE), fWindowsLocaleName(nullptr) { if (!U_FAILURE(status)) { fLCID = locale.getLCID(); + GetEquivalentWindowsLocaleName(locale, &fWindowsLocaleName); + // Note: In the previous code, it would look up the LCID for the locale, and if + // the locale was not recognized then it would get an LCID of 0, which is a + // synonym for LOCALE_USER_DEFAULT on Windows. + // If the above method fails, then fWindowsLocaleName will remain as nullptr, and + // then we will pass nullptr to API GetLocaleInfoEx, which is the same as passing + // LOCALE_USER_DEFAULT. + // Resolve actual locale to be used later UErrorCode tmpsts = U_ZERO_ERROR; char tmpLocID[ULOC_FULLNAME_CAPACITY]; @@ -152,12 +226,19 @@ Win32NumberFormat::Win32NumberFormat(const Locale &locale, UBool currency, UErro fLocale = Locale((const char*)tmpLocID); } + const wchar_t *localeName = nullptr; + + if (fWindowsLocaleName != nullptr) + { + localeName = reinterpret_cast(toOldUCharPtr(fWindowsLocaleName->getTerminatedBuffer())); + } + fFormatInfo = (FormatInfo*)uprv_malloc(sizeof(FormatInfo)); if (fCurrency) { - getCurrencyFormat(&fFormatInfo->currency, fLCID); + getCurrencyFormat(&fFormatInfo->currency, localeName); } else { - getNumberFormat(&fFormatInfo->number, fLCID); + getNumberFormat(&fFormatInfo->number, localeName); } } } @@ -182,6 +263,7 @@ Win32NumberFormat::~Win32NumberFormat() uprv_free(fFormatInfo); } + delete fWindowsLocaleName; } Win32NumberFormat &Win32NumberFormat::operator=(const Win32NumberFormat &other) @@ -192,13 +274,21 @@ Win32NumberFormat &Win32NumberFormat::operator=(const Win32NumberFormat &other) this->fLocale = other.fLocale; this->fLCID = other.fLCID; this->fFractionDigitsSet = other.fFractionDigitsSet; + this->fWindowsLocaleName = other.fWindowsLocaleName == NULL ? NULL : new UnicodeString(*other.fWindowsLocaleName); + + const wchar_t *localeName = nullptr; + + if (fWindowsLocaleName != nullptr) + { + localeName = reinterpret_cast(toOldUCharPtr(fWindowsLocaleName->getTerminatedBuffer())); + } if (fCurrency) { freeCurrencyFormat(&fFormatInfo->currency); - getCurrencyFormat(&fFormatInfo->currency, fLCID); + getCurrencyFormat(&fFormatInfo->currency, localeName); } else { freeNumberFormat(&fFormatInfo->number); - getNumberFormat(&fFormatInfo->number, fLCID); + getNumberFormat(&fFormatInfo->number, localeName); } return *this; @@ -299,6 +389,13 @@ UnicodeString &Win32NumberFormat::format(int32_t numDigits, UnicodeString &appen formatInfo = *fFormatInfo; buffer[0] = 0x0000; + const wchar_t *localeName = nullptr; + + if (fWindowsLocaleName != nullptr) + { + localeName = reinterpret_cast(toOldUCharPtr(fWindowsLocaleName->getTerminatedBuffer())); + } + if (fCurrency) { if (fFractionDigitsSet) { formatInfo.currency.NumDigits = (UINT) numDigits; @@ -308,17 +405,17 @@ UnicodeString &Win32NumberFormat::format(int32_t numDigits, UnicodeString &appen formatInfo.currency.Grouping = 0; } - result = GetCurrencyFormatW(fLCID, 0, nBuffer, &formatInfo.currency, buffer, STACK_BUFFER_SIZE); + result = GetCurrencyFormatEx(localeName, 0, nBuffer, &formatInfo.currency, buffer, STACK_BUFFER_SIZE); if (result == 0) { DWORD lastError = GetLastError(); if (lastError == ERROR_INSUFFICIENT_BUFFER) { - int newLength = GetCurrencyFormatW(fLCID, 0, nBuffer, &formatInfo.currency, NULL, 0); + int newLength = GetCurrencyFormatEx(localeName, 0, nBuffer, &formatInfo.currency, NULL, 0); buffer = NEW_ARRAY(wchar_t, newLength); buffer[0] = 0x0000; - GetCurrencyFormatW(fLCID, 0, nBuffer, &formatInfo.currency, buffer, newLength); + GetCurrencyFormatEx(localeName, 0, nBuffer, &formatInfo.currency, buffer, newLength); } } } else { @@ -330,15 +427,15 @@ UnicodeString &Win32NumberFormat::format(int32_t numDigits, UnicodeString &appen formatInfo.number.Grouping = 0; } - result = GetNumberFormatW(fLCID, 0, nBuffer, &formatInfo.number, buffer, STACK_BUFFER_SIZE); + result = GetNumberFormatEx(localeName, 0, nBuffer, &formatInfo.number, buffer, STACK_BUFFER_SIZE); if (result == 0) { if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { - int newLength = GetNumberFormatW(fLCID, 0, nBuffer, &formatInfo.number, NULL, 0); + int newLength = GetNumberFormatEx(localeName, 0, nBuffer, &formatInfo.number, NULL, 0); buffer = NEW_ARRAY(wchar_t, newLength); buffer[0] = 0x0000; - GetNumberFormatW(fLCID, 0, nBuffer, &formatInfo.number, buffer, newLength); + GetNumberFormatEx(localeName, 0, nBuffer, &formatInfo.number, buffer, newLength); } } } diff --git a/deps/icu-small/source/i18n/winnmfmt.h b/deps/icu-small/source/i18n/winnmfmt.h index 3b0df915e1..7ea5da9170 100644 --- a/deps/icu-small/source/i18n/winnmfmt.h +++ b/deps/icu-small/source/i18n/winnmfmt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** @@ -128,7 +128,7 @@ public: *
          * .   Base* polymorphic_pointer = createPolymorphicObject();
          * .   if (polymorphic_pointer->getDynamicClassID() ==
    -     * .       erived::getStaticClassID()) ...
    +     * .       derived::getStaticClassID()) ...
          * 
    * @return The class ID for all objects of this class. */ @@ -155,6 +155,7 @@ private: FormatInfo *fFormatInfo; UBool fFractionDigitsSet; + UnicodeString* fWindowsLocaleName; // Stores the equivalent Windows locale name. }; U_NAMESPACE_END diff --git a/deps/icu-small/source/i18n/wintzimpl.cpp b/deps/icu-small/source/i18n/wintzimpl.cpp index 4c042d5421..07aad21787 100644 --- a/deps/icu-small/source/i18n/wintzimpl.cpp +++ b/deps/icu-small/source/i18n/wintzimpl.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** @@ -13,7 +13,7 @@ #include "unicode/utypes.h" -#if U_PLATFORM_HAS_WIN32_API && !UCONFIG_NO_FORMATTING +#if U_PLATFORM_USES_ONLY_WIN32_API && !UCONFIG_NO_FORMATTING #include "wintzimpl.h" @@ -24,7 +24,9 @@ #include "uassert.h" #include "cmemory.h" +#ifndef WIN32_LEAN_AND_MEAN # define WIN32_LEAN_AND_MEAN +#endif # define VC_EXTRALEAN # define NOUSER # define NOSERVICE diff --git a/deps/icu-small/source/i18n/wintzimpl.h b/deps/icu-small/source/i18n/wintzimpl.h index 8149fc1417..c36f2ad5f5 100644 --- a/deps/icu-small/source/i18n/wintzimpl.h +++ b/deps/icu-small/source/i18n/wintzimpl.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************** @@ -16,7 +16,7 @@ #include "unicode/utypes.h" -#if U_PLATFORM_HAS_WIN32_API +#if U_PLATFORM_USES_ONLY_WIN32_API /** * \file * \brief C API: Utilities for dealing w/ Windows time zones. @@ -34,6 +34,6 @@ U_CAPI UBool U_EXPORT2 uprv_getWindowsTimeZoneInfo(TIME_ZONE_INFORMATION *zoneInfo, const UChar *icuid, int32_t length); -#endif /* U_PLATFORM_HAS_WIN32_API */ +#endif /* U_PLATFORM_USES_ONLY_WIN32_API */ #endif /* __WINTZIMPL */ diff --git a/deps/icu-small/source/i18n/zonemeta.cpp b/deps/icu-small/source/i18n/zonemeta.cpp index fdf333c371..84a9657802 100644 --- a/deps/icu-small/source/i18n/zonemeta.cpp +++ b/deps/icu-small/source/i18n/zonemeta.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -17,7 +17,7 @@ #include "unicode/ustring.h" #include "unicode/putil.h" #include "unicode/simpletz.h" - +#include "unicode/strenum.h" #include "umutex.h" #include "uvector.h" #include "cmemory.h" @@ -28,6 +28,7 @@ #include "uresimp.h" #include "uhash.h" #include "olsontz.h" +#include "uinvchar.h" static UMutex gZoneMetaLock = U_MUTEX_INITIALIZER; @@ -255,6 +256,12 @@ ZoneMeta::getCanonicalCLDRID(const UnicodeString &tzid, UErrorCode& status) { tzid.extract(utzid, ZID_KEY_MAX + 1, tmpStatus); U_ASSERT(tmpStatus == U_ZERO_ERROR); // we checked the length of tzid already + if (!uprv_isInvariantUString(utzid, -1)) { + // All of known tz IDs are only containing ASCII invariant characters. + status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + // Check if it was already cached umtx_lock(&gZoneMetaLock); { diff --git a/deps/icu-small/source/i18n/zonemeta.h b/deps/icu-small/source/i18n/zonemeta.h index 84be5553ea..9dbcc878a2 100644 --- a/deps/icu-small/source/i18n/zonemeta.h +++ b/deps/icu-small/source/i18n/zonemeta.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -41,7 +41,11 @@ public: /** * Return the canonical id for this tzid defined by CLDR, which might be the id itself. * This overload method returns a persistent const UChar*, which is guranteed to persist - * (a pointer to a resource). + * (a pointer to a resource). If the given system tzid is not known, U_ILLEGAL_ARGUMENT_ERROR + * is set in the status. + * @param tzid Zone ID + * @param status Receives the status + * @return The canonical ID for the input time zone ID */ static const UChar* U_EXPORT2 getCanonicalCLDRID(const UnicodeString &tzid, UErrorCode& status); diff --git a/deps/icu-small/source/i18n/zrule.cpp b/deps/icu-small/source/i18n/zrule.cpp index ad64ab6e16..c13411fc8e 100644 --- a/deps/icu-small/source/i18n/zrule.cpp +++ b/deps/icu-small/source/i18n/zrule.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/zrule.h b/deps/icu-small/source/i18n/zrule.h index b9827bf520..272f954f06 100644 --- a/deps/icu-small/source/i18n/zrule.h +++ b/deps/icu-small/source/i18n/zrule.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/ztrans.cpp b/deps/icu-small/source/i18n/ztrans.cpp index 956b563a2a..d2d93da133 100644 --- a/deps/icu-small/source/i18n/ztrans.cpp +++ b/deps/icu-small/source/i18n/ztrans.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/i18n/ztrans.h b/deps/icu-small/source/i18n/ztrans.h index 0101dc06e3..8b63eb47e8 100644 --- a/deps/icu-small/source/i18n/ztrans.h +++ b/deps/icu-small/source/i18n/ztrans.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/io/locbund.cpp b/deps/icu-small/source/io/locbund.cpp index 4dc50d50df..f15788ee19 100644 --- a/deps/icu-small/source/io/locbund.cpp +++ b/deps/icu-small/source/io/locbund.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/io/locbund.h b/deps/icu-small/source/io/locbund.h index b9e19484b6..5879e28f01 100644 --- a/deps/icu-small/source/io/locbund.h +++ b/deps/icu-small/source/io/locbund.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/io/sprintf.c b/deps/icu-small/source/io/sprintf.c deleted file mode 100644 index 17cdb2dcdd..0000000000 --- a/deps/icu-small/source/io/sprintf.c +++ /dev/null @@ -1,261 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2001-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* File sprintf.c -* -* Modification History: -* -* Date Name Description -* 02/08/2001 george Creation. Copied from uprintf.c -* 03/27/2002 Mark Schneckloth Many fixes regarding alignment, null termination -* (mschneckloth@atomz.com) and other various problems. -* 08/07/2003 george Reunify printf implementations -******************************************************************************* -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION - -#include "unicode/ustdio.h" -#include "unicode/ustring.h" -#include "unicode/putil.h" - -#include "uprintf.h" -#include "locbund.h" - -#include "cmemory.h" -#include - -/* u_minstrncpy copies the minimum number of code units of (count or output->available) */ -static int32_t -u_sprintf_write(void *context, - const UChar *str, - int32_t count) -{ - u_localized_print_string *output = (u_localized_print_string *)context; - int32_t size = ufmt_min(count, output->available); - - u_strncpy(output->str + (output->len - output->available), str, size); - output->available -= size; - return size; -} - -static int32_t -u_sprintf_pad_and_justify(void *context, - const u_printf_spec_info *info, - const UChar *result, - int32_t resultLen) -{ - u_localized_print_string *output = (u_localized_print_string *)context; - int32_t written = 0; - int32_t lengthOfResult = resultLen; - - resultLen = ufmt_min(resultLen, output->available); - - /* pad and justify, if needed */ - if(info->fWidth != -1 && resultLen < info->fWidth) { - int32_t paddingLeft = info->fWidth - resultLen; - int32_t outputPos = output->len - output->available; - - if (paddingLeft + resultLen > output->available) { - paddingLeft = output->available - resultLen; - if (paddingLeft < 0) { - paddingLeft = 0; - } - /* paddingLeft = output->available - resultLen;*/ - } - written += paddingLeft; - - /* left justify */ - if(info->fLeft) { - written += u_sprintf_write(output, result, resultLen); - u_memset(&output->str[outputPos + resultLen], info->fPadChar, paddingLeft); - output->available -= paddingLeft; - } - /* right justify */ - else { - u_memset(&output->str[outputPos], info->fPadChar, paddingLeft); - output->available -= paddingLeft; - written += u_sprintf_write(output, result, resultLen); - } - } - /* just write the formatted output */ - else { - written = u_sprintf_write(output, result, resultLen); - } - - if (written >= 0 && lengthOfResult > written) { - return lengthOfResult; - } - - return written; -} - -U_CAPI int32_t U_EXPORT2 -u_sprintf(UChar *buffer, - const char *patternSpecification, - ... ) -{ - va_list ap; - int32_t written; - - va_start(ap, patternSpecification); - written = u_vsnprintf(buffer, INT32_MAX, patternSpecification, ap); - va_end(ap); - - return written; -} - -U_CAPI int32_t U_EXPORT2 -u_sprintf_u(UChar *buffer, - const UChar *patternSpecification, - ... ) -{ - va_list ap; - int32_t written; - - va_start(ap, patternSpecification); - written = u_vsnprintf_u(buffer, INT32_MAX, patternSpecification, ap); - va_end(ap); - - return written; -} - -U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_vsprintf(UChar *buffer, - const char *patternSpecification, - va_list ap) -{ - return u_vsnprintf(buffer, INT32_MAX, patternSpecification, ap); -} - -U_CAPI int32_t U_EXPORT2 -u_snprintf(UChar *buffer, - int32_t count, - const char *patternSpecification, - ... ) -{ - va_list ap; - int32_t written; - - va_start(ap, patternSpecification); - written = u_vsnprintf(buffer, count, patternSpecification, ap); - va_end(ap); - - return written; -} - -U_CAPI int32_t U_EXPORT2 -u_snprintf_u(UChar *buffer, - int32_t count, - const UChar *patternSpecification, - ... ) -{ - va_list ap; - int32_t written; - - va_start(ap, patternSpecification); - written = u_vsnprintf_u(buffer, count, patternSpecification, ap); - va_end(ap); - - return written; -} - -U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_vsnprintf(UChar *buffer, - int32_t count, - const char *patternSpecification, - va_list ap) -{ - int32_t written; - UChar *pattern; - UChar patBuffer[UFMT_DEFAULT_BUFFER_SIZE]; - int32_t size = (int32_t)strlen(patternSpecification) + 1; - - /* convert from the default codepage to Unicode */ - if (size >= (int32_t)MAX_UCHAR_BUFFER_SIZE(patBuffer)) { - pattern = (UChar *)uprv_malloc(size * sizeof(UChar)); - if(pattern == 0) { - return 0; - } - } - else { - pattern = patBuffer; - } - u_charsToUChars(patternSpecification, pattern, size); - - /* do the work */ - written = u_vsnprintf_u(buffer, count, pattern, ap); - - /* clean up */ - if (pattern != patBuffer) { - uprv_free(pattern); - } - - return written; -} - -U_CAPI int32_t U_EXPORT2 -u_vsprintf_u(UChar *buffer, - const UChar *patternSpecification, - va_list ap) -{ - return u_vsnprintf_u(buffer, INT32_MAX, patternSpecification, ap); -} - -static const u_printf_stream_handler g_sprintf_stream_handler = { - u_sprintf_write, - u_sprintf_pad_and_justify -}; - -U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_vsnprintf_u(UChar *buffer, - int32_t count, - const UChar *patternSpecification, - va_list ap) -{ - int32_t written = 0; /* haven't written anything yet */ - int32_t result = 0; /* test the return value of u_printf_parse */ - - u_localized_print_string outStr; - - if (count < 0) { - count = INT32_MAX; - } - - outStr.str = buffer; - outStr.len = count; - outStr.available = count; - - if(u_locbund_init(&outStr.fBundle, "en_US_POSIX") == 0) { - return 0; - } - - /* parse and print the whole format string */ - result = u_printf_parse(&g_sprintf_stream_handler, patternSpecification, &outStr, &outStr, &outStr.fBundle, &written, ap); - - /* Terminate the buffer, if there's room. */ - if (outStr.available > 0) { - buffer[outStr.len - outStr.available] = 0x0000; - } - - /* Release the cloned bundle, if we cloned it. */ - u_locbund_close(&outStr.fBundle); - - /* parsing error */ - if (result < 0) { - return result; - } - /* return # of UChars written */ - return written; -} - -#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/io/sprintf.cpp b/deps/icu-small/source/io/sprintf.cpp new file mode 100644 index 0000000000..20b9e52a21 --- /dev/null +++ b/deps/icu-small/source/io/sprintf.cpp @@ -0,0 +1,261 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2001-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File sprintf.c +* +* Modification History: +* +* Date Name Description +* 02/08/2001 george Creation. Copied from uprintf.c +* 03/27/2002 Mark Schneckloth Many fixes regarding alignment, null termination +* (mschneckloth@atomz.com) and other various problems. +* 08/07/2003 george Reunify printf implementations +******************************************************************************* +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION + +#include "unicode/ustdio.h" +#include "unicode/ustring.h" +#include "unicode/putil.h" + +#include "uprintf.h" +#include "locbund.h" + +#include "cmemory.h" +#include + +/* u_minstrncpy copies the minimum number of code units of (count or output->available) */ +static int32_t +u_sprintf_write(void *context, + const UChar *str, + int32_t count) +{ + u_localized_print_string *output = (u_localized_print_string *)context; + int32_t size = ufmt_min(count, output->available); + + u_strncpy(output->str + (output->len - output->available), str, size); + output->available -= size; + return size; +} + +static int32_t +u_sprintf_pad_and_justify(void *context, + const u_printf_spec_info *info, + const UChar *result, + int32_t resultLen) +{ + u_localized_print_string *output = (u_localized_print_string *)context; + int32_t written = 0; + int32_t lengthOfResult = resultLen; + + resultLen = ufmt_min(resultLen, output->available); + + /* pad and justify, if needed */ + if(info->fWidth != -1 && resultLen < info->fWidth) { + int32_t paddingLeft = info->fWidth - resultLen; + int32_t outputPos = output->len - output->available; + + if (paddingLeft + resultLen > output->available) { + paddingLeft = output->available - resultLen; + if (paddingLeft < 0) { + paddingLeft = 0; + } + /* paddingLeft = output->available - resultLen;*/ + } + written += paddingLeft; + + /* left justify */ + if(info->fLeft) { + written += u_sprintf_write(output, result, resultLen); + u_memset(&output->str[outputPos + resultLen], info->fPadChar, paddingLeft); + output->available -= paddingLeft; + } + /* right justify */ + else { + u_memset(&output->str[outputPos], info->fPadChar, paddingLeft); + output->available -= paddingLeft; + written += u_sprintf_write(output, result, resultLen); + } + } + /* just write the formatted output */ + else { + written = u_sprintf_write(output, result, resultLen); + } + + if (written >= 0 && lengthOfResult > written) { + return lengthOfResult; + } + + return written; +} + +U_CAPI int32_t U_EXPORT2 +u_sprintf(UChar *buffer, + const char *patternSpecification, + ... ) +{ + va_list ap; + int32_t written; + + va_start(ap, patternSpecification); + written = u_vsnprintf(buffer, INT32_MAX, patternSpecification, ap); + va_end(ap); + + return written; +} + +U_CAPI int32_t U_EXPORT2 +u_sprintf_u(UChar *buffer, + const UChar *patternSpecification, + ... ) +{ + va_list ap; + int32_t written; + + va_start(ap, patternSpecification); + written = u_vsnprintf_u(buffer, INT32_MAX, patternSpecification, ap); + va_end(ap); + + return written; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_vsprintf(UChar *buffer, + const char *patternSpecification, + va_list ap) +{ + return u_vsnprintf(buffer, INT32_MAX, patternSpecification, ap); +} + +U_CAPI int32_t U_EXPORT2 +u_snprintf(UChar *buffer, + int32_t count, + const char *patternSpecification, + ... ) +{ + va_list ap; + int32_t written; + + va_start(ap, patternSpecification); + written = u_vsnprintf(buffer, count, patternSpecification, ap); + va_end(ap); + + return written; +} + +U_CAPI int32_t U_EXPORT2 +u_snprintf_u(UChar *buffer, + int32_t count, + const UChar *patternSpecification, + ... ) +{ + va_list ap; + int32_t written; + + va_start(ap, patternSpecification); + written = u_vsnprintf_u(buffer, count, patternSpecification, ap); + va_end(ap); + + return written; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_vsnprintf(UChar *buffer, + int32_t count, + const char *patternSpecification, + va_list ap) +{ + int32_t written; + UChar *pattern; + UChar patBuffer[UFMT_DEFAULT_BUFFER_SIZE]; + int32_t size = (int32_t)strlen(patternSpecification) + 1; + + /* convert from the default codepage to Unicode */ + if (size >= (int32_t)MAX_UCHAR_BUFFER_SIZE(patBuffer)) { + pattern = (UChar *)uprv_malloc(size * sizeof(UChar)); + if(pattern == 0) { + return 0; + } + } + else { + pattern = patBuffer; + } + u_charsToUChars(patternSpecification, pattern, size); + + /* do the work */ + written = u_vsnprintf_u(buffer, count, pattern, ap); + + /* clean up */ + if (pattern != patBuffer) { + uprv_free(pattern); + } + + return written; +} + +U_CAPI int32_t U_EXPORT2 +u_vsprintf_u(UChar *buffer, + const UChar *patternSpecification, + va_list ap) +{ + return u_vsnprintf_u(buffer, INT32_MAX, patternSpecification, ap); +} + +static const u_printf_stream_handler g_sprintf_stream_handler = { + u_sprintf_write, + u_sprintf_pad_and_justify +}; + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_vsnprintf_u(UChar *buffer, + int32_t count, + const UChar *patternSpecification, + va_list ap) +{ + int32_t written = 0; /* haven't written anything yet */ + int32_t result = 0; /* test the return value of u_printf_parse */ + + u_localized_print_string outStr; + + if (count < 0) { + count = INT32_MAX; + } + + outStr.str = buffer; + outStr.len = count; + outStr.available = count; + + if(u_locbund_init(&outStr.fBundle, "en_US_POSIX") == 0) { + return 0; + } + + /* parse and print the whole format string */ + result = u_printf_parse(&g_sprintf_stream_handler, patternSpecification, &outStr, &outStr, &outStr.fBundle, &written, ap); + + /* Terminate the buffer, if there's room. */ + if (outStr.available > 0) { + buffer[outStr.len - outStr.available] = 0x0000; + } + + /* Release the cloned bundle, if we cloned it. */ + u_locbund_close(&outStr.fBundle); + + /* parsing error */ + if (result < 0) { + return result; + } + /* return # of UChars written */ + return written; +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/io/sscanf.c b/deps/icu-small/source/io/sscanf.c deleted file mode 100644 index 2e14cdbcb6..0000000000 --- a/deps/icu-small/source/io/sscanf.c +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2000-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* File sscanf.c -* -* Modification History: -* -* Date Name Description -* 02/08/00 george Creation. Copied from uscanf.c -****************************************************************************** -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION - -#include "unicode/putil.h" -#include "unicode/ustdio.h" -#include "unicode/ustring.h" -#include "uscanf.h" -#include "ufile.h" -#include "ufmt_cmn.h" - -#include "cmemory.h" -#include "cstring.h" - - -U_CAPI int32_t U_EXPORT2 -u_sscanf(const UChar *buffer, - const char *patternSpecification, - ... ) -{ - va_list ap; - int32_t converted; - - va_start(ap, patternSpecification); - converted = u_vsscanf(buffer, patternSpecification, ap); - va_end(ap); - - return converted; -} - -U_CAPI int32_t U_EXPORT2 -u_sscanf_u(const UChar *buffer, - const UChar *patternSpecification, - ... ) -{ - va_list ap; - int32_t converted; - - va_start(ap, patternSpecification); - converted = u_vsscanf_u(buffer, patternSpecification, ap); - va_end(ap); - - return converted; -} - -U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_vsscanf(const UChar *buffer, - const char *patternSpecification, - va_list ap) -{ - int32_t converted; - UChar *pattern; - UChar patBuffer[UFMT_DEFAULT_BUFFER_SIZE]; - int32_t size = (int32_t)uprv_strlen(patternSpecification) + 1; - - /* convert from the default codepage to Unicode */ - if (size >= (int32_t)MAX_UCHAR_BUFFER_SIZE(patBuffer)) { - pattern = (UChar *)uprv_malloc(size * sizeof(UChar)); - if(pattern == 0) { - return 0; - } - } - else { - pattern = patBuffer; - } - u_charsToUChars(patternSpecification, pattern, size); - - /* do the work */ - converted = u_vsscanf_u(buffer, pattern, ap); - - /* clean up */ - if (pattern != patBuffer) { - uprv_free(pattern); - } - - return converted; -} - -U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_vsscanf_u(const UChar *buffer, - const UChar *patternSpecification, - va_list ap) -{ - int32_t converted; - UFILE inStr; - - inStr.fConverter = NULL; - inStr.fFile = NULL; - inStr.fOwnFile = FALSE; -#if !UCONFIG_NO_TRANSLITERATION - inStr.fTranslit = NULL; -#endif - inStr.fUCBuffer[0] = 0; - inStr.str.fBuffer = (UChar *)buffer; - inStr.str.fPos = (UChar *)buffer; - inStr.str.fLimit = buffer + u_strlen(buffer); - - if(u_locbund_init(&inStr.str.fBundle, "en_US_POSIX") == 0) { - return 0; - } - - converted = u_scanf_parse(&inStr, patternSpecification, ap); - - u_locbund_close(&inStr.str.fBundle); - - /* return # of items converted */ - return converted; -} - -#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/io/sscanf.cpp b/deps/icu-small/source/io/sscanf.cpp new file mode 100644 index 0000000000..5409ebb716 --- /dev/null +++ b/deps/icu-small/source/io/sscanf.cpp @@ -0,0 +1,129 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2000-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File sscanf.c +* +* Modification History: +* +* Date Name Description +* 02/08/00 george Creation. Copied from uscanf.c +****************************************************************************** +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION + +#include "unicode/putil.h" +#include "unicode/ustdio.h" +#include "unicode/ustring.h" +#include "uscanf.h" +#include "ufile.h" +#include "ufmt_cmn.h" + +#include "cmemory.h" +#include "cstring.h" + + +U_CAPI int32_t U_EXPORT2 +u_sscanf(const UChar *buffer, + const char *patternSpecification, + ... ) +{ + va_list ap; + int32_t converted; + + va_start(ap, patternSpecification); + converted = u_vsscanf(buffer, patternSpecification, ap); + va_end(ap); + + return converted; +} + +U_CAPI int32_t U_EXPORT2 +u_sscanf_u(const UChar *buffer, + const UChar *patternSpecification, + ... ) +{ + va_list ap; + int32_t converted; + + va_start(ap, patternSpecification); + converted = u_vsscanf_u(buffer, patternSpecification, ap); + va_end(ap); + + return converted; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_vsscanf(const UChar *buffer, + const char *patternSpecification, + va_list ap) +{ + int32_t converted; + UChar *pattern; + UChar patBuffer[UFMT_DEFAULT_BUFFER_SIZE]; + int32_t size = (int32_t)uprv_strlen(patternSpecification) + 1; + + /* convert from the default codepage to Unicode */ + if (size >= (int32_t)MAX_UCHAR_BUFFER_SIZE(patBuffer)) { + pattern = (UChar *)uprv_malloc(size * sizeof(UChar)); + if(pattern == 0) { + return 0; + } + } + else { + pattern = patBuffer; + } + u_charsToUChars(patternSpecification, pattern, size); + + /* do the work */ + converted = u_vsscanf_u(buffer, pattern, ap); + + /* clean up */ + if (pattern != patBuffer) { + uprv_free(pattern); + } + + return converted; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_vsscanf_u(const UChar *buffer, + const UChar *patternSpecification, + va_list ap) +{ + int32_t converted; + UFILE inStr; + + inStr.fConverter = NULL; + inStr.fFile = NULL; + inStr.fOwnFile = FALSE; +#if !UCONFIG_NO_TRANSLITERATION + inStr.fTranslit = NULL; +#endif + inStr.fUCBuffer[0] = 0; + inStr.str.fBuffer = (UChar *)buffer; + inStr.str.fPos = (UChar *)buffer; + inStr.str.fLimit = buffer + u_strlen(buffer); + + if(u_locbund_init(&inStr.str.fBundle, "en_US_POSIX") == 0) { + return 0; + } + + converted = u_scanf_parse(&inStr, patternSpecification, ap); + + u_locbund_close(&inStr.str.fBundle); + + /* return # of items converted */ + return converted; +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/io/ucln_io.cpp b/deps/icu-small/source/io/ucln_io.cpp index 5822d22be1..388abbb4e9 100644 --- a/deps/icu-small/source/io/ucln_io.cpp +++ b/deps/icu-small/source/io/ucln_io.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * * ****************************************************************************** * file name: ucln_io.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/io/ucln_io.h b/deps/icu-small/source/io/ucln_io.h index 97d06a12ff..20dcb88ac7 100644 --- a/deps/icu-small/source/io/ucln_io.h +++ b/deps/icu-small/source/io/ucln_io.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -8,7 +8,7 @@ * * ****************************************************************************** * file name: ucln_io.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/io/ufile.c b/deps/icu-small/source/io/ufile.c deleted file mode 100644 index 71d27a5e25..0000000000 --- a/deps/icu-small/source/io/ufile.c +++ /dev/null @@ -1,362 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1998-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* File ufile.c -* -* Modification History: -* -* Date Name Description -* 11/19/98 stephen Creation. -* 03/12/99 stephen Modified for new C API. -* 06/16/99 stephen Changed T_LocaleBundle to u_locbund -* 07/19/99 stephen Fixed to use ucnv's default codepage. -****************************************************************************** -*/ - -/* - * fileno is not declared when building with GCC in strict mode. - */ -#if defined(__GNUC__) && defined(__STRICT_ANSI__) -#undef __STRICT_ANSI__ -#endif - -#include "locmap.h" -#include "unicode/ustdio.h" - -#if !UCONFIG_NO_CONVERSION - -#include "ufile.h" -#include "unicode/uloc.h" -#include "unicode/ures.h" -#include "unicode/ucnv.h" -#include "unicode/ustring.h" -#include "cstring.h" -#include "cmemory.h" - -#if U_PLATFORM_USES_ONLY_WIN32_API && !defined(fileno) -/* Windows likes to rename Unix-like functions */ -#define fileno _fileno -#endif - -static UFILE* -finit_owner(FILE *f, - const char *locale, - const char *codepage, - UBool takeOwnership - ) -{ - UErrorCode status = U_ZERO_ERROR; - UFILE *result; - if(f == NULL) { - return 0; - } - result = (UFILE*) uprv_malloc(sizeof(UFILE)); - if(result == NULL) { - return 0; - } - - uprv_memset(result, 0, sizeof(UFILE)); - result->fFileno = fileno(f); - -#if U_PLATFORM_USES_ONLY_WIN32_API && _MSC_VER < 1900 - /* - * Below is a very old workaround (ICU ticket:231). - * - * Previously, 'FILE*' from inside and outside ICU's DLL - * were different, because they pointed into local copies - * of the io block. At least by VS 2015 the implementation - * is something like: - * stdio = _acrt_iob_func(0) - * .. which is a function call, so should return the same pointer - * regardless of call site. - * As of _MSC_VER 1900 this patch is retired, at 16 years old. - */ - if (0 <= result->fFileno && result->fFileno <= 2) { - /* stdin, stdout and stderr need to be special cased for Windows 98 */ -#if _MSC_VER >= 1400 - result->fFile = &__iob_func()[_fileno(f)]; -#else - result->fFile = &_iob[_fileno(f)]; -#endif - } - else -#endif - { - result->fFile = f; - } - - result->str.fBuffer = result->fUCBuffer; - result->str.fPos = result->fUCBuffer; - result->str.fLimit = result->fUCBuffer; - -#if !UCONFIG_NO_FORMATTING - /* if locale is 0, use the default */ - if(u_locbund_init(&result->str.fBundle, locale) == 0) { - /* DO NOT FCLOSE HERE! */ - uprv_free(result); - return 0; - } -#endif - - /* If the codepage is not "" use the ucnv_open default behavior */ - if(codepage == NULL || *codepage != '\0') { - result->fConverter = ucnv_open(codepage, &status); - } - /* else result->fConverter is already memset'd to NULL. */ - - if(U_SUCCESS(status)) { - result->fOwnFile = takeOwnership; - } - else { -#if !UCONFIG_NO_FORMATTING - u_locbund_close(&result->str.fBundle); -#endif - /* DO NOT fclose here!!!!!! */ - uprv_free(result); - result = NULL; - } - - return result; -} - -U_CAPI UFILE* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_finit(FILE *f, - const char *locale, - const char *codepage) -{ - return finit_owner(f, locale, codepage, FALSE); -} - -U_CAPI UFILE* U_EXPORT2 -u_fadopt(FILE *f, - const char *locale, - const char *codepage) -{ - return finit_owner(f, locale, codepage, TRUE); -} - -U_CAPI UFILE* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fopen(const char *filename, - const char *perm, - const char *locale, - const char *codepage) -{ - UFILE *result; - FILE *systemFile = fopen(filename, perm); - if(systemFile == 0) { - return 0; - } - - result = finit_owner(systemFile, locale, codepage, TRUE); - - if (!result) { - /* Something bad happened. - Maybe the converter couldn't be opened. */ - fclose(systemFile); - } - - return result; /* not a file leak */ -} - -U_CAPI UFILE* U_EXPORT2 -u_fopen_u(const UChar *filename, - const char *perm, - const char *locale, - const char *codepage) -{ - UFILE *result; - char buffer[256]; - - u_austrcpy(buffer, filename); - - result = u_fopen(buffer, perm, locale, codepage); -#if U_PLATFORM_USES_ONLY_WIN32_API - /* Try Windows API _wfopen if the above fails. */ - if (!result) { - FILE *systemFile = _wfopen(filename, (UChar*)perm); - if (systemFile) { - result = finit_owner(systemFile, locale, codepage, TRUE); - } - if (!result) { - /* Something bad happened. - Maybe the converter couldn't be opened. */ - fclose(systemFile); - } - } -#endif - return result; /* not a file leak */ -} - -U_CAPI UFILE* U_EXPORT2 -u_fstropen(UChar *stringBuf, - int32_t capacity, - const char *locale) -{ - UFILE *result; - - if (capacity < 0) { - return NULL; - } - - result = (UFILE*) uprv_malloc(sizeof(UFILE)); - /* Null pointer test */ - if (result == NULL) { - return NULL; /* Just get out. */ - } - uprv_memset(result, 0, sizeof(UFILE)); - result->str.fBuffer = stringBuf; - result->str.fPos = stringBuf; - result->str.fLimit = stringBuf+capacity; - -#if !UCONFIG_NO_FORMATTING - /* if locale is 0, use the default */ - if(u_locbund_init(&result->str.fBundle, locale) == 0) { - /* DO NOT FCLOSE HERE! */ - uprv_free(result); - return 0; - } -#endif - - return result; -} - -U_CAPI UBool U_EXPORT2 -u_feof(UFILE *f) -{ - UBool endOfBuffer; - if (f == NULL) { - return TRUE; - } - endOfBuffer = (UBool)(f->str.fPos >= f->str.fLimit); - if (f->fFile != NULL) { - return endOfBuffer && feof(f->fFile); - } - return endOfBuffer; -} - -U_CAPI void U_EXPORT2 -u_fflush(UFILE *file) -{ - ufile_flush_translit(file); - ufile_flush_io(file); - if (file->fFile) { - fflush(file->fFile); - } - else if (file->str.fPos < file->str.fLimit) { - *(file->str.fPos++) = 0; - } - /* TODO: flush input */ -} - -U_CAPI void -u_frewind(UFILE *file) -{ - u_fflush(file); - ucnv_reset(file->fConverter); - if (file->fFile) { - rewind(file->fFile); - file->str.fLimit = file->fUCBuffer; - file->str.fPos = file->fUCBuffer; - } - else { - file->str.fPos = file->str.fBuffer; - } -} - -U_CAPI void U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fclose(UFILE *file) -{ - if (file) { - u_fflush(file); - ufile_close_translit(file); - - if(file->fOwnFile) - fclose(file->fFile); - -#if !UCONFIG_NO_FORMATTING - u_locbund_close(&file->str.fBundle); -#endif - - ucnv_close(file->fConverter); - uprv_free(file); - } -} - -U_CAPI FILE* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fgetfile( UFILE *f) -{ - return f->fFile; -} - -#if !UCONFIG_NO_FORMATTING - -U_CAPI const char* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fgetlocale( UFILE *file) -{ - return file->str.fBundle.fLocale; -} - -U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fsetlocale(UFILE *file, - const char *locale) -{ - u_locbund_close(&file->str.fBundle); - - return u_locbund_init(&file->str.fBundle, locale) == 0 ? -1 : 0; -} - -#endif - -U_CAPI const char* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fgetcodepage(UFILE *file) -{ - UErrorCode status = U_ZERO_ERROR; - const char *codepage = NULL; - - if (file->fConverter) { - codepage = ucnv_getName(file->fConverter, &status); - if(U_FAILURE(status)) - return 0; - } - return codepage; -} - -U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fsetcodepage( const char *codepage, - UFILE *file) -{ - UErrorCode status = U_ZERO_ERROR; - int32_t retVal = -1; - - /* We use the normal default codepage for this system, and not the one for the locale. */ - if ((file->str.fPos == file->str.fBuffer) && (file->str.fLimit == file->str.fBuffer)) { - ucnv_close(file->fConverter); - file->fConverter = ucnv_open(codepage, &status); - if(U_SUCCESS(status)) { - retVal = 0; - } - } - return retVal; -} - - -U_CAPI UConverter * U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fgetConverter(UFILE *file) -{ - return file->fConverter; -} -#if !UCONFIG_NO_FORMATTING -U_CAPI const UNumberFormat* U_EXPORT2 u_fgetNumberFormat(UFILE *file) -{ - return u_locbund_getNumberFormat(&file->str.fBundle, UNUM_DECIMAL); -} -#endif - -#endif diff --git a/deps/icu-small/source/io/ufile.cpp b/deps/icu-small/source/io/ufile.cpp new file mode 100644 index 0000000000..6cbb897555 --- /dev/null +++ b/deps/icu-small/source/io/ufile.cpp @@ -0,0 +1,343 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1998-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File ufile.cpp +* +* Modification History: +* +* Date Name Description +* 11/19/98 stephen Creation. +* 03/12/99 stephen Modified for new C API. +* 06/16/99 stephen Changed T_LocaleBundle to u_locbund +* 07/19/99 stephen Fixed to use ucnv's default codepage. +****************************************************************************** +*/ + +#include "unicode/platform.h" +#if defined(__GNUC__) && !defined(__clang__) && defined(__STRICT_ANSI__) +// g++, fileno isn't defined if __STRICT_ANSI__ is defined. +// clang fails to compile the header unless __STRICT_ANSI__ is defined. +// __GNUC__ is set by both gcc and clang. +#undef __STRICT_ANSI__ +#endif + +#include "locmap.h" +#include "unicode/ustdio.h" + +#if !UCONFIG_NO_CONVERSION + +#include + +#include "ufile.h" +#include "unicode/uloc.h" +#include "unicode/ures.h" +#include "unicode/ucnv.h" +#include "unicode/ustring.h" +#include "cstring.h" +#include "cmemory.h" + +#if U_PLATFORM_USES_ONLY_WIN32_API && !defined(fileno) +/* Windows likes to rename Unix-like functions */ +#define fileno _fileno +#endif + +static UFILE* +finit_owner(FILE *f, + const char *locale, + const char *codepage, + UBool takeOwnership + ) +{ + UErrorCode status = U_ZERO_ERROR; + UFILE *result; + if(f == NULL) { + return 0; + } + result = (UFILE*) uprv_malloc(sizeof(UFILE)); + if(result == NULL) { + return 0; + } + + uprv_memset(result, 0, sizeof(UFILE)); + result->fFileno = fileno(f); + result->fFile = f; + + result->str.fBuffer = result->fUCBuffer; + result->str.fPos = result->fUCBuffer; + result->str.fLimit = result->fUCBuffer; + +#if !UCONFIG_NO_FORMATTING + /* if locale is 0, use the default */ + if(u_locbund_init(&result->str.fBundle, locale) == 0) { + /* DO NOT FCLOSE HERE! */ + uprv_free(result); + return 0; + } +#endif + + /* If the codepage is not "" use the ucnv_open default behavior */ + if(codepage == NULL || *codepage != '\0') { + result->fConverter = ucnv_open(codepage, &status); + } + /* else result->fConverter is already memset'd to NULL. */ + + if(U_SUCCESS(status)) { + result->fOwnFile = takeOwnership; + } + else { +#if !UCONFIG_NO_FORMATTING + u_locbund_close(&result->str.fBundle); +#endif + /* DO NOT fclose here!!!!!! */ + uprv_free(result); + result = NULL; + } + + return result; +} + +U_CAPI UFILE* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_finit(FILE *f, + const char *locale, + const char *codepage) +{ + return finit_owner(f, locale, codepage, FALSE); +} + +U_CAPI UFILE* U_EXPORT2 +u_fadopt(FILE *f, + const char *locale, + const char *codepage) +{ + return finit_owner(f, locale, codepage, TRUE); +} + +U_CAPI UFILE* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fopen(const char *filename, + const char *perm, + const char *locale, + const char *codepage) +{ + UFILE *result; + FILE *systemFile = fopen(filename, perm); + if(systemFile == 0) { + return 0; + } + + result = finit_owner(systemFile, locale, codepage, TRUE); + + if (!result) { + /* Something bad happened. + Maybe the converter couldn't be opened. */ + fclose(systemFile); + } + + return result; /* not a file leak */ +} + +U_CAPI UFILE* U_EXPORT2 +u_fopen_u(const UChar *filename, + const char *perm, + const char *locale, + const char *codepage) +{ + UFILE *result; + char buffer[256]; + + u_austrcpy(buffer, filename); + + result = u_fopen(buffer, perm, locale, codepage); +#if U_PLATFORM_USES_ONLY_WIN32_API + /* Try Windows API _wfopen if the above fails. */ + if (!result) { + // TODO: test this code path, including wperm. + wchar_t wperm[40] = {}; + size_t retVal; + mbstowcs_s(&retVal, wperm, perm, _TRUNCATE); + FILE *systemFile = _wfopen((const wchar_t *)filename, wperm); + if (systemFile) { + result = finit_owner(systemFile, locale, codepage, TRUE); + } + if (!result) { + /* Something bad happened. + Maybe the converter couldn't be opened. */ + fclose(systemFile); + } + } +#endif + return result; /* not a file leak */ +} + +U_CAPI UFILE* U_EXPORT2 +u_fstropen(UChar *stringBuf, + int32_t capacity, + const char *locale) +{ + UFILE *result; + + if (capacity < 0) { + return NULL; + } + + result = (UFILE*) uprv_malloc(sizeof(UFILE)); + /* Null pointer test */ + if (result == NULL) { + return NULL; /* Just get out. */ + } + uprv_memset(result, 0, sizeof(UFILE)); + result->str.fBuffer = stringBuf; + result->str.fPos = stringBuf; + result->str.fLimit = stringBuf+capacity; + +#if !UCONFIG_NO_FORMATTING + /* if locale is 0, use the default */ + if(u_locbund_init(&result->str.fBundle, locale) == 0) { + /* DO NOT FCLOSE HERE! */ + uprv_free(result); + return 0; + } +#endif + + return result; +} + +U_CAPI UBool U_EXPORT2 +u_feof(UFILE *f) +{ + UBool endOfBuffer; + if (f == NULL) { + return TRUE; + } + endOfBuffer = (UBool)(f->str.fPos >= f->str.fLimit); + if (f->fFile != NULL) { + return endOfBuffer && feof(f->fFile); + } + return endOfBuffer; +} + +U_CAPI void U_EXPORT2 +u_fflush(UFILE *file) +{ + ufile_flush_translit(file); + ufile_flush_io(file); + if (file->fFile) { + fflush(file->fFile); + } + else if (file->str.fPos < file->str.fLimit) { + *(file->str.fPos++) = 0; + } + /* TODO: flush input */ +} + +U_CAPI void +u_frewind(UFILE *file) +{ + u_fflush(file); + ucnv_reset(file->fConverter); + if (file->fFile) { + rewind(file->fFile); + file->str.fLimit = file->fUCBuffer; + file->str.fPos = file->fUCBuffer; + } + else { + file->str.fPos = file->str.fBuffer; + } +} + +U_CAPI void U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fclose(UFILE *file) +{ + if (file) { + u_fflush(file); + ufile_close_translit(file); + + if(file->fOwnFile) + fclose(file->fFile); + +#if !UCONFIG_NO_FORMATTING + u_locbund_close(&file->str.fBundle); +#endif + + ucnv_close(file->fConverter); + uprv_free(file); + } +} + +U_CAPI FILE* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgetfile( UFILE *f) +{ + return f->fFile; +} + +#if !UCONFIG_NO_FORMATTING + +U_CAPI const char* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgetlocale( UFILE *file) +{ + return file->str.fBundle.fLocale; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fsetlocale(UFILE *file, + const char *locale) +{ + u_locbund_close(&file->str.fBundle); + + return u_locbund_init(&file->str.fBundle, locale) == 0 ? -1 : 0; +} + +#endif + +U_CAPI const char* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgetcodepage(UFILE *file) +{ + UErrorCode status = U_ZERO_ERROR; + const char *codepage = NULL; + + if (file->fConverter) { + codepage = ucnv_getName(file->fConverter, &status); + if(U_FAILURE(status)) + return 0; + } + return codepage; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fsetcodepage( const char *codepage, + UFILE *file) +{ + UErrorCode status = U_ZERO_ERROR; + int32_t retVal = -1; + + /* We use the normal default codepage for this system, and not the one for the locale. */ + if ((file->str.fPos == file->str.fBuffer) && (file->str.fLimit == file->str.fBuffer)) { + ucnv_close(file->fConverter); + file->fConverter = ucnv_open(codepage, &status); + if(U_SUCCESS(status)) { + retVal = 0; + } + } + return retVal; +} + + +U_CAPI UConverter * U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgetConverter(UFILE *file) +{ + return file->fConverter; +} +#if !UCONFIG_NO_FORMATTING +U_CAPI const UNumberFormat* U_EXPORT2 u_fgetNumberFormat(UFILE *file) +{ + return u_locbund_getNumberFormat(&file->str.fBundle, UNUM_DECIMAL); +} +#endif + +#endif diff --git a/deps/icu-small/source/io/ufile.h b/deps/icu-small/source/io/ufile.h index b2562747e6..ed89727543 100644 --- a/deps/icu-small/source/io/ufile.h +++ b/deps/icu-small/source/io/ufile.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -25,6 +25,8 @@ #if !UCONFIG_NO_CONVERSION +#include + #include "unicode/ucnv.h" #include "unicode/utrans.h" #include "locbund.h" diff --git a/deps/icu-small/source/io/ufmt_cmn.c b/deps/icu-small/source/io/ufmt_cmn.c deleted file mode 100644 index e896bc560d..0000000000 --- a/deps/icu-small/source/io/ufmt_cmn.c +++ /dev/null @@ -1,259 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1998-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* File ufmt_cmn.c -* -* Modification History: -* -* Date Name Description -* 12/02/98 stephen Creation. -* 03/12/99 stephen Modified for new C API. -* 03/15/99 stephen Added defaultCPToUnicode, unicodeToDefaultCP -* 07/19/99 stephen Fixed bug in defaultCPToUnicode -****************************************************************************** -*/ - -#include "cstring.h" -#include "cmemory.h" -#include "ufmt_cmn.h" -#include "unicode/uchar.h" -#include "unicode/ucnv.h" -#include "ustr_cnv.h" - -#if !UCONFIG_NO_CONVERSION - - -#define DIGIT_0 0x0030 -#define DIGIT_9 0x0039 -#define LOWERCASE_A 0x0061 -#define UPPERCASE_A 0x0041 -#define LOWERCASE_Z 0x007A -#define UPPERCASE_Z 0x005A - -int -ufmt_digitvalue(UChar c) -{ - if( ((c>=DIGIT_0)&&(c<=DIGIT_9)) || - ((c>=LOWERCASE_A)&&(c<=LOWERCASE_Z)) || - ((c>=UPPERCASE_A)&&(c<=UPPERCASE_Z)) ) - { - return c - DIGIT_0 - (c >= 0x0041 ? (c >= 0x0061 ? 39 : 7) : 0); - } - else - { - return -1; - } -} - -UBool -ufmt_isdigit(UChar c, - int32_t radix) -{ - int digitVal = ufmt_digitvalue(c); - - return (UBool)(digitVal < radix && digitVal >= 0); -} - -#define TO_UC_DIGIT(a) a <= 9 ? (DIGIT_0 + a) : (0x0037 + a) -#define TO_LC_DIGIT(a) a <= 9 ? (DIGIT_0 + a) : (0x0057 + a) - -void -ufmt_64tou(UChar *buffer, - int32_t *len, - uint64_t value, - uint8_t radix, - UBool uselower, - int32_t minDigits) -{ - int32_t length = 0; - uint32_t digit; - UChar *left, *right, temp; - - do { - digit = (uint32_t)(value % radix); - value = value / radix; - buffer[length++] = (UChar)(uselower ? TO_LC_DIGIT(digit) - : TO_UC_DIGIT(digit)); - } while(value); - - /* pad with zeroes to make it minDigits long */ - if(minDigits != -1 && length < minDigits) { - while(length < minDigits && length < *len) - buffer[length++] = DIGIT_0; /*zero padding */ - } - - /* reverse the buffer */ - left = buffer; - right = buffer + length; - while(left < --right) { - temp = *left; - *left++ = *right; - *right = temp; - } - - *len = length; -} - -void -ufmt_ptou(UChar *buffer, - int32_t *len, - void *value, - UBool uselower) -{ - int32_t i; - int32_t length = 0; - uint8_t *ptrIdx = (uint8_t *)&value; - -#if U_IS_BIG_ENDIAN - for (i = 0; i < (int32_t)sizeof(void *); i++) -#else - for (i = (int32_t)sizeof(void *)-1; i >= 0 ; i--) -#endif - { - uint8_t byteVal = ptrIdx[i]; - uint16_t firstNibble = (uint16_t)(byteVal>>4); - uint16_t secondNibble = (uint16_t)(byteVal&0xF); - if (uselower) { - buffer[length++]=TO_LC_DIGIT(firstNibble); - buffer[length++]=TO_LC_DIGIT(secondNibble); - } - else { - buffer[length++]=TO_UC_DIGIT(firstNibble); - buffer[length++]=TO_UC_DIGIT(secondNibble); - } - } - - *len = length; -} - -int64_t -ufmt_uto64(const UChar *buffer, - int32_t *len, - int8_t radix) -{ - const UChar *limit; - int32_t count; - int64_t result; - - - /* intialize parameters */ - limit = buffer + *len; - count = 0; - result = 0; - - /* iterate through buffer */ - while(ufmt_isdigit(*buffer, radix) && buffer < limit) { - - /* read the next digit */ - result *= radix; - result += ufmt_digitvalue(*buffer++); - - /* increment our count */ - ++count; - } - - *len = count; - return result; -} - -#define NIBBLE_PER_BYTE 2 -void * -ufmt_utop(const UChar *buffer, - int32_t *len) -{ - int32_t count, resultIdx, incVal, offset; - /* This union allows the pointer to be written as an array. */ - union { - void *ptr; - uint8_t bytes[sizeof(void*)]; - } result; - - /* intialize variables */ - count = 0; - offset = 0; - result.ptr = NULL; - - /* Skip the leading zeros */ - while(buffer[count] == DIGIT_0 || u_isspace(buffer[count])) { - count++; - offset++; - } - - /* iterate through buffer, stop when you hit the end */ - while(ufmt_isdigit(buffer[count], 16) && count < *len) { - /* increment the count consumed */ - ++count; - } - - /* detect overflow */ - if (count - offset > (int32_t)(sizeof(void*)*NIBBLE_PER_BYTE)) { - offset = count - (int32_t)(sizeof(void*)*NIBBLE_PER_BYTE); - } - - /* Initialize the direction of the input */ -#if U_IS_BIG_ENDIAN - incVal = -1; - resultIdx = (int32_t)(sizeof(void*) - 1); -#else - incVal = 1; - resultIdx = 0; -#endif - /* Write how much was consumed. */ - *len = count; - while(--count >= offset) { - /* Get the first nibble of the byte */ - uint8_t byte = (uint8_t)ufmt_digitvalue(buffer[count]); - - if (count > offset) { - /* Get the second nibble of the byte when available */ - byte = (uint8_t)(byte + (ufmt_digitvalue(buffer[--count]) << 4)); - } - /* Write the byte into the array */ - result.bytes[resultIdx] = byte; - resultIdx += incVal; - } - - return result.ptr; -} - -UChar* -ufmt_defaultCPToUnicode(const char *s, int32_t sSize, - UChar *target, int32_t tSize) -{ - UChar *alias; - UErrorCode status = U_ZERO_ERROR; - UConverter *defConverter = u_getDefaultConverter(&status); - - if(U_FAILURE(status) || defConverter == 0) - return 0; - - if(sSize <= 0) { - sSize = uprv_strlen(s) + 1; - } - - /* perform the conversion in one swoop */ - if(target != 0) { - - alias = target; - ucnv_toUnicode(defConverter, &alias, alias + tSize, &s, s + sSize - 1, - NULL, TRUE, &status); - - - /* add the null terminator */ - *alias = 0x0000; - } - - u_releaseDefaultConverter(defConverter); - - return target; -} - - -#endif diff --git a/deps/icu-small/source/io/ufmt_cmn.cpp b/deps/icu-small/source/io/ufmt_cmn.cpp new file mode 100644 index 0000000000..760d2711bd --- /dev/null +++ b/deps/icu-small/source/io/ufmt_cmn.cpp @@ -0,0 +1,259 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1998-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File ufmt_cmn.c +* +* Modification History: +* +* Date Name Description +* 12/02/98 stephen Creation. +* 03/12/99 stephen Modified for new C API. +* 03/15/99 stephen Added defaultCPToUnicode, unicodeToDefaultCP +* 07/19/99 stephen Fixed bug in defaultCPToUnicode +****************************************************************************** +*/ + +#include "cstring.h" +#include "cmemory.h" +#include "ufmt_cmn.h" +#include "unicode/uchar.h" +#include "unicode/ucnv.h" +#include "ustr_cnv.h" + +#if !UCONFIG_NO_CONVERSION + + +#define DIGIT_0 0x0030 +#define DIGIT_9 0x0039 +#define LOWERCASE_A 0x0061 +#define UPPERCASE_A 0x0041 +#define LOWERCASE_Z 0x007A +#define UPPERCASE_Z 0x005A + +int +ufmt_digitvalue(UChar c) +{ + if( ((c>=DIGIT_0)&&(c<=DIGIT_9)) || + ((c>=LOWERCASE_A)&&(c<=LOWERCASE_Z)) || + ((c>=UPPERCASE_A)&&(c<=UPPERCASE_Z)) ) + { + return c - DIGIT_0 - (c >= 0x0041 ? (c >= 0x0061 ? 39 : 7) : 0); + } + else + { + return -1; + } +} + +UBool +ufmt_isdigit(UChar c, + int32_t radix) +{ + int digitVal = ufmt_digitvalue(c); + + return (UBool)(digitVal < radix && digitVal >= 0); +} + +#define TO_UC_DIGIT(a) a <= 9 ? (DIGIT_0 + a) : (0x0037 + a) +#define TO_LC_DIGIT(a) a <= 9 ? (DIGIT_0 + a) : (0x0057 + a) + +void +ufmt_64tou(UChar *buffer, + int32_t *len, + uint64_t value, + uint8_t radix, + UBool uselower, + int32_t minDigits) +{ + int32_t length = 0; + uint32_t digit; + UChar *left, *right, temp; + + do { + digit = (uint32_t)(value % radix); + value = value / radix; + buffer[length++] = (UChar)(uselower ? TO_LC_DIGIT(digit) + : TO_UC_DIGIT(digit)); + } while(value); + + /* pad with zeroes to make it minDigits long */ + if(minDigits != -1 && length < minDigits) { + while(length < minDigits && length < *len) + buffer[length++] = DIGIT_0; /*zero padding */ + } + + /* reverse the buffer */ + left = buffer; + right = buffer + length; + while(left < --right) { + temp = *left; + *left++ = *right; + *right = temp; + } + + *len = length; +} + +void +ufmt_ptou(UChar *buffer, + int32_t *len, + void *value, + UBool uselower) +{ + int32_t i; + int32_t length = 0; + uint8_t *ptrIdx = (uint8_t *)&value; + +#if U_IS_BIG_ENDIAN + for (i = 0; i < (int32_t)sizeof(void *); i++) +#else + for (i = (int32_t)sizeof(void *)-1; i >= 0 ; i--) +#endif + { + uint8_t byteVal = ptrIdx[i]; + uint16_t firstNibble = (uint16_t)(byteVal>>4); + uint16_t secondNibble = (uint16_t)(byteVal&0xF); + if (uselower) { + buffer[length++]=TO_LC_DIGIT(firstNibble); + buffer[length++]=TO_LC_DIGIT(secondNibble); + } + else { + buffer[length++]=TO_UC_DIGIT(firstNibble); + buffer[length++]=TO_UC_DIGIT(secondNibble); + } + } + + *len = length; +} + +int64_t +ufmt_uto64(const UChar *buffer, + int32_t *len, + int8_t radix) +{ + const UChar *limit; + int32_t count; + int64_t result; + + + /* intialize parameters */ + limit = buffer + *len; + count = 0; + result = 0; + + /* iterate through buffer */ + while(ufmt_isdigit(*buffer, radix) && buffer < limit) { + + /* read the next digit */ + result *= radix; + result += ufmt_digitvalue(*buffer++); + + /* increment our count */ + ++count; + } + + *len = count; + return result; +} + +#define NIBBLE_PER_BYTE 2 +void * +ufmt_utop(const UChar *buffer, + int32_t *len) +{ + int32_t count, resultIdx, incVal, offset; + /* This union allows the pointer to be written as an array. */ + union { + void *ptr; + uint8_t bytes[sizeof(void*)]; + } result; + + /* intialize variables */ + count = 0; + offset = 0; + result.ptr = NULL; + + /* Skip the leading zeros */ + while(buffer[count] == DIGIT_0 || u_isspace(buffer[count])) { + count++; + offset++; + } + + /* iterate through buffer, stop when you hit the end */ + while(ufmt_isdigit(buffer[count], 16) && count < *len) { + /* increment the count consumed */ + ++count; + } + + /* detect overflow */ + if (count - offset > (int32_t)(sizeof(void*)*NIBBLE_PER_BYTE)) { + offset = count - (int32_t)(sizeof(void*)*NIBBLE_PER_BYTE); + } + + /* Initialize the direction of the input */ +#if U_IS_BIG_ENDIAN + incVal = -1; + resultIdx = (int32_t)(sizeof(void*) - 1); +#else + incVal = 1; + resultIdx = 0; +#endif + /* Write how much was consumed. */ + *len = count; + while(--count >= offset) { + /* Get the first nibble of the byte */ + uint8_t byte = (uint8_t)ufmt_digitvalue(buffer[count]); + + if (count > offset) { + /* Get the second nibble of the byte when available */ + byte = (uint8_t)(byte + (ufmt_digitvalue(buffer[--count]) << 4)); + } + /* Write the byte into the array */ + result.bytes[resultIdx] = byte; + resultIdx += incVal; + } + + return result.ptr; +} + +UChar* +ufmt_defaultCPToUnicode(const char *s, int32_t sSize, + UChar *target, int32_t tSize) +{ + UChar *alias; + UErrorCode status = U_ZERO_ERROR; + UConverter *defConverter = u_getDefaultConverter(&status); + + if(U_FAILURE(status) || defConverter == 0) + return 0; + + if(sSize <= 0) { + sSize = uprv_strlen(s) + 1; + } + + /* perform the conversion in one swoop */ + if(target != 0) { + + alias = target; + ucnv_toUnicode(defConverter, &alias, alias + tSize, &s, s + sSize - 1, + NULL, TRUE, &status); + + + /* add the null terminator */ + *alias = 0x0000; + } + + u_releaseDefaultConverter(defConverter); + + return target; +} + + +#endif diff --git a/deps/icu-small/source/io/ufmt_cmn.h b/deps/icu-small/source/io/ufmt_cmn.h index 7daeee8813..d9cfd6a5f3 100644 --- a/deps/icu-small/source/io/ufmt_cmn.h +++ b/deps/icu-small/source/io/ufmt_cmn.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** @@ -26,7 +26,7 @@ #include "unicode/utf16.h" #define UFMT_DEFAULT_BUFFER_SIZE 128 -#define MAX_UCHAR_BUFFER_SIZE(buffer) (sizeof(buffer)/(U16_MAX_LENGTH*sizeof(UChar))) +#define MAX_UCHAR_BUFFER_SIZE(buffer) ((int32_t)(sizeof(buffer)/(U16_MAX_LENGTH*sizeof(UChar)))) #define MAX_UCHAR_BUFFER_NEEDED(strLen) ((strLen+1)*U16_MAX_LENGTH*sizeof(UChar)) /** diff --git a/deps/icu-small/source/io/unicode/ustdio.h b/deps/icu-small/source/io/unicode/ustdio.h index 5e11bb6766..565b5b3fc5 100644 --- a/deps/icu-small/source/io/unicode/ustdio.h +++ b/deps/icu-small/source/io/unicode/ustdio.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/io/unicode/ustream.h b/deps/icu-small/source/io/unicode/ustream.h index ab52f6a28a..df1506ebfb 100644 --- a/deps/icu-small/source/io/unicode/ustream.h +++ b/deps/icu-small/source/io/unicode/ustream.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** @@ -30,7 +30,7 @@ * C++ I/O stream API. */ -#if !defined(_MSC_VER) +#if defined(__GLIBCXX__) namespace std { class type_info; } // WORKAROUND: http://llvm.org/bugs/show_bug.cgi?id=13364 #endif diff --git a/deps/icu-small/source/io/uprintf.cpp b/deps/icu-small/source/io/uprintf.cpp index e6062ade29..316c794498 100644 --- a/deps/icu-small/source/io/uprintf.cpp +++ b/deps/icu-small/source/io/uprintf.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/io/uprintf.h b/deps/icu-small/source/io/uprintf.h index 0a07e6b7d6..0fd6066e56 100644 --- a/deps/icu-small/source/io/uprintf.h +++ b/deps/icu-small/source/io/uprintf.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/io/uprntf_p.c b/deps/icu-small/source/io/uprntf_p.c deleted file mode 100644 index 81f5269658..0000000000 --- a/deps/icu-small/source/io/uprntf_p.c +++ /dev/null @@ -1,1593 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1998-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* File uprntf_p.c -* -* Modification History: -* -* Date Name Description -* 11/23/98 stephen Creation. -* 03/12/99 stephen Modified for new C API. -* 08/07/2003 george Reunify printf implementations -****************************************************************************** -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION - -#include "unicode/ustring.h" -#include "unicode/utf16.h" -#include "uprintf.h" -#include "ufmt_cmn.h" -#include "cmemory.h" -#include "putilimp.h" - -/* ANSI style formatting */ -/* Use US-ASCII characters only for formatting */ - -/* % */ -#define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_printf_simple_percent_handler} -/* s */ -#define UFMT_STRING {ufmt_string, u_printf_string_handler} -/* c */ -#define UFMT_CHAR {ufmt_char, u_printf_char_handler} -/* d, i */ -#define UFMT_INT {ufmt_int, u_printf_integer_handler} -/* u */ -#define UFMT_UINT {ufmt_int, u_printf_uinteger_handler} -/* o */ -#define UFMT_OCTAL {ufmt_int, u_printf_octal_handler} -/* x, X */ -#define UFMT_HEX {ufmt_int, u_printf_hex_handler} -/* f */ -#define UFMT_DOUBLE {ufmt_double, u_printf_double_handler} -/* e, E */ -#define UFMT_SCIENTIFIC {ufmt_double, u_printf_scientific_handler} -/* g, G */ -#define UFMT_SCIDBL {ufmt_double, u_printf_scidbl_handler} -/* n */ -#define UFMT_COUNT {ufmt_count, u_printf_count_handler} - -/* non-ANSI extensions */ -/* Use US-ASCII characters only for formatting */ - -/* p */ -#define UFMT_POINTER {ufmt_pointer, u_printf_pointer_handler} -/* V */ -#define UFMT_SPELLOUT {ufmt_double, u_printf_spellout_handler} -/* P */ -#define UFMT_PERCENT {ufmt_double, u_printf_percent_handler} -/* C K is old format */ -#define UFMT_UCHAR {ufmt_uchar, u_printf_uchar_handler} -/* S U is old format */ -#define UFMT_USTRING {ufmt_ustring, u_printf_ustring_handler} - - -#define UFMT_EMPTY {ufmt_empty, NULL} - -/** - * A u_printf handler function. - * A u_printf handler is responsible for handling a single u_printf - * format specification, for example 'd' or 's'. - * @param stream The UFILE to which to write output. - * @param info A pointer to a u_printf_spec_info struct containing - * information on the format specification. - * @param args A pointer to the argument data - * @return The number of Unicode characters written to stream. - */ -typedef int32_t U_EXPORT2 -u_printf_handler(const u_printf_stream_handler *handler, - - void *context, - ULocaleBundle *formatBundle, - const u_printf_spec_info *info, - const ufmt_args *args); - -typedef struct u_printf_info { - ufmt_type_info info; - u_printf_handler *handler; -} u_printf_info; - -/** - * Struct encapsulating a single uprintf format specification. - */ -typedef struct u_printf_spec { - u_printf_spec_info fInfo; /* Information on this spec */ - int32_t fWidthPos; /* Position of width in arg list */ - int32_t fPrecisionPos; /* Position of precision in arg list */ - int32_t fArgPos; /* Position of data in arg list */ -} u_printf_spec; - -#define UPRINTF_NUM_FMT_HANDLERS 108 - -/* We do not use handlers for 0-0x1f */ -#define UPRINTF_BASE_FMT_HANDLERS 0x20 - -/* buffer size for formatting */ -#define UPRINTF_BUFFER_SIZE 1024 -#define UPRINTF_SYMBOL_BUFFER_SIZE 8 - -static const UChar gNullStr[] = {0x28, 0x6E, 0x75, 0x6C, 0x6C, 0x29, 0}; /* "(null)" */ -static const UChar gSpaceStr[] = {0x20, 0}; /* " " */ - -/* Sets the sign of a format based on u_printf_spec_info */ -/* TODO: Is setting the prefix symbol to a positive sign a good idea in all locales? */ -static void -u_printf_set_sign(UNumberFormat *format, - const u_printf_spec_info *info, - UChar *prefixBuffer, - int32_t *prefixBufLen, - UErrorCode *status) -{ - if(info->fShowSign) { - *prefixBufLen = unum_getTextAttribute(format, - UNUM_POSITIVE_PREFIX, - prefixBuffer, - *prefixBufLen, - status); - if (info->fSpace) { - /* Setting UNUM_PLUS_SIGN_SYMBOL affects the exponent too. */ - /* unum_setSymbol(format, UNUM_PLUS_SIGN_SYMBOL, gSpaceStr, 1, &status); */ - unum_setTextAttribute(format, UNUM_POSITIVE_PREFIX, gSpaceStr, 1, status); - } - else { - UChar plusSymbol[UPRINTF_SYMBOL_BUFFER_SIZE]; - int32_t symbolLen; - - symbolLen = unum_getSymbol(format, - UNUM_PLUS_SIGN_SYMBOL, - plusSymbol, - UPRV_LENGTHOF(plusSymbol), - status); - unum_setTextAttribute(format, - UNUM_POSITIVE_PREFIX, - plusSymbol, - symbolLen, - status); - } - } - else { - *prefixBufLen = 0; - } -} - -static void -u_printf_reset_sign(UNumberFormat *format, - const u_printf_spec_info *info, - UChar *prefixBuffer, - int32_t *prefixBufLen, - UErrorCode *status) -{ - if(info->fShowSign) { - unum_setTextAttribute(format, - UNUM_POSITIVE_PREFIX, - prefixBuffer, - *prefixBufLen, - status); - } -} - - -/* handle a '%' */ -static int32_t -u_printf_simple_percent_handler(const u_printf_stream_handler *handler, - void *context, - ULocaleBundle *formatBundle, - const u_printf_spec_info *info, - const ufmt_args *args) -{ - static const UChar PERCENT[] = { UP_PERCENT }; - - /* put a single '%' onto the output */ - return handler->write(context, PERCENT, 1); -} - -/* handle 's' */ -static int32_t -u_printf_string_handler(const u_printf_stream_handler *handler, - void *context, - ULocaleBundle *formatBundle, - const u_printf_spec_info *info, - const ufmt_args *args) -{ - UChar *s; - UChar buffer[UFMT_DEFAULT_BUFFER_SIZE]; - int32_t len, written; - int32_t argSize; - const char *arg = (const char*)(args[0].ptrValue); - - /* convert from the default codepage to Unicode */ - if (arg) { - argSize = (int32_t)strlen(arg) + 1; - if (argSize >= MAX_UCHAR_BUFFER_SIZE(buffer)) { - s = ufmt_defaultCPToUnicode(arg, argSize, - (UChar *)uprv_malloc(MAX_UCHAR_BUFFER_NEEDED(argSize)), - MAX_UCHAR_BUFFER_NEEDED(argSize)); - if(s == NULL) { - return 0; - } - } - else { - s = ufmt_defaultCPToUnicode(arg, argSize, buffer, - UPRV_LENGTHOF(buffer)); - } - } - else { - s = (UChar *)gNullStr; - } - len = u_strlen(s); - - /* width = minimum # of characters to write */ - /* precision = maximum # of characters to write */ - if (info->fPrecision != -1 && info->fPrecision < len) { - len = info->fPrecision; - } - - written = handler->pad_and_justify(context, info, s, len); - - /* clean up */ - if (gNullStr != s && buffer != s) { - uprv_free(s); - } - - return written; -} - -static int32_t -u_printf_char_handler(const u_printf_stream_handler *handler, - void *context, - ULocaleBundle *formatBundle, - const u_printf_spec_info *info, - const ufmt_args *args) -{ - UChar s[U16_MAX_LENGTH+1]; - int32_t len = 1, written; - unsigned char arg = (unsigned char)(args[0].int64Value); - - /* convert from default codepage to Unicode */ - ufmt_defaultCPToUnicode((const char *)&arg, 2, s, UPRV_LENGTHOF(s)); - - /* Remember that this may be an MBCS character */ - if (arg != 0) { - len = u_strlen(s); - } - - /* width = minimum # of characters to write */ - /* precision = maximum # of characters to write */ - /* precision is ignored when handling a char */ - - written = handler->pad_and_justify(context, info, s, len); - - return written; -} - -static int32_t -u_printf_double_handler(const u_printf_stream_handler *handler, - void *context, - ULocaleBundle *formatBundle, - const u_printf_spec_info *info, - const ufmt_args *args) -{ - double num = (double) (args[0].doubleValue); - UNumberFormat *format; - UChar result[UPRINTF_BUFFER_SIZE]; - UChar prefixBuffer[UPRINTF_BUFFER_SIZE]; - int32_t prefixBufferLen = sizeof(prefixBuffer); - int32_t minDecimalDigits; - int32_t maxDecimalDigits; - int32_t resultLen; - UErrorCode status = U_ZERO_ERROR; - - prefixBuffer[0] = 0; - - /* mask off any necessary bits */ - /* if(! info->fIsLongDouble) - num &= DBL_MAX;*/ - - /* get the formatter */ - format = u_locbund_getNumberFormat(formatBundle, UNUM_DECIMAL); - - /* handle error */ - if(format == 0) - return 0; - - /* save the formatter's state */ - minDecimalDigits = unum_getAttribute(format, UNUM_MIN_FRACTION_DIGITS); - maxDecimalDigits = unum_getAttribute(format, UNUM_MAX_FRACTION_DIGITS); - - /* set the appropriate flags and number of decimal digits on the formatter */ - if(info->fPrecision != -1) { - /* set the # of decimal digits */ - unum_setAttribute(format, UNUM_FRACTION_DIGITS, info->fPrecision); - } - else if(info->fAlt) { - /* '#' means always show decimal point */ - /* copy of printf behavior on Solaris - '#' shows 6 digits */ - unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); - } - else { - /* # of decimal digits is 6 if precision not specified regardless of locale */ - unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); - } - - /* set whether to show the sign */ - if (info->fShowSign) { - u_printf_set_sign(format, info, prefixBuffer, &prefixBufferLen, &status); - } - - /* format the number */ - resultLen = unum_formatDouble(format, num, result, UPRINTF_BUFFER_SIZE, 0, &status); - - if (U_FAILURE(status)) { - resultLen = 0; - } - - /* restore the number format */ - /* TODO: Is this needed? */ - unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, minDecimalDigits); - unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, maxDecimalDigits); - - if (info->fShowSign) { - /* Reset back to original value regardless of what the error was */ - UErrorCode localStatus = U_ZERO_ERROR; - u_printf_reset_sign(format, info, prefixBuffer, &prefixBufferLen, &localStatus); - } - - return handler->pad_and_justify(context, info, result, resultLen); -} - -/* HSYS */ -static int32_t -u_printf_integer_handler(const u_printf_stream_handler *handler, - void *context, - ULocaleBundle *formatBundle, - const u_printf_spec_info *info, - const ufmt_args *args) -{ - int64_t num = args[0].int64Value; - UNumberFormat *format; - UChar result[UPRINTF_BUFFER_SIZE]; - UChar prefixBuffer[UPRINTF_BUFFER_SIZE]; - int32_t prefixBufferLen = sizeof(prefixBuffer); - int32_t minDigits = -1; - int32_t resultLen; - UErrorCode status = U_ZERO_ERROR; - - prefixBuffer[0] = 0; - - /* mask off any necessary bits */ - if (info->fIsShort) - num = (int16_t)num; - else if (!info->fIsLongLong) - num = (int32_t)num; - - /* get the formatter */ - format = u_locbund_getNumberFormat(formatBundle, UNUM_DECIMAL); - - /* handle error */ - if(format == 0) - return 0; - - /* set the appropriate flags on the formatter */ - - /* set the minimum integer digits */ - if(info->fPrecision != -1) { - /* set the minimum # of digits */ - minDigits = unum_getAttribute(format, UNUM_MIN_INTEGER_DIGITS); - unum_setAttribute(format, UNUM_MIN_INTEGER_DIGITS, info->fPrecision); - } - - /* set whether to show the sign */ - if(info->fShowSign) { - u_printf_set_sign(format, info, prefixBuffer, &prefixBufferLen, &status); - } - - /* format the number */ - resultLen = unum_formatInt64(format, num, result, UPRINTF_BUFFER_SIZE, 0, &status); - - if (U_FAILURE(status)) { - resultLen = 0; - } - - /* restore the number format */ - if (minDigits != -1) { - unum_setAttribute(format, UNUM_MIN_INTEGER_DIGITS, minDigits); - } - - if (info->fShowSign) { - /* Reset back to original value regardless of what the error was */ - UErrorCode localStatus = U_ZERO_ERROR; - u_printf_reset_sign(format, info, prefixBuffer, &prefixBufferLen, &localStatus); - } - - return handler->pad_and_justify(context, info, result, resultLen); -} - -static int32_t -u_printf_hex_handler(const u_printf_stream_handler *handler, - void *context, - ULocaleBundle *formatBundle, - const u_printf_spec_info *info, - const ufmt_args *args) -{ - int64_t num = args[0].int64Value; - UChar result[UPRINTF_BUFFER_SIZE]; - int32_t len = UPRINTF_BUFFER_SIZE; - - - /* mask off any necessary bits */ - if (info->fIsShort) - num &= UINT16_MAX; - else if (!info->fIsLongLong) - num &= UINT32_MAX; - - /* format the number, preserving the minimum # of digits */ - ufmt_64tou(result, &len, num, 16, - (UBool)(info->fSpec == 0x0078), - (info->fPrecision == -1 && info->fZero) ? info->fWidth : info->fPrecision); - - /* convert to alt form, if desired */ - if(num != 0 && info->fAlt && len < UPRINTF_BUFFER_SIZE - 2) { - /* shift the formatted string right by 2 chars */ - memmove(result + 2, result, len * sizeof(UChar)); - result[0] = 0x0030; - result[1] = info->fSpec; - len += 2; - } - - return handler->pad_and_justify(context, info, result, len); -} - -static int32_t -u_printf_octal_handler(const u_printf_stream_handler *handler, - void *context, - ULocaleBundle *formatBundle, - const u_printf_spec_info *info, - const ufmt_args *args) -{ - int64_t num = args[0].int64Value; - UChar result[UPRINTF_BUFFER_SIZE]; - int32_t len = UPRINTF_BUFFER_SIZE; - - - /* mask off any necessary bits */ - if (info->fIsShort) - num &= UINT16_MAX; - else if (!info->fIsLongLong) - num &= UINT32_MAX; - - /* format the number, preserving the minimum # of digits */ - ufmt_64tou(result, &len, num, 8, - FALSE, /* doesn't matter for octal */ - info->fPrecision == -1 && info->fZero ? info->fWidth : info->fPrecision); - - /* convert to alt form, if desired */ - if(info->fAlt && result[0] != 0x0030 && len < UPRINTF_BUFFER_SIZE - 1) { - /* shift the formatted string right by 1 char */ - memmove(result + 1, result, len * sizeof(UChar)); - result[0] = 0x0030; - len += 1; - } - - return handler->pad_and_justify(context, info, result, len); -} - -static int32_t -u_printf_uinteger_handler(const u_printf_stream_handler *handler, - void *context, - ULocaleBundle *formatBundle, - const u_printf_spec_info *info, - const ufmt_args *args) -{ - int64_t num = args[0].int64Value; - UNumberFormat *format; - UChar result[UPRINTF_BUFFER_SIZE]; - int32_t minDigits = -1; - int32_t resultLen; - UErrorCode status = U_ZERO_ERROR; - - /* TODO: Fix this once uint64_t can be formatted. */ - if (info->fIsShort) - num &= UINT16_MAX; - else if (!info->fIsLongLong) - num &= UINT32_MAX; - - /* get the formatter */ - format = u_locbund_getNumberFormat(formatBundle, UNUM_DECIMAL); - - /* handle error */ - if(format == 0) - return 0; - - /* set the appropriate flags on the formatter */ - - /* set the minimum integer digits */ - if(info->fPrecision != -1) { - /* set the minimum # of digits */ - minDigits = unum_getAttribute(format, UNUM_MIN_INTEGER_DIGITS); - unum_setAttribute(format, UNUM_MIN_INTEGER_DIGITS, info->fPrecision); - } - - /* To mirror other stdio implementations, we ignore the sign argument */ - - /* format the number */ - resultLen = unum_formatInt64(format, num, result, UPRINTF_BUFFER_SIZE, 0, &status); - - if (U_FAILURE(status)) { - resultLen = 0; - } - - /* restore the number format */ - if (minDigits != -1) { - unum_setAttribute(format, UNUM_MIN_INTEGER_DIGITS, minDigits); - } - - return handler->pad_and_justify(context, info, result, resultLen); -} - -static int32_t -u_printf_pointer_handler(const u_printf_stream_handler *handler, - void *context, - ULocaleBundle *formatBundle, - const u_printf_spec_info *info, - const ufmt_args *args) -{ - UChar result[UPRINTF_BUFFER_SIZE]; - int32_t len = UPRINTF_BUFFER_SIZE; - - /* format the pointer in hex */ - ufmt_ptou(result, &len, args[0].ptrValue, TRUE/*, info->fPrecision*/); - - return handler->pad_and_justify(context, info, result, len); -} - -static int32_t -u_printf_scientific_handler(const u_printf_stream_handler *handler, - void *context, - ULocaleBundle *formatBundle, - const u_printf_spec_info *info, - const ufmt_args *args) -{ - double num = (double) (args[0].doubleValue); - UNumberFormat *format; - UChar result[UPRINTF_BUFFER_SIZE]; - UChar prefixBuffer[UPRINTF_BUFFER_SIZE]; - int32_t prefixBufferLen = sizeof(prefixBuffer); - int32_t minDecimalDigits; - int32_t maxDecimalDigits; - UErrorCode status = U_ZERO_ERROR; - UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; - int32_t srcLen, expLen; - int32_t resultLen; - UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; - - prefixBuffer[0] = 0; - - /* mask off any necessary bits */ - /* if(! info->fIsLongDouble) - num &= DBL_MAX;*/ - - /* get the formatter */ - format = u_locbund_getNumberFormat(formatBundle, UNUM_SCIENTIFIC); - - /* handle error */ - if(format == 0) - return 0; - - /* set the appropriate flags on the formatter */ - - srcLen = unum_getSymbol(format, - UNUM_EXPONENTIAL_SYMBOL, - srcExpBuf, - sizeof(srcExpBuf), - &status); - - /* Upper/lower case the e */ - if (info->fSpec == (UChar)0x65 /* e */) { - expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf), - srcExpBuf, srcLen, - formatBundle->fLocale, - &status); - } - else { - expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf), - srcExpBuf, srcLen, - formatBundle->fLocale, - &status); - } - - unum_setSymbol(format, - UNUM_EXPONENTIAL_SYMBOL, - expBuf, - expLen, - &status); - - /* save the formatter's state */ - minDecimalDigits = unum_getAttribute(format, UNUM_MIN_FRACTION_DIGITS); - maxDecimalDigits = unum_getAttribute(format, UNUM_MAX_FRACTION_DIGITS); - - /* set the appropriate flags and number of decimal digits on the formatter */ - if(info->fPrecision != -1) { - /* set the # of decimal digits */ - if (info->fOrigSpec == (UChar)0x65 /* e */ || info->fOrigSpec == (UChar)0x45 /* E */) { - unum_setAttribute(format, UNUM_FRACTION_DIGITS, info->fPrecision); - } - else { - unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, 1); - unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, info->fPrecision); - } - } - else if(info->fAlt) { - /* '#' means always show decimal point */ - /* copy of printf behavior on Solaris - '#' shows 6 digits */ - unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); - } - else { - /* # of decimal digits is 6 if precision not specified */ - unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); - } - - /* set whether to show the sign */ - if (info->fShowSign) { - u_printf_set_sign(format, info, prefixBuffer, &prefixBufferLen, &status); - } - - /* format the number */ - resultLen = unum_formatDouble(format, num, result, UPRINTF_BUFFER_SIZE, 0, &status); - - if (U_FAILURE(status)) { - resultLen = 0; - } - - /* restore the number format */ - /* TODO: Is this needed? */ - unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, minDecimalDigits); - unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, maxDecimalDigits); - - /* Since we're the only one using the scientific - format, we don't need to save the old exponent value. */ - /*unum_setSymbol(format, - UNUM_EXPONENTIAL_SYMBOL, - srcExpBuf, - srcLen, - &status);*/ - - if (info->fShowSign) { - /* Reset back to original value regardless of what the error was */ - UErrorCode localStatus = U_ZERO_ERROR; - u_printf_reset_sign(format, info, prefixBuffer, &prefixBufferLen, &localStatus); - } - - return handler->pad_and_justify(context, info, result, resultLen); -} - -static int32_t -u_printf_percent_handler(const u_printf_stream_handler *handler, - void *context, - ULocaleBundle *formatBundle, - const u_printf_spec_info *info, - const ufmt_args *args) -{ - double num = (double) (args[0].doubleValue); - UNumberFormat *format; - UChar result[UPRINTF_BUFFER_SIZE]; - UChar prefixBuffer[UPRINTF_BUFFER_SIZE]; - int32_t prefixBufferLen = sizeof(prefixBuffer); - int32_t minDecimalDigits; - int32_t maxDecimalDigits; - int32_t resultLen; - UErrorCode status = U_ZERO_ERROR; - - prefixBuffer[0] = 0; - - /* mask off any necessary bits */ - /* if(! info->fIsLongDouble) - num &= DBL_MAX;*/ - - /* get the formatter */ - format = u_locbund_getNumberFormat(formatBundle, UNUM_PERCENT); - - /* handle error */ - if(format == 0) - return 0; - - /* save the formatter's state */ - minDecimalDigits = unum_getAttribute(format, UNUM_MIN_FRACTION_DIGITS); - maxDecimalDigits = unum_getAttribute(format, UNUM_MAX_FRACTION_DIGITS); - - /* set the appropriate flags and number of decimal digits on the formatter */ - if(info->fPrecision != -1) { - /* set the # of decimal digits */ - unum_setAttribute(format, UNUM_FRACTION_DIGITS, info->fPrecision); - } - else if(info->fAlt) { - /* '#' means always show decimal point */ - /* copy of printf behavior on Solaris - '#' shows 6 digits */ - unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); - } - else { - /* # of decimal digits is 6 if precision not specified */ - unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); - } - - /* set whether to show the sign */ - if (info->fShowSign) { - u_printf_set_sign(format, info, prefixBuffer, &prefixBufferLen, &status); - } - - /* format the number */ - resultLen = unum_formatDouble(format, num, result, UPRINTF_BUFFER_SIZE, 0, &status); - - if (U_FAILURE(status)) { - resultLen = 0; - } - - /* restore the number format */ - /* TODO: Is this needed? */ - unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, minDecimalDigits); - unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, maxDecimalDigits); - - if (info->fShowSign) { - /* Reset back to original value regardless of what the error was */ - UErrorCode localStatus = U_ZERO_ERROR; - u_printf_reset_sign(format, info, prefixBuffer, &prefixBufferLen, &localStatus); - } - - return handler->pad_and_justify(context, info, result, resultLen); -} - -static int32_t -u_printf_ustring_handler(const u_printf_stream_handler *handler, - void *context, - ULocaleBundle *formatBundle, - const u_printf_spec_info *info, - const ufmt_args *args) -{ - int32_t len, written; - const UChar *arg = (const UChar*)(args[0].ptrValue); - - /* allocate enough space for the buffer */ - if (arg == NULL) { - arg = gNullStr; - } - len = u_strlen(arg); - - /* width = minimum # of characters to write */ - /* precision = maximum # of characters to write */ - if (info->fPrecision != -1 && info->fPrecision < len) { - len = info->fPrecision; - } - - /* determine if the string should be padded */ - written = handler->pad_and_justify(context, info, arg, len); - - return written; -} - -static int32_t -u_printf_uchar_handler(const u_printf_stream_handler *handler, - void *context, - ULocaleBundle *formatBundle, - const u_printf_spec_info *info, - const ufmt_args *args) -{ - int32_t written = 0; - UChar arg = (UChar)(args[0].int64Value); - - /* width = minimum # of characters to write */ - /* precision = maximum # of characters to write */ - /* precision is ignored when handling a uchar */ - - /* determine if the string should be padded */ - written = handler->pad_and_justify(context, info, &arg, 1); - - return written; -} - -static int32_t -u_printf_scidbl_handler(const u_printf_stream_handler *handler, - void *context, - ULocaleBundle *formatBundle, - const u_printf_spec_info *info, - const ufmt_args *args) -{ - u_printf_spec_info scidbl_info; - double num = args[0].doubleValue; - int32_t retVal; - UNumberFormat *format; - int32_t maxSigDecimalDigits, significantDigits; - - memcpy(&scidbl_info, info, sizeof(u_printf_spec_info)); - - /* determine whether to use 'd', 'e' or 'f' notation */ - if (scidbl_info.fPrecision == -1 && num == uprv_trunc(num)) - { - /* use 'f' notation */ - scidbl_info.fSpec = 0x0066; - scidbl_info.fPrecision = 0; - /* call the double handler */ - retVal = u_printf_double_handler(handler, context, formatBundle, &scidbl_info, args); - } - else if(num < 0.0001 || (scidbl_info.fPrecision < 1 && 1000000.0 <= num) - || (scidbl_info.fPrecision != -1 && num > uprv_pow10(scidbl_info.fPrecision))) - { - /* use 'e' or 'E' notation */ - scidbl_info.fSpec = scidbl_info.fSpec - 2; - if (scidbl_info.fPrecision == -1) { - scidbl_info.fPrecision = 5; - } - /* call the scientific handler */ - retVal = u_printf_scientific_handler(handler, context, formatBundle, &scidbl_info, args); - } - else { - format = u_locbund_getNumberFormat(formatBundle, UNUM_DECIMAL); - /* Check for null pointer */ - if (format == NULL) { - return 0; - } - maxSigDecimalDigits = unum_getAttribute(format, UNUM_MAX_SIGNIFICANT_DIGITS); - significantDigits = scidbl_info.fPrecision; - - /* use 'f' notation */ - scidbl_info.fSpec = 0x0066; - if (significantDigits == -1) { - significantDigits = 6; - } - unum_setAttribute(format, UNUM_SIGNIFICANT_DIGITS_USED, TRUE); - unum_setAttribute(format, UNUM_MAX_SIGNIFICANT_DIGITS, significantDigits); - /* call the double handler */ - retVal = u_printf_double_handler(handler, context, formatBundle, &scidbl_info, args); - unum_setAttribute(format, UNUM_MAX_SIGNIFICANT_DIGITS, maxSigDecimalDigits); - unum_setAttribute(format, UNUM_SIGNIFICANT_DIGITS_USED, FALSE); - } - return retVal; -} - -static int32_t -u_printf_count_handler(const u_printf_stream_handler *handler, - void *context, - ULocaleBundle *formatBundle, - const u_printf_spec_info *info, - const ufmt_args *args) -{ - int32_t *count = (int32_t*)(args[0].ptrValue); - - /* in the special case of count, the u_printf_spec_info's width */ - /* will contain the # of chars written thus far */ - *count = info->fWidth; - - return 0; -} - -static int32_t -u_printf_spellout_handler(const u_printf_stream_handler *handler, - void *context, - ULocaleBundle *formatBundle, - const u_printf_spec_info *info, - const ufmt_args *args) -{ - double num = (double) (args[0].doubleValue); - UNumberFormat *format; - UChar result[UPRINTF_BUFFER_SIZE]; - UChar prefixBuffer[UPRINTF_BUFFER_SIZE]; - int32_t prefixBufferLen = sizeof(prefixBuffer); - int32_t minDecimalDigits; - int32_t maxDecimalDigits; - int32_t resultLen; - UErrorCode status = U_ZERO_ERROR; - - prefixBuffer[0] = 0; - - /* mask off any necessary bits */ - /* if(! info->fIsLongDouble) - num &= DBL_MAX;*/ - - /* get the formatter */ - format = u_locbund_getNumberFormat(formatBundle, UNUM_SPELLOUT); - - /* handle error */ - if(format == 0) - return 0; - - /* save the formatter's state */ - minDecimalDigits = unum_getAttribute(format, UNUM_MIN_FRACTION_DIGITS); - maxDecimalDigits = unum_getAttribute(format, UNUM_MAX_FRACTION_DIGITS); - - /* set the appropriate flags and number of decimal digits on the formatter */ - if(info->fPrecision != -1) { - /* set the # of decimal digits */ - unum_setAttribute(format, UNUM_FRACTION_DIGITS, info->fPrecision); - } - else if(info->fAlt) { - /* '#' means always show decimal point */ - /* copy of printf behavior on Solaris - '#' shows 6 digits */ - unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); - } - else { - /* # of decimal digits is 6 if precision not specified */ - unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); - } - - /* set whether to show the sign */ - if (info->fShowSign) { - u_printf_set_sign(format, info, prefixBuffer, &prefixBufferLen, &status); - } - - /* format the number */ - resultLen = unum_formatDouble(format, num, result, UPRINTF_BUFFER_SIZE, 0, &status); - - if (U_FAILURE(status)) { - resultLen = 0; - } - - /* restore the number format */ - /* TODO: Is this needed? */ - unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, minDecimalDigits); - unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, maxDecimalDigits); - - if (info->fShowSign) { - /* Reset back to original value regardless of what the error was */ - UErrorCode localStatus = U_ZERO_ERROR; - u_printf_reset_sign(format, info, prefixBuffer, &prefixBufferLen, &localStatus); - } - - return handler->pad_and_justify(context, info, result, resultLen); -} - -/* Use US-ASCII characters only for formatting. Most codepages have - characters 20-7F from Unicode. Using any other codepage specific - characters will make it very difficult to format the string on - non-Unicode machines */ -static const u_printf_info g_u_printf_infos[UPRINTF_NUM_FMT_HANDLERS] = { -/* 0x20 */ - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_SIMPLE_PERCENT,UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - -/* 0x30 */ - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - -/* 0x40 */ - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR, - UFMT_EMPTY, UFMT_SCIENTIFIC, UFMT_EMPTY, UFMT_SCIDBL, -#ifdef U_USE_OBSOLETE_IO_FORMATTING - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR/*deprecated*/, -#else - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, -#endif - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - -/* 0x50 */ - UFMT_PERCENT, UFMT_EMPTY, UFMT_EMPTY, UFMT_USTRING, -#ifdef U_USE_OBSOLETE_IO_FORMATTING - UFMT_EMPTY, UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT, UFMT_EMPTY, -#else - UFMT_EMPTY, UFMT_EMPTY, UFMT_SPELLOUT, UFMT_EMPTY, -#endif - UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - -/* 0x60 */ - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_CHAR, - UFMT_INT, UFMT_SCIENTIFIC, UFMT_DOUBLE, UFMT_SCIDBL, - UFMT_EMPTY, UFMT_INT, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_COUNT, UFMT_OCTAL, - -/* 0x70 */ - UFMT_POINTER, UFMT_EMPTY, UFMT_EMPTY, UFMT_STRING, - UFMT_EMPTY, UFMT_UINT, UFMT_EMPTY, UFMT_EMPTY, - UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, -}; - -/* flag characters for uprintf */ -#define FLAG_MINUS 0x002D -#define FLAG_PLUS 0x002B -#define FLAG_SPACE 0x0020 -#define FLAG_POUND 0x0023 -#define FLAG_ZERO 0x0030 -#define FLAG_PAREN 0x0028 - -#define ISFLAG(s) (s) == FLAG_MINUS || \ - (s) == FLAG_PLUS || \ - (s) == FLAG_SPACE || \ - (s) == FLAG_POUND || \ - (s) == FLAG_ZERO || \ - (s) == FLAG_PAREN - -/* special characters for uprintf */ -#define SPEC_ASTERISK 0x002A -#define SPEC_DOLLARSIGN 0x0024 -#define SPEC_PERIOD 0x002E -#define SPEC_PERCENT 0x0025 - -/* unicode digits */ -#define DIGIT_ZERO 0x0030 -#define DIGIT_ONE 0x0031 -#define DIGIT_TWO 0x0032 -#define DIGIT_THREE 0x0033 -#define DIGIT_FOUR 0x0034 -#define DIGIT_FIVE 0x0035 -#define DIGIT_SIX 0x0036 -#define DIGIT_SEVEN 0x0037 -#define DIGIT_EIGHT 0x0038 -#define DIGIT_NINE 0x0039 - -#define ISDIGIT(s) (s) == DIGIT_ZERO || \ - (s) == DIGIT_ONE || \ - (s) == DIGIT_TWO || \ - (s) == DIGIT_THREE || \ - (s) == DIGIT_FOUR || \ - (s) == DIGIT_FIVE || \ - (s) == DIGIT_SIX || \ - (s) == DIGIT_SEVEN || \ - (s) == DIGIT_EIGHT || \ - (s) == DIGIT_NINE - -/* u_printf modifiers */ -#define MOD_H 0x0068 -#define MOD_LOWERL 0x006C -#define MOD_L 0x004C - -#define ISMOD(s) (s) == MOD_H || \ - (s) == MOD_LOWERL || \ - (s) == MOD_L -/* Returns an array of the parsed argument type given in the format string. */ -static ufmt_args* parseArguments(const UChar *alias, va_list ap, UErrorCode *status) { - ufmt_args *arglist = NULL; - ufmt_type_info *typelist = NULL; - UBool *islonglong = NULL; - int32_t size = 0; - int32_t pos = 0; - UChar type; - uint16_t handlerNum; - const UChar *aliasStart = alias; - - /* get maximum number of arguments */ - for(;;) { - /* find % */ - while(*alias != UP_PERCENT && *alias != 0x0000) { - alias++; - } - - if(*alias == 0x0000) { - break; - } - - alias++; - - /* handle the pos number */ - if(ISDIGIT(*alias)) { - - /* handle positional parameters */ - if(ISDIGIT(*alias)) { - pos = (int) (*alias++ - DIGIT_ZERO); - - while(ISDIGIT(*alias)) { - pos *= 10; - pos += (int) (*alias++ - DIGIT_ZERO); - } - } - - /* if there is no '$', don't read anything */ - if(*alias != SPEC_DOLLARSIGN) { - return NULL; - } - } else { - return NULL; - } - - if (pos > size) { - size = pos; - } - } - - /* create the parsed argument list */ - typelist = (ufmt_type_info*)uprv_malloc(sizeof(ufmt_type_info) * size); - islonglong = (UBool*)uprv_malloc(sizeof(UBool) * size); - arglist = (ufmt_args*)uprv_malloc(sizeof(ufmt_args) * size); - - /* If malloc failed, return NULL */ - if (!typelist || !islonglong || !arglist) { - if (typelist) { - uprv_free(typelist); - } - - if (islonglong) { - uprv_free(islonglong); - } - - if (arglist) { - uprv_free(arglist); - } - - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - /* reset alias back to the beginning */ - alias = aliasStart; - - for(;;) { - /* find % */ - while(*alias != UP_PERCENT && *alias != 0x0000) { - alias++; - } - - if(*alias == 0x0000) { - break; - } - - alias++; - - /* handle positional parameters */ - if(ISDIGIT(*alias)) { - pos = (int) (*alias++ - DIGIT_ZERO); - - while(ISDIGIT(*alias)) { - pos *= 10; - pos += (int) (*alias++ - DIGIT_ZERO); - } - } - /* offset position by 1 */ - pos--; - - /* skip over everything except for the type */ - while (ISMOD(*alias) || ISFLAG(*alias) || ISDIGIT(*alias) || - *alias == SPEC_ASTERISK || *alias == SPEC_PERIOD || *alias == SPEC_DOLLARSIGN) { - islonglong[pos] = FALSE; - if (ISMOD(*alias)) { - alias++; - if (*alias == MOD_LOWERL) { - islonglong[pos] = TRUE; - } - } - alias++; - } - type = *alias; - - /* store the argument type in the correct position of the parsed argument list */ - handlerNum = (uint16_t)(type - UPRINTF_BASE_FMT_HANDLERS); - if (handlerNum < UPRINTF_NUM_FMT_HANDLERS) { - typelist[pos] = g_u_printf_infos[ handlerNum ].info; - } else { - typelist[pos] = ufmt_empty; - } - } - - /* store argument in arglist */ - for (pos = 0; pos < size; pos++) { - switch (typelist[pos]) { - case ufmt_string: - case ufmt_ustring: - case ufmt_pointer: - arglist[pos].ptrValue = va_arg(ap, void*); - break; - case ufmt_char: - case ufmt_uchar: - case ufmt_int: - if (islonglong[pos]) { - arglist[pos].int64Value = va_arg(ap, int64_t); - } - else { - arglist[pos].int64Value = va_arg(ap, int32_t); - } - break; - case ufmt_float: - arglist[pos].floatValue = (float) va_arg(ap, double); - break; - case ufmt_double: - arglist[pos].doubleValue = va_arg(ap, double); - break; - default: - /* else args is ignored */ - arglist[pos].ptrValue = NULL; - break; - } - } - - uprv_free(typelist); - uprv_free(islonglong); - - return arglist; -} - -/* We parse the argument list in Unicode */ -U_CFUNC int32_t -u_printf_parse(const u_printf_stream_handler *streamHandler, - const UChar *fmt, - void *context, - u_localized_print_string *locStringContext, - ULocaleBundle *formatBundle, - int32_t *written, - va_list ap) -{ - uint16_t handlerNum; - ufmt_args args; - ufmt_type_info argType; - u_printf_handler *handler; - u_printf_spec spec; - u_printf_spec_info *info = &(spec.fInfo); - - const UChar *alias = fmt; - const UChar *backup; - const UChar *lastAlias; - const UChar *orgAlias = fmt; - /* parsed argument list */ - ufmt_args *arglist = NULL; /* initialized it to avoid compiler warnings */ - UErrorCode status = U_ZERO_ERROR; - if (!locStringContext || locStringContext->available >= 0) { - /* get the parsed list of argument types */ - arglist = parseArguments(orgAlias, ap, &status); - - /* Return error if parsing failed. */ - if (U_FAILURE(status)) { - return -1; - } - } - - /* iterate through the pattern */ - while(!locStringContext || locStringContext->available >= 0) { - - /* find the next '%' */ - lastAlias = alias; - while(*alias != UP_PERCENT && *alias != 0x0000) { - alias++; - } - - /* write any characters before the '%' */ - if(alias > lastAlias) { - *written += (streamHandler->write)(context, lastAlias, (int32_t)(alias - lastAlias)); - } - - /* break if at end of string */ - if(*alias == 0x0000) { - break; - } - - /* initialize spec to default values */ - spec.fWidthPos = -1; - spec.fPrecisionPos = -1; - spec.fArgPos = -1; - - uprv_memset(info, 0, sizeof(*info)); - info->fPrecision = -1; - info->fWidth = -1; - info->fPadChar = 0x0020; - - /* skip over the initial '%' */ - alias++; - - /* Check for positional argument */ - if(ISDIGIT(*alias)) { - - /* Save the current position */ - backup = alias; - - /* handle positional parameters */ - if(ISDIGIT(*alias)) { - spec.fArgPos = (int) (*alias++ - DIGIT_ZERO); - - while(ISDIGIT(*alias)) { - spec.fArgPos *= 10; - spec.fArgPos += (int) (*alias++ - DIGIT_ZERO); - } - } - - /* if there is no '$', don't read anything */ - if(*alias != SPEC_DOLLARSIGN) { - spec.fArgPos = -1; - alias = backup; - } - /* munge the '$' */ - else - alias++; - } - - /* Get any format flags */ - while(ISFLAG(*alias)) { - switch(*alias++) { - - /* left justify */ - case FLAG_MINUS: - info->fLeft = TRUE; - break; - - /* always show sign */ - case FLAG_PLUS: - info->fShowSign = TRUE; - break; - - /* use space if no sign present */ - case FLAG_SPACE: - info->fShowSign = TRUE; - info->fSpace = TRUE; - break; - - /* use alternate form */ - case FLAG_POUND: - info->fAlt = TRUE; - break; - - /* pad with leading zeroes */ - case FLAG_ZERO: - info->fZero = TRUE; - info->fPadChar = 0x0030; - break; - - /* pad character specified */ - case FLAG_PAREN: - - /* TODO test that all four are numbers */ - /* first four characters are hex values for pad char */ - info->fPadChar = (UChar)ufmt_digitvalue(*alias++); - info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*alias++)); - info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*alias++)); - info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*alias++)); - - /* final character is ignored */ - alias++; - - break; - } - } - - /* Get the width */ - - /* width is specified out of line */ - if(*alias == SPEC_ASTERISK) { - - info->fWidth = -2; - - /* Skip the '*' */ - alias++; - - /* Save the current position */ - backup = alias; - - /* handle positional parameters */ - if(ISDIGIT(*alias)) { - spec.fWidthPos = (int) (*alias++ - DIGIT_ZERO); - - while(ISDIGIT(*alias)) { - spec.fWidthPos *= 10; - spec.fWidthPos += (int) (*alias++ - DIGIT_ZERO); - } - } - - /* if there is no '$', don't read anything */ - if(*alias != SPEC_DOLLARSIGN) { - spec.fWidthPos = -1; - alias = backup; - } - /* munge the '$' */ - else - alias++; - } - /* read the width, if present */ - else if(ISDIGIT(*alias)){ - info->fWidth = (int) (*alias++ - DIGIT_ZERO); - - while(ISDIGIT(*alias)) { - info->fWidth *= 10; - info->fWidth += (int) (*alias++ - DIGIT_ZERO); - } - } - - /* Get the precision */ - - if(*alias == SPEC_PERIOD) { - - /* eat up the '.' */ - alias++; - - /* precision is specified out of line */ - if(*alias == SPEC_ASTERISK) { - - info->fPrecision = -2; - - /* Skip the '*' */ - alias++; - - /* save the current position */ - backup = alias; - - /* handle positional parameters */ - if(ISDIGIT(*alias)) { - spec.fPrecisionPos = (int) (*alias++ - DIGIT_ZERO); - - while(ISDIGIT(*alias)) { - spec.fPrecisionPos *= 10; - spec.fPrecisionPos += (int) (*alias++ - DIGIT_ZERO); - } - - /* if there is no '$', don't read anything */ - if(*alias != SPEC_DOLLARSIGN) { - spec.fPrecisionPos = -1; - alias = backup; - } - else { - /* munge the '$' */ - alias++; - } - } - } - /* read the precision */ - else if(ISDIGIT(*alias)){ - info->fPrecision = (int) (*alias++ - DIGIT_ZERO); - - while(ISDIGIT(*alias)) { - info->fPrecision *= 10; - info->fPrecision += (int) (*alias++ - DIGIT_ZERO); - } - } - } - - /* Get any modifiers */ - if(ISMOD(*alias)) { - switch(*alias++) { - - /* short */ - case MOD_H: - info->fIsShort = TRUE; - break; - - /* long or long long */ - case MOD_LOWERL: - if(*alias == MOD_LOWERL) { - info->fIsLongLong = TRUE; - /* skip over the next 'l' */ - alias++; - } - else - info->fIsLong = TRUE; - break; - - /* long double */ - case MOD_L: - info->fIsLongDouble = TRUE; - break; - } - } - - /* finally, get the specifier letter */ - info->fSpec = *alias++; - info->fOrigSpec = info->fSpec; - - /* fill in the precision and width, if specified out of line */ - - /* width specified out of line */ - if(spec.fInfo.fWidth == -2) { - if(spec.fWidthPos == -1) { - /* read the width from the argument list */ - info->fWidth = va_arg(ap, int32_t); - } - /* else handle positional parameter */ - - /* if it's negative, take the absolute value and set left alignment */ - if(info->fWidth < 0) { - info->fWidth *= -1; /* Make positive */ - info->fLeft = TRUE; - } - } - - /* precision specified out of line */ - if(info->fPrecision == -2) { - if(spec.fPrecisionPos == -1) { - /* read the precision from the argument list */ - info->fPrecision = va_arg(ap, int32_t); - } - /* else handle positional parameter */ - - /* if it's negative, set it to zero */ - if(info->fPrecision < 0) - info->fPrecision = 0; - } - - handlerNum = (uint16_t)(info->fSpec - UPRINTF_BASE_FMT_HANDLERS); - if (handlerNum < UPRINTF_NUM_FMT_HANDLERS) { - /* query the info function for argument information */ - argType = g_u_printf_infos[ handlerNum ].info; - - /* goto the correct argument on arg_list if position is specified */ - if (spec.fArgPos > 0) { - /* offset position by 1 */ - spec.fArgPos--; - switch(argType) { - case ufmt_count: - /* set the spec's width to the # of chars written */ - info->fWidth = *written; - /* fall through to set the pointer */ - U_FALLTHROUGH; - case ufmt_string: - case ufmt_ustring: - case ufmt_pointer: - args.ptrValue = arglist[spec.fArgPos].ptrValue; - break; - case ufmt_char: - case ufmt_uchar: - case ufmt_int: - args.int64Value = arglist[spec.fArgPos].int64Value; - break; - case ufmt_float: - args.floatValue = arglist[spec.fArgPos].floatValue; - break; - case ufmt_double: - args.doubleValue = arglist[spec.fArgPos].doubleValue; - break; - default: - /* else args is ignored */ - args.ptrValue = NULL; - break; - } - } else { /* no positional argument specified */ - switch(argType) { - case ufmt_count: - /* set the spec's width to the # of chars written */ - info->fWidth = *written; - /* fall through to set the pointer */ - U_FALLTHROUGH; - case ufmt_string: - case ufmt_ustring: - case ufmt_pointer: - args.ptrValue = va_arg(ap, void*); - break; - case ufmt_char: - case ufmt_uchar: - case ufmt_int: - if (info->fIsLongLong) { - args.int64Value = va_arg(ap, int64_t); - } - else { - args.int64Value = va_arg(ap, int32_t); - } - break; - case ufmt_float: - args.floatValue = (float) va_arg(ap, double); - break; - case ufmt_double: - args.doubleValue = va_arg(ap, double); - break; - default: - /* else args is ignored */ - args.ptrValue = NULL; - break; - } - } - - /* call the handler function */ - handler = g_u_printf_infos[ handlerNum ].handler; - if(handler != 0) { - *written += (*handler)(streamHandler, context, formatBundle, info, &args); - } - else { - /* just echo unknown tags */ - *written += (streamHandler->write)(context, fmt, (int32_t)(alias - lastAlias)); - } - } - else { - /* just echo unknown tags */ - *written += (streamHandler->write)(context, fmt, (int32_t)(alias - lastAlias)); - } - } - /* delete parsed argument list */ - if (arglist != NULL) { - uprv_free(arglist); - } - /* return # of characters in this format that have been parsed. */ - return (int32_t)(alias - fmt); -} - -#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/io/uprntf_p.cpp b/deps/icu-small/source/io/uprntf_p.cpp new file mode 100644 index 0000000000..ecf2e67235 --- /dev/null +++ b/deps/icu-small/source/io/uprntf_p.cpp @@ -0,0 +1,1606 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1998-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File uprntf_p.c +* +* Modification History: +* +* Date Name Description +* 11/23/98 stephen Creation. +* 03/12/99 stephen Modified for new C API. +* 08/07/2003 george Reunify printf implementations +****************************************************************************** +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION + +#include "unicode/ustring.h" +#include "unicode/utf16.h" +#include "uprintf.h" +#include "ufmt_cmn.h" +#include "cmemory.h" +#include "putilimp.h" + +/* ANSI style formatting */ +/* Use US-ASCII characters only for formatting */ + +/* % */ +#define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_printf_simple_percent_handler} +/* s */ +#define UFMT_STRING {ufmt_string, u_printf_string_handler} +/* c */ +#define UFMT_CHAR {ufmt_char, u_printf_char_handler} +/* d, i */ +#define UFMT_INT {ufmt_int, u_printf_integer_handler} +/* u */ +#define UFMT_UINT {ufmt_int, u_printf_uinteger_handler} +/* o */ +#define UFMT_OCTAL {ufmt_int, u_printf_octal_handler} +/* x, X */ +#define UFMT_HEX {ufmt_int, u_printf_hex_handler} +/* f */ +#define UFMT_DOUBLE {ufmt_double, u_printf_double_handler} +/* e, E */ +#define UFMT_SCIENTIFIC {ufmt_double, u_printf_scientific_handler} +/* g, G */ +#define UFMT_SCIDBL {ufmt_double, u_printf_scidbl_handler} +/* n */ +#define UFMT_COUNT {ufmt_count, u_printf_count_handler} + +/* non-ANSI extensions */ +/* Use US-ASCII characters only for formatting */ + +/* p */ +#define UFMT_POINTER {ufmt_pointer, u_printf_pointer_handler} +/* V */ +#define UFMT_SPELLOUT {ufmt_double, u_printf_spellout_handler} +/* P */ +#define UFMT_PERCENT {ufmt_double, u_printf_percent_handler} +/* C K is old format */ +#define UFMT_UCHAR {ufmt_uchar, u_printf_uchar_handler} +/* S U is old format */ +#define UFMT_USTRING {ufmt_ustring, u_printf_ustring_handler} + + +#define UFMT_EMPTY {ufmt_empty, NULL} + +/** + * A u_printf handler function. + * A u_printf handler is responsible for handling a single u_printf + * format specification, for example 'd' or 's'. + * @param stream The UFILE to which to write output. + * @param info A pointer to a u_printf_spec_info struct containing + * information on the format specification. + * @param args A pointer to the argument data + * @return The number of Unicode characters written to stream. + */ +typedef int32_t U_EXPORT2 +u_printf_handler(const u_printf_stream_handler *handler, + + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args); + +typedef struct u_printf_info { + ufmt_type_info info; + u_printf_handler *handler; +} u_printf_info; + +/** + * Struct encapsulating a single uprintf format specification. + */ +typedef struct u_printf_spec { + u_printf_spec_info fInfo; /* Information on this spec */ + int32_t fWidthPos; /* Position of width in arg list */ + int32_t fPrecisionPos; /* Position of precision in arg list */ + int32_t fArgPos; /* Position of data in arg list */ +} u_printf_spec; + +#define UPRINTF_NUM_FMT_HANDLERS 108 + +/* We do not use handlers for 0-0x1f */ +#define UPRINTF_BASE_FMT_HANDLERS 0x20 + +/* buffer size for formatting */ +#define UPRINTF_BUFFER_SIZE 1024 +#define UPRINTF_SYMBOL_BUFFER_SIZE 8 + +static const UChar gNullStr[] = {0x28, 0x6E, 0x75, 0x6C, 0x6C, 0x29, 0}; /* "(null)" */ +static const UChar gSpaceStr[] = {0x20, 0}; /* " " */ + +/* Sets the sign of a format based on u_printf_spec_info */ +/* TODO: Is setting the prefix symbol to a positive sign a good idea in all locales? */ +static void +u_printf_set_sign(UNumberFormat *format, + const u_printf_spec_info *info, + UChar *prefixBuffer, + int32_t *prefixBufLen, + UErrorCode *status) +{ + if(info->fShowSign) { + *prefixBufLen = unum_getTextAttribute(format, + UNUM_POSITIVE_PREFIX, + prefixBuffer, + *prefixBufLen, + status); + if (info->fSpace) { + /* Setting UNUM_PLUS_SIGN_SYMBOL affects the exponent too. */ + /* unum_setSymbol(format, UNUM_PLUS_SIGN_SYMBOL, gSpaceStr, 1, &status); */ + unum_setTextAttribute(format, UNUM_POSITIVE_PREFIX, gSpaceStr, 1, status); + } + else { + UChar plusSymbol[UPRINTF_SYMBOL_BUFFER_SIZE]; + int32_t symbolLen; + + symbolLen = unum_getSymbol(format, + UNUM_PLUS_SIGN_SYMBOL, + plusSymbol, + UPRV_LENGTHOF(plusSymbol), + status); + unum_setTextAttribute(format, + UNUM_POSITIVE_PREFIX, + plusSymbol, + symbolLen, + status); + } + } + else { + *prefixBufLen = 0; + } +} + +static void +u_printf_reset_sign(UNumberFormat *format, + const u_printf_spec_info *info, + UChar *prefixBuffer, + int32_t *prefixBufLen, + UErrorCode *status) +{ + if(info->fShowSign) { + unum_setTextAttribute(format, + UNUM_POSITIVE_PREFIX, + prefixBuffer, + *prefixBufLen, + status); + } +} + + +/* handle a '%' */ +static int32_t +u_printf_simple_percent_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + (void)formatBundle; + (void)info; + (void)args; + static const UChar PERCENT[] = { UP_PERCENT }; + + /* put a single '%' onto the output */ + return handler->write(context, PERCENT, 1); +} + +/* handle 's' */ +static int32_t +u_printf_string_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + (void)formatBundle; + UChar *s; + UChar buffer[UFMT_DEFAULT_BUFFER_SIZE]; + int32_t len, written; + int32_t argSize; + const char *arg = (const char*)(args[0].ptrValue); + + /* convert from the default codepage to Unicode */ + if (arg) { + argSize = (int32_t)strlen(arg) + 1; + if (argSize >= MAX_UCHAR_BUFFER_SIZE(buffer)) { + s = ufmt_defaultCPToUnicode(arg, argSize, + (UChar *)uprv_malloc(MAX_UCHAR_BUFFER_NEEDED(argSize)), + MAX_UCHAR_BUFFER_NEEDED(argSize)); + if(s == NULL) { + return 0; + } + } + else { + s = ufmt_defaultCPToUnicode(arg, argSize, buffer, + UPRV_LENGTHOF(buffer)); + } + } + else { + s = (UChar *)gNullStr; + } + len = u_strlen(s); + + /* width = minimum # of characters to write */ + /* precision = maximum # of characters to write */ + if (info->fPrecision != -1 && info->fPrecision < len) { + len = info->fPrecision; + } + + written = handler->pad_and_justify(context, info, s, len); + + /* clean up */ + if (gNullStr != s && buffer != s) { + uprv_free(s); + } + + return written; +} + +static int32_t +u_printf_char_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + (void)formatBundle; + UChar s[U16_MAX_LENGTH+1]; + int32_t len = 1, written; + unsigned char arg = (unsigned char)(args[0].int64Value); + + /* convert from default codepage to Unicode */ + ufmt_defaultCPToUnicode((const char *)&arg, 2, s, UPRV_LENGTHOF(s)); + + /* Remember that this may be an MBCS character */ + if (arg != 0) { + len = u_strlen(s); + } + + /* width = minimum # of characters to write */ + /* precision = maximum # of characters to write */ + /* precision is ignored when handling a char */ + + written = handler->pad_and_justify(context, info, s, len); + + return written; +} + +static int32_t +u_printf_double_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + double num = (double) (args[0].doubleValue); + UNumberFormat *format; + UChar result[UPRINTF_BUFFER_SIZE]; + UChar prefixBuffer[UPRINTF_BUFFER_SIZE]; + int32_t prefixBufferLen = sizeof(prefixBuffer); + int32_t minDecimalDigits; + int32_t maxDecimalDigits; + int32_t resultLen; + UErrorCode status = U_ZERO_ERROR; + + prefixBuffer[0] = 0; + + /* mask off any necessary bits */ + /* if(! info->fIsLongDouble) + num &= DBL_MAX;*/ + + /* get the formatter */ + format = u_locbund_getNumberFormat(formatBundle, UNUM_DECIMAL); + + /* handle error */ + if(format == 0) + return 0; + + /* save the formatter's state */ + minDecimalDigits = unum_getAttribute(format, UNUM_MIN_FRACTION_DIGITS); + maxDecimalDigits = unum_getAttribute(format, UNUM_MAX_FRACTION_DIGITS); + + /* set the appropriate flags and number of decimal digits on the formatter */ + if(info->fPrecision != -1) { + /* set the # of decimal digits */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, info->fPrecision); + } + else if(info->fAlt) { + /* '#' means always show decimal point */ + /* copy of printf behavior on Solaris - '#' shows 6 digits */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); + } + else { + /* # of decimal digits is 6 if precision not specified regardless of locale */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); + } + + /* set whether to show the sign */ + if (info->fShowSign) { + u_printf_set_sign(format, info, prefixBuffer, &prefixBufferLen, &status); + } + + /* format the number */ + resultLen = unum_formatDouble(format, num, result, UPRINTF_BUFFER_SIZE, 0, &status); + + if (U_FAILURE(status)) { + resultLen = 0; + } + + /* restore the number format */ + /* TODO: Is this needed? */ + unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, minDecimalDigits); + unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, maxDecimalDigits); + + if (info->fShowSign) { + /* Reset back to original value regardless of what the error was */ + UErrorCode localStatus = U_ZERO_ERROR; + u_printf_reset_sign(format, info, prefixBuffer, &prefixBufferLen, &localStatus); + } + + return handler->pad_and_justify(context, info, result, resultLen); +} + +/* HSYS */ +static int32_t +u_printf_integer_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + int64_t num = args[0].int64Value; + UNumberFormat *format; + UChar result[UPRINTF_BUFFER_SIZE]; + UChar prefixBuffer[UPRINTF_BUFFER_SIZE]; + int32_t prefixBufferLen = sizeof(prefixBuffer); + int32_t minDigits = -1; + int32_t resultLen; + UErrorCode status = U_ZERO_ERROR; + + prefixBuffer[0] = 0; + + /* mask off any necessary bits */ + if (info->fIsShort) + num = (int16_t)num; + else if (!info->fIsLongLong) + num = (int32_t)num; + + /* get the formatter */ + format = u_locbund_getNumberFormat(formatBundle, UNUM_DECIMAL); + + /* handle error */ + if(format == 0) + return 0; + + /* set the appropriate flags on the formatter */ + + /* set the minimum integer digits */ + if(info->fPrecision != -1) { + /* set the minimum # of digits */ + minDigits = unum_getAttribute(format, UNUM_MIN_INTEGER_DIGITS); + unum_setAttribute(format, UNUM_MIN_INTEGER_DIGITS, info->fPrecision); + } + + /* set whether to show the sign */ + if(info->fShowSign) { + u_printf_set_sign(format, info, prefixBuffer, &prefixBufferLen, &status); + } + + /* format the number */ + resultLen = unum_formatInt64(format, num, result, UPRINTF_BUFFER_SIZE, 0, &status); + + if (U_FAILURE(status)) { + resultLen = 0; + } + + /* restore the number format */ + if (minDigits != -1) { + unum_setAttribute(format, UNUM_MIN_INTEGER_DIGITS, minDigits); + } + + if (info->fShowSign) { + /* Reset back to original value regardless of what the error was */ + UErrorCode localStatus = U_ZERO_ERROR; + u_printf_reset_sign(format, info, prefixBuffer, &prefixBufferLen, &localStatus); + } + + return handler->pad_and_justify(context, info, result, resultLen); +} + +static int32_t +u_printf_hex_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + (void)formatBundle; + int64_t num = args[0].int64Value; + UChar result[UPRINTF_BUFFER_SIZE]; + int32_t len = UPRINTF_BUFFER_SIZE; + + + /* mask off any necessary bits */ + if (info->fIsShort) + num &= UINT16_MAX; + else if (!info->fIsLongLong) + num &= UINT32_MAX; + + /* format the number, preserving the minimum # of digits */ + ufmt_64tou(result, &len, num, 16, + (UBool)(info->fSpec == 0x0078), + (info->fPrecision == -1 && info->fZero) ? info->fWidth : info->fPrecision); + + /* convert to alt form, if desired */ + if(num != 0 && info->fAlt && len < UPRINTF_BUFFER_SIZE - 2) { + /* shift the formatted string right by 2 chars */ + memmove(result + 2, result, len * sizeof(UChar)); + result[0] = 0x0030; + result[1] = info->fSpec; + len += 2; + } + + return handler->pad_and_justify(context, info, result, len); +} + +static int32_t +u_printf_octal_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + (void)formatBundle; + int64_t num = args[0].int64Value; + UChar result[UPRINTF_BUFFER_SIZE]; + int32_t len = UPRINTF_BUFFER_SIZE; + + + /* mask off any necessary bits */ + if (info->fIsShort) + num &= UINT16_MAX; + else if (!info->fIsLongLong) + num &= UINT32_MAX; + + /* format the number, preserving the minimum # of digits */ + ufmt_64tou(result, &len, num, 8, + FALSE, /* doesn't matter for octal */ + info->fPrecision == -1 && info->fZero ? info->fWidth : info->fPrecision); + + /* convert to alt form, if desired */ + if(info->fAlt && result[0] != 0x0030 && len < UPRINTF_BUFFER_SIZE - 1) { + /* shift the formatted string right by 1 char */ + memmove(result + 1, result, len * sizeof(UChar)); + result[0] = 0x0030; + len += 1; + } + + return handler->pad_and_justify(context, info, result, len); +} + +static int32_t +u_printf_uinteger_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + int64_t num = args[0].int64Value; + UNumberFormat *format; + UChar result[UPRINTF_BUFFER_SIZE]; + int32_t minDigits = -1; + int32_t resultLen; + UErrorCode status = U_ZERO_ERROR; + + /* TODO: Fix this once uint64_t can be formatted. */ + if (info->fIsShort) + num &= UINT16_MAX; + else if (!info->fIsLongLong) + num &= UINT32_MAX; + + /* get the formatter */ + format = u_locbund_getNumberFormat(formatBundle, UNUM_DECIMAL); + + /* handle error */ + if(format == 0) + return 0; + + /* set the appropriate flags on the formatter */ + + /* set the minimum integer digits */ + if(info->fPrecision != -1) { + /* set the minimum # of digits */ + minDigits = unum_getAttribute(format, UNUM_MIN_INTEGER_DIGITS); + unum_setAttribute(format, UNUM_MIN_INTEGER_DIGITS, info->fPrecision); + } + + /* To mirror other stdio implementations, we ignore the sign argument */ + + /* format the number */ + resultLen = unum_formatInt64(format, num, result, UPRINTF_BUFFER_SIZE, 0, &status); + + if (U_FAILURE(status)) { + resultLen = 0; + } + + /* restore the number format */ + if (minDigits != -1) { + unum_setAttribute(format, UNUM_MIN_INTEGER_DIGITS, minDigits); + } + + return handler->pad_and_justify(context, info, result, resultLen); +} + +static int32_t +u_printf_pointer_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + (void)formatBundle; + UChar result[UPRINTF_BUFFER_SIZE]; + int32_t len = UPRINTF_BUFFER_SIZE; + + /* format the pointer in hex */ + ufmt_ptou(result, &len, args[0].ptrValue, TRUE/*, info->fPrecision*/); + + return handler->pad_and_justify(context, info, result, len); +} + +static int32_t +u_printf_scientific_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + double num = (double) (args[0].doubleValue); + UNumberFormat *format; + UChar result[UPRINTF_BUFFER_SIZE]; + UChar prefixBuffer[UPRINTF_BUFFER_SIZE]; + int32_t prefixBufferLen = sizeof(prefixBuffer); + int32_t minDecimalDigits; + int32_t maxDecimalDigits; + UErrorCode status = U_ZERO_ERROR; + UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; + int32_t srcLen, expLen; + int32_t resultLen; + UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; + + prefixBuffer[0] = 0; + + /* mask off any necessary bits */ + /* if(! info->fIsLongDouble) + num &= DBL_MAX;*/ + + /* get the formatter */ + format = u_locbund_getNumberFormat(formatBundle, UNUM_SCIENTIFIC); + + /* handle error */ + if(format == 0) + return 0; + + /* set the appropriate flags on the formatter */ + + srcLen = unum_getSymbol(format, + UNUM_EXPONENTIAL_SYMBOL, + srcExpBuf, + sizeof(srcExpBuf), + &status); + + /* Upper/lower case the e */ + if (info->fSpec == (UChar)0x65 /* e */) { + expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf), + srcExpBuf, srcLen, + formatBundle->fLocale, + &status); + } + else { + expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf), + srcExpBuf, srcLen, + formatBundle->fLocale, + &status); + } + + unum_setSymbol(format, + UNUM_EXPONENTIAL_SYMBOL, + expBuf, + expLen, + &status); + + /* save the formatter's state */ + minDecimalDigits = unum_getAttribute(format, UNUM_MIN_FRACTION_DIGITS); + maxDecimalDigits = unum_getAttribute(format, UNUM_MAX_FRACTION_DIGITS); + + /* set the appropriate flags and number of decimal digits on the formatter */ + if(info->fPrecision != -1) { + /* set the # of decimal digits */ + if (info->fOrigSpec == (UChar)0x65 /* e */ || info->fOrigSpec == (UChar)0x45 /* E */) { + unum_setAttribute(format, UNUM_FRACTION_DIGITS, info->fPrecision); + } + else { + unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, 1); + unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, info->fPrecision); + } + } + else if(info->fAlt) { + /* '#' means always show decimal point */ + /* copy of printf behavior on Solaris - '#' shows 6 digits */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); + } + else { + /* # of decimal digits is 6 if precision not specified */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); + } + + /* set whether to show the sign */ + if (info->fShowSign) { + u_printf_set_sign(format, info, prefixBuffer, &prefixBufferLen, &status); + } + + /* format the number */ + resultLen = unum_formatDouble(format, num, result, UPRINTF_BUFFER_SIZE, 0, &status); + + if (U_FAILURE(status)) { + resultLen = 0; + } + + /* restore the number format */ + /* TODO: Is this needed? */ + unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, minDecimalDigits); + unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, maxDecimalDigits); + + /* Since we're the only one using the scientific + format, we don't need to save the old exponent value. */ + /*unum_setSymbol(format, + UNUM_EXPONENTIAL_SYMBOL, + srcExpBuf, + srcLen, + &status);*/ + + if (info->fShowSign) { + /* Reset back to original value regardless of what the error was */ + UErrorCode localStatus = U_ZERO_ERROR; + u_printf_reset_sign(format, info, prefixBuffer, &prefixBufferLen, &localStatus); + } + + return handler->pad_and_justify(context, info, result, resultLen); +} + +static int32_t +u_printf_percent_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + double num = (double) (args[0].doubleValue); + UNumberFormat *format; + UChar result[UPRINTF_BUFFER_SIZE]; + UChar prefixBuffer[UPRINTF_BUFFER_SIZE]; + int32_t prefixBufferLen = sizeof(prefixBuffer); + int32_t minDecimalDigits; + int32_t maxDecimalDigits; + int32_t resultLen; + UErrorCode status = U_ZERO_ERROR; + + prefixBuffer[0] = 0; + + /* mask off any necessary bits */ + /* if(! info->fIsLongDouble) + num &= DBL_MAX;*/ + + /* get the formatter */ + format = u_locbund_getNumberFormat(formatBundle, UNUM_PERCENT); + + /* handle error */ + if(format == 0) + return 0; + + /* save the formatter's state */ + minDecimalDigits = unum_getAttribute(format, UNUM_MIN_FRACTION_DIGITS); + maxDecimalDigits = unum_getAttribute(format, UNUM_MAX_FRACTION_DIGITS); + + /* set the appropriate flags and number of decimal digits on the formatter */ + if(info->fPrecision != -1) { + /* set the # of decimal digits */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, info->fPrecision); + } + else if(info->fAlt) { + /* '#' means always show decimal point */ + /* copy of printf behavior on Solaris - '#' shows 6 digits */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); + } + else { + /* # of decimal digits is 6 if precision not specified */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); + } + + /* set whether to show the sign */ + if (info->fShowSign) { + u_printf_set_sign(format, info, prefixBuffer, &prefixBufferLen, &status); + } + + /* format the number */ + resultLen = unum_formatDouble(format, num, result, UPRINTF_BUFFER_SIZE, 0, &status); + + if (U_FAILURE(status)) { + resultLen = 0; + } + + /* restore the number format */ + /* TODO: Is this needed? */ + unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, minDecimalDigits); + unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, maxDecimalDigits); + + if (info->fShowSign) { + /* Reset back to original value regardless of what the error was */ + UErrorCode localStatus = U_ZERO_ERROR; + u_printf_reset_sign(format, info, prefixBuffer, &prefixBufferLen, &localStatus); + } + + return handler->pad_and_justify(context, info, result, resultLen); +} + +static int32_t +u_printf_ustring_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + (void)formatBundle; + int32_t len, written; + const UChar *arg = (const UChar*)(args[0].ptrValue); + + /* allocate enough space for the buffer */ + if (arg == NULL) { + arg = gNullStr; + } + len = u_strlen(arg); + + /* width = minimum # of characters to write */ + /* precision = maximum # of characters to write */ + if (info->fPrecision != -1 && info->fPrecision < len) { + len = info->fPrecision; + } + + /* determine if the string should be padded */ + written = handler->pad_and_justify(context, info, arg, len); + + return written; +} + +static int32_t +u_printf_uchar_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + (void)formatBundle; + int32_t written = 0; + UChar arg = (UChar)(args[0].int64Value); + + /* width = minimum # of characters to write */ + /* precision = maximum # of characters to write */ + /* precision is ignored when handling a uchar */ + + /* determine if the string should be padded */ + written = handler->pad_and_justify(context, info, &arg, 1); + + return written; +} + +static int32_t +u_printf_scidbl_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + u_printf_spec_info scidbl_info; + double num = args[0].doubleValue; + int32_t retVal; + UNumberFormat *format; + int32_t maxSigDecimalDigits, significantDigits; + + memcpy(&scidbl_info, info, sizeof(u_printf_spec_info)); + + /* determine whether to use 'd', 'e' or 'f' notation */ + if (scidbl_info.fPrecision == -1 && num == uprv_trunc(num)) + { + /* use 'f' notation */ + scidbl_info.fSpec = 0x0066; + scidbl_info.fPrecision = 0; + /* call the double handler */ + retVal = u_printf_double_handler(handler, context, formatBundle, &scidbl_info, args); + } + else if(num < 0.0001 || (scidbl_info.fPrecision < 1 && 1000000.0 <= num) + || (scidbl_info.fPrecision != -1 && num > uprv_pow10(scidbl_info.fPrecision))) + { + /* use 'e' or 'E' notation */ + scidbl_info.fSpec = scidbl_info.fSpec - 2; + if (scidbl_info.fPrecision == -1) { + scidbl_info.fPrecision = 5; + } + /* call the scientific handler */ + retVal = u_printf_scientific_handler(handler, context, formatBundle, &scidbl_info, args); + } + else { + format = u_locbund_getNumberFormat(formatBundle, UNUM_DECIMAL); + /* Check for null pointer */ + if (format == NULL) { + return 0; + } + maxSigDecimalDigits = unum_getAttribute(format, UNUM_MAX_SIGNIFICANT_DIGITS); + significantDigits = scidbl_info.fPrecision; + + /* use 'f' notation */ + scidbl_info.fSpec = 0x0066; + if (significantDigits == -1) { + significantDigits = 6; + } + unum_setAttribute(format, UNUM_SIGNIFICANT_DIGITS_USED, TRUE); + unum_setAttribute(format, UNUM_MAX_SIGNIFICANT_DIGITS, significantDigits); + /* call the double handler */ + retVal = u_printf_double_handler(handler, context, formatBundle, &scidbl_info, args); + unum_setAttribute(format, UNUM_MAX_SIGNIFICANT_DIGITS, maxSigDecimalDigits); + unum_setAttribute(format, UNUM_SIGNIFICANT_DIGITS_USED, FALSE); + } + return retVal; +} + +static int32_t +u_printf_count_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + (void)handler; + (void)context; + (void)formatBundle; + int32_t *count = (int32_t*)(args[0].ptrValue); + + /* in the special case of count, the u_printf_spec_info's width */ + /* will contain the # of chars written thus far */ + *count = info->fWidth; + + return 0; +} + +static int32_t +u_printf_spellout_handler(const u_printf_stream_handler *handler, + void *context, + ULocaleBundle *formatBundle, + const u_printf_spec_info *info, + const ufmt_args *args) +{ + double num = (double) (args[0].doubleValue); + UNumberFormat *format; + UChar result[UPRINTF_BUFFER_SIZE]; + UChar prefixBuffer[UPRINTF_BUFFER_SIZE]; + int32_t prefixBufferLen = sizeof(prefixBuffer); + int32_t minDecimalDigits; + int32_t maxDecimalDigits; + int32_t resultLen; + UErrorCode status = U_ZERO_ERROR; + + prefixBuffer[0] = 0; + + /* mask off any necessary bits */ + /* if(! info->fIsLongDouble) + num &= DBL_MAX;*/ + + /* get the formatter */ + format = u_locbund_getNumberFormat(formatBundle, UNUM_SPELLOUT); + + /* handle error */ + if(format == 0) + return 0; + + /* save the formatter's state */ + minDecimalDigits = unum_getAttribute(format, UNUM_MIN_FRACTION_DIGITS); + maxDecimalDigits = unum_getAttribute(format, UNUM_MAX_FRACTION_DIGITS); + + /* set the appropriate flags and number of decimal digits on the formatter */ + if(info->fPrecision != -1) { + /* set the # of decimal digits */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, info->fPrecision); + } + else if(info->fAlt) { + /* '#' means always show decimal point */ + /* copy of printf behavior on Solaris - '#' shows 6 digits */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); + } + else { + /* # of decimal digits is 6 if precision not specified */ + unum_setAttribute(format, UNUM_FRACTION_DIGITS, 6); + } + + /* set whether to show the sign */ + if (info->fShowSign) { + u_printf_set_sign(format, info, prefixBuffer, &prefixBufferLen, &status); + } + + /* format the number */ + resultLen = unum_formatDouble(format, num, result, UPRINTF_BUFFER_SIZE, 0, &status); + + if (U_FAILURE(status)) { + resultLen = 0; + } + + /* restore the number format */ + /* TODO: Is this needed? */ + unum_setAttribute(format, UNUM_MIN_FRACTION_DIGITS, minDecimalDigits); + unum_setAttribute(format, UNUM_MAX_FRACTION_DIGITS, maxDecimalDigits); + + if (info->fShowSign) { + /* Reset back to original value regardless of what the error was */ + UErrorCode localStatus = U_ZERO_ERROR; + u_printf_reset_sign(format, info, prefixBuffer, &prefixBufferLen, &localStatus); + } + + return handler->pad_and_justify(context, info, result, resultLen); +} + +/* Use US-ASCII characters only for formatting. Most codepages have + characters 20-7F from Unicode. Using any other codepage specific + characters will make it very difficult to format the string on + non-Unicode machines */ +static const u_printf_info g_u_printf_infos[UPRINTF_NUM_FMT_HANDLERS] = { +/* 0x20 */ + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_SIMPLE_PERCENT,UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + +/* 0x30 */ + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + +/* 0x40 */ + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR, + UFMT_EMPTY, UFMT_SCIENTIFIC, UFMT_EMPTY, UFMT_SCIDBL, +#ifdef U_USE_OBSOLETE_IO_FORMATTING + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR/*deprecated*/, +#else + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, +#endif + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + +/* 0x50 */ + UFMT_PERCENT, UFMT_EMPTY, UFMT_EMPTY, UFMT_USTRING, +#ifdef U_USE_OBSOLETE_IO_FORMATTING + UFMT_EMPTY, UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT, UFMT_EMPTY, +#else + UFMT_EMPTY, UFMT_EMPTY, UFMT_SPELLOUT, UFMT_EMPTY, +#endif + UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + +/* 0x60 */ + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_CHAR, + UFMT_INT, UFMT_SCIENTIFIC, UFMT_DOUBLE, UFMT_SCIDBL, + UFMT_EMPTY, UFMT_INT, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_COUNT, UFMT_OCTAL, + +/* 0x70 */ + UFMT_POINTER, UFMT_EMPTY, UFMT_EMPTY, UFMT_STRING, + UFMT_EMPTY, UFMT_UINT, UFMT_EMPTY, UFMT_EMPTY, + UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, +}; + +/* flag characters for uprintf */ +#define FLAG_MINUS 0x002D +#define FLAG_PLUS 0x002B +#define FLAG_SPACE 0x0020 +#define FLAG_POUND 0x0023 +#define FLAG_ZERO 0x0030 +#define FLAG_PAREN 0x0028 + +#define ISFLAG(s) (s) == FLAG_MINUS || \ + (s) == FLAG_PLUS || \ + (s) == FLAG_SPACE || \ + (s) == FLAG_POUND || \ + (s) == FLAG_ZERO || \ + (s) == FLAG_PAREN + +/* special characters for uprintf */ +#define SPEC_ASTERISK 0x002A +#define SPEC_DOLLARSIGN 0x0024 +#define SPEC_PERIOD 0x002E +#define SPEC_PERCENT 0x0025 + +/* unicode digits */ +#define DIGIT_ZERO 0x0030 +#define DIGIT_ONE 0x0031 +#define DIGIT_TWO 0x0032 +#define DIGIT_THREE 0x0033 +#define DIGIT_FOUR 0x0034 +#define DIGIT_FIVE 0x0035 +#define DIGIT_SIX 0x0036 +#define DIGIT_SEVEN 0x0037 +#define DIGIT_EIGHT 0x0038 +#define DIGIT_NINE 0x0039 + +#define ISDIGIT(s) (s) == DIGIT_ZERO || \ + (s) == DIGIT_ONE || \ + (s) == DIGIT_TWO || \ + (s) == DIGIT_THREE || \ + (s) == DIGIT_FOUR || \ + (s) == DIGIT_FIVE || \ + (s) == DIGIT_SIX || \ + (s) == DIGIT_SEVEN || \ + (s) == DIGIT_EIGHT || \ + (s) == DIGIT_NINE + +/* u_printf modifiers */ +#define MOD_H 0x0068 +#define MOD_LOWERL 0x006C +#define MOD_L 0x004C + +#define ISMOD(s) (s) == MOD_H || \ + (s) == MOD_LOWERL || \ + (s) == MOD_L +/* Returns an array of the parsed argument type given in the format string. */ +static ufmt_args* parseArguments(const UChar *alias, va_list ap, UErrorCode *status) { + ufmt_args *arglist = NULL; + ufmt_type_info *typelist = NULL; + UBool *islonglong = NULL; + int32_t size = 0; + int32_t pos = 0; + UChar type; + uint16_t handlerNum; + const UChar *aliasStart = alias; + + /* get maximum number of arguments */ + for(;;) { + /* find % */ + while(*alias != UP_PERCENT && *alias != 0x0000) { + alias++; + } + + if(*alias == 0x0000) { + break; + } + + alias++; + + /* handle the pos number */ + if(ISDIGIT(*alias)) { + + /* handle positional parameters */ + if(ISDIGIT(*alias)) { + pos = (int) (*alias++ - DIGIT_ZERO); + + while(ISDIGIT(*alias)) { + pos *= 10; + pos += (int) (*alias++ - DIGIT_ZERO); + } + } + + /* if there is no '$', don't read anything */ + if(*alias != SPEC_DOLLARSIGN) { + return NULL; + } + } else { + return NULL; + } + + if (pos > size) { + size = pos; + } + } + + /* create the parsed argument list */ + typelist = (ufmt_type_info*)uprv_malloc(sizeof(ufmt_type_info) * size); + islonglong = (UBool*)uprv_malloc(sizeof(UBool) * size); + arglist = (ufmt_args*)uprv_malloc(sizeof(ufmt_args) * size); + + /* If malloc failed, return NULL */ + if (!typelist || !islonglong || !arglist) { + if (typelist) { + uprv_free(typelist); + } + + if (islonglong) { + uprv_free(islonglong); + } + + if (arglist) { + uprv_free(arglist); + } + + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + + /* reset alias back to the beginning */ + alias = aliasStart; + + for(;;) { + /* find % */ + while(*alias != UP_PERCENT && *alias != 0x0000) { + alias++; + } + + if(*alias == 0x0000) { + break; + } + + alias++; + + /* handle positional parameters */ + if(ISDIGIT(*alias)) { + pos = (int) (*alias++ - DIGIT_ZERO); + + while(ISDIGIT(*alias)) { + pos *= 10; + pos += (int) (*alias++ - DIGIT_ZERO); + } + } + /* offset position by 1 */ + pos--; + + /* skip over everything except for the type */ + while (ISMOD(*alias) || ISFLAG(*alias) || ISDIGIT(*alias) || + *alias == SPEC_ASTERISK || *alias == SPEC_PERIOD || *alias == SPEC_DOLLARSIGN) { + islonglong[pos] = FALSE; + if (ISMOD(*alias)) { + alias++; + if (*alias == MOD_LOWERL) { + islonglong[pos] = TRUE; + } + } + alias++; + } + type = *alias; + + /* store the argument type in the correct position of the parsed argument list */ + handlerNum = (uint16_t)(type - UPRINTF_BASE_FMT_HANDLERS); + if (handlerNum < UPRINTF_NUM_FMT_HANDLERS) { + typelist[pos] = g_u_printf_infos[ handlerNum ].info; + } else { + typelist[pos] = ufmt_empty; + } + } + + /* store argument in arglist */ + for (pos = 0; pos < size; pos++) { + switch (typelist[pos]) { + case ufmt_string: + case ufmt_ustring: + case ufmt_pointer: + arglist[pos].ptrValue = va_arg(ap, void*); + break; + case ufmt_char: + case ufmt_uchar: + case ufmt_int: + if (islonglong[pos]) { + arglist[pos].int64Value = va_arg(ap, int64_t); + } + else { + arglist[pos].int64Value = va_arg(ap, int32_t); + } + break; + case ufmt_float: + arglist[pos].floatValue = (float) va_arg(ap, double); + break; + case ufmt_double: + arglist[pos].doubleValue = va_arg(ap, double); + break; + default: + /* else args is ignored */ + arglist[pos].ptrValue = NULL; + break; + } + } + + uprv_free(typelist); + uprv_free(islonglong); + + return arglist; +} + +/* We parse the argument list in Unicode */ +U_CFUNC int32_t +u_printf_parse(const u_printf_stream_handler *streamHandler, + const UChar *fmt, + void *context, + u_localized_print_string *locStringContext, + ULocaleBundle *formatBundle, + int32_t *written, + va_list ap) +{ + uint16_t handlerNum; + ufmt_args args; + ufmt_type_info argType; + u_printf_handler *handler; + u_printf_spec spec; + u_printf_spec_info *info = &(spec.fInfo); + + const UChar *alias = fmt; + const UChar *backup; + const UChar *lastAlias; + const UChar *orgAlias = fmt; + /* parsed argument list */ + ufmt_args *arglist = NULL; /* initialized it to avoid compiler warnings */ + UErrorCode status = U_ZERO_ERROR; + if (!locStringContext || locStringContext->available >= 0) { + /* get the parsed list of argument types */ + arglist = parseArguments(orgAlias, ap, &status); + + /* Return error if parsing failed. */ + if (U_FAILURE(status)) { + return -1; + } + } + + /* iterate through the pattern */ + while(!locStringContext || locStringContext->available >= 0) { + + /* find the next '%' */ + lastAlias = alias; + while(*alias != UP_PERCENT && *alias != 0x0000) { + alias++; + } + + /* write any characters before the '%' */ + if(alias > lastAlias) { + *written += (streamHandler->write)(context, lastAlias, (int32_t)(alias - lastAlias)); + } + + /* break if at end of string */ + if(*alias == 0x0000) { + break; + } + + /* initialize spec to default values */ + spec.fWidthPos = -1; + spec.fPrecisionPos = -1; + spec.fArgPos = -1; + + uprv_memset(info, 0, sizeof(*info)); + info->fPrecision = -1; + info->fWidth = -1; + info->fPadChar = 0x0020; + + /* skip over the initial '%' */ + alias++; + + /* Check for positional argument */ + if(ISDIGIT(*alias)) { + + /* Save the current position */ + backup = alias; + + /* handle positional parameters */ + if(ISDIGIT(*alias)) { + spec.fArgPos = (int) (*alias++ - DIGIT_ZERO); + + while(ISDIGIT(*alias)) { + spec.fArgPos *= 10; + spec.fArgPos += (int) (*alias++ - DIGIT_ZERO); + } + } + + /* if there is no '$', don't read anything */ + if(*alias != SPEC_DOLLARSIGN) { + spec.fArgPos = -1; + alias = backup; + } + /* munge the '$' */ + else + alias++; + } + + /* Get any format flags */ + while(ISFLAG(*alias)) { + switch(*alias++) { + + /* left justify */ + case FLAG_MINUS: + info->fLeft = TRUE; + break; + + /* always show sign */ + case FLAG_PLUS: + info->fShowSign = TRUE; + break; + + /* use space if no sign present */ + case FLAG_SPACE: + info->fShowSign = TRUE; + info->fSpace = TRUE; + break; + + /* use alternate form */ + case FLAG_POUND: + info->fAlt = TRUE; + break; + + /* pad with leading zeroes */ + case FLAG_ZERO: + info->fZero = TRUE; + info->fPadChar = 0x0030; + break; + + /* pad character specified */ + case FLAG_PAREN: + + /* TODO test that all four are numbers */ + /* first four characters are hex values for pad char */ + info->fPadChar = (UChar)ufmt_digitvalue(*alias++); + info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*alias++)); + info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*alias++)); + info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*alias++)); + + /* final character is ignored */ + alias++; + + break; + } + } + + /* Get the width */ + + /* width is specified out of line */ + if(*alias == SPEC_ASTERISK) { + + info->fWidth = -2; + + /* Skip the '*' */ + alias++; + + /* Save the current position */ + backup = alias; + + /* handle positional parameters */ + if(ISDIGIT(*alias)) { + spec.fWidthPos = (int) (*alias++ - DIGIT_ZERO); + + while(ISDIGIT(*alias)) { + spec.fWidthPos *= 10; + spec.fWidthPos += (int) (*alias++ - DIGIT_ZERO); + } + } + + /* if there is no '$', don't read anything */ + if(*alias != SPEC_DOLLARSIGN) { + spec.fWidthPos = -1; + alias = backup; + } + /* munge the '$' */ + else + alias++; + } + /* read the width, if present */ + else if(ISDIGIT(*alias)){ + info->fWidth = (int) (*alias++ - DIGIT_ZERO); + + while(ISDIGIT(*alias)) { + info->fWidth *= 10; + info->fWidth += (int) (*alias++ - DIGIT_ZERO); + } + } + + /* Get the precision */ + + if(*alias == SPEC_PERIOD) { + + /* eat up the '.' */ + alias++; + + /* precision is specified out of line */ + if(*alias == SPEC_ASTERISK) { + + info->fPrecision = -2; + + /* Skip the '*' */ + alias++; + + /* save the current position */ + backup = alias; + + /* handle positional parameters */ + if(ISDIGIT(*alias)) { + spec.fPrecisionPos = (int) (*alias++ - DIGIT_ZERO); + + while(ISDIGIT(*alias)) { + spec.fPrecisionPos *= 10; + spec.fPrecisionPos += (int) (*alias++ - DIGIT_ZERO); + } + + /* if there is no '$', don't read anything */ + if(*alias != SPEC_DOLLARSIGN) { + spec.fPrecisionPos = -1; + alias = backup; + } + else { + /* munge the '$' */ + alias++; + } + } + } + /* read the precision */ + else if(ISDIGIT(*alias)){ + info->fPrecision = (int) (*alias++ - DIGIT_ZERO); + + while(ISDIGIT(*alias)) { + info->fPrecision *= 10; + info->fPrecision += (int) (*alias++ - DIGIT_ZERO); + } + } + } + + /* Get any modifiers */ + if(ISMOD(*alias)) { + switch(*alias++) { + + /* short */ + case MOD_H: + info->fIsShort = TRUE; + break; + + /* long or long long */ + case MOD_LOWERL: + if(*alias == MOD_LOWERL) { + info->fIsLongLong = TRUE; + /* skip over the next 'l' */ + alias++; + } + else + info->fIsLong = TRUE; + break; + + /* long double */ + case MOD_L: + info->fIsLongDouble = TRUE; + break; + } + } + + /* finally, get the specifier letter */ + info->fSpec = *alias++; + info->fOrigSpec = info->fSpec; + + /* fill in the precision and width, if specified out of line */ + + /* width specified out of line */ + if(spec.fInfo.fWidth == -2) { + if(spec.fWidthPos == -1) { + /* read the width from the argument list */ + info->fWidth = va_arg(ap, int32_t); + } + /* else handle positional parameter */ + + /* if it's negative, take the absolute value and set left alignment */ + if(info->fWidth < 0) { + info->fWidth *= -1; /* Make positive */ + info->fLeft = TRUE; + } + } + + /* precision specified out of line */ + if(info->fPrecision == -2) { + if(spec.fPrecisionPos == -1) { + /* read the precision from the argument list */ + info->fPrecision = va_arg(ap, int32_t); + } + /* else handle positional parameter */ + + /* if it's negative, set it to zero */ + if(info->fPrecision < 0) + info->fPrecision = 0; + } + + handlerNum = (uint16_t)(info->fSpec - UPRINTF_BASE_FMT_HANDLERS); + if (handlerNum < UPRINTF_NUM_FMT_HANDLERS) { + /* query the info function for argument information */ + argType = g_u_printf_infos[ handlerNum ].info; + + /* goto the correct argument on arg_list if position is specified */ + if (spec.fArgPos > 0) { + /* offset position by 1 */ + spec.fArgPos--; + switch(argType) { + case ufmt_count: + /* set the spec's width to the # of chars written */ + info->fWidth = *written; + /* fall through to set the pointer */ + U_FALLTHROUGH; + case ufmt_string: + case ufmt_ustring: + case ufmt_pointer: + args.ptrValue = arglist[spec.fArgPos].ptrValue; + break; + case ufmt_char: + case ufmt_uchar: + case ufmt_int: + args.int64Value = arglist[spec.fArgPos].int64Value; + break; + case ufmt_float: + args.floatValue = arglist[spec.fArgPos].floatValue; + break; + case ufmt_double: + args.doubleValue = arglist[spec.fArgPos].doubleValue; + break; + default: + /* else args is ignored */ + args.ptrValue = NULL; + break; + } + } else { /* no positional argument specified */ + switch(argType) { + case ufmt_count: + /* set the spec's width to the # of chars written */ + info->fWidth = *written; + /* fall through to set the pointer */ + U_FALLTHROUGH; + case ufmt_string: + case ufmt_ustring: + case ufmt_pointer: + args.ptrValue = va_arg(ap, void*); + break; + case ufmt_char: + case ufmt_uchar: + case ufmt_int: + if (info->fIsLongLong) { + args.int64Value = va_arg(ap, int64_t); + } + else { + args.int64Value = va_arg(ap, int32_t); + } + break; + case ufmt_float: + args.floatValue = (float) va_arg(ap, double); + break; + case ufmt_double: + args.doubleValue = va_arg(ap, double); + break; + default: + /* else args is ignored */ + args.ptrValue = NULL; + break; + } + } + + /* call the handler function */ + handler = g_u_printf_infos[ handlerNum ].handler; + if(handler != 0) { + *written += (*handler)(streamHandler, context, formatBundle, info, &args); + } + else { + /* just echo unknown tags */ + *written += (streamHandler->write)(context, fmt, (int32_t)(alias - lastAlias)); + } + } + else { + /* just echo unknown tags */ + *written += (streamHandler->write)(context, fmt, (int32_t)(alias - lastAlias)); + } + } + /* delete parsed argument list */ + if (arglist != NULL) { + uprv_free(arglist); + } + /* return # of characters in this format that have been parsed. */ + return (int32_t)(alias - fmt); +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/io/uscanf.c b/deps/icu-small/source/io/uscanf.c deleted file mode 100644 index 9866963201..0000000000 --- a/deps/icu-small/source/io/uscanf.c +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1998-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* File uscanf.c -* -* Modification History: -* -* Date Name Description -* 12/02/98 stephen Creation. -* 03/13/99 stephen Modified for new C API. -****************************************************************************** -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION - -#include "unicode/putil.h" -#include "unicode/ustdio.h" -#include "unicode/ustring.h" -#include "uscanf.h" -#include "ufile.h" -#include "ufmt_cmn.h" - -#include "cmemory.h" -#include "cstring.h" - - -U_CAPI int32_t U_EXPORT2 -u_fscanf(UFILE *f, - const char *patternSpecification, - ... ) -{ - va_list ap; - int32_t converted; - - va_start(ap, patternSpecification); - converted = u_vfscanf(f, patternSpecification, ap); - va_end(ap); - - return converted; -} - -U_CAPI int32_t U_EXPORT2 -u_fscanf_u(UFILE *f, - const UChar *patternSpecification, - ... ) -{ - va_list ap; - int32_t converted; - - va_start(ap, patternSpecification); - converted = u_vfscanf_u(f, patternSpecification, ap); - va_end(ap); - - return converted; -} - -U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_vfscanf(UFILE *f, - const char *patternSpecification, - va_list ap) -{ - int32_t converted; - UChar *pattern; - UChar patBuffer[UFMT_DEFAULT_BUFFER_SIZE]; - int32_t size = (int32_t)uprv_strlen(patternSpecification) + 1; - - /* convert from the default codepage to Unicode */ - if (size >= MAX_UCHAR_BUFFER_SIZE(patBuffer)) { - pattern = (UChar *)uprv_malloc(size * sizeof(UChar)); - if(pattern == 0) { - return 0; - } - } - else { - pattern = patBuffer; - } - u_charsToUChars(patternSpecification, pattern, size); - - /* do the work */ - converted = u_vfscanf_u(f, pattern, ap); - - /* clean up */ - if (pattern != patBuffer) { - uprv_free(pattern); - } - - return converted; -} - -U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_vfscanf_u(UFILE *f, - const UChar *patternSpecification, - va_list ap) -{ - return u_scanf_parse(f, patternSpecification, ap); -} - -#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/io/uscanf.cpp b/deps/icu-small/source/io/uscanf.cpp new file mode 100644 index 0000000000..0febd21398 --- /dev/null +++ b/deps/icu-small/source/io/uscanf.cpp @@ -0,0 +1,107 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1998-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File uscanf.c +* +* Modification History: +* +* Date Name Description +* 12/02/98 stephen Creation. +* 03/13/99 stephen Modified for new C API. +****************************************************************************** +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION + +#include "unicode/putil.h" +#include "unicode/ustdio.h" +#include "unicode/ustring.h" +#include "uscanf.h" +#include "ufile.h" +#include "ufmt_cmn.h" + +#include "cmemory.h" +#include "cstring.h" + + +U_CAPI int32_t U_EXPORT2 +u_fscanf(UFILE *f, + const char *patternSpecification, + ... ) +{ + va_list ap; + int32_t converted; + + va_start(ap, patternSpecification); + converted = u_vfscanf(f, patternSpecification, ap); + va_end(ap); + + return converted; +} + +U_CAPI int32_t U_EXPORT2 +u_fscanf_u(UFILE *f, + const UChar *patternSpecification, + ... ) +{ + va_list ap; + int32_t converted; + + va_start(ap, patternSpecification); + converted = u_vfscanf_u(f, patternSpecification, ap); + va_end(ap); + + return converted; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_vfscanf(UFILE *f, + const char *patternSpecification, + va_list ap) +{ + int32_t converted; + UChar *pattern; + UChar patBuffer[UFMT_DEFAULT_BUFFER_SIZE]; + int32_t size = (int32_t)uprv_strlen(patternSpecification) + 1; + + /* convert from the default codepage to Unicode */ + if (size >= MAX_UCHAR_BUFFER_SIZE(patBuffer)) { + pattern = (UChar *)uprv_malloc(size * sizeof(UChar)); + if(pattern == 0) { + return 0; + } + } + else { + pattern = patBuffer; + } + u_charsToUChars(patternSpecification, pattern, size); + + /* do the work */ + converted = u_vfscanf_u(f, pattern, ap); + + /* clean up */ + if (pattern != patBuffer) { + uprv_free(pattern); + } + + return converted; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_vfscanf_u(UFILE *f, + const UChar *patternSpecification, + va_list ap) +{ + return u_scanf_parse(f, patternSpecification, ap); +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/io/uscanf.h b/deps/icu-small/source/io/uscanf.h index bbe84d9e2b..760691cb0a 100644 --- a/deps/icu-small/source/io/uscanf.h +++ b/deps/icu-small/source/io/uscanf.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/io/uscanf_p.c b/deps/icu-small/source/io/uscanf_p.c deleted file mode 100644 index f17502038a..0000000000 --- a/deps/icu-small/source/io/uscanf_p.c +++ /dev/null @@ -1,1408 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1998-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* -* File uscnnf_p.c -* -* Modification History: -* -* Date Name Description -* 12/02/98 stephen Creation. -* 03/13/99 stephen Modified for new C API. -******************************************************************************* -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION - -#include "unicode/uchar.h" -#include "unicode/ustring.h" -#include "unicode/unum.h" -#include "unicode/udat.h" -#include "unicode/uset.h" -#include "uscanf.h" -#include "ufmt_cmn.h" -#include "ufile.h" -#include "locbund.h" - -#include "cmemory.h" -#include "ustr_cnv.h" - -/* flag characters for u_scanf */ -#define FLAG_ASTERISK 0x002A -#define FLAG_PAREN 0x0028 - -#define ISFLAG(s) (s) == FLAG_ASTERISK || \ - (s) == FLAG_PAREN - -/* special characters for u_scanf */ -#define SPEC_DOLLARSIGN 0x0024 - -/* unicode digits */ -#define DIGIT_ZERO 0x0030 -#define DIGIT_ONE 0x0031 -#define DIGIT_TWO 0x0032 -#define DIGIT_THREE 0x0033 -#define DIGIT_FOUR 0x0034 -#define DIGIT_FIVE 0x0035 -#define DIGIT_SIX 0x0036 -#define DIGIT_SEVEN 0x0037 -#define DIGIT_EIGHT 0x0038 -#define DIGIT_NINE 0x0039 - -#define ISDIGIT(s) (s) == DIGIT_ZERO || \ - (s) == DIGIT_ONE || \ - (s) == DIGIT_TWO || \ - (s) == DIGIT_THREE || \ - (s) == DIGIT_FOUR || \ - (s) == DIGIT_FIVE || \ - (s) == DIGIT_SIX || \ - (s) == DIGIT_SEVEN || \ - (s) == DIGIT_EIGHT || \ - (s) == DIGIT_NINE - -/* u_scanf modifiers */ -#define MOD_H 0x0068 -#define MOD_LOWERL 0x006C -#define MOD_L 0x004C - -#define ISMOD(s) (s) == MOD_H || \ - (s) == MOD_LOWERL || \ - (s) == MOD_L - -/** - * Struct encapsulating a single uscanf format specification. - */ -typedef struct u_scanf_spec_info { - int32_t fWidth; /* Width */ - - UChar fSpec; /* Format specification */ - - UChar fPadChar; /* Padding character */ - - UBool fSkipArg; /* TRUE if arg should be skipped */ - UBool fIsLongDouble; /* L flag */ - UBool fIsShort; /* h flag */ - UBool fIsLong; /* l flag */ - UBool fIsLongLong; /* ll flag */ - UBool fIsString; /* TRUE if this is a NULL-terminated string. */ -} u_scanf_spec_info; - - -/** - * Struct encapsulating a single u_scanf format specification. - */ -typedef struct u_scanf_spec { - u_scanf_spec_info fInfo; /* Information on this spec */ - int32_t fArgPos; /* Position of data in arg list */ -} u_scanf_spec; - -/** - * Parse a single u_scanf format specifier in Unicode. - * @param fmt A pointer to a '%' character in a u_scanf format specification. - * @param spec A pointer to a u_scanf_spec to receive the parsed - * format specifier. - * @return The number of characters contained in this specifier. - */ -static int32_t -u_scanf_parse_spec (const UChar *fmt, - u_scanf_spec *spec) -{ - const UChar *s = fmt; - const UChar *backup; - u_scanf_spec_info *info = &(spec->fInfo); - - /* initialize spec to default values */ - spec->fArgPos = -1; - - info->fWidth = -1; - info->fSpec = 0x0000; - info->fPadChar = 0x0020; - info->fSkipArg = FALSE; - info->fIsLongDouble = FALSE; - info->fIsShort = FALSE; - info->fIsLong = FALSE; - info->fIsLongLong = FALSE; - info->fIsString = TRUE; - - - /* skip over the initial '%' */ - s++; - - /* Check for positional argument */ - if(ISDIGIT(*s)) { - - /* Save the current position */ - backup = s; - - /* handle positional parameters */ - if(ISDIGIT(*s)) { - spec->fArgPos = (int) (*s++ - DIGIT_ZERO); - - while(ISDIGIT(*s)) { - spec->fArgPos *= 10; - spec->fArgPos += (int) (*s++ - DIGIT_ZERO); - } - } - - /* if there is no '$', don't read anything */ - if(*s != SPEC_DOLLARSIGN) { - spec->fArgPos = -1; - s = backup; - } - /* munge the '$' */ - else - s++; - } - - /* Get any format flags */ - while(ISFLAG(*s)) { - switch(*s++) { - - /* skip argument */ - case FLAG_ASTERISK: - info->fSkipArg = TRUE; - break; - - /* pad character specified */ - case FLAG_PAREN: - - /* first four characters are hex values for pad char */ - info->fPadChar = (UChar)ufmt_digitvalue(*s++); - info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); - info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); - info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); - - /* final character is ignored */ - s++; - - break; - } - } - - /* Get the width */ - if(ISDIGIT(*s)){ - info->fWidth = (int) (*s++ - DIGIT_ZERO); - - while(ISDIGIT(*s)) { - info->fWidth *= 10; - info->fWidth += (int) (*s++ - DIGIT_ZERO); - } - } - - /* Get any modifiers */ - if(ISMOD(*s)) { - switch(*s++) { - - /* short */ - case MOD_H: - info->fIsShort = TRUE; - break; - - /* long or long long */ - case MOD_LOWERL: - if(*s == MOD_LOWERL) { - info->fIsLongLong = TRUE; - /* skip over the next 'l' */ - s++; - } - else - info->fIsLong = TRUE; - break; - - /* long double */ - case MOD_L: - info->fIsLongDouble = TRUE; - break; - } - } - - /* finally, get the specifier letter */ - info->fSpec = *s++; - - /* return # of characters in this specifier */ - return (int32_t)(s - fmt); -} - -#define UP_PERCENT 0x0025 - - -/* ANSI style formatting */ -/* Use US-ASCII characters only for formatting */ - -/* % */ -#define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler} -/* s */ -#define UFMT_STRING {ufmt_string, u_scanf_string_handler} -/* c */ -#define UFMT_CHAR {ufmt_string, u_scanf_char_handler} -/* d, i */ -#define UFMT_INT {ufmt_int, u_scanf_integer_handler} -/* u */ -#define UFMT_UINT {ufmt_int, u_scanf_uinteger_handler} -/* o */ -#define UFMT_OCTAL {ufmt_int, u_scanf_octal_handler} -/* x, X */ -#define UFMT_HEX {ufmt_int, u_scanf_hex_handler} -/* f */ -#define UFMT_DOUBLE {ufmt_double, u_scanf_double_handler} -/* e, E */ -#define UFMT_SCIENTIFIC {ufmt_double, u_scanf_scientific_handler} -/* g, G */ -#define UFMT_SCIDBL {ufmt_double, u_scanf_scidbl_handler} -/* n */ -#define UFMT_COUNT {ufmt_count, u_scanf_count_handler} -/* [ */ -#define UFMT_SCANSET {ufmt_string, u_scanf_scanset_handler} - -/* non-ANSI extensions */ -/* Use US-ASCII characters only for formatting */ - -/* p */ -#define UFMT_POINTER {ufmt_pointer, u_scanf_pointer_handler} -/* V */ -#define UFMT_SPELLOUT {ufmt_double, u_scanf_spellout_handler} -/* P */ -#define UFMT_PERCENT {ufmt_double, u_scanf_percent_handler} -/* C K is old format */ -#define UFMT_UCHAR {ufmt_uchar, u_scanf_uchar_handler} -/* S U is old format */ -#define UFMT_USTRING {ufmt_ustring, u_scanf_ustring_handler} - - -#define UFMT_EMPTY {ufmt_empty, NULL} - -/** - * A u_scanf handler function. - * A u_scanf handler is responsible for handling a single u_scanf - * format specification, for example 'd' or 's'. - * @param stream The UFILE to which to write output. - * @param info A pointer to a u_scanf_spec_info struct containing - * information on the format specification. - * @param args A pointer to the argument data - * @param fmt A pointer to the first character in the format string - * following the spec. - * @param fmtConsumed On output, set to the number of characters consumed - * in fmt. Do nothing, if the argument isn't variable width. - * @param argConverted The number of arguments converted and assigned, or -1 if an - * error occurred. - * @return The number of code points consumed during reading. - */ -typedef int32_t (*u_scanf_handler) (UFILE *stream, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted); - -typedef struct u_scanf_info { - ufmt_type_info info; - u_scanf_handler handler; -} u_scanf_info; - -#define USCANF_NUM_FMT_HANDLERS 108 -#define USCANF_SYMBOL_BUFFER_SIZE 8 - -/* We do not use handlers for 0-0x1f */ -#define USCANF_BASE_FMT_HANDLERS 0x20 - - -static int32_t -u_scanf_skip_leading_ws(UFILE *input, - UChar pad) -{ - UChar c; - int32_t count = 0; - UBool isNotEOF; - - /* skip all leading ws in the input */ - while( (isNotEOF = ufile_getch(input, &c)) && (c == pad || u_isWhitespace(c)) ) - { - count++; - } - - /* put the final character back on the input */ - if(isNotEOF) - u_fungetc(c, input); - - return count; -} - -/* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */ -static int32_t -u_scanf_skip_leading_positive_sign(UFILE *input, - UNumberFormat *format, - UErrorCode *status) -{ - UChar c; - int32_t count = 0; - UBool isNotEOF; - UChar plusSymbol[USCANF_SYMBOL_BUFFER_SIZE]; - int32_t symbolLen; - UErrorCode localStatus = U_ZERO_ERROR; - - if (U_SUCCESS(*status)) { - symbolLen = unum_getSymbol(format, - UNUM_PLUS_SIGN_SYMBOL, - plusSymbol, - UPRV_LENGTHOF(plusSymbol), - &localStatus); - - if (U_SUCCESS(localStatus)) { - /* skip all leading ws in the input */ - while( (isNotEOF = ufile_getch(input, &c)) && (count < symbolLen && c == plusSymbol[count]) ) - { - count++; - } - - /* put the final character back on the input */ - if(isNotEOF) { - u_fungetc(c, input); - } - } - } - - return count; -} - -static int32_t -u_scanf_simple_percent_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - /* make sure the next character in the input is a percent */ - *argConverted = 0; - if(u_fgetc(input) != 0x0025) { - *argConverted = -1; - } - return 1; -} - -static int32_t -u_scanf_count_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - /* in the special case of count, the u_scanf_spec_info's width */ - /* will contain the # of items converted thus far */ - if (!info->fSkipArg) { - if (info->fIsShort) - *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth); - else if (info->fIsLongLong) - *(int64_t*)(args[0].ptrValue) = info->fWidth; - else - *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth); - } - *argConverted = 0; - - /* we converted 0 args */ - return 0; -} - -static int32_t -u_scanf_double_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - int32_t len; - double num; - UNumberFormat *format; - int32_t parsePos = 0; - int32_t skipped; - UErrorCode status = U_ZERO_ERROR; - - - /* skip all ws in the input */ - skipped = u_scanf_skip_leading_ws(input, info->fPadChar); - - /* fill the input's internal buffer */ - ufile_fill_uchar_buffer(input); - - /* determine the size of the input's buffer */ - len = (int32_t)(input->str.fLimit - input->str.fPos); - - /* truncate to the width, if specified */ - if(info->fWidth != -1) - len = ufmt_min(len, info->fWidth); - - /* get the formatter */ - format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); - - /* handle error */ - if(format == 0) - return 0; - - /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ - skipped += u_scanf_skip_leading_positive_sign(input, format, &status); - - /* parse the number */ - num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); - - if (!info->fSkipArg) { - if (info->fIsLong) - *(double*)(args[0].ptrValue) = num; - else if (info->fIsLongDouble) - *(long double*)(args[0].ptrValue) = num; - else - *(float*)(args[0].ptrValue) = (float)num; - } - - /* mask off any necessary bits */ - /* if(! info->fIsLong_double) - num &= DBL_MAX;*/ - - /* update the input's position to reflect consumed data */ - input->str.fPos += parsePos; - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return parsePos + skipped; -} - -#define UPRINTF_SYMBOL_BUFFER_SIZE 8 - -static int32_t -u_scanf_scientific_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - int32_t len; - double num; - UNumberFormat *format; - int32_t parsePos = 0; - int32_t skipped; - UErrorCode status = U_ZERO_ERROR; - UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; - int32_t srcLen, expLen; - UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; - - - /* skip all ws in the input */ - skipped = u_scanf_skip_leading_ws(input, info->fPadChar); - - /* fill the input's internal buffer */ - ufile_fill_uchar_buffer(input); - - /* determine the size of the input's buffer */ - len = (int32_t)(input->str.fLimit - input->str.fPos); - - /* truncate to the width, if specified */ - if(info->fWidth != -1) - len = ufmt_min(len, info->fWidth); - - /* get the formatter */ - format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC); - - /* handle error */ - if(format == 0) - return 0; - - /* set the appropriate flags on the formatter */ - - srcLen = unum_getSymbol(format, - UNUM_EXPONENTIAL_SYMBOL, - srcExpBuf, - sizeof(srcExpBuf), - &status); - - /* Upper/lower case the e */ - if (info->fSpec == (UChar)0x65 /* e */) { - expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf), - srcExpBuf, srcLen, - input->str.fBundle.fLocale, - &status); - } - else { - expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf), - srcExpBuf, srcLen, - input->str.fBundle.fLocale, - &status); - } - - unum_setSymbol(format, - UNUM_EXPONENTIAL_SYMBOL, - expBuf, - expLen, - &status); - - - - - /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ - skipped += u_scanf_skip_leading_positive_sign(input, format, &status); - - /* parse the number */ - num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); - - if (!info->fSkipArg) { - if (info->fIsLong) - *(double*)(args[0].ptrValue) = num; - else if (info->fIsLongDouble) - *(long double*)(args[0].ptrValue) = num; - else - *(float*)(args[0].ptrValue) = (float)num; - } - - /* mask off any necessary bits */ - /* if(! info->fIsLong_double) - num &= DBL_MAX;*/ - - /* update the input's position to reflect consumed data */ - input->str.fPos += parsePos; - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return parsePos + skipped; -} - -static int32_t -u_scanf_scidbl_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - int32_t len; - double num; - UNumberFormat *scientificFormat, *genericFormat; - /*int32_t scientificResult, genericResult;*/ - double scientificResult, genericResult; - int32_t scientificParsePos = 0, genericParsePos = 0, parsePos = 0; - int32_t skipped; - UErrorCode scientificStatus = U_ZERO_ERROR; - UErrorCode genericStatus = U_ZERO_ERROR; - - - /* since we can't determine by scanning the characters whether */ - /* a number was formatted in the 'f' or 'g' styles, parse the */ - /* string with both formatters, and assume whichever one */ - /* parsed the most is the correct formatter to use */ - - - /* skip all ws in the input */ - skipped = u_scanf_skip_leading_ws(input, info->fPadChar); - - /* fill the input's internal buffer */ - ufile_fill_uchar_buffer(input); - - /* determine the size of the input's buffer */ - len = (int32_t)(input->str.fLimit - input->str.fPos); - - /* truncate to the width, if specified */ - if(info->fWidth != -1) - len = ufmt_min(len, info->fWidth); - - /* get the formatters */ - scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC); - genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); - - /* handle error */ - if(scientificFormat == 0 || genericFormat == 0) - return 0; - - /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ - skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus); - - /* parse the number using each format*/ - - scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len, - &scientificParsePos, &scientificStatus); - - genericResult = unum_parseDouble(genericFormat, input->str.fPos, len, - &genericParsePos, &genericStatus); - - /* determine which parse made it farther */ - if(scientificParsePos > genericParsePos) { - /* stash the result in num */ - num = scientificResult; - /* update the input's position to reflect consumed data */ - parsePos += scientificParsePos; - } - else { - /* stash the result in num */ - num = genericResult; - /* update the input's position to reflect consumed data */ - parsePos += genericParsePos; - } - input->str.fPos += parsePos; - - if (!info->fSkipArg) { - if (info->fIsLong) - *(double*)(args[0].ptrValue) = num; - else if (info->fIsLongDouble) - *(long double*)(args[0].ptrValue) = num; - else - *(float*)(args[0].ptrValue) = (float)num; - } - - /* mask off any necessary bits */ - /* if(! info->fIsLong_double) - num &= DBL_MAX;*/ - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return parsePos + skipped; -} - -static int32_t -u_scanf_integer_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - int32_t len; - void *num = (void*) (args[0].ptrValue); - UNumberFormat *format; - int32_t parsePos = 0; - int32_t skipped; - UErrorCode status = U_ZERO_ERROR; - int64_t result; - - - /* skip all ws in the input */ - skipped = u_scanf_skip_leading_ws(input, info->fPadChar); - - /* fill the input's internal buffer */ - ufile_fill_uchar_buffer(input); - - /* determine the size of the input's buffer */ - len = (int32_t)(input->str.fLimit - input->str.fPos); - - /* truncate to the width, if specified */ - if(info->fWidth != -1) - len = ufmt_min(len, info->fWidth); - - /* get the formatter */ - format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); - - /* handle error */ - if(format == 0) - return 0; - - /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ - skipped += u_scanf_skip_leading_positive_sign(input, format, &status); - - /* parse the number */ - result = unum_parseInt64(format, input->str.fPos, len, &parsePos, &status); - - /* mask off any necessary bits */ - if (!info->fSkipArg) { - if (info->fIsShort) - *(int16_t*)num = (int16_t)(UINT16_MAX & result); - else if (info->fIsLongLong) - *(int64_t*)num = result; - else - *(int32_t*)num = (int32_t)(UINT32_MAX & result); - } - - /* update the input's position to reflect consumed data */ - input->str.fPos += parsePos; - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return parsePos + skipped; -} - -static int32_t -u_scanf_uinteger_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - /* TODO Fix this when Numberformat handles uint64_t */ - return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted); -} - -static int32_t -u_scanf_percent_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - int32_t len; - double num; - UNumberFormat *format; - int32_t parsePos = 0; - UErrorCode status = U_ZERO_ERROR; - - - /* skip all ws in the input */ - u_scanf_skip_leading_ws(input, info->fPadChar); - - /* fill the input's internal buffer */ - ufile_fill_uchar_buffer(input); - - /* determine the size of the input's buffer */ - len = (int32_t)(input->str.fLimit - input->str.fPos); - - /* truncate to the width, if specified */ - if(info->fWidth != -1) - len = ufmt_min(len, info->fWidth); - - /* get the formatter */ - format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT); - - /* handle error */ - if(format == 0) - return 0; - - /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ - u_scanf_skip_leading_positive_sign(input, format, &status); - - /* parse the number */ - num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); - - if (!info->fSkipArg) { - *(double*)(args[0].ptrValue) = num; - } - - /* mask off any necessary bits */ - /* if(! info->fIsLong_double) - num &= DBL_MAX;*/ - - /* update the input's position to reflect consumed data */ - input->str.fPos += parsePos; - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return parsePos; -} - -static int32_t -u_scanf_string_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - const UChar *source; - UConverter *conv; - char *arg = (char*)(args[0].ptrValue); - char *alias = arg; - char *limit; - UErrorCode status = U_ZERO_ERROR; - int32_t count; - int32_t skipped = 0; - UChar c; - UBool isNotEOF = FALSE; - - /* skip all ws in the input */ - if (info->fIsString) { - skipped = u_scanf_skip_leading_ws(input, info->fPadChar); - } - - /* get the string one character at a time, truncating to the width */ - count = 0; - - /* open the default converter */ - conv = u_getDefaultConverter(&status); - - if(U_FAILURE(status)) - return -1; - - while( (info->fWidth == -1 || count < info->fWidth) - && (isNotEOF = ufile_getch(input, &c)) - && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c)))) - { - - if (!info->fSkipArg) { - /* put the character from the input onto the target */ - source = &c; - /* Since we do this one character at a time, do it this way. */ - if (info->fWidth > 0) { - limit = alias + info->fWidth - count; - } - else { - limit = alias + ucnv_getMaxCharSize(conv); - } - - /* convert the character to the default codepage */ - ucnv_fromUnicode(conv, &alias, limit, &source, source + 1, - NULL, TRUE, &status); - - if(U_FAILURE(status)) { - /* clean up */ - u_releaseDefaultConverter(conv); - return -1; - } - } - - /* increment the count */ - ++count; - } - - /* put the final character we read back on the input */ - if (!info->fSkipArg) { - if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF) - u_fungetc(c, input); - - /* add the terminator */ - if (info->fIsString) { - *alias = 0x00; - } - } - - /* clean up */ - u_releaseDefaultConverter(conv); - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return count + skipped; -} - -static int32_t -u_scanf_char_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - if (info->fWidth < 0) { - info->fWidth = 1; - } - info->fIsString = FALSE; - return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted); -} - -static int32_t -u_scanf_ustring_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - UChar *arg = (UChar*)(args[0].ptrValue); - UChar *alias = arg; - int32_t count; - int32_t skipped = 0; - UChar c; - UBool isNotEOF = FALSE; - - /* skip all ws in the input */ - if (info->fIsString) { - skipped = u_scanf_skip_leading_ws(input, info->fPadChar); - } - - /* get the string one character at a time, truncating to the width */ - count = 0; - - while( (info->fWidth == -1 || count < info->fWidth) - && (isNotEOF = ufile_getch(input, &c)) - && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c)))) - { - - /* put the character from the input onto the target */ - if (!info->fSkipArg) { - *alias++ = c; - } - - /* increment the count */ - ++count; - } - - /* put the final character we read back on the input */ - if (!info->fSkipArg) { - if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) { - u_fungetc(c, input); - } - - /* add the terminator */ - if (info->fIsString) { - *alias = 0x0000; - } - } - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return count + skipped; -} - -static int32_t -u_scanf_uchar_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - if (info->fWidth < 0) { - info->fWidth = 1; - } - info->fIsString = FALSE; - return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted); -} - -static int32_t -u_scanf_spellout_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - int32_t len; - double num; - UNumberFormat *format; - int32_t parsePos = 0; - int32_t skipped; - UErrorCode status = U_ZERO_ERROR; - - - /* skip all ws in the input */ - skipped = u_scanf_skip_leading_ws(input, info->fPadChar); - - /* fill the input's internal buffer */ - ufile_fill_uchar_buffer(input); - - /* determine the size of the input's buffer */ - len = (int32_t)(input->str.fLimit - input->str.fPos); - - /* truncate to the width, if specified */ - if(info->fWidth != -1) - len = ufmt_min(len, info->fWidth); - - /* get the formatter */ - format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT); - - /* handle error */ - if(format == 0) - return 0; - - /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ - /* This is not applicable to RBNF. */ - /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/ - - /* parse the number */ - num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); - - if (!info->fSkipArg) { - *(double*)(args[0].ptrValue) = num; - } - - /* mask off any necessary bits */ - /* if(! info->fIsLong_double) - num &= DBL_MAX;*/ - - /* update the input's position to reflect consumed data */ - input->str.fPos += parsePos; - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return parsePos + skipped; -} - -static int32_t -u_scanf_hex_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - int32_t len; - int32_t skipped; - void *num = (void*) (args[0].ptrValue); - int64_t result; - - /* skip all ws in the input */ - skipped = u_scanf_skip_leading_ws(input, info->fPadChar); - - /* fill the input's internal buffer */ - ufile_fill_uchar_buffer(input); - - /* determine the size of the input's buffer */ - len = (int32_t)(input->str.fLimit - input->str.fPos); - - /* truncate to the width, if specified */ - if(info->fWidth != -1) - len = ufmt_min(len, info->fWidth); - - /* check for alternate form */ - if( *(input->str.fPos) == 0x0030 && - (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) { - - /* skip the '0' and 'x' or 'X' if present */ - input->str.fPos += 2; - len -= 2; - } - - /* parse the number */ - result = ufmt_uto64(input->str.fPos, &len, 16); - - /* update the input's position to reflect consumed data */ - input->str.fPos += len; - - /* mask off any necessary bits */ - if (!info->fSkipArg) { - if (info->fIsShort) - *(int16_t*)num = (int16_t)(UINT16_MAX & result); - else if (info->fIsLongLong) - *(int64_t*)num = result; - else - *(int32_t*)num = (int32_t)(UINT32_MAX & result); - } - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return len + skipped; -} - -static int32_t -u_scanf_octal_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - int32_t len; - int32_t skipped; - void *num = (void*) (args[0].ptrValue); - int64_t result; - - /* skip all ws in the input */ - skipped = u_scanf_skip_leading_ws(input, info->fPadChar); - - /* fill the input's internal buffer */ - ufile_fill_uchar_buffer(input); - - /* determine the size of the input's buffer */ - len = (int32_t)(input->str.fLimit - input->str.fPos); - - /* truncate to the width, if specified */ - if(info->fWidth != -1) - len = ufmt_min(len, info->fWidth); - - /* parse the number */ - result = ufmt_uto64(input->str.fPos, &len, 8); - - /* update the input's position to reflect consumed data */ - input->str.fPos += len; - - /* mask off any necessary bits */ - if (!info->fSkipArg) { - if (info->fIsShort) - *(int16_t*)num = (int16_t)(UINT16_MAX & result); - else if (info->fIsLongLong) - *(int64_t*)num = result; - else - *(int32_t*)num = (int32_t)(UINT32_MAX & result); - } - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return len + skipped; -} - -static int32_t -u_scanf_pointer_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - int32_t len; - int32_t skipped; - void *result; - void **p = (void**)(args[0].ptrValue); - - - /* skip all ws in the input */ - skipped = u_scanf_skip_leading_ws(input, info->fPadChar); - - /* fill the input's internal buffer */ - ufile_fill_uchar_buffer(input); - - /* determine the size of the input's buffer */ - len = (int32_t)(input->str.fLimit - input->str.fPos); - - /* truncate to the width, if specified */ - if(info->fWidth != -1) { - len = ufmt_min(len, info->fWidth); - } - - /* Make sure that we don't consume too much */ - if (len > (int32_t)(sizeof(void*)*2)) { - len = (int32_t)(sizeof(void*)*2); - } - - /* parse the pointer - assign to temporary value */ - result = ufmt_utop(input->str.fPos, &len); - - if (!info->fSkipArg) { - *p = result; - } - - /* update the input's position to reflect consumed data */ - input->str.fPos += len; - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return len + skipped; -} - -static int32_t -u_scanf_scanset_handler(UFILE *input, - u_scanf_spec_info *info, - ufmt_args *args, - const UChar *fmt, - int32_t *fmtConsumed, - int32_t *argConverted) -{ - USet *scanset; - UErrorCode status = U_ZERO_ERROR; - int32_t chLeft = INT32_MAX; - UChar32 c; - UChar *alias = (UChar*) (args[0].ptrValue); - UBool isNotEOF = FALSE; - UBool readCharacter = FALSE; - - /* Create an empty set */ - scanset = uset_open(0, -1); - - /* Back up one to get the [ */ - fmt--; - - /* truncate to the width, if specified and alias the target */ - if(info->fWidth >= 0) { - chLeft = info->fWidth; - } - - /* parse the scanset from the fmt string */ - *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status); - - /* verify that the parse was successful */ - if (U_SUCCESS(status)) { - c=0; - - /* grab characters one at a time and make sure they are in the scanset */ - while(chLeft > 0) { - if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) { - readCharacter = TRUE; - if (!info->fSkipArg) { - int32_t idx = 0; - UBool isError = FALSE; - - U16_APPEND(alias, idx, chLeft, c, isError); - if (isError) { - break; - } - alias += idx; - } - chLeft -= (1 + U_IS_SUPPLEMENTARY(c)); - } - else { - /* if the character's not in the scanset, break out */ - break; - } - } - - /* put the final character we read back on the input */ - if(isNotEOF && chLeft > 0) { - u_fungetc(c, input); - } - } - - uset_close(scanset); - - /* if we didn't match at least 1 character, fail */ - if(!readCharacter) - return -1; - /* otherwise, add the terminator */ - else if (!info->fSkipArg) { - *alias = 0x00; - } - - /* we converted 1 arg */ - *argConverted = !info->fSkipArg; - return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft; -} - -/* Use US-ASCII characters only for formatting. Most codepages have - characters 20-7F from Unicode. Using any other codepage specific - characters will make it very difficult to format the string on - non-Unicode machines */ -static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = { -/* 0x20 */ - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_SIMPLE_PERCENT,UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - -/* 0x30 */ - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - -/* 0x40 */ - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR, - UFMT_EMPTY, UFMT_SCIENTIFIC, UFMT_EMPTY, UFMT_SCIDBL, -#ifdef U_USE_OBSOLETE_IO_FORMATTING - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR/*deprecated*/, -#else - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, -#endif - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - -/* 0x50 */ - UFMT_PERCENT, UFMT_EMPTY, UFMT_EMPTY, UFMT_USTRING, -#ifdef U_USE_OBSOLETE_IO_FORMATTING - UFMT_EMPTY, UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT, UFMT_EMPTY, -#else - UFMT_EMPTY, UFMT_EMPTY, UFMT_SPELLOUT, UFMT_EMPTY, -#endif - UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_SCANSET, - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - -/* 0x60 */ - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_CHAR, - UFMT_INT, UFMT_SCIENTIFIC, UFMT_DOUBLE, UFMT_SCIDBL, - UFMT_EMPTY, UFMT_INT, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_COUNT, UFMT_OCTAL, - -/* 0x70 */ - UFMT_POINTER, UFMT_EMPTY, UFMT_EMPTY, UFMT_STRING, - UFMT_EMPTY, UFMT_UINT, UFMT_EMPTY, UFMT_EMPTY, - UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, - UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, -}; - -U_CFUNC int32_t -u_scanf_parse(UFILE *f, - const UChar *patternSpecification, - va_list ap) -{ - const UChar *alias; - int32_t count, converted, argConsumed, cpConsumed; - uint16_t handlerNum; - - ufmt_args args; - u_scanf_spec spec; - ufmt_type_info info; - u_scanf_handler handler; - - /* alias the pattern */ - alias = patternSpecification; - - /* haven't converted anything yet */ - argConsumed = 0; - converted = 0; - cpConsumed = 0; - - /* iterate through the pattern */ - for(;;) { - - /* match any characters up to the next '%' */ - while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) { - alias++; - } - - /* if we aren't at a '%', or if we're at end of string, break*/ - if(*alias != UP_PERCENT || *alias == 0x0000) - break; - - /* parse the specifier */ - count = u_scanf_parse_spec(alias, &spec); - - /* update the pointer in pattern */ - alias += count; - - handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS); - if (handlerNum < USCANF_NUM_FMT_HANDLERS) { - /* skip the argument, if necessary */ - /* query the info function for argument information */ - info = g_u_scanf_infos[ handlerNum ].info; - if (info != ufmt_count && u_feof(f)) { - break; - } - else if(spec.fInfo.fSkipArg) { - args.ptrValue = NULL; - } - else { - switch(info) { - case ufmt_count: - /* set the spec's width to the # of items converted */ - spec.fInfo.fWidth = cpConsumed; - U_FALLTHROUGH; - case ufmt_char: - case ufmt_uchar: - case ufmt_int: - case ufmt_string: - case ufmt_ustring: - case ufmt_pointer: - case ufmt_float: - case ufmt_double: - args.ptrValue = va_arg(ap, void*); - break; - - default: - /* else args is ignored */ - args.ptrValue = NULL; - break; - } - } - - /* call the handler function */ - handler = g_u_scanf_infos[ handlerNum ].handler; - if(handler != 0) { - - /* reset count to 1 so that += for alias works. */ - count = 1; - - cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed); - - /* if the handler encountered an error condition, break */ - if(argConsumed < 0) { - converted = -1; - break; - } - - /* add to the # of items converted */ - converted += argConsumed; - - /* update the pointer in pattern */ - alias += count-1; - } - /* else do nothing */ - } - /* else do nothing */ - - /* just ignore unknown tags */ - } - - /* return # of items converted */ - return converted; -} - -#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/io/uscanf_p.cpp b/deps/icu-small/source/io/uscanf_p.cpp new file mode 100644 index 0000000000..c08949d729 --- /dev/null +++ b/deps/icu-small/source/io/uscanf_p.cpp @@ -0,0 +1,1450 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1998-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* +* File uscnnf_p.c +* +* Modification History: +* +* Date Name Description +* 12/02/98 stephen Creation. +* 03/13/99 stephen Modified for new C API. +******************************************************************************* +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION + +#include "unicode/uchar.h" +#include "unicode/ustring.h" +#include "unicode/unum.h" +#include "unicode/udat.h" +#include "unicode/uset.h" +#include "uscanf.h" +#include "ufmt_cmn.h" +#include "ufile.h" +#include "locbund.h" + +#include "cmemory.h" +#include "ustr_cnv.h" + +/* flag characters for u_scanf */ +#define FLAG_ASTERISK 0x002A +#define FLAG_PAREN 0x0028 + +#define ISFLAG(s) (s) == FLAG_ASTERISK || \ + (s) == FLAG_PAREN + +/* special characters for u_scanf */ +#define SPEC_DOLLARSIGN 0x0024 + +/* unicode digits */ +#define DIGIT_ZERO 0x0030 +#define DIGIT_ONE 0x0031 +#define DIGIT_TWO 0x0032 +#define DIGIT_THREE 0x0033 +#define DIGIT_FOUR 0x0034 +#define DIGIT_FIVE 0x0035 +#define DIGIT_SIX 0x0036 +#define DIGIT_SEVEN 0x0037 +#define DIGIT_EIGHT 0x0038 +#define DIGIT_NINE 0x0039 + +#define ISDIGIT(s) (s) == DIGIT_ZERO || \ + (s) == DIGIT_ONE || \ + (s) == DIGIT_TWO || \ + (s) == DIGIT_THREE || \ + (s) == DIGIT_FOUR || \ + (s) == DIGIT_FIVE || \ + (s) == DIGIT_SIX || \ + (s) == DIGIT_SEVEN || \ + (s) == DIGIT_EIGHT || \ + (s) == DIGIT_NINE + +/* u_scanf modifiers */ +#define MOD_H 0x0068 +#define MOD_LOWERL 0x006C +#define MOD_L 0x004C + +#define ISMOD(s) (s) == MOD_H || \ + (s) == MOD_LOWERL || \ + (s) == MOD_L + +/** + * Struct encapsulating a single uscanf format specification. + */ +typedef struct u_scanf_spec_info { + int32_t fWidth; /* Width */ + + UChar fSpec; /* Format specification */ + + UChar fPadChar; /* Padding character */ + + UBool fSkipArg; /* TRUE if arg should be skipped */ + UBool fIsLongDouble; /* L flag */ + UBool fIsShort; /* h flag */ + UBool fIsLong; /* l flag */ + UBool fIsLongLong; /* ll flag */ + UBool fIsString; /* TRUE if this is a NULL-terminated string. */ +} u_scanf_spec_info; + + +/** + * Struct encapsulating a single u_scanf format specification. + */ +typedef struct u_scanf_spec { + u_scanf_spec_info fInfo; /* Information on this spec */ + int32_t fArgPos; /* Position of data in arg list */ +} u_scanf_spec; + +/** + * Parse a single u_scanf format specifier in Unicode. + * @param fmt A pointer to a '%' character in a u_scanf format specification. + * @param spec A pointer to a u_scanf_spec to receive the parsed + * format specifier. + * @return The number of characters contained in this specifier. + */ +static int32_t +u_scanf_parse_spec (const UChar *fmt, + u_scanf_spec *spec) +{ + const UChar *s = fmt; + const UChar *backup; + u_scanf_spec_info *info = &(spec->fInfo); + + /* initialize spec to default values */ + spec->fArgPos = -1; + + info->fWidth = -1; + info->fSpec = 0x0000; + info->fPadChar = 0x0020; + info->fSkipArg = FALSE; + info->fIsLongDouble = FALSE; + info->fIsShort = FALSE; + info->fIsLong = FALSE; + info->fIsLongLong = FALSE; + info->fIsString = TRUE; + + + /* skip over the initial '%' */ + s++; + + /* Check for positional argument */ + if(ISDIGIT(*s)) { + + /* Save the current position */ + backup = s; + + /* handle positional parameters */ + if(ISDIGIT(*s)) { + spec->fArgPos = (int) (*s++ - DIGIT_ZERO); + + while(ISDIGIT(*s)) { + spec->fArgPos *= 10; + spec->fArgPos += (int) (*s++ - DIGIT_ZERO); + } + } + + /* if there is no '$', don't read anything */ + if(*s != SPEC_DOLLARSIGN) { + spec->fArgPos = -1; + s = backup; + } + /* munge the '$' */ + else + s++; + } + + /* Get any format flags */ + while(ISFLAG(*s)) { + switch(*s++) { + + /* skip argument */ + case FLAG_ASTERISK: + info->fSkipArg = TRUE; + break; + + /* pad character specified */ + case FLAG_PAREN: + + /* first four characters are hex values for pad char */ + info->fPadChar = (UChar)ufmt_digitvalue(*s++); + info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); + info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); + info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); + + /* final character is ignored */ + s++; + + break; + } + } + + /* Get the width */ + if(ISDIGIT(*s)){ + info->fWidth = (int) (*s++ - DIGIT_ZERO); + + while(ISDIGIT(*s)) { + info->fWidth *= 10; + info->fWidth += (int) (*s++ - DIGIT_ZERO); + } + } + + /* Get any modifiers */ + if(ISMOD(*s)) { + switch(*s++) { + + /* short */ + case MOD_H: + info->fIsShort = TRUE; + break; + + /* long or long long */ + case MOD_LOWERL: + if(*s == MOD_LOWERL) { + info->fIsLongLong = TRUE; + /* skip over the next 'l' */ + s++; + } + else + info->fIsLong = TRUE; + break; + + /* long double */ + case MOD_L: + info->fIsLongDouble = TRUE; + break; + } + } + + /* finally, get the specifier letter */ + info->fSpec = *s++; + + /* return # of characters in this specifier */ + return (int32_t)(s - fmt); +} + +#define UP_PERCENT 0x0025 + + +/* ANSI style formatting */ +/* Use US-ASCII characters only for formatting */ + +/* % */ +#define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler} +/* s */ +#define UFMT_STRING {ufmt_string, u_scanf_string_handler} +/* c */ +#define UFMT_CHAR {ufmt_string, u_scanf_char_handler} +/* d, i */ +#define UFMT_INT {ufmt_int, u_scanf_integer_handler} +/* u */ +#define UFMT_UINT {ufmt_int, u_scanf_uinteger_handler} +/* o */ +#define UFMT_OCTAL {ufmt_int, u_scanf_octal_handler} +/* x, X */ +#define UFMT_HEX {ufmt_int, u_scanf_hex_handler} +/* f */ +#define UFMT_DOUBLE {ufmt_double, u_scanf_double_handler} +/* e, E */ +#define UFMT_SCIENTIFIC {ufmt_double, u_scanf_scientific_handler} +/* g, G */ +#define UFMT_SCIDBL {ufmt_double, u_scanf_scidbl_handler} +/* n */ +#define UFMT_COUNT {ufmt_count, u_scanf_count_handler} +/* [ */ +#define UFMT_SCANSET {ufmt_string, u_scanf_scanset_handler} + +/* non-ANSI extensions */ +/* Use US-ASCII characters only for formatting */ + +/* p */ +#define UFMT_POINTER {ufmt_pointer, u_scanf_pointer_handler} +/* V */ +#define UFMT_SPELLOUT {ufmt_double, u_scanf_spellout_handler} +/* P */ +#define UFMT_PERCENT {ufmt_double, u_scanf_percent_handler} +/* C K is old format */ +#define UFMT_UCHAR {ufmt_uchar, u_scanf_uchar_handler} +/* S U is old format */ +#define UFMT_USTRING {ufmt_ustring, u_scanf_ustring_handler} + + +#define UFMT_EMPTY {ufmt_empty, NULL} + +/** + * A u_scanf handler function. + * A u_scanf handler is responsible for handling a single u_scanf + * format specification, for example 'd' or 's'. + * @param stream The UFILE to which to write output. + * @param info A pointer to a u_scanf_spec_info struct containing + * information on the format specification. + * @param args A pointer to the argument data + * @param fmt A pointer to the first character in the format string + * following the spec. + * @param fmtConsumed On output, set to the number of characters consumed + * in fmt. Do nothing, if the argument isn't variable width. + * @param argConverted The number of arguments converted and assigned, or -1 if an + * error occurred. + * @return The number of code points consumed during reading. + */ +typedef int32_t (*u_scanf_handler) (UFILE *stream, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted); + +typedef struct u_scanf_info { + ufmt_type_info info; + u_scanf_handler handler; +} u_scanf_info; + +#define USCANF_NUM_FMT_HANDLERS 108 +#define USCANF_SYMBOL_BUFFER_SIZE 8 + +/* We do not use handlers for 0-0x1f */ +#define USCANF_BASE_FMT_HANDLERS 0x20 + + +static int32_t +u_scanf_skip_leading_ws(UFILE *input, + UChar pad) +{ + UChar c; + int32_t count = 0; + UBool isNotEOF; + + /* skip all leading ws in the input */ + while( (isNotEOF = ufile_getch(input, &c)) && (c == pad || u_isWhitespace(c)) ) + { + count++; + } + + /* put the final character back on the input */ + if(isNotEOF) + u_fungetc(c, input); + + return count; +} + +/* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */ +static int32_t +u_scanf_skip_leading_positive_sign(UFILE *input, + UNumberFormat *format, + UErrorCode *status) +{ + UChar c; + int32_t count = 0; + UBool isNotEOF; + UChar plusSymbol[USCANF_SYMBOL_BUFFER_SIZE]; + int32_t symbolLen; + UErrorCode localStatus = U_ZERO_ERROR; + + if (U_SUCCESS(*status)) { + symbolLen = unum_getSymbol(format, + UNUM_PLUS_SIGN_SYMBOL, + plusSymbol, + UPRV_LENGTHOF(plusSymbol), + &localStatus); + + if (U_SUCCESS(localStatus)) { + /* skip all leading ws in the input */ + while( (isNotEOF = ufile_getch(input, &c)) && (count < symbolLen && c == plusSymbol[count]) ) + { + count++; + } + + /* put the final character back on the input */ + if(isNotEOF) { + u_fungetc(c, input); + } + } + } + + return count; +} + +static int32_t +u_scanf_simple_percent_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)info; + (void)args; + (void)fmt; + (void)fmtConsumed; + + /* make sure the next character in the input is a percent */ + *argConverted = 0; + if(u_fgetc(input) != 0x0025) { + *argConverted = -1; + } + return 1; +} + +static int32_t +u_scanf_count_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)input; + (void)fmt; + (void)fmtConsumed; + + /* in the special case of count, the u_scanf_spec_info's width */ + /* will contain the # of items converted thus far */ + if (!info->fSkipArg) { + if (info->fIsShort) + *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth); + else if (info->fIsLongLong) + *(int64_t*)(args[0].ptrValue) = info->fWidth; + else + *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth); + } + *argConverted = 0; + + /* we converted 0 args */ + return 0; +} + +static int32_t +u_scanf_double_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + double num; + UNumberFormat *format; + int32_t parsePos = 0; + int32_t skipped; + UErrorCode status = U_ZERO_ERROR; + + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* get the formatter */ + format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); + + /* handle error */ + if(format == 0) + return 0; + + /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ + skipped += u_scanf_skip_leading_positive_sign(input, format, &status); + + /* parse the number */ + num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); + + if (!info->fSkipArg) { + if (info->fIsLong) + *(double*)(args[0].ptrValue) = num; + else if (info->fIsLongDouble) + *(long double*)(args[0].ptrValue) = num; + else + *(float*)(args[0].ptrValue) = (float)num; + } + + /* mask off any necessary bits */ + /* if(! info->fIsLong_double) + num &= DBL_MAX;*/ + + /* update the input's position to reflect consumed data */ + input->str.fPos += parsePos; + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return parsePos + skipped; +} + +#define UPRINTF_SYMBOL_BUFFER_SIZE 8 + +static int32_t +u_scanf_scientific_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + double num; + UNumberFormat *format; + int32_t parsePos = 0; + int32_t skipped; + UErrorCode status = U_ZERO_ERROR; + UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; + int32_t srcLen, expLen; + UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; + + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* get the formatter */ + format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC); + + /* handle error */ + if(format == 0) + return 0; + + /* set the appropriate flags on the formatter */ + + srcLen = unum_getSymbol(format, + UNUM_EXPONENTIAL_SYMBOL, + srcExpBuf, + sizeof(srcExpBuf), + &status); + + /* Upper/lower case the e */ + if (info->fSpec == (UChar)0x65 /* e */) { + expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf), + srcExpBuf, srcLen, + input->str.fBundle.fLocale, + &status); + } + else { + expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf), + srcExpBuf, srcLen, + input->str.fBundle.fLocale, + &status); + } + + unum_setSymbol(format, + UNUM_EXPONENTIAL_SYMBOL, + expBuf, + expLen, + &status); + + + + + /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ + skipped += u_scanf_skip_leading_positive_sign(input, format, &status); + + /* parse the number */ + num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); + + if (!info->fSkipArg) { + if (info->fIsLong) + *(double*)(args[0].ptrValue) = num; + else if (info->fIsLongDouble) + *(long double*)(args[0].ptrValue) = num; + else + *(float*)(args[0].ptrValue) = (float)num; + } + + /* mask off any necessary bits */ + /* if(! info->fIsLong_double) + num &= DBL_MAX;*/ + + /* update the input's position to reflect consumed data */ + input->str.fPos += parsePos; + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return parsePos + skipped; +} + +static int32_t +u_scanf_scidbl_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + double num; + UNumberFormat *scientificFormat, *genericFormat; + /*int32_t scientificResult, genericResult;*/ + double scientificResult, genericResult; + int32_t scientificParsePos = 0, genericParsePos = 0, parsePos = 0; + int32_t skipped; + UErrorCode scientificStatus = U_ZERO_ERROR; + UErrorCode genericStatus = U_ZERO_ERROR; + + + /* since we can't determine by scanning the characters whether */ + /* a number was formatted in the 'f' or 'g' styles, parse the */ + /* string with both formatters, and assume whichever one */ + /* parsed the most is the correct formatter to use */ + + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* get the formatters */ + scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC); + genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); + + /* handle error */ + if(scientificFormat == 0 || genericFormat == 0) + return 0; + + /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ + skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus); + + /* parse the number using each format*/ + + scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len, + &scientificParsePos, &scientificStatus); + + genericResult = unum_parseDouble(genericFormat, input->str.fPos, len, + &genericParsePos, &genericStatus); + + /* determine which parse made it farther */ + if(scientificParsePos > genericParsePos) { + /* stash the result in num */ + num = scientificResult; + /* update the input's position to reflect consumed data */ + parsePos += scientificParsePos; + } + else { + /* stash the result in num */ + num = genericResult; + /* update the input's position to reflect consumed data */ + parsePos += genericParsePos; + } + input->str.fPos += parsePos; + + if (!info->fSkipArg) { + if (info->fIsLong) + *(double*)(args[0].ptrValue) = num; + else if (info->fIsLongDouble) + *(long double*)(args[0].ptrValue) = num; + else + *(float*)(args[0].ptrValue) = (float)num; + } + + /* mask off any necessary bits */ + /* if(! info->fIsLong_double) + num &= DBL_MAX;*/ + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return parsePos + skipped; +} + +static int32_t +u_scanf_integer_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + void *num = (void*) (args[0].ptrValue); + UNumberFormat *format; + int32_t parsePos = 0; + int32_t skipped; + UErrorCode status = U_ZERO_ERROR; + int64_t result; + + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* get the formatter */ + format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); + + /* handle error */ + if(format == 0) + return 0; + + /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ + skipped += u_scanf_skip_leading_positive_sign(input, format, &status); + + /* parse the number */ + result = unum_parseInt64(format, input->str.fPos, len, &parsePos, &status); + + /* mask off any necessary bits */ + if (!info->fSkipArg) { + if (info->fIsShort) + *(int16_t*)num = (int16_t)(UINT16_MAX & result); + else if (info->fIsLongLong) + *(int64_t*)num = result; + else + *(int32_t*)num = (int32_t)(UINT32_MAX & result); + } + + /* update the input's position to reflect consumed data */ + input->str.fPos += parsePos; + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return parsePos + skipped; +} + +static int32_t +u_scanf_uinteger_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + /* TODO Fix this when Numberformat handles uint64_t */ + return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted); +} + +static int32_t +u_scanf_percent_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + double num; + UNumberFormat *format; + int32_t parsePos = 0; + UErrorCode status = U_ZERO_ERROR; + + + /* skip all ws in the input */ + u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* get the formatter */ + format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT); + + /* handle error */ + if(format == 0) + return 0; + + /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ + u_scanf_skip_leading_positive_sign(input, format, &status); + + /* parse the number */ + num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); + + if (!info->fSkipArg) { + *(double*)(args[0].ptrValue) = num; + } + + /* mask off any necessary bits */ + /* if(! info->fIsLong_double) + num &= DBL_MAX;*/ + + /* update the input's position to reflect consumed data */ + input->str.fPos += parsePos; + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return parsePos; +} + +static int32_t +u_scanf_string_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + const UChar *source; + UConverter *conv; + char *arg = (char*)(args[0].ptrValue); + char *alias = arg; + char *limit; + UErrorCode status = U_ZERO_ERROR; + int32_t count; + int32_t skipped = 0; + UChar c; + UBool isNotEOF = FALSE; + + /* skip all ws in the input */ + if (info->fIsString) { + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + } + + /* get the string one character at a time, truncating to the width */ + count = 0; + + /* open the default converter */ + conv = u_getDefaultConverter(&status); + + if(U_FAILURE(status)) + return -1; + + while( (info->fWidth == -1 || count < info->fWidth) + && (isNotEOF = ufile_getch(input, &c)) + && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c)))) + { + + if (!info->fSkipArg) { + /* put the character from the input onto the target */ + source = &c; + /* Since we do this one character at a time, do it this way. */ + if (info->fWidth > 0) { + limit = alias + info->fWidth - count; + } + else { + limit = alias + ucnv_getMaxCharSize(conv); + } + + /* convert the character to the default codepage */ + ucnv_fromUnicode(conv, &alias, limit, &source, source + 1, + NULL, TRUE, &status); + + if(U_FAILURE(status)) { + /* clean up */ + u_releaseDefaultConverter(conv); + return -1; + } + } + + /* increment the count */ + ++count; + } + + /* put the final character we read back on the input */ + if (!info->fSkipArg) { + if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF) + u_fungetc(c, input); + + /* add the terminator */ + if (info->fIsString) { + *alias = 0x00; + } + } + + /* clean up */ + u_releaseDefaultConverter(conv); + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return count + skipped; +} + +static int32_t +u_scanf_char_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + if (info->fWidth < 0) { + info->fWidth = 1; + } + info->fIsString = FALSE; + return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted); +} + +static int32_t +u_scanf_ustring_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + UChar *arg = (UChar*)(args[0].ptrValue); + UChar *alias = arg; + int32_t count; + int32_t skipped = 0; + UChar c; + UBool isNotEOF = FALSE; + + /* skip all ws in the input */ + if (info->fIsString) { + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + } + + /* get the string one character at a time, truncating to the width */ + count = 0; + + while( (info->fWidth == -1 || count < info->fWidth) + && (isNotEOF = ufile_getch(input, &c)) + && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c)))) + { + + /* put the character from the input onto the target */ + if (!info->fSkipArg) { + *alias++ = c; + } + + /* increment the count */ + ++count; + } + + /* put the final character we read back on the input */ + if (!info->fSkipArg) { + if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) { + u_fungetc(c, input); + } + + /* add the terminator */ + if (info->fIsString) { + *alias = 0x0000; + } + } + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return count + skipped; +} + +static int32_t +u_scanf_uchar_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + if (info->fWidth < 0) { + info->fWidth = 1; + } + info->fIsString = FALSE; + return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted); +} + +static int32_t +u_scanf_spellout_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + double num; + UNumberFormat *format; + int32_t parsePos = 0; + int32_t skipped; + UErrorCode status = U_ZERO_ERROR; + + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* get the formatter */ + format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT); + + /* handle error */ + if(format == 0) + return 0; + + /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ + /* This is not applicable to RBNF. */ + /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/ + + /* parse the number */ + num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); + + if (!info->fSkipArg) { + *(double*)(args[0].ptrValue) = num; + } + + /* mask off any necessary bits */ + /* if(! info->fIsLong_double) + num &= DBL_MAX;*/ + + /* update the input's position to reflect consumed data */ + input->str.fPos += parsePos; + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return parsePos + skipped; +} + +static int32_t +u_scanf_hex_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + int32_t skipped; + void *num = (void*) (args[0].ptrValue); + int64_t result; + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* check for alternate form */ + if( *(input->str.fPos) == 0x0030 && + (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) { + + /* skip the '0' and 'x' or 'X' if present */ + input->str.fPos += 2; + len -= 2; + } + + /* parse the number */ + result = ufmt_uto64(input->str.fPos, &len, 16); + + /* update the input's position to reflect consumed data */ + input->str.fPos += len; + + /* mask off any necessary bits */ + if (!info->fSkipArg) { + if (info->fIsShort) + *(int16_t*)num = (int16_t)(UINT16_MAX & result); + else if (info->fIsLongLong) + *(int64_t*)num = result; + else + *(int32_t*)num = (int32_t)(UINT32_MAX & result); + } + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return len + skipped; +} + +static int32_t +u_scanf_octal_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + int32_t skipped; + void *num = (void*) (args[0].ptrValue); + int64_t result; + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) + len = ufmt_min(len, info->fWidth); + + /* parse the number */ + result = ufmt_uto64(input->str.fPos, &len, 8); + + /* update the input's position to reflect consumed data */ + input->str.fPos += len; + + /* mask off any necessary bits */ + if (!info->fSkipArg) { + if (info->fIsShort) + *(int16_t*)num = (int16_t)(UINT16_MAX & result); + else if (info->fIsLongLong) + *(int64_t*)num = result; + else + *(int32_t*)num = (int32_t)(UINT32_MAX & result); + } + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return len + skipped; +} + +static int32_t +u_scanf_pointer_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + (void)fmt; + (void)fmtConsumed; + + int32_t len; + int32_t skipped; + void *result; + void **p = (void**)(args[0].ptrValue); + + + /* skip all ws in the input */ + skipped = u_scanf_skip_leading_ws(input, info->fPadChar); + + /* fill the input's internal buffer */ + ufile_fill_uchar_buffer(input); + + /* determine the size of the input's buffer */ + len = (int32_t)(input->str.fLimit - input->str.fPos); + + /* truncate to the width, if specified */ + if(info->fWidth != -1) { + len = ufmt_min(len, info->fWidth); + } + + /* Make sure that we don't consume too much */ + if (len > (int32_t)(sizeof(void*)*2)) { + len = (int32_t)(sizeof(void*)*2); + } + + /* parse the pointer - assign to temporary value */ + result = ufmt_utop(input->str.fPos, &len); + + if (!info->fSkipArg) { + *p = result; + } + + /* update the input's position to reflect consumed data */ + input->str.fPos += len; + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return len + skipped; +} + +static int32_t +u_scanf_scanset_handler(UFILE *input, + u_scanf_spec_info *info, + ufmt_args *args, + const UChar *fmt, + int32_t *fmtConsumed, + int32_t *argConverted) +{ + USet *scanset; + UErrorCode status = U_ZERO_ERROR; + int32_t chLeft = INT32_MAX; + UChar32 c; + UChar *alias = (UChar*) (args[0].ptrValue); + UBool isNotEOF = FALSE; + UBool readCharacter = FALSE; + + /* Create an empty set */ + scanset = uset_open(0, -1); + + /* Back up one to get the [ */ + fmt--; + + /* truncate to the width, if specified and alias the target */ + if(info->fWidth >= 0) { + chLeft = info->fWidth; + } + + /* parse the scanset from the fmt string */ + *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status); + + /* verify that the parse was successful */ + if (U_SUCCESS(status)) { + c=0; + + /* grab characters one at a time and make sure they are in the scanset */ + while(chLeft > 0) { + if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) { + readCharacter = TRUE; + if (!info->fSkipArg) { + int32_t idx = 0; + UBool isError = FALSE; + + U16_APPEND(alias, idx, chLeft, c, isError); + if (isError) { + break; + } + alias += idx; + } + chLeft -= (1 + U_IS_SUPPLEMENTARY(c)); + } + else { + /* if the character's not in the scanset, break out */ + break; + } + } + + /* put the final character we read back on the input */ + if(isNotEOF && chLeft > 0) { + u_fungetc(c, input); + } + } + + uset_close(scanset); + + /* if we didn't match at least 1 character, fail */ + if(!readCharacter) + return -1; + /* otherwise, add the terminator */ + else if (!info->fSkipArg) { + *alias = 0x00; + } + + /* we converted 1 arg */ + *argConverted = !info->fSkipArg; + return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft; +} + +/* Use US-ASCII characters only for formatting. Most codepages have + characters 20-7F from Unicode. Using any other codepage specific + characters will make it very difficult to format the string on + non-Unicode machines */ +static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = { +/* 0x20 */ + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_SIMPLE_PERCENT,UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + +/* 0x30 */ + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + +/* 0x40 */ + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR, + UFMT_EMPTY, UFMT_SCIENTIFIC, UFMT_EMPTY, UFMT_SCIDBL, +#ifdef U_USE_OBSOLETE_IO_FORMATTING + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR/*deprecated*/, +#else + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, +#endif + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + +/* 0x50 */ + UFMT_PERCENT, UFMT_EMPTY, UFMT_EMPTY, UFMT_USTRING, +#ifdef U_USE_OBSOLETE_IO_FORMATTING + UFMT_EMPTY, UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT, UFMT_EMPTY, +#else + UFMT_EMPTY, UFMT_EMPTY, UFMT_SPELLOUT, UFMT_EMPTY, +#endif + UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_SCANSET, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + +/* 0x60 */ + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_CHAR, + UFMT_INT, UFMT_SCIENTIFIC, UFMT_DOUBLE, UFMT_SCIDBL, + UFMT_EMPTY, UFMT_INT, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_COUNT, UFMT_OCTAL, + +/* 0x70 */ + UFMT_POINTER, UFMT_EMPTY, UFMT_EMPTY, UFMT_STRING, + UFMT_EMPTY, UFMT_UINT, UFMT_EMPTY, UFMT_EMPTY, + UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, + UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, +}; + +U_CFUNC int32_t +u_scanf_parse(UFILE *f, + const UChar *patternSpecification, + va_list ap) +{ + const UChar *alias; + int32_t count, converted, argConsumed, cpConsumed; + uint16_t handlerNum; + + ufmt_args args; + u_scanf_spec spec; + ufmt_type_info info; + u_scanf_handler handler; + + /* alias the pattern */ + alias = patternSpecification; + + /* haven't converted anything yet */ + argConsumed = 0; + converted = 0; + cpConsumed = 0; + + /* iterate through the pattern */ + for(;;) { + + /* match any characters up to the next '%' */ + while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) { + alias++; + } + + /* if we aren't at a '%', or if we're at end of string, break*/ + if(*alias != UP_PERCENT || *alias == 0x0000) + break; + + /* parse the specifier */ + count = u_scanf_parse_spec(alias, &spec); + + /* update the pointer in pattern */ + alias += count; + + handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS); + if (handlerNum < USCANF_NUM_FMT_HANDLERS) { + /* skip the argument, if necessary */ + /* query the info function for argument information */ + info = g_u_scanf_infos[ handlerNum ].info; + if (info != ufmt_count && u_feof(f)) { + break; + } + else if(spec.fInfo.fSkipArg) { + args.ptrValue = NULL; + } + else { + switch(info) { + case ufmt_count: + /* set the spec's width to the # of items converted */ + spec.fInfo.fWidth = cpConsumed; + U_FALLTHROUGH; + case ufmt_char: + case ufmt_uchar: + case ufmt_int: + case ufmt_string: + case ufmt_ustring: + case ufmt_pointer: + case ufmt_float: + case ufmt_double: + args.ptrValue = va_arg(ap, void*); + break; + + default: + /* else args is ignored */ + args.ptrValue = NULL; + break; + } + } + + /* call the handler function */ + handler = g_u_scanf_infos[ handlerNum ].handler; + if(handler != 0) { + + /* reset count to 1 so that += for alias works. */ + count = 1; + + cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed); + + /* if the handler encountered an error condition, break */ + if(argConsumed < 0) { + converted = -1; + break; + } + + /* add to the # of items converted */ + converted += argConsumed; + + /* update the pointer in pattern */ + alias += count-1; + } + /* else do nothing */ + } + /* else do nothing */ + + /* just ignore unknown tags */ + } + + /* return # of items converted */ + return converted; +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/io/ustdio.c b/deps/icu-small/source/io/ustdio.c deleted file mode 100644 index 4b156595bb..0000000000 --- a/deps/icu-small/source/io/ustdio.c +++ /dev/null @@ -1,732 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ****************************************************************************** - * - * Copyright (C) 1998-2016, International Business Machines - * Corporation and others. All Rights Reserved. - * - ****************************************************************************** - * - * File ustdio.c - * - * Modification History: - * - * Date Name Description - * 11/18/98 stephen Creation. - * 03/12/99 stephen Modified for new C API. - * 07/19/99 stephen Fixed read() and gets() - ****************************************************************************** - */ - -#include "unicode/ustdio.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/putil.h" -#include "cmemory.h" -#include "cstring.h" -#include "ufile.h" -#include "ufmt_cmn.h" -#include "unicode/ucnv.h" -#include "unicode/ustring.h" - -#include - -#define DELIM_LF 0x000A -#define DELIM_VT 0x000B -#define DELIM_FF 0x000C -#define DELIM_CR 0x000D -#define DELIM_NEL 0x0085 -#define DELIM_LS 0x2028 -#define DELIM_PS 0x2029 - -/* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */ -#if U_PLATFORM_USES_ONLY_WIN32_API -static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 }; -static const uint32_t DELIMITERS_LEN = 2; -/* TODO: Default newline writing should be detected based upon the converter being used. */ -#else -static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 }; -static const uint32_t DELIMITERS_LEN = 1; -#endif - -#define IS_FIRST_STRING_DELIMITER(c1) \ - (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \ - || (c1) == DELIM_NEL \ - || (c1) == DELIM_LS \ - || (c1) == DELIM_PS) -#define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR) -#define IS_COMBINED_STRING_DELIMITER(c1, c2) \ - (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF) - - -#if !UCONFIG_NO_TRANSLITERATION - -U_CAPI UTransliterator* U_EXPORT2 -u_fsettransliterator(UFILE *file, UFileDirection direction, - UTransliterator *adopt, UErrorCode *status) -{ - UTransliterator *old = NULL; - - if(U_FAILURE(*status)) - { - return adopt; - } - - if(!file) - { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return adopt; - } - - if(direction & U_READ) - { - /** TODO: implement */ - *status = U_UNSUPPORTED_ERROR; - return adopt; - } - - if(adopt == NULL) /* they are clearing it */ - { - if(file->fTranslit != NULL) - { - /* TODO: Check side */ - old = file->fTranslit->translit; - uprv_free(file->fTranslit->buffer); - file->fTranslit->buffer=NULL; - uprv_free(file->fTranslit); - file->fTranslit=NULL; - } - } - else - { - if(file->fTranslit == NULL) - { - file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer)); - if(!file->fTranslit) - { - *status = U_MEMORY_ALLOCATION_ERROR; - return adopt; - } - file->fTranslit->capacity = 0; - file->fTranslit->length = 0; - file->fTranslit->pos = 0; - file->fTranslit->buffer = NULL; - } - else - { - old = file->fTranslit->translit; - ufile_flush_translit(file); - } - - file->fTranslit->translit = adopt; - } - - return old; -} - -static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush) -{ - int32_t newlen; - int32_t junkCount = 0; - int32_t textLength; - int32_t textLimit; - UTransPosition pos; - UErrorCode status = U_ZERO_ERROR; - - if(count == NULL) - { - count = &junkCount; - } - - if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit)) - { - /* fast path */ - return src; - } - - /* First: slide over everything */ - if(f->fTranslit->length > f->fTranslit->pos) - { - memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos, - (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar)); - } - f->fTranslit->length -= f->fTranslit->pos; /* always */ - f->fTranslit->pos = 0; - - /* Calculate new buffer size needed */ - newlen = (*count + f->fTranslit->length) * 4; - - if(newlen > f->fTranslit->capacity) - { - if(f->fTranslit->buffer == NULL) - { - f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar)); - } - else - { - f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar)); - } - /* Check for malloc/realloc failure. */ - if (f->fTranslit->buffer == NULL) { - return NULL; - } - f->fTranslit->capacity = newlen; - } - - /* Now, copy any data over */ - u_strncpy(f->fTranslit->buffer + f->fTranslit->length, - src, - *count); - f->fTranslit->length += *count; - - /* Now, translit in place as much as we can */ - if(flush == FALSE) - { - textLength = f->fTranslit->length; - pos.contextStart = 0; - pos.contextLimit = textLength; - pos.start = 0; - pos.limit = textLength; - - utrans_transIncrementalUChars(f->fTranslit->translit, - f->fTranslit->buffer, /* because we shifted */ - &textLength, - f->fTranslit->capacity, - &pos, - &status); - - /* now: start/limit point to the transliterated text */ - /* Transliterated is [buffer..pos.start) */ - *count = pos.start; - f->fTranslit->pos = pos.start; - f->fTranslit->length = pos.limit; - - return f->fTranslit->buffer; - } - else - { - textLength = f->fTranslit->length; - textLimit = f->fTranslit->length; - - utrans_transUChars(f->fTranslit->translit, - f->fTranslit->buffer, - &textLength, - f->fTranslit->capacity, - 0, - &textLimit, - &status); - - /* out: converted len */ - *count = textLimit; - - /* Set pointers to 0 */ - f->fTranslit->pos = 0; - f->fTranslit->length = 0; - - return f->fTranslit->buffer; - } -} - -#endif - -void -ufile_flush_translit(UFILE *f) -{ -#if !UCONFIG_NO_TRANSLITERATION - if((!f)||(!f->fTranslit)) - return; -#endif - - u_file_write_flush(NULL, 0, f, FALSE, TRUE); -} - - -void -ufile_flush_io(UFILE *f) -{ - if((!f) || (!f->fFile)) { - return; /* skip if no file */ - } - - u_file_write_flush(NULL, 0, f, TRUE, FALSE); -} - - -void -ufile_close_translit(UFILE *f) -{ -#if !UCONFIG_NO_TRANSLITERATION - if((!f)||(!f->fTranslit)) - return; -#endif - - ufile_flush_translit(f); - -#if !UCONFIG_NO_TRANSLITERATION - if(f->fTranslit->translit) - utrans_close(f->fTranslit->translit); - - if(f->fTranslit->buffer) - { - uprv_free(f->fTranslit->buffer); - } - - uprv_free(f->fTranslit); - f->fTranslit = NULL; -#endif -} - - -/* Input/output */ - -U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fputs(const UChar *s, - UFILE *f) -{ - int32_t count = u_file_write(s, u_strlen(s), f); - count += u_file_write(DELIMITERS, DELIMITERS_LEN, f); - return count; -} - -U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fputc(UChar32 uc, - UFILE *f) -{ - UChar buf[2]; - int32_t idx = 0; - UBool isError = FALSE; - - U16_APPEND(buf, idx, UPRV_LENGTHOF(buf), uc, isError); - if (isError) { - return U_EOF; - } - return u_file_write(buf, idx, f) == idx ? uc : U_EOF; -} - - -U_CFUNC int32_t U_EXPORT2 -u_file_write_flush(const UChar *chars, - int32_t count, - UFILE *f, - UBool flushIO, - UBool flushTranslit) -{ - /* Set up conversion parameters */ - UErrorCode status = U_ZERO_ERROR; - const UChar *mySource = chars; - const UChar *mySourceBegin; - const UChar *mySourceEnd; - char charBuffer[UFILE_CHARBUFFER_SIZE]; - char *myTarget = charBuffer; - int32_t written = 0; - int32_t numConverted = 0; - - if (count < 0) { - count = u_strlen(chars); - } - -#if !UCONFIG_NO_TRANSLITERATION - if((f->fTranslit) && (f->fTranslit->translit)) - { - /* Do the transliteration */ - mySource = u_file_translit(f, chars, &count, flushTranslit); - } -#endif - - /* Write to a string. */ - if (!f->fFile) { - int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos); - if (flushIO && charsLeft > count) { - count++; - } - written = ufmt_min(count, charsLeft); - u_strncpy(f->str.fPos, mySource, written); - f->str.fPos += written; - return written; - } - - mySourceEnd = mySource + count; - - /* Perform the conversion in a loop */ - do { - mySourceBegin = mySource; /* beginning location for this loop */ - status = U_ZERO_ERROR; - if(f->fConverter != NULL) { /* We have a valid converter */ - ucnv_fromUnicode(f->fConverter, - &myTarget, - charBuffer + UFILE_CHARBUFFER_SIZE, - &mySource, - mySourceEnd, - NULL, - flushIO, - &status); - } else { /*weiv: do the invariant conversion */ - int32_t convertChars = (int32_t) (mySourceEnd - mySource); - if (convertChars > UFILE_CHARBUFFER_SIZE) { - convertChars = UFILE_CHARBUFFER_SIZE; - status = U_BUFFER_OVERFLOW_ERROR; - } - u_UCharsToChars(mySource, myTarget, convertChars); - mySource += convertChars; - myTarget += convertChars; - } - numConverted = (int32_t)(myTarget - charBuffer); - - if (numConverted > 0) { - /* write the converted bytes */ - fwrite(charBuffer, - sizeof(char), - numConverted, - f->fFile); - - written += (int32_t) (mySource - mySourceBegin); - } - myTarget = charBuffer; - } - while(status == U_BUFFER_OVERFLOW_ERROR); - - /* return # of chars written */ - return written; -} - -U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_file_write( const UChar *chars, - int32_t count, - UFILE *f) -{ - return u_file_write_flush(chars,count,f,FALSE,FALSE); -} - - -/* private function used for buffering input */ -void -ufile_fill_uchar_buffer(UFILE *f) -{ - UErrorCode status; - const char *mySource; - const char *mySourceEnd; - UChar *myTarget; - int32_t bufferSize; - int32_t maxCPBytes; - int32_t bytesRead; - int32_t availLength; - int32_t dataSize; - char charBuffer[UFILE_CHARBUFFER_SIZE]; - u_localized_string *str; - - if (f->fFile == NULL) { - /* There is nothing to do. It's a string. */ - return; - } - - str = &f->str; - dataSize = (int32_t)(str->fLimit - str->fPos); - if (f->fFileno == 0 && dataSize > 0) { - /* Don't read from stdin too many times. There is still some data. */ - return; - } - - /* shift the buffer if it isn't empty */ - if(dataSize != 0) { - u_memmove(f->fUCBuffer, str->fPos, dataSize); /* not accessing beyond memory */ - } - - - /* record how much buffer space is available */ - availLength = UFILE_UCHARBUFFER_SIZE - dataSize; - - /* Determine the # of codepage bytes needed to fill our UChar buffer */ - /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/ - maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1); - - /* Read in the data to convert */ - if (f->fFileno == 0) { - /* Special case. Read from stdin one line at a time. */ - char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile); - bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0); - } - else { - /* A normal file */ - bytesRead = (int32_t)fread(charBuffer, - sizeof(char), - ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), - f->fFile); - } - - /* Set up conversion parameters */ - status = U_ZERO_ERROR; - mySource = charBuffer; - mySourceEnd = charBuffer + bytesRead; - myTarget = f->fUCBuffer + dataSize; - bufferSize = UFILE_UCHARBUFFER_SIZE; - - if(f->fConverter != NULL) { /* We have a valid converter */ - /* Perform the conversion */ - ucnv_toUnicode(f->fConverter, - &myTarget, - f->fUCBuffer + bufferSize, - &mySource, - mySourceEnd, - NULL, - (UBool)(feof(f->fFile) != 0), - &status); - - } else { /*weiv: do the invariant conversion */ - u_charsToUChars(mySource, myTarget, bytesRead); - myTarget += bytesRead; - } - - /* update the pointers into our array */ - str->fPos = str->fBuffer; - str->fLimit = myTarget; -} - -U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fgets(UChar *s, - int32_t n, - UFILE *f) -{ - int32_t dataSize; - int32_t count; - UChar *alias; - const UChar *limit; - UChar *sItr; - UChar currDelim = 0; - u_localized_string *str; - - if (n <= 0) { - /* Caller screwed up. We need to write the null terminatior. */ - return NULL; - } - - /* fill the buffer if needed */ - str = &f->str; - if (str->fPos >= str->fLimit) { - ufile_fill_uchar_buffer(f); - } - - /* subtract 1 from n to compensate for the terminator */ - --n; - - /* determine the amount of data in the buffer */ - dataSize = (int32_t)(str->fLimit - str->fPos); - - /* if 0 characters were left, return 0 */ - if (dataSize == 0) - return NULL; - - /* otherwise, iteratively fill the buffer and copy */ - count = 0; - sItr = s; - currDelim = 0; - while (dataSize > 0 && count < n) { - alias = str->fPos; - - /* Find how much to copy */ - if (dataSize < (n - count)) { - limit = str->fLimit; - } - else { - limit = alias + (n - count); - } - - if (!currDelim) { - /* Copy UChars until we find the first occurrence of a delimiter character */ - while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) { - count++; - *(sItr++) = *(alias++); - } - /* Preserve the newline */ - if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) { - if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) { - currDelim = *alias; - } - else { - currDelim = 1; /* This isn't a newline, but it's used to say - that we should break later. We've checked all - possible newline combinations even across buffer - boundaries. */ - } - count++; - *(sItr++) = *(alias++); - } - } - /* If we have a CRLF combination, preserve that too. */ - if (alias < limit) { - if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) { - count++; - *(sItr++) = *(alias++); - } - currDelim = 1; /* This isn't a newline, but it's used to say - that we should break later. We've checked all - possible newline combinations even across buffer - boundaries. */ - } - - /* update the current buffer position */ - str->fPos = alias; - - /* if we found a delimiter */ - if (currDelim == 1) { - /* break out */ - break; - } - - /* refill the buffer */ - ufile_fill_uchar_buffer(f); - - /* determine the amount of data in the buffer */ - dataSize = (int32_t)(str->fLimit - str->fPos); - } - - /* add the terminator and return s */ - *sItr = 0x0000; - return s; -} - -U_CFUNC UBool U_EXPORT2 -ufile_getch(UFILE *f, UChar *ch) -{ - UBool isValidChar = FALSE; - - *ch = U_EOF; - /* if we have an available character in the buffer, return it */ - if(f->str.fPos < f->str.fLimit){ - *ch = *(f->str.fPos)++; - isValidChar = TRUE; - } - else { - /* otherwise, fill the buffer and return the next character */ - if(f->str.fPos >= f->str.fLimit) { - ufile_fill_uchar_buffer(f); - } - if(f->str.fPos < f->str.fLimit) { - *ch = *(f->str.fPos)++; - isValidChar = TRUE; - } - } - return isValidChar; -} - -U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fgetc(UFILE *f) -{ - UChar ch; - ufile_getch(f, &ch); - return ch; -} - -U_CFUNC UBool U_EXPORT2 -ufile_getch32(UFILE *f, UChar32 *c32) -{ - UBool isValidChar = FALSE; - u_localized_string *str; - - *c32 = U_EOF; - - /* Fill the buffer if it is empty */ - str = &f->str; - if (f && str->fPos + 1 >= str->fLimit) { - ufile_fill_uchar_buffer(f); - } - - /* Get the next character in the buffer */ - if (str->fPos < str->fLimit) { - *c32 = *(str->fPos)++; - if (U_IS_LEAD(*c32)) { - if (str->fPos < str->fLimit) { - UChar c16 = *(str->fPos)++; - *c32 = U16_GET_SUPPLEMENTARY(*c32, c16); - isValidChar = TRUE; - } - else { - *c32 = U_EOF; - } - } - else { - isValidChar = TRUE; - } - } - - return isValidChar; -} - -U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fgetcx(UFILE *f) -{ - UChar32 ch; - ufile_getch32(f, &ch); - return ch; -} - -U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fungetc(UChar32 ch, - UFILE *f) -{ - u_localized_string *str; - - str = &f->str; - - /* if we're at the beginning of the buffer, sorry! */ - if (str->fPos == str->fBuffer - || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer)) - { - ch = U_EOF; - } - else { - /* otherwise, put the character back */ - /* Remember, read them back on in the reverse order. */ - if (U_IS_LEAD(ch)) { - if (*--(str->fPos) != U16_TRAIL(ch) - || *--(str->fPos) != U16_LEAD(ch)) - { - ch = U_EOF; - } - } - else if (*--(str->fPos) != ch) { - ch = U_EOF; - } - } - return ch; -} - -U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_file_read( UChar *chars, - int32_t count, - UFILE *f) -{ - int32_t dataSize; - int32_t read = 0; - u_localized_string *str = &f->str; - - do { - - /* determine the amount of data in the buffer */ - dataSize = (int32_t)(str->fLimit - str->fPos); - if (dataSize <= 0) { - /* fill the buffer */ - ufile_fill_uchar_buffer(f); - dataSize = (int32_t)(str->fLimit - str->fPos); - } - - /* Make sure that we don't read too much */ - if (dataSize > (count - read)) { - dataSize = count - read; - } - - /* copy the current data in the buffer */ - memcpy(chars + read, str->fPos, dataSize * sizeof(UChar)); - - /* update number of items read */ - read += dataSize; - - /* update the current buffer position */ - str->fPos += dataSize; - } - while (dataSize != 0 && read < count); - - return read; -} -#endif diff --git a/deps/icu-small/source/io/ustdio.cpp b/deps/icu-small/source/io/ustdio.cpp new file mode 100644 index 0000000000..790a097980 --- /dev/null +++ b/deps/icu-small/source/io/ustdio.cpp @@ -0,0 +1,732 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ****************************************************************************** + * + * Copyright (C) 1998-2016, International Business Machines + * Corporation and others. All Rights Reserved. + * + ****************************************************************************** + * + * File ustdio.c + * + * Modification History: + * + * Date Name Description + * 11/18/98 stephen Creation. + * 03/12/99 stephen Modified for new C API. + * 07/19/99 stephen Fixed read() and gets() + ****************************************************************************** + */ + +#include "unicode/ustdio.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/putil.h" +#include "cmemory.h" +#include "cstring.h" +#include "ufile.h" +#include "ufmt_cmn.h" +#include "unicode/ucnv.h" +#include "unicode/ustring.h" + +#include + +#define DELIM_LF 0x000A +#define DELIM_VT 0x000B +#define DELIM_FF 0x000C +#define DELIM_CR 0x000D +#define DELIM_NEL 0x0085 +#define DELIM_LS 0x2028 +#define DELIM_PS 0x2029 + +/* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */ +#if U_PLATFORM_USES_ONLY_WIN32_API +static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 }; +static const uint32_t DELIMITERS_LEN = 2; +/* TODO: Default newline writing should be detected based upon the converter being used. */ +#else +static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 }; +static const uint32_t DELIMITERS_LEN = 1; +#endif + +#define IS_FIRST_STRING_DELIMITER(c1) \ + (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \ + || (c1) == DELIM_NEL \ + || (c1) == DELIM_LS \ + || (c1) == DELIM_PS) +#define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR) +#define IS_COMBINED_STRING_DELIMITER(c1, c2) \ + (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF) + + +#if !UCONFIG_NO_TRANSLITERATION + +U_CAPI UTransliterator* U_EXPORT2 +u_fsettransliterator(UFILE *file, UFileDirection direction, + UTransliterator *adopt, UErrorCode *status) +{ + UTransliterator *old = NULL; + + if(U_FAILURE(*status)) + { + return adopt; + } + + if(!file) + { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return adopt; + } + + if(direction & U_READ) + { + /** TODO: implement */ + *status = U_UNSUPPORTED_ERROR; + return adopt; + } + + if(adopt == NULL) /* they are clearing it */ + { + if(file->fTranslit != NULL) + { + /* TODO: Check side */ + old = file->fTranslit->translit; + uprv_free(file->fTranslit->buffer); + file->fTranslit->buffer=NULL; + uprv_free(file->fTranslit); + file->fTranslit=NULL; + } + } + else + { + if(file->fTranslit == NULL) + { + file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer)); + if(!file->fTranslit) + { + *status = U_MEMORY_ALLOCATION_ERROR; + return adopt; + } + file->fTranslit->capacity = 0; + file->fTranslit->length = 0; + file->fTranslit->pos = 0; + file->fTranslit->buffer = NULL; + } + else + { + old = file->fTranslit->translit; + ufile_flush_translit(file); + } + + file->fTranslit->translit = adopt; + } + + return old; +} + +static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush) +{ + int32_t newlen; + int32_t junkCount = 0; + int32_t textLength; + int32_t textLimit; + UTransPosition pos; + UErrorCode status = U_ZERO_ERROR; + + if(count == NULL) + { + count = &junkCount; + } + + if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit)) + { + /* fast path */ + return src; + } + + /* First: slide over everything */ + if(f->fTranslit->length > f->fTranslit->pos) + { + memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos, + (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar)); + } + f->fTranslit->length -= f->fTranslit->pos; /* always */ + f->fTranslit->pos = 0; + + /* Calculate new buffer size needed */ + newlen = (*count + f->fTranslit->length) * 4; + + if(newlen > f->fTranslit->capacity) + { + if(f->fTranslit->buffer == NULL) + { + f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar)); + } + else + { + f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar)); + } + /* Check for malloc/realloc failure. */ + if (f->fTranslit->buffer == NULL) { + return NULL; + } + f->fTranslit->capacity = newlen; + } + + /* Now, copy any data over */ + u_strncpy(f->fTranslit->buffer + f->fTranslit->length, + src, + *count); + f->fTranslit->length += *count; + + /* Now, translit in place as much as we can */ + if(flush == FALSE) + { + textLength = f->fTranslit->length; + pos.contextStart = 0; + pos.contextLimit = textLength; + pos.start = 0; + pos.limit = textLength; + + utrans_transIncrementalUChars(f->fTranslit->translit, + f->fTranslit->buffer, /* because we shifted */ + &textLength, + f->fTranslit->capacity, + &pos, + &status); + + /* now: start/limit point to the transliterated text */ + /* Transliterated is [buffer..pos.start) */ + *count = pos.start; + f->fTranslit->pos = pos.start; + f->fTranslit->length = pos.limit; + + return f->fTranslit->buffer; + } + else + { + textLength = f->fTranslit->length; + textLimit = f->fTranslit->length; + + utrans_transUChars(f->fTranslit->translit, + f->fTranslit->buffer, + &textLength, + f->fTranslit->capacity, + 0, + &textLimit, + &status); + + /* out: converted len */ + *count = textLimit; + + /* Set pointers to 0 */ + f->fTranslit->pos = 0; + f->fTranslit->length = 0; + + return f->fTranslit->buffer; + } +} + +#endif + +void +ufile_flush_translit(UFILE *f) +{ +#if !UCONFIG_NO_TRANSLITERATION + if((!f)||(!f->fTranslit)) + return; +#endif + + u_file_write_flush(NULL, 0, f, FALSE, TRUE); +} + + +void +ufile_flush_io(UFILE *f) +{ + if((!f) || (!f->fFile)) { + return; /* skip if no file */ + } + + u_file_write_flush(NULL, 0, f, TRUE, FALSE); +} + + +void +ufile_close_translit(UFILE *f) +{ +#if !UCONFIG_NO_TRANSLITERATION + if((!f)||(!f->fTranslit)) + return; +#endif + + ufile_flush_translit(f); + +#if !UCONFIG_NO_TRANSLITERATION + if(f->fTranslit->translit) + utrans_close(f->fTranslit->translit); + + if(f->fTranslit->buffer) + { + uprv_free(f->fTranslit->buffer); + } + + uprv_free(f->fTranslit); + f->fTranslit = NULL; +#endif +} + + +/* Input/output */ + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fputs(const UChar *s, + UFILE *f) +{ + int32_t count = u_file_write(s, u_strlen(s), f); + count += u_file_write(DELIMITERS, DELIMITERS_LEN, f); + return count; +} + +U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fputc(UChar32 uc, + UFILE *f) +{ + UChar buf[2]; + int32_t idx = 0; + UBool isError = FALSE; + + U16_APPEND(buf, idx, UPRV_LENGTHOF(buf), uc, isError); + if (isError) { + return U_EOF; + } + return u_file_write(buf, idx, f) == idx ? uc : U_EOF; +} + + +U_CFUNC int32_t U_EXPORT2 +u_file_write_flush(const UChar *chars, + int32_t count, + UFILE *f, + UBool flushIO, + UBool flushTranslit) +{ + /* Set up conversion parameters */ + UErrorCode status = U_ZERO_ERROR; + const UChar *mySource = chars; + const UChar *mySourceBegin; + const UChar *mySourceEnd; + char charBuffer[UFILE_CHARBUFFER_SIZE]; + char *myTarget = charBuffer; + int32_t written = 0; + int32_t numConverted = 0; + + if (count < 0) { + count = u_strlen(chars); + } + +#if !UCONFIG_NO_TRANSLITERATION + if((f->fTranslit) && (f->fTranslit->translit)) + { + /* Do the transliteration */ + mySource = u_file_translit(f, chars, &count, flushTranslit); + } +#endif + + /* Write to a string. */ + if (!f->fFile) { + int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos); + if (flushIO && charsLeft > count) { + count++; + } + written = ufmt_min(count, charsLeft); + u_strncpy(f->str.fPos, mySource, written); + f->str.fPos += written; + return written; + } + + mySourceEnd = mySource + count; + + /* Perform the conversion in a loop */ + do { + mySourceBegin = mySource; /* beginning location for this loop */ + status = U_ZERO_ERROR; + if(f->fConverter != NULL) { /* We have a valid converter */ + ucnv_fromUnicode(f->fConverter, + &myTarget, + charBuffer + UFILE_CHARBUFFER_SIZE, + &mySource, + mySourceEnd, + NULL, + flushIO, + &status); + } else { /*weiv: do the invariant conversion */ + int32_t convertChars = (int32_t) (mySourceEnd - mySource); + if (convertChars > UFILE_CHARBUFFER_SIZE) { + convertChars = UFILE_CHARBUFFER_SIZE; + status = U_BUFFER_OVERFLOW_ERROR; + } + u_UCharsToChars(mySource, myTarget, convertChars); + mySource += convertChars; + myTarget += convertChars; + } + numConverted = (int32_t)(myTarget - charBuffer); + + if (numConverted > 0) { + /* write the converted bytes */ + fwrite(charBuffer, + sizeof(char), + numConverted, + f->fFile); + + written += (int32_t) (mySource - mySourceBegin); + } + myTarget = charBuffer; + } + while(status == U_BUFFER_OVERFLOW_ERROR); + + /* return # of chars written */ + return written; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_file_write( const UChar *chars, + int32_t count, + UFILE *f) +{ + return u_file_write_flush(chars,count,f,FALSE,FALSE); +} + + +/* private function used for buffering input */ +void +ufile_fill_uchar_buffer(UFILE *f) +{ + UErrorCode status; + const char *mySource; + const char *mySourceEnd; + UChar *myTarget; + int32_t bufferSize; + int32_t maxCPBytes; + int32_t bytesRead; + int32_t availLength; + int32_t dataSize; + char charBuffer[UFILE_CHARBUFFER_SIZE]; + u_localized_string *str; + + if (f->fFile == NULL) { + /* There is nothing to do. It's a string. */ + return; + } + + str = &f->str; + dataSize = (int32_t)(str->fLimit - str->fPos); + if (f->fFileno == 0 && dataSize > 0) { + /* Don't read from stdin too many times. There is still some data. */ + return; + } + + /* shift the buffer if it isn't empty */ + if(dataSize != 0) { + u_memmove(f->fUCBuffer, str->fPos, dataSize); /* not accessing beyond memory */ + } + + + /* record how much buffer space is available */ + availLength = UFILE_UCHARBUFFER_SIZE - dataSize; + + /* Determine the # of codepage bytes needed to fill our UChar buffer */ + /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/ + maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1); + + /* Read in the data to convert */ + if (f->fFileno == 0) { + /* Special case. Read from stdin one line at a time. */ + char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile); + bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0); + } + else { + /* A normal file */ + bytesRead = (int32_t)fread(charBuffer, + sizeof(char), + ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), + f->fFile); + } + + /* Set up conversion parameters */ + status = U_ZERO_ERROR; + mySource = charBuffer; + mySourceEnd = charBuffer + bytesRead; + myTarget = f->fUCBuffer + dataSize; + bufferSize = UFILE_UCHARBUFFER_SIZE; + + if(f->fConverter != NULL) { /* We have a valid converter */ + /* Perform the conversion */ + ucnv_toUnicode(f->fConverter, + &myTarget, + f->fUCBuffer + bufferSize, + &mySource, + mySourceEnd, + NULL, + (UBool)(feof(f->fFile) != 0), + &status); + + } else { /*weiv: do the invariant conversion */ + u_charsToUChars(mySource, myTarget, bytesRead); + myTarget += bytesRead; + } + + /* update the pointers into our array */ + str->fPos = str->fBuffer; + str->fLimit = myTarget; +} + +U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgets(UChar *s, + int32_t n, + UFILE *f) +{ + int32_t dataSize; + int32_t count; + UChar *alias; + const UChar *limit; + UChar *sItr; + UChar currDelim = 0; + u_localized_string *str; + + if (n <= 0) { + /* Caller screwed up. We need to write the null terminatior. */ + return NULL; + } + + /* fill the buffer if needed */ + str = &f->str; + if (str->fPos >= str->fLimit) { + ufile_fill_uchar_buffer(f); + } + + /* subtract 1 from n to compensate for the terminator */ + --n; + + /* determine the amount of data in the buffer */ + dataSize = (int32_t)(str->fLimit - str->fPos); + + /* if 0 characters were left, return 0 */ + if (dataSize == 0) + return NULL; + + /* otherwise, iteratively fill the buffer and copy */ + count = 0; + sItr = s; + currDelim = 0; + while (dataSize > 0 && count < n) { + alias = str->fPos; + + /* Find how much to copy */ + if (dataSize < (n - count)) { + limit = str->fLimit; + } + else { + limit = alias + (n - count); + } + + if (!currDelim) { + /* Copy UChars until we find the first occurrence of a delimiter character */ + while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) { + count++; + *(sItr++) = *(alias++); + } + /* Preserve the newline */ + if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) { + if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) { + currDelim = *alias; + } + else { + currDelim = 1; /* This isn't a newline, but it's used to say + that we should break later. We've checked all + possible newline combinations even across buffer + boundaries. */ + } + count++; + *(sItr++) = *(alias++); + } + } + /* If we have a CRLF combination, preserve that too. */ + if (alias < limit) { + if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) { + count++; + *(sItr++) = *(alias++); + } + currDelim = 1; /* This isn't a newline, but it's used to say + that we should break later. We've checked all + possible newline combinations even across buffer + boundaries. */ + } + + /* update the current buffer position */ + str->fPos = alias; + + /* if we found a delimiter */ + if (currDelim == 1) { + /* break out */ + break; + } + + /* refill the buffer */ + ufile_fill_uchar_buffer(f); + + /* determine the amount of data in the buffer */ + dataSize = (int32_t)(str->fLimit - str->fPos); + } + + /* add the terminator and return s */ + *sItr = 0x0000; + return s; +} + +U_CFUNC UBool U_EXPORT2 +ufile_getch(UFILE *f, UChar *ch) +{ + UBool isValidChar = FALSE; + + *ch = U_EOF; + /* if we have an available character in the buffer, return it */ + if(f->str.fPos < f->str.fLimit){ + *ch = *(f->str.fPos)++; + isValidChar = TRUE; + } + else { + /* otherwise, fill the buffer and return the next character */ + if(f->str.fPos >= f->str.fLimit) { + ufile_fill_uchar_buffer(f); + } + if(f->str.fPos < f->str.fLimit) { + *ch = *(f->str.fPos)++; + isValidChar = TRUE; + } + } + return isValidChar; +} + +U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgetc(UFILE *f) +{ + UChar ch; + ufile_getch(f, &ch); + return ch; +} + +U_CFUNC UBool U_EXPORT2 +ufile_getch32(UFILE *f, UChar32 *c32) +{ + UBool isValidChar = FALSE; + u_localized_string *str; + + *c32 = U_EOF; + + /* Fill the buffer if it is empty */ + str = &f->str; + if (f && str->fPos + 1 >= str->fLimit) { + ufile_fill_uchar_buffer(f); + } + + /* Get the next character in the buffer */ + if (str->fPos < str->fLimit) { + *c32 = *(str->fPos)++; + if (U_IS_LEAD(*c32)) { + if (str->fPos < str->fLimit) { + UChar c16 = *(str->fPos)++; + *c32 = U16_GET_SUPPLEMENTARY(*c32, c16); + isValidChar = TRUE; + } + else { + *c32 = U_EOF; + } + } + else { + isValidChar = TRUE; + } + } + + return isValidChar; +} + +U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgetcx(UFILE *f) +{ + UChar32 ch; + ufile_getch32(f, &ch); + return ch; +} + +U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fungetc(UChar32 ch, + UFILE *f) +{ + u_localized_string *str; + + str = &f->str; + + /* if we're at the beginning of the buffer, sorry! */ + if (str->fPos == str->fBuffer + || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer)) + { + ch = U_EOF; + } + else { + /* otherwise, put the character back */ + /* Remember, read them back on in the reverse order. */ + if (U_IS_LEAD(ch)) { + if (*--(str->fPos) != U16_TRAIL(ch) + || *--(str->fPos) != U16_LEAD(ch)) + { + ch = U_EOF; + } + } + else if (*--(str->fPos) != ch) { + ch = U_EOF; + } + } + return ch; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_file_read( UChar *chars, + int32_t count, + UFILE *f) +{ + int32_t dataSize; + int32_t read = 0; + u_localized_string *str = &f->str; + + do { + + /* determine the amount of data in the buffer */ + dataSize = (int32_t)(str->fLimit - str->fPos); + if (dataSize <= 0) { + /* fill the buffer */ + ufile_fill_uchar_buffer(f); + dataSize = (int32_t)(str->fLimit - str->fPos); + } + + /* Make sure that we don't read too much */ + if (dataSize > (count - read)) { + dataSize = count - read; + } + + /* copy the current data in the buffer */ + memcpy(chars + read, str->fPos, dataSize * sizeof(UChar)); + + /* update number of items read */ + read += dataSize; + + /* update the current buffer position */ + str->fPos += dataSize; + } + while (dataSize != 0 && read < count); + + return read; +} +#endif diff --git a/deps/icu-small/source/io/ustream.cpp b/deps/icu-small/source/io/ustream.cpp index 8e0087edbe..a537d14383 100644 --- a/deps/icu-small/source/io/ustream.cpp +++ b/deps/icu-small/source/io/ustream.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** diff --git a/deps/icu-small/source/stubdata/stubdata.c b/deps/icu-small/source/stubdata/stubdata.c deleted file mode 100644 index a1a1654560..0000000000 --- a/deps/icu-small/source/stubdata/stubdata.c +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/****************************************************************************** -* -* Copyright (C) 2001, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: stubdata.c -* -* Define initialized data that will build into a valid, but empty -* ICU data library. Used to bootstrap the ICU build, which has these -* dependencies: -* ICU Common library depends on ICU data -* ICU data requires data building tools. -* ICU data building tools require the ICU common library. -* -* The stub data library (for which this file is the source) is sufficient -* for running the data building tools. -* -*/ -#include "unicode/utypes.h" -#include "unicode/udata.h" -#include "unicode/uversion.h" - - -typedef struct { - uint16_t headerSize; - uint8_t magic1, magic2; - UDataInfo info; - char padding[8]; - uint32_t count, reserved; - /* - const struct { - const char *const name; - const void *const data; - } toc[1]; - */ - int fakeNameAndData[4]; /* TODO: Change this header type from */ - /* pointerTOC to OffsetTOC. */ -} ICU_Data_Header; - -U_EXPORT const ICU_Data_Header U_ICUDATA_ENTRY_POINT = { - 32, /* headerSize */ - 0xda, /* magic1, (see struct MappedData in udata.c) */ - 0x27, /* magic2 */ - { /*UDataInfo */ - sizeof(UDataInfo), /* size */ - 0, /* reserved */ - -#if U_IS_BIG_ENDIAN - 1, -#else - 0, -#endif - - U_CHARSET_FAMILY, - sizeof(UChar), - 0, /* reserved */ - { /* data format identifier */ - 0x54, 0x6f, 0x43, 0x50}, /* "ToCP" */ - {1, 0, 0, 0}, /* format version major, minor, milli, micro */ - {0, 0, 0, 0} /* dataVersion */ - }, - {0,0,0,0,0,0,0,0}, /* Padding[8] */ - 0, /* count */ - 0, /* Reserved */ - { /* TOC structure */ -/* { */ - 0 , 0 , 0, 0 /* name and data entries. Count says there are none, */ - /* but put one in just in case. */ -/* } */ - } -}; diff --git a/deps/icu-small/source/stubdata/stubdata.cpp b/deps/icu-small/source/stubdata/stubdata.cpp new file mode 100644 index 0000000000..de49b9a733 --- /dev/null +++ b/deps/icu-small/source/stubdata/stubdata.cpp @@ -0,0 +1,74 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/****************************************************************************** +* +* Copyright (C) 2001, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: stubdata.c +* +* Define initialized data that will build into a valid, but empty +* ICU data library. Used to bootstrap the ICU build, which has these +* dependencies: +* ICU Common library depends on ICU data +* ICU data requires data building tools. +* ICU data building tools require the ICU common library. +* +* The stub data library (for which this file is the source) is sufficient +* for running the data building tools. +* +*/ +#include "unicode/utypes.h" +#include "unicode/udata.h" +#include "unicode/uversion.h" + + +typedef struct { + uint16_t headerSize; + uint8_t magic1, magic2; + UDataInfo info; + char padding[8]; + uint32_t count, reserved; + /* + const struct { + const char *const name; + const void *const data; + } toc[1]; + */ + int fakeNameAndData[4]; /* TODO: Change this header type from */ + /* pointerTOC to OffsetTOC. */ +} ICU_Data_Header; + +extern "C" U_EXPORT const ICU_Data_Header U_ICUDATA_ENTRY_POINT = { + 32, /* headerSize */ + 0xda, /* magic1, (see struct MappedData in udata.c) */ + 0x27, /* magic2 */ + { /*UDataInfo */ + sizeof(UDataInfo), /* size */ + 0, /* reserved */ + +#if U_IS_BIG_ENDIAN + 1, +#else + 0, +#endif + + U_CHARSET_FAMILY, + sizeof(UChar), + 0, /* reserved */ + { /* data format identifier */ + 0x54, 0x6f, 0x43, 0x50}, /* "ToCP" */ + {1, 0, 0, 0}, /* format version major, minor, milli, micro */ + {0, 0, 0, 0} /* dataVersion */ + }, + {0,0,0,0,0,0,0,0}, /* Padding[8] */ + 0, /* count */ + 0, /* Reserved */ + { /* TOC structure */ +/* { */ + 0 , 0 , 0, 0 /* name and data entries. Count says there are none, */ + /* but put one in just in case. */ +/* } */ + } +}; diff --git a/deps/icu-small/source/tools/escapesrc/cptbl.h b/deps/icu-small/source/tools/escapesrc/cptbl.h new file mode 100644 index 0000000000..efaa9642e1 --- /dev/null +++ b/deps/icu-small/source/tools/escapesrc/cptbl.h @@ -0,0 +1,520 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html +// generated by tblgen. You weren't going to edit it by hand, were you? + +static const char cp1047_8859_1[256] = { + (char)0x00, /* 00 */ + (char)0x01, /* 01 */ + (char)0x02, /* 02 */ + (char)0x03, /* 03 */ + (char)0x9C, /* 04 */ + (char)0x09, /* 05 */ + (char)0x86, /* 06 */ + (char)0x7F, /* 07 */ + (char)0x97, /* 08 */ + (char)0x8D, /* 09 */ + (char)0x8E, /* 0A */ + (char)0x0B, /* 0B */ + (char)0x0C, /* 0C */ + (char)0x0D, /* 0D */ + (char)0x0E, /* 0E */ + (char)0x0F, /* 0F */ + (char)0x10, /* 10 */ + (char)0x11, /* 11 */ + (char)0x12, /* 12 */ + (char)0x13, /* 13 */ + (char)0x9D, /* 14 */ + (char)0x85, /* 15 */ + (char)0x08, /* 16 */ + (char)0x87, /* 17 */ + (char)0x18, /* 18 */ + (char)0x19, /* 19 */ + (char)0x92, /* 1A */ + (char)0x8F, /* 1B */ + (char)0x1C, /* 1C */ + (char)0x1D, /* 1D */ + (char)0x1E, /* 1E */ + (char)0x1F, /* 1F */ + (char)0x80, /* 20 */ + (char)0x81, /* 21 */ + (char)0x82, /* 22 */ + (char)0x83, /* 23 */ + (char)0x84, /* 24 */ + (char)0x0A, /* 25 */ + (char)0x17, /* 26 */ + (char)0x1B, /* 27 */ + (char)0x88, /* 28 */ + (char)0x89, /* 29 */ + (char)0x8A, /* 2A */ + (char)0x8B, /* 2B */ + (char)0x8C, /* 2C */ + (char)0x05, /* 2D */ + (char)0x06, /* 2E */ + (char)0x07, /* 2F */ + (char)0x90, /* 30 */ + (char)0x91, /* 31 */ + (char)0x16, /* 32 */ + (char)0x93, /* 33 */ + (char)0x94, /* 34 */ + (char)0x95, /* 35 */ + (char)0x96, /* 36 */ + (char)0x04, /* 37 */ + (char)0x98, /* 38 */ + (char)0x99, /* 39 */ + (char)0x9A, /* 3A */ + (char)0x9B, /* 3B */ + (char)0x14, /* 3C */ + (char)0x15, /* 3D */ + (char)0x9E, /* 3E */ + (char)0x1A, /* 3F */ + (char)0x20, /* 40 */ + (char)0xA0, /* 41 */ + (char)0xE2, /* 42 */ + (char)0xE4, /* 43 */ + (char)0xE0, /* 44 */ + (char)0xE1, /* 45 */ + (char)0xE3, /* 46 */ + (char)0xE5, /* 47 */ + (char)0xE7, /* 48 */ + (char)0xF1, /* 49 */ + (char)0xA2, /* 4A */ + (char)0x2E, /* 4B */ + (char)0x3C, /* 4C */ + (char)0x28, /* 4D */ + (char)0x2B, /* 4E */ + (char)0x7C, /* 4F */ + (char)0x26, /* 50 */ + (char)0xE9, /* 51 */ + (char)0xEA, /* 52 */ + (char)0xEB, /* 53 */ + (char)0xE8, /* 54 */ + (char)0xED, /* 55 */ + (char)0xEE, /* 56 */ + (char)0xEF, /* 57 */ + (char)0xEC, /* 58 */ + (char)0xDF, /* 59 */ + (char)0x21, /* 5A */ + (char)0x24, /* 5B */ + (char)0x2A, /* 5C */ + (char)0x29, /* 5D */ + (char)0x3B, /* 5E */ + (char)0x5E, /* 5F */ + (char)0x2D, /* 60 */ + (char)0x2F, /* 61 */ + (char)0xC2, /* 62 */ + (char)0xC4, /* 63 */ + (char)0xC0, /* 64 */ + (char)0xC1, /* 65 */ + (char)0xC3, /* 66 */ + (char)0xC5, /* 67 */ + (char)0xC7, /* 68 */ + (char)0xD1, /* 69 */ + (char)0xA6, /* 6A */ + (char)0x2C, /* 6B */ + (char)0x25, /* 6C */ + (char)0x5F, /* 6D */ + (char)0x3E, /* 6E */ + (char)0x3F, /* 6F */ + (char)0xF8, /* 70 */ + (char)0xC9, /* 71 */ + (char)0xCA, /* 72 */ + (char)0xCB, /* 73 */ + (char)0xC8, /* 74 */ + (char)0xCD, /* 75 */ + (char)0xCE, /* 76 */ + (char)0xCF, /* 77 */ + (char)0xCC, /* 78 */ + (char)0x60, /* 79 */ + (char)0x3A, /* 7A */ + (char)0x23, /* 7B */ + (char)0x40, /* 7C */ + (char)0x27, /* 7D */ + (char)0x3D, /* 7E */ + (char)0x22, /* 7F */ + (char)0xD8, /* 80 */ + (char)0x61, /* 81 */ + (char)0x62, /* 82 */ + (char)0x63, /* 83 */ + (char)0x64, /* 84 */ + (char)0x65, /* 85 */ + (char)0x66, /* 86 */ + (char)0x67, /* 87 */ + (char)0x68, /* 88 */ + (char)0x69, /* 89 */ + (char)0xAB, /* 8A */ + (char)0xBB, /* 8B */ + (char)0xF0, /* 8C */ + (char)0xFD, /* 8D */ + (char)0xFE, /* 8E */ + (char)0xB1, /* 8F */ + (char)0xB0, /* 90 */ + (char)0x6A, /* 91 */ + (char)0x6B, /* 92 */ + (char)0x6C, /* 93 */ + (char)0x6D, /* 94 */ + (char)0x6E, /* 95 */ + (char)0x6F, /* 96 */ + (char)0x70, /* 97 */ + (char)0x71, /* 98 */ + (char)0x72, /* 99 */ + (char)0xAA, /* 9A */ + (char)0xBA, /* 9B */ + (char)0xE6, /* 9C */ + (char)0xB8, /* 9D */ + (char)0xC6, /* 9E */ + (char)0xA4, /* 9F */ + (char)0xB5, /* A0 */ + (char)0x7E, /* A1 */ + (char)0x73, /* A2 */ + (char)0x74, /* A3 */ + (char)0x75, /* A4 */ + (char)0x76, /* A5 */ + (char)0x77, /* A6 */ + (char)0x78, /* A7 */ + (char)0x79, /* A8 */ + (char)0x7A, /* A9 */ + (char)0xA1, /* AA */ + (char)0xBF, /* AB */ + (char)0xD0, /* AC */ + (char)0x5B, /* AD */ + (char)0xDE, /* AE */ + (char)0xAE, /* AF */ + (char)0xAC, /* B0 */ + (char)0xA3, /* B1 */ + (char)0xA5, /* B2 */ + (char)0xB7, /* B3 */ + (char)0xA9, /* B4 */ + (char)0xA7, /* B5 */ + (char)0xB6, /* B6 */ + (char)0xBC, /* B7 */ + (char)0xBD, /* B8 */ + (char)0xBE, /* B9 */ + (char)0xDD, /* BA */ + (char)0xA8, /* BB */ + (char)0xAF, /* BC */ + (char)0x5D, /* BD */ + (char)0xB4, /* BE */ + (char)0xD7, /* BF */ + (char)0x7B, /* C0 */ + (char)0x41, /* C1 */ + (char)0x42, /* C2 */ + (char)0x43, /* C3 */ + (char)0x44, /* C4 */ + (char)0x45, /* C5 */ + (char)0x46, /* C6 */ + (char)0x47, /* C7 */ + (char)0x48, /* C8 */ + (char)0x49, /* C9 */ + (char)0xAD, /* CA */ + (char)0xF4, /* CB */ + (char)0xF6, /* CC */ + (char)0xF2, /* CD */ + (char)0xF3, /* CE */ + (char)0xF5, /* CF */ + (char)0x7D, /* D0 */ + (char)0x4A, /* D1 */ + (char)0x4B, /* D2 */ + (char)0x4C, /* D3 */ + (char)0x4D, /* D4 */ + (char)0x4E, /* D5 */ + (char)0x4F, /* D6 */ + (char)0x50, /* D7 */ + (char)0x51, /* D8 */ + (char)0x52, /* D9 */ + (char)0xB9, /* DA */ + (char)0xFB, /* DB */ + (char)0xFC, /* DC */ + (char)0xF9, /* DD */ + (char)0xFA, /* DE */ + (char)0xFF, /* DF */ + (char)0x5C, /* E0 */ + (char)0xF7, /* E1 */ + (char)0x53, /* E2 */ + (char)0x54, /* E3 */ + (char)0x55, /* E4 */ + (char)0x56, /* E5 */ + (char)0x57, /* E6 */ + (char)0x58, /* E7 */ + (char)0x59, /* E8 */ + (char)0x5A, /* E9 */ + (char)0xB2, /* EA */ + (char)0xD4, /* EB */ + (char)0xD6, /* EC */ + (char)0xD2, /* ED */ + (char)0xD3, /* EE */ + (char)0xD5, /* EF */ + (char)0x30, /* F0 */ + (char)0x31, /* F1 */ + (char)0x32, /* F2 */ + (char)0x33, /* F3 */ + (char)0x34, /* F4 */ + (char)0x35, /* F5 */ + (char)0x36, /* F6 */ + (char)0x37, /* F7 */ + (char)0x38, /* F8 */ + (char)0x39, /* F9 */ + (char)0xB3, /* FA */ + (char)0xDB, /* FB */ + (char)0xDC, /* FC */ + (char)0xD9, /* FD */ + (char)0xDA, /* FE */ + (char)0x9F, /* FF */ +}; + +static const bool oldIllegal[256] = { + false, /* U+0000 */ + false, /* U+0001 */ + false, /* U+0002 */ + false, /* U+0003 */ + false, /* U+0004 */ + false, /* U+0005 */ + false, /* U+0006 */ + false, /* U+0007 */ + false, /* U+0008 */ + false, /* U+0009 */ + false, /* U+000A */ + false, /* U+000B */ + false, /* U+000C */ + false, /* U+000D */ + false, /* U+000E */ + false, /* U+000F */ + false, /* U+0010 */ + false, /* U+0011 */ + false, /* U+0012 */ + false, /* U+0013 */ + false, /* U+0014 */ + false, /* U+0015 */ + false, /* U+0016 */ + false, /* U+0017 */ + false, /* U+0018 */ + false, /* U+0019 */ + false, /* U+001A */ + false, /* U+001B */ + false, /* U+001C */ + false, /* U+001D */ + false, /* U+001E */ + false, /* U+001F */ + true, /* U+0020 */ + true, /* U+0021 */ + true, /* U+0022 */ + true, /* U+0023 */ + false, /* U+0024 */ + true, /* U+0025 */ + true, /* U+0026 */ + true, /* U+0027 */ + true, /* U+0028 */ + true, /* U+0029 */ + true, /* U+002A */ + true, /* U+002B */ + true, /* U+002C */ + true, /* U+002D */ + true, /* U+002E */ + true, /* U+002F */ + true, /* U+0030 */ + true, /* U+0031 */ + true, /* U+0032 */ + true, /* U+0033 */ + true, /* U+0034 */ + true, /* U+0035 */ + true, /* U+0036 */ + true, /* U+0037 */ + true, /* U+0038 */ + true, /* U+0039 */ + true, /* U+003A */ + true, /* U+003B */ + true, /* U+003C */ + true, /* U+003D */ + true, /* U+003E */ + true, /* U+003F */ + false, /* U+0040 */ + true, /* U+0041 */ + true, /* U+0042 */ + true, /* U+0043 */ + true, /* U+0044 */ + true, /* U+0045 */ + true, /* U+0046 */ + true, /* U+0047 */ + true, /* U+0048 */ + true, /* U+0049 */ + true, /* U+004A */ + true, /* U+004B */ + true, /* U+004C */ + true, /* U+004D */ + true, /* U+004E */ + true, /* U+004F */ + true, /* U+0050 */ + true, /* U+0051 */ + true, /* U+0052 */ + true, /* U+0053 */ + true, /* U+0054 */ + true, /* U+0055 */ + true, /* U+0056 */ + true, /* U+0057 */ + true, /* U+0058 */ + true, /* U+0059 */ + true, /* U+005A */ + true, /* U+005B */ + false, /* U+005C */ + true, /* U+005D */ + true, /* U+005E */ + true, /* U+005F */ + false, /* U+0060 */ + true, /* U+0061 */ + true, /* U+0062 */ + true, /* U+0063 */ + true, /* U+0064 */ + true, /* U+0065 */ + true, /* U+0066 */ + true, /* U+0067 */ + true, /* U+0068 */ + true, /* U+0069 */ + true, /* U+006A */ + true, /* U+006B */ + true, /* U+006C */ + true, /* U+006D */ + true, /* U+006E */ + true, /* U+006F */ + true, /* U+0070 */ + true, /* U+0071 */ + true, /* U+0072 */ + true, /* U+0073 */ + true, /* U+0074 */ + true, /* U+0075 */ + true, /* U+0076 */ + true, /* U+0077 */ + true, /* U+0078 */ + true, /* U+0079 */ + true, /* U+007A */ + true, /* U+007B */ + true, /* U+007C */ + true, /* U+007D */ + true, /* U+007E */ + false, /* U+007F */ + false, /* U+0080 */ + false, /* U+0081 */ + false, /* U+0082 */ + false, /* U+0083 */ + false, /* U+0084 */ + false, /* U+0085 */ + false, /* U+0086 */ + false, /* U+0087 */ + false, /* U+0088 */ + false, /* U+0089 */ + false, /* U+008A */ + false, /* U+008B */ + false, /* U+008C */ + false, /* U+008D */ + false, /* U+008E */ + false, /* U+008F */ + false, /* U+0090 */ + false, /* U+0091 */ + false, /* U+0092 */ + false, /* U+0093 */ + false, /* U+0094 */ + false, /* U+0095 */ + false, /* U+0096 */ + false, /* U+0097 */ + false, /* U+0098 */ + false, /* U+0099 */ + false, /* U+009A */ + false, /* U+009B */ + false, /* U+009C */ + false, /* U+009D */ + false, /* U+009E */ + false, /* U+009F */ + false, /* U+00A0 */ + false, /* U+00A1 */ + false, /* U+00A2 */ + false, /* U+00A3 */ + false, /* U+00A4 */ + false, /* U+00A5 */ + false, /* U+00A6 */ + false, /* U+00A7 */ + false, /* U+00A8 */ + false, /* U+00A9 */ + false, /* U+00AA */ + false, /* U+00AB */ + false, /* U+00AC */ + false, /* U+00AD */ + false, /* U+00AE */ + false, /* U+00AF */ + false, /* U+00B0 */ + false, /* U+00B1 */ + false, /* U+00B2 */ + false, /* U+00B3 */ + false, /* U+00B4 */ + false, /* U+00B5 */ + false, /* U+00B6 */ + false, /* U+00B7 */ + false, /* U+00B8 */ + false, /* U+00B9 */ + false, /* U+00BA */ + false, /* U+00BB */ + false, /* U+00BC */ + false, /* U+00BD */ + false, /* U+00BE */ + false, /* U+00BF */ + false, /* U+00C0 */ + false, /* U+00C1 */ + false, /* U+00C2 */ + false, /* U+00C3 */ + false, /* U+00C4 */ + false, /* U+00C5 */ + false, /* U+00C6 */ + false, /* U+00C7 */ + false, /* U+00C8 */ + false, /* U+00C9 */ + false, /* U+00CA */ + false, /* U+00CB */ + false, /* U+00CC */ + false, /* U+00CD */ + false, /* U+00CE */ + false, /* U+00CF */ + false, /* U+00D0 */ + false, /* U+00D1 */ + false, /* U+00D2 */ + false, /* U+00D3 */ + false, /* U+00D4 */ + false, /* U+00D5 */ + false, /* U+00D6 */ + false, /* U+00D7 */ + false, /* U+00D8 */ + false, /* U+00D9 */ + false, /* U+00DA */ + false, /* U+00DB */ + false, /* U+00DC */ + false, /* U+00DD */ + false, /* U+00DE */ + false, /* U+00DF */ + false, /* U+00E0 */ + false, /* U+00E1 */ + false, /* U+00E2 */ + false, /* U+00E3 */ + false, /* U+00E4 */ + false, /* U+00E5 */ + false, /* U+00E6 */ + false, /* U+00E7 */ + false, /* U+00E8 */ + false, /* U+00E9 */ + false, /* U+00EA */ + false, /* U+00EB */ + false, /* U+00EC */ + false, /* U+00ED */ + false, /* U+00EE */ + false, /* U+00EF */ + false, /* U+00F0 */ + false, /* U+00F1 */ + false, /* U+00F2 */ + false, /* U+00F3 */ + false, /* U+00F4 */ + false, /* U+00F5 */ + false, /* U+00F6 */ + false, /* U+00F7 */ + false, /* U+00F8 */ + false, /* U+00F9 */ + false, /* U+00FA */ + false, /* U+00FB */ + false, /* U+00FC */ + false, /* U+00FD */ + false, /* U+00FE */ + false, /* U+00FF */ +}; diff --git a/deps/icu-small/source/tools/escapesrc/escapesrc.cpp b/deps/icu-small/source/tools/escapesrc/escapesrc.cpp new file mode 100644 index 0000000000..1127cd4ffb --- /dev/null +++ b/deps/icu-small/source/tools/escapesrc/escapesrc.cpp @@ -0,0 +1,409 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include +#include +#include +#include +#include +#include +#include +#include + +// with caution: +#include "unicode/utf8.h" + +static const char + kSPACE = 0x20, + kTAB = 0x09, + kLF = 0x0A, + kCR = 0x0D, + // kHASH = 0x23, + // kSLASH = 0x2f, + kBKSLASH = 0x5C, + // kSTAR = 0x2A, + kL_U = 0x75, + kU_U = 0x55, + kQUOT = 0x27, + kDBLQ = 0x22; + +# include "cptbl.h" + +# define cp1047_to_8859(c) cp1047_8859_1[c] + +std::string prog; + +void usage() { + fprintf(stderr, "%s: usage: %s infile.cpp outfile.cpp\n", prog.c_str(), prog.c_str()); +} + + +int cleanup(const std::string &outfile) { + const char *outstr = outfile.c_str(); + if(outstr && *outstr) { + int rc = unlink(outstr); + if(rc == 0) { + fprintf(stderr, "%s: deleted %s\n", prog.c_str(), outstr); + return 0; + } else { + if( errno == ENOENT ) { + return 0; // File did not exist - no error. + } else { + perror("unlink"); + return 1; + } + } + } + return 0; +} + +// inline bool hasNonAscii(const char *line, size_t len) { +// const unsigned char *uline = reinterpret_cast(line); +// for(size_t i=0;i 0x7F) { +// return true; +// } +// } +// return false; +// } + +inline const char *skipws(const char *p, const char *e) { + for(;p0; pos2++,trail--) { + linestr[pos2] = cp1047_to_8859(linestr[pos2]); + if(linestr[pos2] == 0x0A) { + linestr[pos2] = 0x85; // NL is ambiguous here + } + } +#endif + + // Proceed to decode utf-8 + const uint8_t *s = (const uint8_t*) (linestr.c_str()); + int32_t length = linestr.size(); + UChar32 c; + if(U8_IS_SINGLE((uint8_t)s[i]) && oldIllegal[s[i]]) { +#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY) + linestr[pos] = old_byte; // put it back +#endif + continue; // single code point not previously legal for \u escaping + } + + // otherwise, convert it to \u / \U + { + U8_NEXT(s, i, length, c); + } + if(c<0) { + fprintf(stderr, "Illegal utf-8 sequence at Column: %d\n", old_pos); + fprintf(stderr, "Line: >>%s<<\n", linestr.c_str()); + return true; + } + + size_t seqLen = (i-pos); + + //printf("U+%04X pos %d [len %d]\n", c, pos, seqLen);fflush(stdout); + + char newSeq[20]; + if( c <= 0xFFFF) { + sprintf(newSeq, "\\u%04X", c); + } else { + sprintf(newSeq, "\\U%08X", c); + } + linestr.replace(pos, seqLen, newSeq); + pos += strlen(newSeq) - 1; + } + } + + return false; +} + +/** + * false = no err + * true = had err + */ +bool fixLine(int /*no*/, std::string &linestr) { + const char *line = linestr.c_str(); + size_t len = linestr.size(); + + // no u' in the line? + if(!strstr(line, "u'") && !strstr(line, "u\"") && !strstr(line, "u8\"")) { + return false; // Nothing to do. No u' or u" detected + } + + // lines such as u8"\u0308" are all ASCII. + // // Quick Check: all ascii? + // if(!hasNonAscii(line, len)) { + // return false; // ASCII + // } + + // // comment or empty line? + // if(isCommentOrEmpty(line, len)) { + // return false; // Comment or just empty + // } + + // start from the end and find all u" cases + size_t pos = len = linestr.size(); + while((pos>0) && (pos = linestr.rfind("u\"", pos)) != std::string::npos) { + //printf("found doublequote at %d\n", pos); + if(fixAt(linestr, pos)) return true; + if(pos == 0) break; + pos--; + } + + // reset and find all u' cases + pos = len = linestr.size(); + while((pos>0) && (pos = linestr.rfind("u'", pos)) != std::string::npos) { + //printf("found singlequote at %d\n", pos); + if(fixAt(linestr, pos)) return true; + if(pos == 0) break; + pos--; + } + + // reset and find all u8" cases + pos = len = linestr.size(); + while((pos>0) && (pos = linestr.rfind("u8\"", pos)) != std::string::npos) { + if(fixAt(linestr, pos)) return true; + if(pos == 0) break; + pos--; + } + + //fprintf(stderr, "%d - fixed\n", no); + return false; +} + +int convert(const std::string &infile, const std::string &outfile) { + fprintf(stderr, "escapesrc: %s -> %s\n", infile.c_str(), outfile.c_str()); + + std::ifstream inf; + + inf.open(infile.c_str(), std::ios::in); + + if(!inf.is_open()) { + fprintf(stderr, "%s: could not open input file %s\n", prog.c_str(), infile.c_str()); + cleanup(outfile); + return 1; + } + + std::ofstream outf; + + outf.open(outfile.c_str(), std::ios::out); + + if(!outf.is_open()) { + fprintf(stderr, "%s: could not open output file %s\n", prog.c_str(), outfile.c_str()); + return 1; + } + + // TODO: any platform variations of #line? + outf << "#line 1 \"" << infile << "\"" << '\n'; + + int no = 0; + std::string linestr; + while( getline( inf, linestr)) { + no++; + if(fixLine(no, linestr)) { + outf.close(); + fprintf(stderr, "%s:%d: Fixup failed by %s\n", infile.c_str(), no, prog.c_str()); + cleanup(outfile); + return 1; + } + outf << linestr << '\n'; + } + + return 0; +} + +int main(int argc, const char *argv[]) { + prog = argv[0]; + + if(argc != 3) { + usage(); + return 1; + } + + std::string infile = argv[1]; + std::string outfile = argv[2]; + + return convert(infile, outfile); +} + + +#include "utf_impl.cpp" diff --git a/deps/icu-small/source/tools/escapesrc/expect-simple.cpp b/deps/icu-small/source/tools/escapesrc/expect-simple.cpp new file mode 100644 index 0000000000..a6019a8d40 --- /dev/null +++ b/deps/icu-small/source/tools/escapesrc/expect-simple.cpp @@ -0,0 +1,17 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +u"sa\u0127\u0127a"; +u'\u6587'; +u"\U000219F2"; +u"\u039C\u03C5\u03C3\u03C4\u03AE\u03C1\u03B9\u03BF"; + + u"sa\u0127\u0127a"; + u'\u6587'; u"\U000219F2"; + +"\x20\xCC\x81"; +"\xCC\x88\x20"; +"\x73\x61\xC4\xA7\xC4\xA7\x61"; +"\xE6\x96\x87"; +"\xF0\xA1\xA7\xB2"; +"\x73\x61\xC4\xA7\xC4\xA7\x61"; diff --git a/deps/icu-small/source/tools/escapesrc/tblgen.cpp b/deps/icu-small/source/tools/escapesrc/tblgen.cpp new file mode 100644 index 0000000000..9bf59a9db9 --- /dev/null +++ b/deps/icu-small/source/tools/escapesrc/tblgen.cpp @@ -0,0 +1,80 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" +#include "unicode/ucnv.h" +#include "unicode/uniset.h" +#include + +static const char *kConverter = "ibm-1047"; + +int main(int argc, const char *argv[]) { + printf("// %s\n", U_COPYRIGHT_STRING); + printf("// generated by tblgen. You weren't going to edit it by hand, were you?\n"); + printf("\n"); + + UErrorCode status = U_ZERO_ERROR; + LocalUConverterPointer cnv(ucnv_open(kConverter, &status)); + + if(U_FAILURE(status)) { + fprintf(stderr, "Failed to open %s: %s\n", kConverter, u_errorName(status)); + return 1; + } + + printf("static const char cp1047_8859_1[256] = { \n"); + for(int i=0x00; i<0x100; i++) { + char cp1047[1]; + cp1047[0] = i; + UChar u[1]; + UChar *target = u; + const char *source = cp1047; + ucnv_toUnicode(cnv.getAlias(), &target, u+1, &source, cp1047+1, nullptr, true, &status); + if(U_FAILURE(status)) { + fprintf(stderr, "Conversion failure at #%X: %s\n", i, u_errorName(status)); + return 2; + } + printf(" (char)0x%02X, /* %02X */\n", u[0], i); + } + printf("};\n\n"); + + // + // UnicodeSet oldIllegal("[:print:]", status); // [a-zA-Z0-9_}{#)(><%:;.?*+-/^&|~!=,\\u005b\\u005d\\u005c]", status); + UnicodeSet oldIllegal("[0-9 a-z A-Z " + "_ \\{ \\} \\[ \\] # \\( \\) < > % \\: ; . " + "? * + \\- / \\^ \\& | ~ ! = , \\ \" ' ]", status); + + /* + +http://www.lirmm.fr/~ducour/Doc-objets/ISO+IEC+14882-1998.pdf ( note: 1998 ) page 10, section 2.2 says: + +1 The basic source character set consists of 96 characters: the space character, the control characters repre- 15) +senting horizontal tab, vertical tab, form feed, and new-line, plus the following 91 graphical characters: +a b c d e f g h i j k l m n opqrstuvwxyz +A B C D E F G H I J K L M N OPQRSTUVWXYZ +0 12 3 4 5 6 7 8 9 + _ { } [ ] # ( ) < > % : ; . ?*+-/^&|~!=,\" +2 The universal-character-name construct provides a way to name other characters. hex-quad: +hexadecimal-digit hexadecimal-digit hexadecimal-digit hexadecimal-digit +universal-character-name: \u hex-quad +\U hex-quad hex-quad +The character designated by the universal-character-name \UNNNNNNNN is that character whose character short name in ISO/IEC 10646 is NNNNNNNN; the character designated by the universal-character-name \uNNNN is that character whose character short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value for a universal character name is less than 0x20 or in the range 0x7F-0x9F (inclusive), or if the uni- versal character name designates a character in the basic source character set, then the program is ill- formed. + + +So basically: printable ASCII plus 0x00-0x1F, 0x7F-0x9F, was all illegal. + +Some discussion at http://unicode.org/mail-arch/unicode-ml/y2003-m10/0471.html + + */ + + + + printf("static const bool oldIllegal[256] = { \n"); + for(UChar i=0x00; i<0x100;i++) { + printf(" %s, /* U+%04X */\n", + (oldIllegal.contains(i))?" true":"false", + i); + } + printf("};\n\n"); + + return 0; +} diff --git a/deps/icu-small/source/tools/escapesrc/test-nochange.cpp b/deps/icu-small/source/tools/escapesrc/test-nochange.cpp new file mode 100644 index 0000000000..8c0d04b809 --- /dev/null +++ b/deps/icu-small/source/tools/escapesrc/test-nochange.cpp @@ -0,0 +1,5 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// This is a source file with no changes needed in it. +// In fact, the only non-ASCII character is the comment line at top. diff --git a/deps/icu-small/source/tools/escapesrc/test-simple.cpp b/deps/icu-small/source/tools/escapesrc/test-simple.cpp new file mode 100644 index 0000000000..b03f28f706 --- /dev/null +++ b/deps/icu-small/source/tools/escapesrc/test-simple.cpp @@ -0,0 +1,17 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +u"saħħa"; +u'文'; +u"𡧲"; +u"Μυστήριο"; + + u"saħħa"; + u'文'; u"𡧲"; + +u8" \u0301"; +u8"\u0308 "; +u8"saħħa"; +u8"文"; +u8"𡧲"; +u8"saħ\u0127a"; diff --git a/deps/icu-small/source/tools/genccode/genccode.c b/deps/icu-small/source/tools/genccode/genccode.c index 2534820bac..d35b589010 100644 --- a/deps/icu-small/source/tools/genccode/genccode.c +++ b/deps/icu-small/source/tools/genccode/genccode.c @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: gennames.c - * encoding: US-ASCII + * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/tools/gencmn/gencmn.c b/deps/icu-small/source/tools/gencmn/gencmn.c index d328a30521..77f0c20c61 100644 --- a/deps/icu-small/source/tools/gencmn/gencmn.c +++ b/deps/icu-small/source/tools/gencmn/gencmn.c @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: gencmn.c -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/tools/genrb/derb.cpp b/deps/icu-small/source/tools/genrb/derb.cpp index 22e275ef35..ac26d95be4 100644 --- a/deps/icu-small/source/tools/genrb/derb.cpp +++ b/deps/icu-small/source/tools/genrb/derb.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: derb.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/tools/genrb/errmsg.c b/deps/icu-small/source/tools/genrb/errmsg.c index 7340f01af2..603f26a174 100644 --- a/deps/icu-small/source/tools/genrb/errmsg.c +++ b/deps/icu-small/source/tools/genrb/errmsg.c @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/tools/genrb/errmsg.h b/deps/icu-small/source/tools/genrb/errmsg.h index 5026ecf7be..e01b9558f0 100644 --- a/deps/icu-small/source/tools/genrb/errmsg.h +++ b/deps/icu-small/source/tools/genrb/errmsg.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/tools/genrb/genrb.cpp b/deps/icu-small/source/tools/genrb/genrb.cpp index 685fb5884e..68870bd90a 100644 --- a/deps/icu-small/source/tools/genrb/genrb.cpp +++ b/deps/icu-small/source/tools/genrb/genrb.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/tools/genrb/genrb.h b/deps/icu-small/source/tools/genrb/genrb.h index 99c604f780..019020a34a 100644 --- a/deps/icu-small/source/tools/genrb/genrb.h +++ b/deps/icu-small/source/tools/genrb/genrb.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/tools/genrb/parse.cpp b/deps/icu-small/source/tools/genrb/parse.cpp index f448daab6c..88b08c21d0 100644 --- a/deps/icu-small/source/tools/genrb/parse.cpp +++ b/deps/icu-small/source/tools/genrb/parse.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/tools/genrb/parse.h b/deps/icu-small/source/tools/genrb/parse.h index d7341be6dd..dfe3b8dda0 100644 --- a/deps/icu-small/source/tools/genrb/parse.h +++ b/deps/icu-small/source/tools/genrb/parse.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/tools/genrb/prscmnts.cpp b/deps/icu-small/source/tools/genrb/prscmnts.cpp index edae16c5c5..5d494cd9ad 100644 --- a/deps/icu-small/source/tools/genrb/prscmnts.cpp +++ b/deps/icu-small/source/tools/genrb/prscmnts.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/tools/genrb/prscmnts.h b/deps/icu-small/source/tools/genrb/prscmnts.h index c9958cd071..82cf0deaa1 100644 --- a/deps/icu-small/source/tools/genrb/prscmnts.h +++ b/deps/icu-small/source/tools/genrb/prscmnts.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/tools/genrb/rbutil.c b/deps/icu-small/source/tools/genrb/rbutil.c index 174b4d7b44..808d35bb1b 100644 --- a/deps/icu-small/source/tools/genrb/rbutil.c +++ b/deps/icu-small/source/tools/genrb/rbutil.c @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/tools/genrb/rbutil.h b/deps/icu-small/source/tools/genrb/rbutil.h index d2a303516a..9a12c50959 100644 --- a/deps/icu-small/source/tools/genrb/rbutil.h +++ b/deps/icu-small/source/tools/genrb/rbutil.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/tools/genrb/read.c b/deps/icu-small/source/tools/genrb/read.c index 313fb61677..c20b4510a2 100644 --- a/deps/icu-small/source/tools/genrb/read.c +++ b/deps/icu-small/source/tools/genrb/read.c @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/tools/genrb/read.h b/deps/icu-small/source/tools/genrb/read.h index 74b8c823a2..e5b8d155da 100644 --- a/deps/icu-small/source/tools/genrb/read.h +++ b/deps/icu-small/source/tools/genrb/read.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/tools/genrb/reslist.cpp b/deps/icu-small/source/tools/genrb/reslist.cpp index 9420184a9d..2e04bbce21 100644 --- a/deps/icu-small/source/tools/genrb/reslist.cpp +++ b/deps/icu-small/source/tools/genrb/reslist.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -271,7 +271,7 @@ StringBaseResource::StringBaseResource(SRBRoot *bundle, const char *tag, int8_t return; } - fString.setTo(value, len); + fString.setTo(ConstChar16Ptr(value), len); fString.getTerminatedBuffer(); // Some code relies on NUL-termination. if (U_SUCCESS(errorCode) && fString.isBogus()) { errorCode = U_MEMORY_ALLOCATION_ERROR; @@ -1031,7 +1031,7 @@ void SRBRoot::write(const char *outputDir, const char *outputPkg, if (f16BitUnits.length() <= 1) { // no pool strings to checksum } else if (U_IS_BIG_ENDIAN) { - checksum = computeCRC((const char *)f16BitUnits.getBuffer(), + checksum = computeCRC(reinterpret_cast(f16BitUnits.getBuffer()), (uint32_t)f16BitUnits.length() * 2, checksum); } else { // Swap to big-endian so we get the same checksum on all platforms @@ -1039,7 +1039,7 @@ void SRBRoot::write(const char *outputDir, const char *outputPkg, UnicodeString s(f16BitUnits); s.append((UChar)1); // Ensure that we own this buffer. assert(!s.isBogus()); - uint16_t *p = (uint16_t *)s.getBuffer(); + uint16_t *p = const_cast(reinterpret_cast(s.getBuffer())); for (int32_t count = f16BitUnits.length(); count > 0; --count) { uint16_t x = *p; *p++ = (uint16_t)((x << 8) | (x >> 8)); diff --git a/deps/icu-small/source/tools/genrb/reslist.h b/deps/icu-small/source/tools/genrb/reslist.h index 614be2d10f..53ade5b82c 100644 --- a/deps/icu-small/source/tools/genrb/reslist.h +++ b/deps/icu-small/source/tools/genrb/reslist.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -304,7 +304,7 @@ public: StringBaseResource(int8_t type, const UChar *value, int32_t len, UErrorCode &errorCode); virtual ~StringBaseResource(); - const UChar *getBuffer() const { return fString.getBuffer(); } + const UChar *getBuffer() const { return icu::toUCharPtr(fString.getBuffer()); } int32_t length() const { return fString.length(); } virtual void handlePreWrite(uint32_t *byteOffset); diff --git a/deps/icu-small/source/tools/genrb/rle.c b/deps/icu-small/source/tools/genrb/rle.c index 4a69cd5298..08495c2b4f 100644 --- a/deps/icu-small/source/tools/genrb/rle.c +++ b/deps/icu-small/source/tools/genrb/rle.c @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/tools/genrb/rle.h b/deps/icu-small/source/tools/genrb/rle.h index 93d51a3750..9f580733d5 100644 --- a/deps/icu-small/source/tools/genrb/rle.h +++ b/deps/icu-small/source/tools/genrb/rle.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/tools/genrb/ustr.c b/deps/icu-small/source/tools/genrb/ustr.c index 1631a205ff..f1436ae8ae 100644 --- a/deps/icu-small/source/tools/genrb/ustr.c +++ b/deps/icu-small/source/tools/genrb/ustr.c @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/tools/genrb/ustr.h b/deps/icu-small/source/tools/genrb/ustr.h index c27a78104f..91483d1f0f 100644 --- a/deps/icu-small/source/tools/genrb/ustr.h +++ b/deps/icu-small/source/tools/genrb/ustr.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/tools/genrb/wrtjava.cpp b/deps/icu-small/source/tools/genrb/wrtjava.cpp index 329753717b..a0d72f72d8 100644 --- a/deps/icu-small/source/tools/genrb/wrtjava.cpp +++ b/deps/icu-small/source/tools/genrb/wrtjava.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/tools/genrb/wrtxml.cpp b/deps/icu-small/source/tools/genrb/wrtxml.cpp index 62fdd2427c..2bfcfebf9e 100644 --- a/deps/icu-small/source/tools/genrb/wrtxml.cpp +++ b/deps/icu-small/source/tools/genrb/wrtxml.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -73,7 +73,7 @@ static int32_t write_utf8_file(FileStream* fileStream, UnicodeString outString) u_strToUTF8(NULL, 0, &len, - outString.getBuffer(), + toUCharPtr(outString.getBuffer()), outString.length(), &status); @@ -85,7 +85,7 @@ static int32_t write_utf8_file(FileStream* fileStream, UnicodeString outString) u_strToUTF8(dest, len, &len, - outString.getBuffer(), + toUCharPtr(outString.getBuffer()), outString.length(), &status); diff --git a/deps/icu-small/source/tools/icupkg/icupkg.cpp b/deps/icu-small/source/tools/icupkg/icupkg.cpp index 2023930441..ea7be4a909 100644 --- a/deps/icu-small/source/tools/icupkg/icupkg.cpp +++ b/deps/icu-small/source/tools/icupkg/icupkg.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: icupkg.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/tools/pkgdata/pkgdata.cpp b/deps/icu-small/source/tools/pkgdata/pkgdata.cpp index bf93318602..d4dc271732 100644 --- a/deps/icu-small/source/tools/pkgdata/pkgdata.cpp +++ b/deps/icu-small/source/tools/pkgdata/pkgdata.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /****************************************************************************** * Copyright (C) 2000-2016, International Business Machines @@ -26,7 +26,7 @@ #include "putilimp.h" #if U_HAVE_POPEN -#if (U_PF_MINGW <= U_PLATFORM || U_PLATFORM <= U_PF_CYGWIN) && defined(__STRICT_ANSI__) +#if (U_PF_MINGW <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(__STRICT_ANSI__) /* popen/pclose aren't defined in strict ANSI on Cygwin and MinGW */ #undef __STRICT_ANSI__ #endif @@ -121,7 +121,9 @@ enum { LIBNAME, QUIET, WITHOUT_ASSEMBLY, - PDS_BUILD + PDS_BUILD, + UWP_BUILD, + UWP_ARM_BUILD }; /* This sets the modes that are available */ @@ -163,7 +165,9 @@ static UOption options[]={ /*18*/ UOPTION_DEF( "libname", 'L', UOPT_REQUIRES_ARG), /*19*/ UOPTION_DEF( "quiet", 'q', UOPT_NO_ARG), /*20*/ UOPTION_DEF( "without-assembly", 'w', UOPT_NO_ARG), - /*21*/ UOPTION_DEF( "zos-pds-build", 'z', UOPT_NO_ARG) + /*21*/ UOPTION_DEF("zos-pds-build", 'z', UOPT_NO_ARG), + /*22*/ UOPTION_DEF("windows-uwp-build", 'u', UOPT_NO_ARG), + /*23*/ UOPTION_DEF("windows-uwp-arm-build", 'a', UOPT_NO_ARG) }; /* This enum and the following char array should be kept in sync. */ @@ -250,9 +254,11 @@ const char options_help[][320]={ "Specify a version when packaging in dll or static mode", "Add package to all file names if not present", "Library name to build (if different than package name)", - "Quite mode. (e.g. Do not output a readme file for static libraries)", + "Quiet mode. (e.g. Do not output a readme file for static libraries)", "Build the data without assembly code", - "Build PDS dataset (zOS build only)" + "Build PDS dataset (zOS build only)", + "Build for Universal Windows Platform (Windows build only)", + "Set DLL machine type for UWP to target windows ARM (Windows UWP build only)" }; const char *progname = "PKGDATA"; @@ -1751,7 +1757,14 @@ static int32_t pkg_createWithoutAssemblyCode(UPKGOptions *o, const char *targetD #ifdef WINDOWS_WITH_MSVC #define LINK_CMD "link.exe /nologo /release /out:" -#define LINK_FLAGS "/DLL /NOENTRY /MANIFEST:NO /base:0x4ad00000 /implib:" +#define LINK_FLAGS "/DLL /NOENTRY /MANIFEST:NO /implib:" +#ifdef _WIN64 +#define LINK_EXTRA_UWP_FLAGS "/NXCOMPAT /DYNAMICBASE /APPCONTAINER " +#else +#define LINK_EXTRA_UWP_FLAGS "/NXCOMPAT /SAFESEH /DYNAMICBASE /APPCONTAINER /MACHINE:X86" +#endif +#define LINK_EXTRA_UWP_FLAGS_ARM "/NXCOMPAT /DYNAMICBASE /APPCONTAINER /MACHINE:ARM" +#define LINK_EXTRA_NO_UWP_FLAGS "/base:0x4ad00000 " #define LIB_CMD "LIB.exe /nologo /out:" #define LIB_FILE "icudt.lib" #define LIB_EXT UDATA_LIB_SUFFIX @@ -1831,14 +1844,33 @@ static int32_t pkg_createWindowsDLL(const char mode, const char *gencFilePath, U return 0; } - sprintf(cmd, "%s\"%s\" %s\"%s\" \"%s\" %s", - LINK_CMD, - dllFilePath, - LINK_FLAGS, - libFilePath, - gencFilePath, - resFilePath - ); + char *extraFlags = ""; +#ifdef WINDOWS_WITH_MSVC + if (options[UWP_BUILD].doesOccur) + { + if (options[UWP_ARM_BUILD].doesOccur) + { + extraFlags = LINK_EXTRA_UWP_FLAGS_ARM; + } + else + { + extraFlags = LINK_EXTRA_UWP_FLAGS; + } + } + else + { + extraFlags = LINK_EXTRA_NO_UWP_FLAGS; + } +#endif + sprintf(cmd, "%s\"%s\" %s %s\"%s\" \"%s\" %s", + LINK_CMD, + dllFilePath, + extraFlags, + LINK_FLAGS, + libFilePath, + gencFilePath, + resFilePath + ); } result = runCommand(cmd, TRUE); diff --git a/deps/icu-small/source/tools/pkgdata/pkgtypes.c b/deps/icu-small/source/tools/pkgdata/pkgtypes.c index eadf634db3..43ee3dfb5e 100644 --- a/deps/icu-small/source/tools/pkgdata/pkgtypes.c +++ b/deps/icu-small/source/tools/pkgdata/pkgtypes.c @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /************************************************************************** * diff --git a/deps/icu-small/source/tools/pkgdata/pkgtypes.h b/deps/icu-small/source/tools/pkgdata/pkgtypes.h index c7eeba42cb..3297d0a2da 100644 --- a/deps/icu-small/source/tools/pkgdata/pkgtypes.h +++ b/deps/icu-small/source/tools/pkgdata/pkgtypes.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /************************************************************************** * diff --git a/deps/icu-small/source/tools/toolutil/collationinfo.cpp b/deps/icu-small/source/tools/toolutil/collationinfo.cpp index bbb1839ef1..6bad90e133 100644 --- a/deps/icu-small/source/tools/toolutil/collationinfo.cpp +++ b/deps/icu-small/source/tools/toolutil/collationinfo.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/tools/toolutil/collationinfo.h b/deps/icu-small/source/tools/toolutil/collationinfo.h index 48cd556184..815b89d40d 100644 --- a/deps/icu-small/source/tools/toolutil/collationinfo.h +++ b/deps/icu-small/source/tools/toolutil/collationinfo.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/tools/toolutil/dbgutil.cpp b/deps/icu-small/source/tools/toolutil/dbgutil.cpp index 345715163b..29bab92753 100644 --- a/deps/icu-small/source/tools/toolutil/dbgutil.cpp +++ b/deps/icu-small/source/tools/toolutil/dbgutil.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /******************************************************************** * COPYRIGHT: @@ -117,7 +117,7 @@ U_CAPI int32_t udbg_stoi(const UnicodeString &s) { char ch[256]; - const UChar *u = s.getBuffer(); + const UChar *u = toUCharPtr(s.getBuffer()); int32_t len = s.length(); u_UCharsToChars(u, ch, len); ch[len] = 0; /* include terminating \0 */ @@ -129,7 +129,7 @@ U_CAPI double udbg_stod(const UnicodeString &s) { char ch[256]; - const UChar *u = s.getBuffer(); + const UChar *u = toUCharPtr(s.getBuffer()); int32_t len = s.length(); u_UCharsToChars(u, ch, len); ch[len] = 0; /* include terminating \0 */ diff --git a/deps/icu-small/source/tools/toolutil/dbgutil.h b/deps/icu-small/source/tools/toolutil/dbgutil.h index 704090cc21..314a9ae885 100644 --- a/deps/icu-small/source/tools/toolutil/dbgutil.h +++ b/deps/icu-small/source/tools/toolutil/dbgutil.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* diff --git a/deps/icu-small/source/tools/toolutil/denseranges.cpp b/deps/icu-small/source/tools/toolutil/denseranges.cpp index 3b83715f28..f5e52b1bbb 100644 --- a/deps/icu-small/source/tools/toolutil/denseranges.cpp +++ b/deps/icu-small/source/tools/toolutil/denseranges.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: denseranges.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/tools/toolutil/denseranges.h b/deps/icu-small/source/tools/toolutil/denseranges.h index 7b072f4654..c489ca47d8 100644 --- a/deps/icu-small/source/tools/toolutil/denseranges.h +++ b/deps/icu-small/source/tools/toolutil/denseranges.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: denseranges.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/tools/toolutil/filestrm.c b/deps/icu-small/source/tools/toolutil/filestrm.c deleted file mode 100644 index 446125de6b..0000000000 --- a/deps/icu-small/source/tools/toolutil/filestrm.c +++ /dev/null @@ -1,227 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1997-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* File FILESTRM.C -* -* @author Glenn Marcy -* -* Modification History: -* -* Date Name Description -* 5/8/98 gm Created -* 03/02/99 stephen Reordered params in ungetc to match stdio -* Added wopen -* 3/29/99 helena Merged Stephen and Bertrand's changes. -* -****************************************************************************** -*/ - -#include "filestrm.h" - -#include "cmemory.h" - -#include - -U_CAPI FileStream* U_EXPORT2 -T_FileStream_open(const char* filename, const char* mode) -{ - if(filename != NULL && *filename != 0 && mode != NULL && *mode != 0) { - FILE *file = fopen(filename, mode); - return (FileStream*)file; - } else { - return NULL; - } -} - -/* -U_CAPI FileStream* U_EXPORT2 -T_FileStream_wopen(const wchar_t* filename, const wchar_t* mode) -{ - // TBD: _wfopen is believed to be MS-specific? -#if U_PLATFORM_USES_ONLY_WIN32_API - FILE* result = _wfopen(filename, mode); - return (FileStream*)result; -#else - size_t fnMbsSize, mdMbsSize; - char *fn, *md; - FILE *result; - - // convert from wchar_t to char - fnMbsSize = wcstombs(NULL, filename, ((size_t)-1) >> 1); - fn = (char*)uprv_malloc(fnMbsSize+2); - wcstombs(fn, filename, fnMbsSize); - fn[fnMbsSize] = 0; - - mdMbsSize = wcstombs(NULL, mode, ((size_t)-1) >> 1); - md = (char*)uprv_malloc(mdMbsSize+2); - wcstombs(md, mode, mdMbsSize); - md[mdMbsSize] = 0; - - result = fopen(fn, md); - uprv_free(fn); - uprv_free(md); - return (FileStream*)result; -#endif -} -*/ -U_CAPI void U_EXPORT2 -T_FileStream_close(FileStream* fileStream) -{ - if (fileStream != 0) - fclose((FILE*)fileStream); -} - -U_CAPI UBool U_EXPORT2 -T_FileStream_file_exists(const char* filename) -{ - FILE* temp = fopen(filename, "r"); - if (temp) { - fclose(temp); - return TRUE; - } else - return FALSE; -} - -/*static const int32_t kEOF; -const int32_t FileStream::kEOF = EOF;*/ - -/* -U_CAPI FileStream* -T_FileStream_tmpfile() -{ - FILE* file = tmpfile(); - return (FileStream*)file; -} -*/ - -U_CAPI int32_t U_EXPORT2 -T_FileStream_read(FileStream* fileStream, void* addr, int32_t len) -{ - return fread(addr, 1, len, (FILE*)fileStream); -} - -U_CAPI int32_t U_EXPORT2 -T_FileStream_write(FileStream* fileStream, const void* addr, int32_t len) -{ - - return fwrite(addr, 1, len, (FILE*)fileStream); -} - -U_CAPI void U_EXPORT2 -T_FileStream_rewind(FileStream* fileStream) -{ - rewind((FILE*)fileStream); -} - -U_CAPI int32_t U_EXPORT2 -T_FileStream_putc(FileStream* fileStream, int32_t ch) -{ - int32_t c = fputc(ch, (FILE*)fileStream); - return c; -} - -U_CAPI int U_EXPORT2 -T_FileStream_getc(FileStream* fileStream) -{ - int c = fgetc((FILE*)fileStream); - return c; -} - -U_CAPI int32_t U_EXPORT2 -T_FileStream_ungetc(int32_t ch, FileStream* fileStream) -{ - - int32_t c = ungetc(ch, (FILE*)fileStream); - return c; -} - -U_CAPI int32_t U_EXPORT2 -T_FileStream_peek(FileStream* fileStream) -{ - int32_t c = fgetc((FILE*)fileStream); - return ungetc(c, (FILE*)fileStream); -} - -U_CAPI char* U_EXPORT2 -T_FileStream_readLine(FileStream* fileStream, char* buffer, int32_t length) -{ - return fgets(buffer, length, (FILE*)fileStream); -} - -U_CAPI int32_t U_EXPORT2 -T_FileStream_writeLine(FileStream* fileStream, const char* buffer) -{ - return fputs(buffer, (FILE*)fileStream); -} - -U_CAPI int32_t U_EXPORT2 -T_FileStream_size(FileStream* fileStream) -{ - int32_t savedPos = ftell((FILE*)fileStream); - int32_t size = 0; - - /*Changes by Bertrand A. D. doesn't affect the current position - goes to the end of the file before ftell*/ - fseek((FILE*)fileStream, 0, SEEK_END); - size = (int32_t)ftell((FILE*)fileStream); - fseek((FILE*)fileStream, savedPos, SEEK_SET); - return size; -} - -U_CAPI int U_EXPORT2 -T_FileStream_eof(FileStream* fileStream) -{ - return feof((FILE*)fileStream); -} - -/* - Warning - This function may not work consistently on all platforms - (e.g. HP-UX, FreeBSD and MacOSX don't return an error when - putc is used on a file opened as readonly) -*/ -U_CAPI int U_EXPORT2 -T_FileStream_error(FileStream* fileStream) -{ - return (fileStream == 0 || ferror((FILE*)fileStream)); -} - -/* This function doesn't work. */ -/* force the stream to set its error flag*/ -/*U_CAPI void U_EXPORT2 -T_FileStream_setError(FileStream* fileStream) -{ - fseek((FILE*)fileStream, 99999, SEEK_SET); -} -*/ - -U_CAPI FileStream* U_EXPORT2 -T_FileStream_stdin(void) -{ - return (FileStream*)stdin; -} - -U_CAPI FileStream* U_EXPORT2 -T_FileStream_stdout(void) -{ - return (FileStream*)stdout; -} - - -U_CAPI FileStream* U_EXPORT2 -T_FileStream_stderr(void) -{ - return (FileStream*)stderr; -} - -U_CAPI UBool U_EXPORT2 -T_FileStream_remove(const char* fileName){ - return (remove(fileName) == 0); -} diff --git a/deps/icu-small/source/tools/toolutil/filestrm.cpp b/deps/icu-small/source/tools/toolutil/filestrm.cpp new file mode 100644 index 0000000000..cfffa1b75d --- /dev/null +++ b/deps/icu-small/source/tools/toolutil/filestrm.cpp @@ -0,0 +1,227 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File FILESTRM.C +* +* @author Glenn Marcy +* +* Modification History: +* +* Date Name Description +* 5/8/98 gm Created +* 03/02/99 stephen Reordered params in ungetc to match stdio +* Added wopen +* 3/29/99 helena Merged Stephen and Bertrand's changes. +* +****************************************************************************** +*/ + +#include "filestrm.h" + +#include "cmemory.h" + +#include + +U_CAPI FileStream* U_EXPORT2 +T_FileStream_open(const char* filename, const char* mode) +{ + if(filename != NULL && *filename != 0 && mode != NULL && *mode != 0) { + FILE *file = fopen(filename, mode); + return (FileStream*)file; + } else { + return NULL; + } +} + +/* +U_CAPI FileStream* U_EXPORT2 +T_FileStream_wopen(const wchar_t* filename, const wchar_t* mode) +{ + // TBD: _wfopen is believed to be MS-specific? +#if U_PLATFORM_USES_ONLY_WIN32_API + FILE* result = _wfopen(filename, mode); + return (FileStream*)result; +#else + size_t fnMbsSize, mdMbsSize; + char *fn, *md; + FILE *result; + + // convert from wchar_t to char + fnMbsSize = wcstombs(NULL, filename, ((size_t)-1) >> 1); + fn = (char*)uprv_malloc(fnMbsSize+2); + wcstombs(fn, filename, fnMbsSize); + fn[fnMbsSize] = 0; + + mdMbsSize = wcstombs(NULL, mode, ((size_t)-1) >> 1); + md = (char*)uprv_malloc(mdMbsSize+2); + wcstombs(md, mode, mdMbsSize); + md[mdMbsSize] = 0; + + result = fopen(fn, md); + uprv_free(fn); + uprv_free(md); + return (FileStream*)result; +#endif +} +*/ +U_CAPI void U_EXPORT2 +T_FileStream_close(FileStream* fileStream) +{ + if (fileStream != 0) + fclose((FILE*)fileStream); +} + +U_CAPI UBool U_EXPORT2 +T_FileStream_file_exists(const char* filename) +{ + FILE* temp = fopen(filename, "r"); + if (temp) { + fclose(temp); + return TRUE; + } else + return FALSE; +} + +/*static const int32_t kEOF; +const int32_t FileStream::kEOF = EOF;*/ + +/* +U_CAPI FileStream* +T_FileStream_tmpfile() +{ + FILE* file = tmpfile(); + return (FileStream*)file; +} +*/ + +U_CAPI int32_t U_EXPORT2 +T_FileStream_read(FileStream* fileStream, void* addr, int32_t len) +{ + return fread(addr, 1, len, (FILE*)fileStream); +} + +U_CAPI int32_t U_EXPORT2 +T_FileStream_write(FileStream* fileStream, const void* addr, int32_t len) +{ + + return fwrite(addr, 1, len, (FILE*)fileStream); +} + +U_CAPI void U_EXPORT2 +T_FileStream_rewind(FileStream* fileStream) +{ + rewind((FILE*)fileStream); +} + +U_CAPI int32_t U_EXPORT2 +T_FileStream_putc(FileStream* fileStream, int32_t ch) +{ + int32_t c = fputc(ch, (FILE*)fileStream); + return c; +} + +U_CAPI int U_EXPORT2 +T_FileStream_getc(FileStream* fileStream) +{ + int c = fgetc((FILE*)fileStream); + return c; +} + +U_CAPI int32_t U_EXPORT2 +T_FileStream_ungetc(int32_t ch, FileStream* fileStream) +{ + + int32_t c = ungetc(ch, (FILE*)fileStream); + return c; +} + +U_CAPI int32_t U_EXPORT2 +T_FileStream_peek(FileStream* fileStream) +{ + int32_t c = fgetc((FILE*)fileStream); + return ungetc(c, (FILE*)fileStream); +} + +U_CAPI char* U_EXPORT2 +T_FileStream_readLine(FileStream* fileStream, char* buffer, int32_t length) +{ + return fgets(buffer, length, (FILE*)fileStream); +} + +U_CAPI int32_t U_EXPORT2 +T_FileStream_writeLine(FileStream* fileStream, const char* buffer) +{ + return fputs(buffer, (FILE*)fileStream); +} + +U_CAPI int32_t U_EXPORT2 +T_FileStream_size(FileStream* fileStream) +{ + int32_t savedPos = ftell((FILE*)fileStream); + int32_t size = 0; + + /*Changes by Bertrand A. D. doesn't affect the current position + goes to the end of the file before ftell*/ + fseek((FILE*)fileStream, 0, SEEK_END); + size = (int32_t)ftell((FILE*)fileStream); + fseek((FILE*)fileStream, savedPos, SEEK_SET); + return size; +} + +U_CAPI int U_EXPORT2 +T_FileStream_eof(FileStream* fileStream) +{ + return feof((FILE*)fileStream); +} + +/* + Warning + This function may not work consistently on all platforms + (e.g. HP-UX, FreeBSD and MacOSX don't return an error when + putc is used on a file opened as readonly) +*/ +U_CAPI int U_EXPORT2 +T_FileStream_error(FileStream* fileStream) +{ + return (fileStream == 0 || ferror((FILE*)fileStream)); +} + +/* This function doesn't work. */ +/* force the stream to set its error flag*/ +/*U_CAPI void U_EXPORT2 +T_FileStream_setError(FileStream* fileStream) +{ + fseek((FILE*)fileStream, 99999, SEEK_SET); +} +*/ + +U_CAPI FileStream* U_EXPORT2 +T_FileStream_stdin(void) +{ + return (FileStream*)stdin; +} + +U_CAPI FileStream* U_EXPORT2 +T_FileStream_stdout(void) +{ + return (FileStream*)stdout; +} + + +U_CAPI FileStream* U_EXPORT2 +T_FileStream_stderr(void) +{ + return (FileStream*)stderr; +} + +U_CAPI UBool U_EXPORT2 +T_FileStream_remove(const char* fileName){ + return (remove(fileName) == 0); +} diff --git a/deps/icu-small/source/tools/toolutil/filestrm.h b/deps/icu-small/source/tools/toolutil/filestrm.h index b423451406..86fac3063f 100644 --- a/deps/icu-small/source/tools/toolutil/filestrm.h +++ b/deps/icu-small/source/tools/toolutil/filestrm.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** diff --git a/deps/icu-small/source/tools/toolutil/filetools.cpp b/deps/icu-small/source/tools/toolutil/filetools.cpp index b0d4ed81a5..176a791b0d 100644 --- a/deps/icu-small/source/tools/toolutil/filetools.cpp +++ b/deps/icu-small/source/tools/toolutil/filetools.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /****************************************************************************** * Copyright (C) 2009-2013, International Business Machines diff --git a/deps/icu-small/source/tools/toolutil/filetools.h b/deps/icu-small/source/tools/toolutil/filetools.h index 5ede02761a..6a25c3601c 100644 --- a/deps/icu-small/source/tools/toolutil/filetools.h +++ b/deps/icu-small/source/tools/toolutil/filetools.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: filetools.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/tools/toolutil/flagparser.c b/deps/icu-small/source/tools/toolutil/flagparser.c deleted file mode 100644 index c87beb147c..0000000000 --- a/deps/icu-small/source/tools/toolutil/flagparser.c +++ /dev/null @@ -1,180 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/****************************************************************************** - * Copyright (C) 2009-2015, International Business Machines - * Corporation and others. All Rights Reserved. - ******************************************************************************* - */ - -#include "flagparser.h" -#include "filestrm.h" -#include "cstring.h" -#include "cmemory.h" - -#define DEFAULT_BUFFER_SIZE 512 - -static int32_t currentBufferSize = DEFAULT_BUFFER_SIZE; - -static int32_t extractFlag(char* buffer, int32_t bufferSize, char* flag, int32_t flagSize, const char ** flagNames, int32_t numOfFlags, UErrorCode *status); -static int32_t getFlagOffset(const char *buffer, int32_t bufferSize); - -/* - * Opens the given fileName and reads in the information storing the data in flagBuffer. - */ -U_CAPI int32_t U_EXPORT2 -parseFlagsFile(const char *fileName, char **flagBuffer, int32_t flagBufferSize, const char ** flagNames, int32_t numOfFlags, UErrorCode *status) { - char* buffer = NULL; - char* tmpFlagBuffer = NULL; - UBool allocateMoreSpace = FALSE; - int32_t idx, i; - int32_t result = 0; - - FileStream *f = T_FileStream_open(fileName, "r"); - if (f == NULL) { - *status = U_FILE_ACCESS_ERROR; - goto parseFlagsFile_cleanup; - } - - buffer = uprv_malloc(sizeof(char) * currentBufferSize); - tmpFlagBuffer = uprv_malloc(sizeof(char) * flagBufferSize); - - if (buffer == NULL || tmpFlagBuffer == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto parseFlagsFile_cleanup; - } - - do { - if (allocateMoreSpace) { - allocateMoreSpace = FALSE; - currentBufferSize *= 2; - uprv_free(buffer); - buffer = uprv_malloc(sizeof(char) * currentBufferSize); - if (buffer == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto parseFlagsFile_cleanup; - } - } - for (i = 0; i < numOfFlags;) { - if (T_FileStream_readLine(f, buffer, currentBufferSize) == NULL) { - /* End of file reached. */ - break; - } - if (buffer[0] == '#') { - continue; - } - - if ((int32_t)uprv_strlen(buffer) == (currentBufferSize - 1) && buffer[currentBufferSize-2] != '\n') { - /* Allocate more space for buffer if it didnot read the entrire line */ - allocateMoreSpace = TRUE; - T_FileStream_rewind(f); - break; - } else { - idx = extractFlag(buffer, currentBufferSize, tmpFlagBuffer, flagBufferSize, flagNames, numOfFlags, status); - if (U_FAILURE(*status)) { - if (*status == U_BUFFER_OVERFLOW_ERROR) { - result = currentBufferSize; - } else { - result = -1; - } - break; - } else { - if (flagNames != NULL) { - if (idx >= 0) { - uprv_strcpy(flagBuffer[idx], tmpFlagBuffer); - } else { - /* No match found. Skip it. */ - continue; - } - } else { - uprv_strcpy(flagBuffer[i++], tmpFlagBuffer); - } - } - } - } - } while (allocateMoreSpace && U_SUCCESS(*status)); - -parseFlagsFile_cleanup: - uprv_free(tmpFlagBuffer); - uprv_free(buffer); - - T_FileStream_close(f); - - if (U_FAILURE(*status) && *status != U_BUFFER_OVERFLOW_ERROR) { - return -1; - } - - if (U_SUCCESS(*status) && result == 0) { - currentBufferSize = DEFAULT_BUFFER_SIZE; - } - - return result; -} - - -/* - * Extract the setting after the '=' and store it in flag excluding the newline character. - */ -static int32_t extractFlag(char* buffer, int32_t bufferSize, char* flag, int32_t flagSize, const char **flagNames, int32_t numOfFlags, UErrorCode *status) { - int32_t i, idx = -1; - char *pBuffer; - int32_t offset=0; - UBool bufferWritten = FALSE; - - if (buffer[0] != 0) { - /* Get the offset (i.e. position after the '=') */ - offset = getFlagOffset(buffer, bufferSize); - pBuffer = buffer+offset; - for(i = 0;;i++) { - if (i >= flagSize) { - *status = U_BUFFER_OVERFLOW_ERROR; - return -1; - } - if (pBuffer[i+1] == 0) { - /* Indicates a new line character. End here. */ - flag[i] = 0; - break; - } - - flag[i] = pBuffer[i]; - if (i == 0) { - bufferWritten = TRUE; - } - } - } - - if (!bufferWritten) { - flag[0] = 0; - } - - if (flagNames != NULL && offset>0) { - offset--; /* Move offset back 1 because of '='*/ - for (i = 0; i < numOfFlags; i++) { - if (uprv_strncmp(buffer, flagNames[i], offset) == 0) { - idx = i; - break; - } - } - } - - return idx; -} - -/* - * Get the position after the '=' character. - */ -static int32_t getFlagOffset(const char *buffer, int32_t bufferSize) { - int32_t offset = 0; - - for (offset = 0; offset < bufferSize;offset++) { - if (buffer[offset] == '=') { - offset++; - break; - } - } - - if (offset == bufferSize || (offset - 1) == bufferSize) { - offset = 0; - } - - return offset; -} diff --git a/deps/icu-small/source/tools/toolutil/flagparser.cpp b/deps/icu-small/source/tools/toolutil/flagparser.cpp new file mode 100644 index 0000000000..c8d791c636 --- /dev/null +++ b/deps/icu-small/source/tools/toolutil/flagparser.cpp @@ -0,0 +1,180 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/****************************************************************************** + * Copyright (C) 2009-2015, International Business Machines + * Corporation and others. All Rights Reserved. + ******************************************************************************* + */ + +#include "flagparser.h" +#include "filestrm.h" +#include "cstring.h" +#include "cmemory.h" + +#define DEFAULT_BUFFER_SIZE 512 + +static int32_t currentBufferSize = DEFAULT_BUFFER_SIZE; + +static int32_t extractFlag(char* buffer, int32_t bufferSize, char* flag, int32_t flagSize, const char ** flagNames, int32_t numOfFlags, UErrorCode *status); +static int32_t getFlagOffset(const char *buffer, int32_t bufferSize); + +/* + * Opens the given fileName and reads in the information storing the data in flagBuffer. + */ +U_CAPI int32_t U_EXPORT2 +parseFlagsFile(const char *fileName, char **flagBuffer, int32_t flagBufferSize, const char ** flagNames, int32_t numOfFlags, UErrorCode *status) { + char* buffer = NULL; + char* tmpFlagBuffer = NULL; + UBool allocateMoreSpace = FALSE; + int32_t idx, i; + int32_t result = 0; + + FileStream *f = T_FileStream_open(fileName, "r"); + if (f == NULL) { + *status = U_FILE_ACCESS_ERROR; + goto parseFlagsFile_cleanup; + } + + buffer = (char *)uprv_malloc(sizeof(char) * currentBufferSize); + tmpFlagBuffer = (char *)uprv_malloc(sizeof(char) * flagBufferSize); + + if (buffer == NULL || tmpFlagBuffer == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + goto parseFlagsFile_cleanup; + } + + do { + if (allocateMoreSpace) { + allocateMoreSpace = FALSE; + currentBufferSize *= 2; + uprv_free(buffer); + buffer = (char *)uprv_malloc(sizeof(char) * currentBufferSize); + if (buffer == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + goto parseFlagsFile_cleanup; + } + } + for (i = 0; i < numOfFlags;) { + if (T_FileStream_readLine(f, buffer, currentBufferSize) == NULL) { + /* End of file reached. */ + break; + } + if (buffer[0] == '#') { + continue; + } + + if ((int32_t)uprv_strlen(buffer) == (currentBufferSize - 1) && buffer[currentBufferSize-2] != '\n') { + /* Allocate more space for buffer if it didnot read the entrire line */ + allocateMoreSpace = TRUE; + T_FileStream_rewind(f); + break; + } else { + idx = extractFlag(buffer, currentBufferSize, tmpFlagBuffer, flagBufferSize, flagNames, numOfFlags, status); + if (U_FAILURE(*status)) { + if (*status == U_BUFFER_OVERFLOW_ERROR) { + result = currentBufferSize; + } else { + result = -1; + } + break; + } else { + if (flagNames != NULL) { + if (idx >= 0) { + uprv_strcpy(flagBuffer[idx], tmpFlagBuffer); + } else { + /* No match found. Skip it. */ + continue; + } + } else { + uprv_strcpy(flagBuffer[i++], tmpFlagBuffer); + } + } + } + } + } while (allocateMoreSpace && U_SUCCESS(*status)); + +parseFlagsFile_cleanup: + uprv_free(tmpFlagBuffer); + uprv_free(buffer); + + T_FileStream_close(f); + + if (U_FAILURE(*status) && *status != U_BUFFER_OVERFLOW_ERROR) { + return -1; + } + + if (U_SUCCESS(*status) && result == 0) { + currentBufferSize = DEFAULT_BUFFER_SIZE; + } + + return result; +} + + +/* + * Extract the setting after the '=' and store it in flag excluding the newline character. + */ +static int32_t extractFlag(char* buffer, int32_t bufferSize, char* flag, int32_t flagSize, const char **flagNames, int32_t numOfFlags, UErrorCode *status) { + int32_t i, idx = -1; + char *pBuffer; + int32_t offset=0; + UBool bufferWritten = FALSE; + + if (buffer[0] != 0) { + /* Get the offset (i.e. position after the '=') */ + offset = getFlagOffset(buffer, bufferSize); + pBuffer = buffer+offset; + for(i = 0;;i++) { + if (i >= flagSize) { + *status = U_BUFFER_OVERFLOW_ERROR; + return -1; + } + if (pBuffer[i+1] == 0) { + /* Indicates a new line character. End here. */ + flag[i] = 0; + break; + } + + flag[i] = pBuffer[i]; + if (i == 0) { + bufferWritten = TRUE; + } + } + } + + if (!bufferWritten) { + flag[0] = 0; + } + + if (flagNames != NULL && offset>0) { + offset--; /* Move offset back 1 because of '='*/ + for (i = 0; i < numOfFlags; i++) { + if (uprv_strncmp(buffer, flagNames[i], offset) == 0) { + idx = i; + break; + } + } + } + + return idx; +} + +/* + * Get the position after the '=' character. + */ +static int32_t getFlagOffset(const char *buffer, int32_t bufferSize) { + int32_t offset = 0; + + for (offset = 0; offset < bufferSize;offset++) { + if (buffer[offset] == '=') { + offset++; + break; + } + } + + if (offset == bufferSize || (offset - 1) == bufferSize) { + offset = 0; + } + + return offset; +} diff --git a/deps/icu-small/source/tools/toolutil/flagparser.h b/deps/icu-small/source/tools/toolutil/flagparser.h index 4aa03c8a5f..aa42547164 100644 --- a/deps/icu-small/source/tools/toolutil/flagparser.h +++ b/deps/icu-small/source/tools/toolutil/flagparser.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: flagparser.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/tools/toolutil/package.cpp b/deps/icu-small/source/tools/toolutil/package.cpp index d069147708..e3354b3524 100644 --- a/deps/icu-small/source/tools/toolutil/package.cpp +++ b/deps/icu-small/source/tools/toolutil/package.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: package.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -1290,7 +1290,7 @@ void Package::setItemCapacity(int32_t max) Item *oldItems = items; if(newItems == NULL) { fprintf(stderr, "icupkg: Out of memory trying to allocate %lu bytes for %d items\n", - (unsigned long)max*sizeof(items[0]), max); + (unsigned long)(max*sizeof(items[0])), max); exit(U_MEMORY_ALLOCATION_ERROR); } if(items && itemCount>0) { diff --git a/deps/icu-small/source/tools/toolutil/package.h b/deps/icu-small/source/tools/toolutil/package.h index 4d60202999..3263c84feb 100644 --- a/deps/icu-small/source/tools/toolutil/package.h +++ b/deps/icu-small/source/tools/toolutil/package.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: package.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/tools/toolutil/pkg_genc.c b/deps/icu-small/source/tools/toolutil/pkg_genc.c deleted file mode 100644 index c85a12322b..0000000000 --- a/deps/icu-small/source/tools/toolutil/pkg_genc.c +++ /dev/null @@ -1,1199 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/****************************************************************************** - * Copyright (C) 2009-2016, International Business Machines - * Corporation and others. All Rights Reserved. - ******************************************************************************* - */ -#include "unicode/utypes.h" - -#if U_PLATFORM_HAS_WIN32_API -# define VC_EXTRALEAN -# define WIN32_LEAN_AND_MEAN -# define NOUSER -# define NOSERVICE -# define NOIME -# define NOMCX -#include -#include -# ifdef __GNUC__ -# define WINDOWS_WITH_GNUC -# endif -#endif - -#if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H -# define U_ELF -#endif - -#ifdef U_ELF -# include -# if defined(ELFCLASS64) -# define U_ELF64 -# endif - /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */ -# ifndef EM_X86_64 -# define EM_X86_64 62 -# endif -# define ICU_ENTRY_OFFSET 0 -#endif - -#include -#include -#include "unicode/putil.h" -#include "cmemory.h" -#include "cstring.h" -#include "filestrm.h" -#include "toolutil.h" -#include "unicode/uclean.h" -#include "uoptions.h" -#include "pkg_genc.h" - -#define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU)) - -#define HEX_0X 0 /* 0x1234 */ -#define HEX_0H 1 /* 01234h */ - -/* prototypes --------------------------------------------------------------- */ -static void -getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename); - -static uint32_t -write8(FileStream *out, uint8_t byte, uint32_t column); - -static uint32_t -write32(FileStream *out, uint32_t byte, uint32_t column); - -#if U_PLATFORM == U_PF_OS400 -static uint32_t -write8str(FileStream *out, uint8_t byte, uint32_t column); -#endif -/* -------------------------------------------------------------------------- */ - -/* -Creating Template Files for New Platforms - -Let the cc compiler help you get started. -Compile this program - const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16}; -with the -S option to produce assembly output. - -For example, this will generate array.s: -gcc -S array.c - -This will produce a .s file that may look like this: - - .file "array.c" - .version "01.01" -gcc2_compiled.: - .globl x - .section .rodata - .align 4 - .type x,@object - .size x,20 -x: - .long 1 - .long 2 - .long -559038737 - .long -1 - .long 16 - .ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)" - -which gives a starting point that will compile, and can be transformed -to become the template, generally with some consulting of as docs and -some experimentation. - -If you want ICU to automatically use this assembly, you should -specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file, -where the name is the compiler or platform that you used in this -assemblyHeader data structure. -*/ -static const struct AssemblyType { - const char *name; - const char *header; - const char *beginLine; - const char *footer; - int8_t hexType; /* HEX_0X or HEX_0h */ -} assemblyHeader[] = { - /* For gcc assemblers, the meaning of .align changes depending on the */ - /* hardware, so we use .balign 16 which always means 16 bytes. */ - /* https://sourceware.org/binutils/docs/as/Pseudo-Ops.html */ - {"gcc", - ".globl %s\n" - "\t.section .note.GNU-stack,\"\",%%progbits\n" - "\t.section .rodata\n" - "\t.balign 16\n" - "#ifdef U_HIDE_DATA_SYMBOL\n" - "\t.hidden %s\n" - "#endif\n" - "\t.type %s,%%object\n" - "%s:\n\n", - - ".long ",".size %s, .-%s\n",HEX_0X - }, - {"gcc-darwin", - /*"\t.section __TEXT,__text,regular,pure_instructions\n" - "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/ - ".globl _%s\n" - "#ifdef U_HIDE_DATA_SYMBOL\n" - "\t.private_extern _%s\n" - "#endif\n" - "\t.data\n" - "\t.const\n" - "\t.balign 16\n" - "_%s:\n\n", - - ".long ","",HEX_0X - }, - {"gcc-cygwin", - ".globl _%s\n" - "\t.section .rodata\n" - "\t.balign 16\n" - "_%s:\n\n", - - ".long ","",HEX_0X - }, - {"gcc-mingw64", - ".globl %s\n" - "\t.section .rodata\n" - "\t.balign 16\n" - "%s:\n\n", - - ".long ","",HEX_0X - }, -/* 16 bytes alignment. */ -/* http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf */ - {"sun", - "\t.section \".rodata\"\n" - "\t.align 16\n" - ".globl %s\n" - "%s:\n", - - ".word ","",HEX_0X - }, -/* 16 bytes alignment for sun-x86. */ -/* http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html */ - {"sun-x86", - "Drodata.rodata:\n" - "\t.type Drodata.rodata,@object\n" - "\t.size Drodata.rodata,0\n" - "\t.globl %s\n" - "\t.align 16\n" - "%s:\n", - - ".4byte ","",HEX_0X - }, -/* 1<<4 bit alignment for aix. */ -/* http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm */ - {"xlc", - ".globl %s{RO}\n" - "\t.toc\n" - "%s:\n" - "\t.csect %s{RO}, 4\n", - - ".long ","",HEX_0X - }, - {"aCC-ia64", - "\t.file \"%s.s\"\n" - "\t.type %s,@object\n" - "\t.global %s\n" - "\t.secalias .abe$0.rodata, \".rodata\"\n" - "\t.section .abe$0.rodata = \"a\", \"progbits\"\n" - "\t.align 16\n" - "%s::\t", - - "data4 ","",HEX_0X - }, - {"aCC-parisc", - "\t.SPACE $TEXT$\n" - "\t.SUBSPA $LIT$\n" - "%s\n" - "\t.EXPORT %s\n" - "\t.ALIGN 16\n", - - ".WORD ","",HEX_0X - }, -/* align 16 bytes */ -/* http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx */ - { "masm", - "\tTITLE %s\n" - "; generated by genccode\n" - ".386\n" - ".model flat\n" - "\tPUBLIC _%s\n" - "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n" - "\tALIGN 16\n" - "_%s\tLABEL DWORD\n", - "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H - } -}; - -static int32_t assemblyHeaderIndex = -1; -static int32_t hexType = HEX_0X; - -U_CAPI UBool U_EXPORT2 -checkAssemblyHeaderName(const char* optAssembly) { - int32_t idx; - assemblyHeaderIndex = -1; - for (idx = 0; idx < UPRV_LENGTHOF(assemblyHeader); idx++) { - if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) { - assemblyHeaderIndex = idx; - hexType = assemblyHeader[idx].hexType; /* set the hex type */ - return TRUE; - } - } - - return FALSE; -} - - -U_CAPI void U_EXPORT2 -printAssemblyHeadersToStdErr(void) { - int32_t idx; - fprintf(stderr, "%s", assemblyHeader[0].name); - for (idx = 1; idx < UPRV_LENGTHOF(assemblyHeader); idx++) { - fprintf(stderr, ", %s", assemblyHeader[idx].name); - } - fprintf(stderr, - ")\n"); -} - -U_CAPI void U_EXPORT2 -writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) { - uint32_t column = MAX_COLUMN; - char entry[64]; - uint32_t buffer[1024]; - char *bufferStr = (char *)buffer; - FileStream *in, *out; - size_t i, length; - - in=T_FileStream_open(filename, "rb"); - if(in==NULL) { - fprintf(stderr, "genccode: unable to open input file %s\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - - getOutFilename(filename, destdir, bufferStr, entry, ".S", optFilename); - out=T_FileStream_open(bufferStr, "w"); - if(out==NULL) { - fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr); - exit(U_FILE_ACCESS_ERROR); - } - - if (outFilePath != NULL) { - uprv_strcpy(outFilePath, bufferStr); - } - -#ifdef WINDOWS_WITH_GNUC - /* Need to fix the file seperator character when using MinGW. */ - swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/'); -#endif - - if(optEntryPoint != NULL) { - uprv_strcpy(entry, optEntryPoint); - uprv_strcat(entry, "_dat"); - } - - /* turn dashes or dots in the entry name into underscores */ - length=uprv_strlen(entry); - for(i=0; i= 0 ; i--) -#endif - { - uint8_t value = ptrIdx[i]; - if (value || seenNonZero) { - *(s++)=hexToStr[value>>4]; - *(s++)=hexToStr[value&0xF]; - seenNonZero = 1; - } - } - if(hexType==HEX_0H) { - *(s++)='h'; - } - } - - *(s++)=0; - T_FileStream_writeLine(out, bitFieldStr); - return column; -} - -static uint32_t -write8(FileStream *out, uint8_t byte, uint32_t column) { - char s[4]; - int i=0; - - /* convert the byte value to a string */ - if(byte>=100) { - s[i++]=(char)('0'+byte/100); - byte%=100; - } - if(i>0 || byte>=10) { - s[i++]=(char)('0'+byte/10); - byte%=10; - } - s[i++]=(char)('0'+byte); - s[i]=0; - - /* write the value, possibly with comma and newline */ - if(column==MAX_COLUMN) { - /* first byte */ - column=1; - } else if(column<16) { - T_FileStream_writeLine(out, ","); - ++column; - } else { - T_FileStream_writeLine(out, ",\n"); - column=1; - } - T_FileStream_writeLine(out, s); - return column; -} - -#if U_PLATFORM == U_PF_OS400 -static uint32_t -write8str(FileStream *out, uint8_t byte, uint32_t column) { - char s[8]; - - if (byte > 7) - sprintf(s, "\\x%X", byte); - else - sprintf(s, "\\%X", byte); - - /* write the value, possibly with comma and newline */ - if(column==MAX_COLUMN) { - /* first byte */ - column=1; - T_FileStream_writeLine(out, "\""); - } else if(column<24) { - ++column; - } else { - T_FileStream_writeLine(out, "\"\n\""); - column=1; - } - T_FileStream_writeLine(out, s); - return column; -} -#endif - -static void -getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) { - const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.'); - - /* copy path */ - if(destdir!=NULL && *destdir!=0) { - do { - *outFilename++=*destdir++; - } while(*destdir!=0); - if(*(outFilename-1)!=U_FILE_SEP_CHAR) { - *outFilename++=U_FILE_SEP_CHAR; - } - inFilename=basename; - } else { - while(inFilenameELFCLASS64 - ) { - fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename); - exit(U_UNSUPPORTED_ERROR); - } - - *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */ -#ifdef U_ELF64 - if(*pBits!=32 && *pBits!=64) { - fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n"); - exit(U_UNSUPPORTED_ERROR); - } -#else - if(*pBits!=32) { - fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n"); - exit(U_UNSUPPORTED_ERROR); - } -#endif - - *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB); - if(*pIsBigEndian!=U_IS_BIG_ENDIAN) { - fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n"); - exit(U_UNSUPPORTED_ERROR); - } - /* TODO: Support byte swapping */ - - *pCPU=buffer.header32.e_machine; -#elif U_PLATFORM_HAS_WIN32_API - if(lengthMachine; - /* - * The number of bits is implicit with the Machine value. - * *pBits is ignored in the calling code, so this need not be precise. - */ - *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64; - /* Windows always runs on little-endian CPUs. */ - *pIsBigEndian=FALSE; -#else -# error "Unknown platform for CAN_GENERATE_OBJECTS." -#endif - - T_FileStream_close(in); -} - -U_CAPI void U_EXPORT2 -writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) { - /* common variables */ - char buffer[4096], entry[96]={ 0 }; - FileStream *in, *out; - const char *newSuffix; - int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0; - - uint16_t cpu, bits; - UBool makeBigEndian; - - /* platform-specific variables and initialization code */ -#ifdef U_ELF - /* 32-bit Elf file header */ - static Elf32_Ehdr header32={ - { - /* e_ident[] */ - ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, - ELFCLASS32, - U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB, - EV_CURRENT /* EI_VERSION */ - }, - ET_REL, - EM_386, - EV_CURRENT, /* e_version */ - 0, /* e_entry */ - 0, /* e_phoff */ - (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */ - 0, /* e_flags */ - (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */ - 0, /* e_phentsize */ - 0, /* e_phnum */ - (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */ - 5, /* e_shnum */ - 2 /* e_shstrndx */ - }; - - /* 32-bit Elf section header table */ - static Elf32_Shdr sectionHeaders32[5]={ - { /* SHN_UNDEF */ - 0 - }, - { /* .symtab */ - 1, /* sh_name */ - SHT_SYMTAB, - 0, /* sh_flags */ - 0, /* sh_addr */ - (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */ - (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */ - 3, /* sh_link=sect hdr index of .strtab */ - 1, /* sh_info=One greater than the symbol table index of the last - * local symbol (with STB_LOCAL). */ - 4, /* sh_addralign */ - (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */ - }, - { /* .shstrtab */ - 9, /* sh_name */ - SHT_STRTAB, - 0, /* sh_flags */ - 0, /* sh_addr */ - (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */ - 40, /* sh_size */ - 0, /* sh_link */ - 0, /* sh_info */ - 1, /* sh_addralign */ - 0 /* sh_entsize */ - }, - { /* .strtab */ - 19, /* sh_name */ - SHT_STRTAB, - 0, /* sh_flags */ - 0, /* sh_addr */ - (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */ - (Elf32_Word)sizeof(entry), /* sh_size */ - 0, /* sh_link */ - 0, /* sh_info */ - 1, /* sh_addralign */ - 0 /* sh_entsize */ - }, - { /* .rodata */ - 27, /* sh_name */ - SHT_PROGBITS, - SHF_ALLOC, /* sh_flags */ - 0, /* sh_addr */ - (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */ - 0, /* sh_size */ - 0, /* sh_link */ - 0, /* sh_info */ - 16, /* sh_addralign */ - 0 /* sh_entsize */ - } - }; - - /* symbol table */ - static Elf32_Sym symbols32[2]={ - { /* STN_UNDEF */ - 0 - }, - { /* data entry point */ - 1, /* st_name */ - 0, /* st_value */ - 0, /* st_size */ - ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), - 0, /* st_other */ - 4 /* st_shndx=index of related section table entry */ - } - }; - - /* section header string table, with decimal string offsets */ - static const char sectionStrings[40]= - /* 0 */ "\0" - /* 1 */ ".symtab\0" - /* 9 */ ".shstrtab\0" - /* 19 */ ".strtab\0" - /* 27 */ ".rodata\0" - /* 35 */ "\0\0\0\0"; /* contains terminating NUL */ - /* 40: padded to multiple of 8 bytes */ - - /* - * Use entry[] for the string table which will contain only the - * entry point name. - * entry[0] must be 0 (NUL) - * The entry point name can be up to 38 characters long (sizeof(entry)-2). - */ - - /* 16-align .rodata in the .o file, just in case */ - static const char padding[16]={ 0 }; - int32_t paddingSize; - -#ifdef U_ELF64 - /* 64-bit Elf file header */ - static Elf64_Ehdr header64={ - { - /* e_ident[] */ - ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, - ELFCLASS64, - U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB, - EV_CURRENT /* EI_VERSION */ - }, - ET_REL, - EM_X86_64, - EV_CURRENT, /* e_version */ - 0, /* e_entry */ - 0, /* e_phoff */ - (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */ - 0, /* e_flags */ - (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */ - 0, /* e_phentsize */ - 0, /* e_phnum */ - (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */ - 5, /* e_shnum */ - 2 /* e_shstrndx */ - }; - - /* 64-bit Elf section header table */ - static Elf64_Shdr sectionHeaders64[5]={ - { /* SHN_UNDEF */ - 0 - }, - { /* .symtab */ - 1, /* sh_name */ - SHT_SYMTAB, - 0, /* sh_flags */ - 0, /* sh_addr */ - (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */ - (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */ - 3, /* sh_link=sect hdr index of .strtab */ - 1, /* sh_info=One greater than the symbol table index of the last - * local symbol (with STB_LOCAL). */ - 4, /* sh_addralign */ - (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */ - }, - { /* .shstrtab */ - 9, /* sh_name */ - SHT_STRTAB, - 0, /* sh_flags */ - 0, /* sh_addr */ - (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */ - 40, /* sh_size */ - 0, /* sh_link */ - 0, /* sh_info */ - 1, /* sh_addralign */ - 0 /* sh_entsize */ - }, - { /* .strtab */ - 19, /* sh_name */ - SHT_STRTAB, - 0, /* sh_flags */ - 0, /* sh_addr */ - (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */ - (Elf64_Xword)sizeof(entry), /* sh_size */ - 0, /* sh_link */ - 0, /* sh_info */ - 1, /* sh_addralign */ - 0 /* sh_entsize */ - }, - { /* .rodata */ - 27, /* sh_name */ - SHT_PROGBITS, - SHF_ALLOC, /* sh_flags */ - 0, /* sh_addr */ - (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */ - 0, /* sh_size */ - 0, /* sh_link */ - 0, /* sh_info */ - 16, /* sh_addralign */ - 0 /* sh_entsize */ - } - }; - - /* - * 64-bit symbol table - * careful: different order of items compared with Elf32_sym! - */ - static Elf64_Sym symbols64[2]={ - { /* STN_UNDEF */ - 0 - }, - { /* data entry point */ - 1, /* st_name */ - ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), - 0, /* st_other */ - 4, /* st_shndx=index of related section table entry */ - 0, /* st_value */ - 0 /* st_size */ - } - }; - -#endif /* U_ELF64 */ - - /* entry[] have a leading NUL */ - entryOffset=1; - - /* in the common code, count entryLength from after the NUL */ - entryLengthOffset=1; - - newSuffix=".o"; - -#elif U_PLATFORM_HAS_WIN32_API - struct { - IMAGE_FILE_HEADER fileHeader; - IMAGE_SECTION_HEADER sections[2]; - char linkerOptions[100]; - } objHeader; - IMAGE_SYMBOL symbols[1]; - struct { - DWORD sizeofLongNames; - char longNames[100]; - } symbolNames; - - /* - * entry sometimes have a leading '_' - * overwritten if entryOffset==0 depending on the target platform - * see check for cpu below - */ - entry[0]='_'; - - newSuffix=".obj"; -#else -# error "Unknown platform for CAN_GENERATE_OBJECTS." -#endif - - /* deal with options, files and the entry point name */ - getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch); - printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian); -#if U_PLATFORM_HAS_WIN32_API - if(cpu==IMAGE_FILE_MACHINE_I386) { - entryOffset=1; - } -#endif - - in=T_FileStream_open(filename, "rb"); - if(in==NULL) { - fprintf(stderr, "genccode: unable to open input file %s\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - size=T_FileStream_size(in); - - getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename); - if (outFilePath != NULL) { - uprv_strcpy(outFilePath, buffer); - } - - if(optEntryPoint != NULL) { - uprv_strcpy(entry+entryOffset, optEntryPoint); - uprv_strcat(entry+entryOffset, "_dat"); - } - /* turn dashes in the entry name into underscores */ - entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset); - for(i=0; i +#include +# ifdef __GNUC__ +# define WINDOWS_WITH_GNUC +# endif +#endif + +#if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H +# define U_ELF +#endif + +#ifdef U_ELF +# include +# if defined(ELFCLASS64) +# define U_ELF64 +# endif + /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */ +# ifndef EM_X86_64 +# define EM_X86_64 62 +# endif +# define ICU_ENTRY_OFFSET 0 +#endif + +#include +#include +#include "unicode/putil.h" +#include "cmemory.h" +#include "cstring.h" +#include "filestrm.h" +#include "toolutil.h" +#include "unicode/uclean.h" +#include "uoptions.h" +#include "pkg_genc.h" + +#define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU)) + +#define HEX_0X 0 /* 0x1234 */ +#define HEX_0H 1 /* 01234h */ + +/* prototypes --------------------------------------------------------------- */ +static void +getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename); + +static uint32_t +write8(FileStream *out, uint8_t byte, uint32_t column); + +static uint32_t +write32(FileStream *out, uint32_t byte, uint32_t column); + +#if U_PLATFORM == U_PF_OS400 +static uint32_t +write8str(FileStream *out, uint8_t byte, uint32_t column); +#endif +/* -------------------------------------------------------------------------- */ + +/* +Creating Template Files for New Platforms + +Let the cc compiler help you get started. +Compile this program + const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16}; +with the -S option to produce assembly output. + +For example, this will generate array.s: +gcc -S array.c + +This will produce a .s file that may look like this: + + .file "array.c" + .version "01.01" +gcc2_compiled.: + .globl x + .section .rodata + .align 4 + .type x,@object + .size x,20 +x: + .long 1 + .long 2 + .long -559038737 + .long -1 + .long 16 + .ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)" + +which gives a starting point that will compile, and can be transformed +to become the template, generally with some consulting of as docs and +some experimentation. + +If you want ICU to automatically use this assembly, you should +specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file, +where the name is the compiler or platform that you used in this +assemblyHeader data structure. +*/ +static const struct AssemblyType { + const char *name; + const char *header; + const char *beginLine; + const char *footer; + int8_t hexType; /* HEX_0X or HEX_0h */ +} assemblyHeader[] = { + /* For gcc assemblers, the meaning of .align changes depending on the */ + /* hardware, so we use .balign 16 which always means 16 bytes. */ + /* https://sourceware.org/binutils/docs/as/Pseudo-Ops.html */ + {"gcc", + ".globl %s\n" + "\t.section .note.GNU-stack,\"\",%%progbits\n" + "\t.section .rodata\n" + "\t.balign 16\n" + "#ifdef U_HIDE_DATA_SYMBOL\n" + "\t.hidden %s\n" + "#endif\n" + "\t.type %s,%%object\n" + "%s:\n\n", + + ".long ",".size %s, .-%s\n",HEX_0X + }, + {"gcc-darwin", + /*"\t.section __TEXT,__text,regular,pure_instructions\n" + "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/ + ".globl _%s\n" + "#ifdef U_HIDE_DATA_SYMBOL\n" + "\t.private_extern _%s\n" + "#endif\n" + "\t.data\n" + "\t.const\n" + "\t.balign 16\n" + "_%s:\n\n", + + ".long ","",HEX_0X + }, + {"gcc-cygwin", + ".globl _%s\n" + "\t.section .rodata\n" + "\t.balign 16\n" + "_%s:\n\n", + + ".long ","",HEX_0X + }, + {"gcc-mingw64", + ".globl %s\n" + "\t.section .rodata\n" + "\t.balign 16\n" + "%s:\n\n", + + ".long ","",HEX_0X + }, +/* 16 bytes alignment. */ +/* http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf */ + {"sun", + "\t.section \".rodata\"\n" + "\t.align 16\n" + ".globl %s\n" + "%s:\n", + + ".word ","",HEX_0X + }, +/* 16 bytes alignment for sun-x86. */ +/* http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html */ + {"sun-x86", + "Drodata.rodata:\n" + "\t.type Drodata.rodata,@object\n" + "\t.size Drodata.rodata,0\n" + "\t.globl %s\n" + "\t.align 16\n" + "%s:\n", + + ".4byte ","",HEX_0X + }, +/* 1<<4 bit alignment for aix. */ +/* http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm */ + {"xlc", + ".globl %s{RO}\n" + "\t.toc\n" + "%s:\n" + "\t.csect %s{RO}, 4\n", + + ".long ","",HEX_0X + }, + {"aCC-ia64", + "\t.file \"%s.s\"\n" + "\t.type %s,@object\n" + "\t.global %s\n" + "\t.secalias .abe$0.rodata, \".rodata\"\n" + "\t.section .abe$0.rodata = \"a\", \"progbits\"\n" + "\t.align 16\n" + "%s::\t", + + "data4 ","",HEX_0X + }, + {"aCC-parisc", + "\t.SPACE $TEXT$\n" + "\t.SUBSPA $LIT$\n" + "%s\n" + "\t.EXPORT %s\n" + "\t.ALIGN 16\n", + + ".WORD ","",HEX_0X + }, +/* align 16 bytes */ +/* http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx */ + { "masm", + "\tTITLE %s\n" + "; generated by genccode\n" + ".386\n" + ".model flat\n" + "\tPUBLIC _%s\n" + "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n" + "\tALIGN 16\n" + "_%s\tLABEL DWORD\n", + "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H + } +}; + +static int32_t assemblyHeaderIndex = -1; +static int32_t hexType = HEX_0X; + +U_CAPI UBool U_EXPORT2 +checkAssemblyHeaderName(const char* optAssembly) { + int32_t idx; + assemblyHeaderIndex = -1; + for (idx = 0; idx < UPRV_LENGTHOF(assemblyHeader); idx++) { + if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) { + assemblyHeaderIndex = idx; + hexType = assemblyHeader[idx].hexType; /* set the hex type */ + return TRUE; + } + } + + return FALSE; +} + + +U_CAPI void U_EXPORT2 +printAssemblyHeadersToStdErr(void) { + int32_t idx; + fprintf(stderr, "%s", assemblyHeader[0].name); + for (idx = 1; idx < UPRV_LENGTHOF(assemblyHeader); idx++) { + fprintf(stderr, ", %s", assemblyHeader[idx].name); + } + fprintf(stderr, + ")\n"); +} + +U_CAPI void U_EXPORT2 +writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) { + uint32_t column = MAX_COLUMN; + char entry[64]; + uint32_t buffer[1024]; + char *bufferStr = (char *)buffer; + FileStream *in, *out; + size_t i, length; + + in=T_FileStream_open(filename, "rb"); + if(in==NULL) { + fprintf(stderr, "genccode: unable to open input file %s\n", filename); + exit(U_FILE_ACCESS_ERROR); + } + + getOutFilename(filename, destdir, bufferStr, entry, ".S", optFilename); + out=T_FileStream_open(bufferStr, "w"); + if(out==NULL) { + fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr); + exit(U_FILE_ACCESS_ERROR); + } + + if (outFilePath != NULL) { + uprv_strcpy(outFilePath, bufferStr); + } + +#if defined (WINDOWS_WITH_GNUC) && U_PLATFORM != U_PF_CYGWIN + /* Need to fix the file seperator character when using MinGW. */ + swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/'); +#endif + + if(optEntryPoint != NULL) { + uprv_strcpy(entry, optEntryPoint); + uprv_strcat(entry, "_dat"); + } + + /* turn dashes or dots in the entry name into underscores */ + length=uprv_strlen(entry); + for(i=0; i= 0 ; i--) +#endif + { + uint8_t value = ptrIdx[i]; + if (value || seenNonZero) { + *(s++)=hexToStr[value>>4]; + *(s++)=hexToStr[value&0xF]; + seenNonZero = 1; + } + } + if(hexType==HEX_0H) { + *(s++)='h'; + } + } + + *(s++)=0; + T_FileStream_writeLine(out, bitFieldStr); + return column; +} + +static uint32_t +write8(FileStream *out, uint8_t byte, uint32_t column) { + char s[4]; + int i=0; + + /* convert the byte value to a string */ + if(byte>=100) { + s[i++]=(char)('0'+byte/100); + byte%=100; + } + if(i>0 || byte>=10) { + s[i++]=(char)('0'+byte/10); + byte%=10; + } + s[i++]=(char)('0'+byte); + s[i]=0; + + /* write the value, possibly with comma and newline */ + if(column==MAX_COLUMN) { + /* first byte */ + column=1; + } else if(column<16) { + T_FileStream_writeLine(out, ","); + ++column; + } else { + T_FileStream_writeLine(out, ",\n"); + column=1; + } + T_FileStream_writeLine(out, s); + return column; +} + +#if U_PLATFORM == U_PF_OS400 +static uint32_t +write8str(FileStream *out, uint8_t byte, uint32_t column) { + char s[8]; + + if (byte > 7) + sprintf(s, "\\x%X", byte); + else + sprintf(s, "\\%X", byte); + + /* write the value, possibly with comma and newline */ + if(column==MAX_COLUMN) { + /* first byte */ + column=1; + T_FileStream_writeLine(out, "\""); + } else if(column<24) { + ++column; + } else { + T_FileStream_writeLine(out, "\"\n\""); + column=1; + } + T_FileStream_writeLine(out, s); + return column; +} +#endif + +static void +getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) { + const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.'); + + /* copy path */ + if(destdir!=NULL && *destdir!=0) { + do { + *outFilename++=*destdir++; + } while(*destdir!=0); + if(*(outFilename-1)!=U_FILE_SEP_CHAR) { + *outFilename++=U_FILE_SEP_CHAR; + } + inFilename=basename; + } else { + while(inFilenameELFCLASS64 + ) { + fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename); + exit(U_UNSUPPORTED_ERROR); + } + + *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */ +#ifdef U_ELF64 + if(*pBits!=32 && *pBits!=64) { + fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n"); + exit(U_UNSUPPORTED_ERROR); + } +#else + if(*pBits!=32) { + fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n"); + exit(U_UNSUPPORTED_ERROR); + } +#endif + + *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB); + if(*pIsBigEndian!=U_IS_BIG_ENDIAN) { + fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n"); + exit(U_UNSUPPORTED_ERROR); + } + /* TODO: Support byte swapping */ + + *pCPU=buffer.header32.e_machine; +#elif U_PLATFORM_HAS_WIN32_API + if(lengthMachine; + /* + * The number of bits is implicit with the Machine value. + * *pBits is ignored in the calling code, so this need not be precise. + */ + *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64; + /* Windows always runs on little-endian CPUs. */ + *pIsBigEndian=FALSE; +#else +# error "Unknown platform for CAN_GENERATE_OBJECTS." +#endif + + T_FileStream_close(in); +} + +U_CAPI void U_EXPORT2 +writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) { + /* common variables */ + char buffer[4096], entry[96]={ 0 }; + FileStream *in, *out; + const char *newSuffix; + int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0; + + uint16_t cpu, bits; + UBool makeBigEndian; + + /* platform-specific variables and initialization code */ +#ifdef U_ELF + /* 32-bit Elf file header */ + static Elf32_Ehdr header32={ + { + /* e_ident[] */ + ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, + ELFCLASS32, + U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB, + EV_CURRENT /* EI_VERSION */ + }, + ET_REL, + EM_386, + EV_CURRENT, /* e_version */ + 0, /* e_entry */ + 0, /* e_phoff */ + (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */ + 0, /* e_flags */ + (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */ + 0, /* e_phentsize */ + 0, /* e_phnum */ + (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */ + 5, /* e_shnum */ + 2 /* e_shstrndx */ + }; + + /* 32-bit Elf section header table */ + static Elf32_Shdr sectionHeaders32[5]={ + { /* SHN_UNDEF */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }, + { /* .symtab */ + 1, /* sh_name */ + SHT_SYMTAB, + 0, /* sh_flags */ + 0, /* sh_addr */ + (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */ + (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */ + 3, /* sh_link=sect hdr index of .strtab */ + 1, /* sh_info=One greater than the symbol table index of the last + * local symbol (with STB_LOCAL). */ + 4, /* sh_addralign */ + (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */ + }, + { /* .shstrtab */ + 9, /* sh_name */ + SHT_STRTAB, + 0, /* sh_flags */ + 0, /* sh_addr */ + (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */ + 40, /* sh_size */ + 0, /* sh_link */ + 0, /* sh_info */ + 1, /* sh_addralign */ + 0 /* sh_entsize */ + }, + { /* .strtab */ + 19, /* sh_name */ + SHT_STRTAB, + 0, /* sh_flags */ + 0, /* sh_addr */ + (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */ + (Elf32_Word)sizeof(entry), /* sh_size */ + 0, /* sh_link */ + 0, /* sh_info */ + 1, /* sh_addralign */ + 0 /* sh_entsize */ + }, + { /* .rodata */ + 27, /* sh_name */ + SHT_PROGBITS, + SHF_ALLOC, /* sh_flags */ + 0, /* sh_addr */ + (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */ + 0, /* sh_size */ + 0, /* sh_link */ + 0, /* sh_info */ + 16, /* sh_addralign */ + 0 /* sh_entsize */ + } + }; + + /* symbol table */ + static Elf32_Sym symbols32[2]={ + { /* STN_UNDEF */ + 0, 0, 0, 0, 0, 0 + }, + { /* data entry point */ + 1, /* st_name */ + 0, /* st_value */ + 0, /* st_size */ + ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), + 0, /* st_other */ + 4 /* st_shndx=index of related section table entry */ + } + }; + + /* section header string table, with decimal string offsets */ + static const char sectionStrings[40]= + /* 0 */ "\0" + /* 1 */ ".symtab\0" + /* 9 */ ".shstrtab\0" + /* 19 */ ".strtab\0" + /* 27 */ ".rodata\0" + /* 35 */ "\0\0\0\0"; /* contains terminating NUL */ + /* 40: padded to multiple of 8 bytes */ + + /* + * Use entry[] for the string table which will contain only the + * entry point name. + * entry[0] must be 0 (NUL) + * The entry point name can be up to 38 characters long (sizeof(entry)-2). + */ + + /* 16-align .rodata in the .o file, just in case */ + static const char padding[16]={ 0 }; + int32_t paddingSize; + +#ifdef U_ELF64 + /* 64-bit Elf file header */ + static Elf64_Ehdr header64={ + { + /* e_ident[] */ + ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, + ELFCLASS64, + U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB, + EV_CURRENT /* EI_VERSION */ + }, + ET_REL, + EM_X86_64, + EV_CURRENT, /* e_version */ + 0, /* e_entry */ + 0, /* e_phoff */ + (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */ + 0, /* e_flags */ + (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */ + 0, /* e_phentsize */ + 0, /* e_phnum */ + (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */ + 5, /* e_shnum */ + 2 /* e_shstrndx */ + }; + + /* 64-bit Elf section header table */ + static Elf64_Shdr sectionHeaders64[5]={ + { /* SHN_UNDEF */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }, + { /* .symtab */ + 1, /* sh_name */ + SHT_SYMTAB, + 0, /* sh_flags */ + 0, /* sh_addr */ + (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */ + (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */ + 3, /* sh_link=sect hdr index of .strtab */ + 1, /* sh_info=One greater than the symbol table index of the last + * local symbol (with STB_LOCAL). */ + 4, /* sh_addralign */ + (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */ + }, + { /* .shstrtab */ + 9, /* sh_name */ + SHT_STRTAB, + 0, /* sh_flags */ + 0, /* sh_addr */ + (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */ + 40, /* sh_size */ + 0, /* sh_link */ + 0, /* sh_info */ + 1, /* sh_addralign */ + 0 /* sh_entsize */ + }, + { /* .strtab */ + 19, /* sh_name */ + SHT_STRTAB, + 0, /* sh_flags */ + 0, /* sh_addr */ + (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */ + (Elf64_Xword)sizeof(entry), /* sh_size */ + 0, /* sh_link */ + 0, /* sh_info */ + 1, /* sh_addralign */ + 0 /* sh_entsize */ + }, + { /* .rodata */ + 27, /* sh_name */ + SHT_PROGBITS, + SHF_ALLOC, /* sh_flags */ + 0, /* sh_addr */ + (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */ + 0, /* sh_size */ + 0, /* sh_link */ + 0, /* sh_info */ + 16, /* sh_addralign */ + 0 /* sh_entsize */ + } + }; + + /* + * 64-bit symbol table + * careful: different order of items compared with Elf32_sym! + */ + static Elf64_Sym symbols64[2]={ + { /* STN_UNDEF */ + 0, 0, 0, 0, 0, 0 + }, + { /* data entry point */ + 1, /* st_name */ + ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), + 0, /* st_other */ + 4, /* st_shndx=index of related section table entry */ + 0, /* st_value */ + 0 /* st_size */ + } + }; + +#endif /* U_ELF64 */ + + /* entry[] have a leading NUL */ + entryOffset=1; + + /* in the common code, count entryLength from after the NUL */ + entryLengthOffset=1; + + newSuffix=".o"; + +#elif U_PLATFORM_HAS_WIN32_API + struct { + IMAGE_FILE_HEADER fileHeader; + IMAGE_SECTION_HEADER sections[2]; + char linkerOptions[100]; + } objHeader; + IMAGE_SYMBOL symbols[1]; + struct { + DWORD sizeofLongNames; + char longNames[100]; + } symbolNames; + + /* + * entry sometimes have a leading '_' + * overwritten if entryOffset==0 depending on the target platform + * see check for cpu below + */ + entry[0]='_'; + + newSuffix=".obj"; +#else +# error "Unknown platform for CAN_GENERATE_OBJECTS." +#endif + + /* deal with options, files and the entry point name */ + getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch); + if (optMatchArch) + { + printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian); + } + else + { + printf("genccode: using architecture cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian); + } +#if U_PLATFORM_HAS_WIN32_API + if(cpu==IMAGE_FILE_MACHINE_I386) { + entryOffset=1; + } +#endif + + in=T_FileStream_open(filename, "rb"); + if(in==NULL) { + fprintf(stderr, "genccode: unable to open input file %s\n", filename); + exit(U_FILE_ACCESS_ERROR); + } + size=T_FileStream_size(in); + + getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename); + if (outFilePath != NULL) { + uprv_strcpy(outFilePath, buffer); + } + + if(optEntryPoint != NULL) { + uprv_strcpy(entry+entryOffset, optEntryPoint); + uprv_strcat(entry+entryOffset, "_dat"); + } + /* turn dashes in the entry name into underscores */ + entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset); + for(i=0; i -#include -#include "unicode/utypes.h" -#include "unicode/putil.h" -#include "cmemory.h" -#include "cstring.h" -#include "filestrm.h" -#include "toolutil.h" -#include "unicode/uclean.h" -#include "unewdata.h" -#include "putilimp.h" -#include "pkg_gencmn.h" - -#define STRING_STORE_SIZE 200000 - -#define COMMON_DATA_NAME U_ICUDATA_NAME -#define DATA_TYPE "dat" - -/* ICU package data file format (.dat files) ------------------------------- *** - -Description of the data format after the usual ICU data file header -(UDataInfo etc.). - -Format version 1 - -A .dat package file contains a simple Table of Contents of item names, -followed by the items themselves: - -1. ToC table - -uint32_t count; - number of items -UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item: - uint32_t nameOffset; - offset of the item name - uint32_t dataOffset; - offset of the item data -both are byte offsets from the beginning of the data - -2. item name strings - -All item names are stored as char * strings in one block between the ToC table -and the data items. - -3. data items - -The data items are stored following the item names block. -Each data item is 16-aligned. -The data items are stored in the sorted order of their names. - -Therefore, the top of the name strings block is the offset of the first item, -the length of the last item is the difference between its offset and -the .dat file length, and the length of all previous items is the difference -between its offset and the next one. - ------------------------------------------------------------------------------ */ - -/* UDataInfo cf. udata.h */ -static const UDataInfo dataInfo={ - sizeof(UDataInfo), - 0, - - U_IS_BIG_ENDIAN, - U_CHARSET_FAMILY, - sizeof(UChar), - 0, - - {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */ - {1, 0, 0, 0}, /* formatVersion */ - {3, 0, 0, 0} /* dataVersion */ -}; - -static uint32_t maxSize; - -static char stringStore[STRING_STORE_SIZE]; -static uint32_t stringTop=0, basenameTotal=0; - -typedef struct { - char *pathname, *basename; - uint32_t basenameLength, basenameOffset, fileSize, fileOffset; -} File; - -#define CHUNK_FILE_COUNT 256 -static File *files = NULL; -static uint32_t fileCount=0; -static uint32_t fileMax = 0; - - -static char *symPrefix = NULL; - -#define LINE_BUFFER_SIZE 512 -/* prototypes --------------------------------------------------------------- */ - -static void -addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose); - -static char * -allocString(uint32_t length); - -static int -compareFiles(const void *file1, const void *file2); - -static char * -pathToFullPath(const char *path, const char *source); - -/* map non-tree separator (such as '\') to tree separator ('/') inplace. */ -static void -fixDirToTreePath(char *s); -/* -------------------------------------------------------------------------- */ - -U_CAPI void U_EXPORT2 -createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight, - const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) { - static char buffer[4096]; - char *line; - char *linePtr; - char *s = NULL; - UErrorCode errorCode=U_ZERO_ERROR; - uint32_t i, fileOffset, basenameOffset, length, nread; - FileStream *in, *file; - - line = (char *)uprv_malloc(sizeof(char) * LINE_BUFFER_SIZE); - if (line == NULL) { - fprintf(stderr, "gencmn: unable to allocate memory for line buffer of size %d\n", LINE_BUFFER_SIZE); - exit(U_MEMORY_ALLOCATION_ERROR); - } - - linePtr = line; - - maxSize = max_size; - - if (destDir == NULL) { - destDir = u_getDataDirectory(); - } - if (name == NULL) { - name = COMMON_DATA_NAME; - } - if (type == NULL) { - type = DATA_TYPE; - } - if (source == NULL) { - source = "."; - } - - if (dataFile == NULL) { - in = T_FileStream_stdin(); - } else { - in = T_FileStream_open(dataFile, "r"); - if(in == NULL) { - fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile); - exit(U_FILE_ACCESS_ERROR); - } - } - - if (verbose) { - if(sourceTOC) { - printf("generating %s_%s.c (table of contents source file)\n", name, type); - } else { - printf("generating %s.%s (common data file with table of contents)\n", name, type); - } - } - - /* read the list of files and get their lengths */ - while((s != NULL && *s != 0) || (s=T_FileStream_readLine(in, (line=linePtr), - LINE_BUFFER_SIZE))!=NULL) { - /* remove trailing newline characters and parse space separated items */ - if (s != NULL && *s != 0) { - line=s; - } else { - s=line; - } - while(*s!=0) { - if(*s==' ') { - *s=0; - ++s; - break; - } else if(*s=='\r' || *s=='\n') { - *s=0; - break; - } - ++s; - } - - /* check for comment */ - - if (*line == '#') { - continue; - } - - /* add the file */ -#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) - { - char *t; - while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) { - *t = U_FILE_SEP_CHAR; - } - } -#endif - addFile(getLongPathname(line), name, source, sourceTOC, verbose); - } - - uprv_free(linePtr); - - if(in!=T_FileStream_stdin()) { - T_FileStream_close(in); - } - - if(fileCount==0) { - fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "" : dataFile); - return; - } - - /* sort the files by basename */ - qsort(files, fileCount, sizeof(File), compareFiles); - - if(!sourceTOC) { - UNewDataMemory *out; - - /* determine the offsets of all basenames and files in this common one */ - basenameOffset=4+8*fileCount; - fileOffset=(basenameOffset+(basenameTotal+15))&~0xf; - for(i=0; ifilename && *(s-1)!=U_FILE_SEP_CHAR) { - *s++=U_FILE_SEP_CHAR; - } - uprv_strcpy(s, name); - if(*(type)!=0) { - s+=uprv_strlen(s); - *s++='_'; - uprv_strcpy(s, type); - } - s+=uprv_strlen(s); - uprv_strcpy(s, ".c"); - - /* open the output file */ - out=T_FileStream_open(filename, "w"); - if (gencmnFileName != NULL) { - uprv_strcpy(gencmnFileName, filename); - } - if(out==NULL) { - fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename); - exit(U_FILE_ACCESS_ERROR); - } - - /* write the source file */ - sprintf(buffer, - "/*\n" - " * ICU common data table of contents for %s.%s\n" - " * Automatically generated by icu/source/tools/gencmn/gencmn .\n" - " */\n\n" - "#include \"unicode/utypes.h\"\n" - "#include \"unicode/udata.h\"\n" - "\n" - "/* external symbol declarations for data (%d files) */\n", - name, type, fileCount); - T_FileStream_writeLine(out, buffer); - - sprintf(buffer, "extern const char\n %s%s[]", symPrefix?symPrefix:"", files[0].pathname); - T_FileStream_writeLine(out, buffer); - for(i=1; imaxSize) { - if (verbose) { - printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize); - } - return; - } - files[fileCount].fileSize=length; - } else { - char *t; - /* get and store the basename */ - /* need to include the package name */ - length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1); - s=allocString(length); - uprv_strcpy(s, name); - uprv_strcat(s, U_TREE_ENTRY_SEP_STRING); - uprv_strcat(s, filename); - fixDirToTreePath(s); - files[fileCount].basename=s; - /* turn the basename into an entry point name and store in the pathname field */ - t=files[fileCount].pathname=allocString(length); - while(--length>0) { - if(*s=='.' || *s=='-' || *s=='/') { - *t='_'; - } else { - *t=*s; - } - ++s; - ++t; - } - *t=0; - } - ++fileCount; -} - -static char * -allocString(uint32_t length) { - uint32_t top=stringTop+length; - char *p; - - if(top>STRING_STORE_SIZE) { - fprintf(stderr, "gencmn: out of memory\n"); - exit(U_MEMORY_ALLOCATION_ERROR); - } - p=stringStore+stringTop; - stringTop=top; - return p; -} - -static char * -pathToFullPath(const char *path, const char *source) { - int32_t length; - int32_t newLength; - char *fullPath; - int32_t n; - - length = (uint32_t)(uprv_strlen(path) + 1); - newLength = (length + 1 + (int32_t)uprv_strlen(source)); - fullPath = uprv_malloc(newLength); - if(source != NULL) { - uprv_strcpy(fullPath, source); - uprv_strcat(fullPath, U_FILE_SEP_STRING); - } else { - fullPath[0] = 0; - } - n = (int32_t)uprv_strlen(fullPath); - fullPath[n] = 0; /* Suppress compiler warning for unused variable n */ - /* when conditional code below is not compiled. */ - uprv_strcat(fullPath, path); - -#if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) -#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) - /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */ - for(;fullPath[n];n++) { - if(fullPath[n] == U_FILE_ALT_SEP_CHAR) { - fullPath[n] = U_FILE_SEP_CHAR; - } - } -#endif -#endif -#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) - /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */ - for(;fullPath[n];n++) { - if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) { - fullPath[n] = U_FILE_SEP_CHAR; - } - } -#endif - return fullPath; -} - -static int -compareFiles(const void *file1, const void *file2) { - /* sort by basename */ - return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename); -} - -static void -fixDirToTreePath(char *s) -{ -#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)) - char *t; -#endif -#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) - for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) { - *t = U_TREE_ENTRY_SEP_CHAR; - } -#endif -#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) - for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) { - *t = U_TREE_ENTRY_SEP_CHAR; - } -#endif -} diff --git a/deps/icu-small/source/tools/toolutil/pkg_gencmn.cpp b/deps/icu-small/source/tools/toolutil/pkg_gencmn.cpp new file mode 100644 index 0000000000..423e4b7363 --- /dev/null +++ b/deps/icu-small/source/tools/toolutil/pkg_gencmn.cpp @@ -0,0 +1,578 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/****************************************************************************** + * Copyright (C) 2008-2012, International Business Machines + * Corporation and others. All Rights Reserved. + ******************************************************************************* + */ +#include "unicode/utypes.h" + +#include +#include +#include "unicode/utypes.h" +#include "unicode/putil.h" +#include "cmemory.h" +#include "cstring.h" +#include "filestrm.h" +#include "toolutil.h" +#include "unicode/uclean.h" +#include "unewdata.h" +#include "putilimp.h" +#include "pkg_gencmn.h" + +#define STRING_STORE_SIZE 200000 + +#define COMMON_DATA_NAME U_ICUDATA_NAME +#define DATA_TYPE "dat" + +/* ICU package data file format (.dat files) ------------------------------- *** + +Description of the data format after the usual ICU data file header +(UDataInfo etc.). + +Format version 1 + +A .dat package file contains a simple Table of Contents of item names, +followed by the items themselves: + +1. ToC table + +uint32_t count; - number of items +UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item: + uint32_t nameOffset; - offset of the item name + uint32_t dataOffset; - offset of the item data +both are byte offsets from the beginning of the data + +2. item name strings + +All item names are stored as char * strings in one block between the ToC table +and the data items. + +3. data items + +The data items are stored following the item names block. +Each data item is 16-aligned. +The data items are stored in the sorted order of their names. + +Therefore, the top of the name strings block is the offset of the first item, +the length of the last item is the difference between its offset and +the .dat file length, and the length of all previous items is the difference +between its offset and the next one. + +----------------------------------------------------------------------------- */ + +/* UDataInfo cf. udata.h */ +static const UDataInfo dataInfo={ + sizeof(UDataInfo), + 0, + + U_IS_BIG_ENDIAN, + U_CHARSET_FAMILY, + sizeof(UChar), + 0, + + {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */ + {1, 0, 0, 0}, /* formatVersion */ + {3, 0, 0, 0} /* dataVersion */ +}; + +static uint32_t maxSize; + +static char stringStore[STRING_STORE_SIZE]; +static uint32_t stringTop=0, basenameTotal=0; + +typedef struct { + char *pathname, *basename; + uint32_t basenameLength, basenameOffset, fileSize, fileOffset; +} File; + +#define CHUNK_FILE_COUNT 256 +static File *files = NULL; +static uint32_t fileCount=0; +static uint32_t fileMax = 0; + + +static char *symPrefix = NULL; + +#define LINE_BUFFER_SIZE 512 +/* prototypes --------------------------------------------------------------- */ + +static void +addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose); + +static char * +allocString(uint32_t length); + +U_CDECL_BEGIN +static int +compareFiles(const void *file1, const void *file2); +U_CDECL_END + +static char * +pathToFullPath(const char *path, const char *source); + +/* map non-tree separator (such as '\') to tree separator ('/') inplace. */ +static void +fixDirToTreePath(char *s); +/* -------------------------------------------------------------------------- */ + +U_CAPI void U_EXPORT2 +createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight, + const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) { + static char buffer[4096]; + char *line; + char *linePtr; + char *s = NULL; + UErrorCode errorCode=U_ZERO_ERROR; + uint32_t i, fileOffset, basenameOffset, length, nread; + FileStream *in, *file; + + line = (char *)uprv_malloc(sizeof(char) * LINE_BUFFER_SIZE); + if (line == NULL) { + fprintf(stderr, "gencmn: unable to allocate memory for line buffer of size %d\n", LINE_BUFFER_SIZE); + exit(U_MEMORY_ALLOCATION_ERROR); + } + + linePtr = line; + + maxSize = max_size; + + if (destDir == NULL) { + destDir = u_getDataDirectory(); + } + if (name == NULL) { + name = COMMON_DATA_NAME; + } + if (type == NULL) { + type = DATA_TYPE; + } + if (source == NULL) { + source = "."; + } + + if (dataFile == NULL) { + in = T_FileStream_stdin(); + } else { + in = T_FileStream_open(dataFile, "r"); + if(in == NULL) { + fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile); + exit(U_FILE_ACCESS_ERROR); + } + } + + if (verbose) { + if(sourceTOC) { + printf("generating %s_%s.c (table of contents source file)\n", name, type); + } else { + printf("generating %s.%s (common data file with table of contents)\n", name, type); + } + } + + /* read the list of files and get their lengths */ + while((s != NULL && *s != 0) || (s=T_FileStream_readLine(in, (line=linePtr), + LINE_BUFFER_SIZE))!=NULL) { + /* remove trailing newline characters and parse space separated items */ + if (s != NULL && *s != 0) { + line=s; + } else { + s=line; + } + while(*s!=0) { + if(*s==' ') { + *s=0; + ++s; + break; + } else if(*s=='\r' || *s=='\n') { + *s=0; + break; + } + ++s; + } + + /* check for comment */ + + if (*line == '#') { + continue; + } + + /* add the file */ +#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) + { + char *t; + while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) { + *t = U_FILE_SEP_CHAR; + } + } +#endif + addFile(getLongPathname(line), name, source, sourceTOC, verbose); + } + + uprv_free(linePtr); + + if(in!=T_FileStream_stdin()) { + T_FileStream_close(in); + } + + if(fileCount==0) { + fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "" : dataFile); + return; + } + + /* sort the files by basename */ + qsort(files, fileCount, sizeof(File), compareFiles); + + if(!sourceTOC) { + UNewDataMemory *out; + + /* determine the offsets of all basenames and files in this common one */ + basenameOffset=4+8*fileCount; + fileOffset=(basenameOffset+(basenameTotal+15))&~0xf; + for(i=0; ifilename && *(s-1)!=U_FILE_SEP_CHAR) { + *s++=U_FILE_SEP_CHAR; + } + uprv_strcpy(s, name); + if(*(type)!=0) { + s+=uprv_strlen(s); + *s++='_'; + uprv_strcpy(s, type); + } + s+=uprv_strlen(s); + uprv_strcpy(s, ".c"); + + /* open the output file */ + out=T_FileStream_open(filename, "w"); + if (gencmnFileName != NULL) { + uprv_strcpy(gencmnFileName, filename); + } + if(out==NULL) { + fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename); + exit(U_FILE_ACCESS_ERROR); + } + + /* write the source file */ + sprintf(buffer, + "/*\n" + " * ICU common data table of contents for %s.%s\n" + " * Automatically generated by icu/source/tools/gencmn/gencmn .\n" + " */\n\n" + "#include \"unicode/utypes.h\"\n" + "#include \"unicode/udata.h\"\n" + "\n" + "/* external symbol declarations for data (%d files) */\n", + name, type, fileCount); + T_FileStream_writeLine(out, buffer); + + sprintf(buffer, "extern const char\n %s%s[]", symPrefix?symPrefix:"", files[0].pathname); + T_FileStream_writeLine(out, buffer); + for(i=1; imaxSize) { + if (verbose) { + printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize); + } + return; + } + files[fileCount].fileSize=length; + } else { + char *t; + /* get and store the basename */ + /* need to include the package name */ + length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1); + s=allocString(length); + uprv_strcpy(s, name); + uprv_strcat(s, U_TREE_ENTRY_SEP_STRING); + uprv_strcat(s, filename); + fixDirToTreePath(s); + files[fileCount].basename=s; + /* turn the basename into an entry point name and store in the pathname field */ + t=files[fileCount].pathname=allocString(length); + while(--length>0) { + if(*s=='.' || *s=='-' || *s=='/') { + *t='_'; + } else { + *t=*s; + } + ++s; + ++t; + } + *t=0; + } + ++fileCount; +} + +static char * +allocString(uint32_t length) { + uint32_t top=stringTop+length; + char *p; + + if(top>STRING_STORE_SIZE) { + fprintf(stderr, "gencmn: out of memory\n"); + exit(U_MEMORY_ALLOCATION_ERROR); + } + p=stringStore+stringTop; + stringTop=top; + return p; +} + +static char * +pathToFullPath(const char *path, const char *source) { + int32_t length; + int32_t newLength; + char *fullPath; + int32_t n; + + length = (uint32_t)(uprv_strlen(path) + 1); + newLength = (length + 1 + (int32_t)uprv_strlen(source)); + fullPath = (char *)uprv_malloc(newLength); + if(source != NULL) { + uprv_strcpy(fullPath, source); + uprv_strcat(fullPath, U_FILE_SEP_STRING); + } else { + fullPath[0] = 0; + } + n = (int32_t)uprv_strlen(fullPath); + fullPath[n] = 0; /* Suppress compiler warning for unused variable n */ + /* when conditional code below is not compiled. */ + uprv_strcat(fullPath, path); + +#if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) +#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) + /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */ + for(;fullPath[n];n++) { + if(fullPath[n] == U_FILE_ALT_SEP_CHAR) { + fullPath[n] = U_FILE_SEP_CHAR; + } + } +#endif +#endif +#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) + /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */ + for(;fullPath[n];n++) { + if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) { + fullPath[n] = U_FILE_SEP_CHAR; + } + } +#endif + return fullPath; +} + +U_CDECL_BEGIN +static int +compareFiles(const void *file1, const void *file2) { + /* sort by basename */ + return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename); +} +U_CDECL_END + +static void +fixDirToTreePath(char *s) +{ + (void)s; +#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)) + char *t; +#endif +#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) + for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) { + *t = U_TREE_ENTRY_SEP_CHAR; + } +#endif +#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) + for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) { + *t = U_TREE_ENTRY_SEP_CHAR; + } +#endif +} diff --git a/deps/icu-small/source/tools/toolutil/pkg_gencmn.h b/deps/icu-small/source/tools/toolutil/pkg_gencmn.h index 62f8327cdf..238239960a 100644 --- a/deps/icu-small/source/tools/toolutil/pkg_gencmn.h +++ b/deps/icu-small/source/tools/toolutil/pkg_gencmn.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /****************************************************************************** * Copyright (C) 2008, International Business Machines diff --git a/deps/icu-small/source/tools/toolutil/pkg_icu.cpp b/deps/icu-small/source/tools/toolutil/pkg_icu.cpp index e679c23be8..ce0bfc215b 100644 --- a/deps/icu-small/source/tools/toolutil/pkg_icu.cpp +++ b/deps/icu-small/source/tools/toolutil/pkg_icu.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /****************************************************************************** * Copyright (C) 2008-2015, International Business Machines diff --git a/deps/icu-small/source/tools/toolutil/pkg_icu.h b/deps/icu-small/source/tools/toolutil/pkg_icu.h index 3d620f78df..638056e60b 100644 --- a/deps/icu-small/source/tools/toolutil/pkg_icu.h +++ b/deps/icu-small/source/tools/toolutil/pkg_icu.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /****************************************************************************** * Copyright (C) 2008-2016, International Business Machines diff --git a/deps/icu-small/source/tools/toolutil/pkg_imp.h b/deps/icu-small/source/tools/toolutil/pkg_imp.h index c9fe81bd73..29abd8d83c 100644 --- a/deps/icu-small/source/tools/toolutil/pkg_imp.h +++ b/deps/icu-small/source/tools/toolutil/pkg_imp.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: pkg_imp.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/tools/toolutil/pkgitems.cpp b/deps/icu-small/source/tools/toolutil/pkgitems.cpp index 91c8520110..dd414c2f87 100644 --- a/deps/icu-small/source/tools/toolutil/pkgitems.cpp +++ b/deps/icu-small/source/tools/toolutil/pkgitems.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: pkgitems.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/tools/toolutil/ppucd.cpp b/deps/icu-small/source/tools/toolutil/ppucd.cpp index 18d317e3e3..cccde81c7a 100644 --- a/deps/icu-small/source/tools/toolutil/ppucd.cpp +++ b/deps/icu-small/source/tools/toolutil/ppucd.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: ppucd.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -515,12 +515,12 @@ PreparsedUCD::parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, U void PreparsedUCD::parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode) { - UChar *buffer=uni.getBuffer(-1); + UChar *buffer=toUCharPtr(uni.getBuffer(-1)); int32_t length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode); if(errorCode==U_BUFFER_OVERFLOW_ERROR) { errorCode=U_ZERO_ERROR; uni.releaseBuffer(0); - buffer=uni.getBuffer(length); + buffer=toUCharPtr(uni.getBuffer(length)); length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode); } uni.releaseBuffer(length); diff --git a/deps/icu-small/source/tools/toolutil/ppucd.h b/deps/icu-small/source/tools/toolutil/ppucd.h index 593bd24799..3cd6feee00 100644 --- a/deps/icu-small/source/tools/toolutil/ppucd.h +++ b/deps/icu-small/source/tools/toolutil/ppucd.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -6,7 +6,7 @@ * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: ppucd.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/tools/toolutil/swapimpl.cpp b/deps/icu-small/source/tools/toolutil/swapimpl.cpp index 6cc2162301..620a387e24 100644 --- a/deps/icu-small/source/tools/toolutil/swapimpl.cpp +++ b/deps/icu-small/source/tools/toolutil/swapimpl.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: swapimpl.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/tools/toolutil/swapimpl.h b/deps/icu-small/source/tools/toolutil/swapimpl.h index 0e4d417344..8c6474f662 100644 --- a/deps/icu-small/source/tools/toolutil/swapimpl.h +++ b/deps/icu-small/source/tools/toolutil/swapimpl.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: swapimpl.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/tools/toolutil/toolutil.cpp b/deps/icu-small/source/tools/toolutil/toolutil.cpp index bb393a2e7d..0f7d0984a8 100644 --- a/deps/icu-small/source/tools/toolutil/toolutil.cpp +++ b/deps/icu-small/source/tools/toolutil/toolutil.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: toolutil.c -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/tools/toolutil/toolutil.h b/deps/icu-small/source/tools/toolutil/toolutil.h index 026e75aeb2..be07787a9f 100644 --- a/deps/icu-small/source/tools/toolutil/toolutil.h +++ b/deps/icu-small/source/tools/toolutil/toolutil.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: toolutil.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/tools/toolutil/ucbuf.cpp b/deps/icu-small/source/tools/toolutil/ucbuf.cpp index b6b0150afc..5269c8177c 100644 --- a/deps/icu-small/source/tools/toolutil/ucbuf.cpp +++ b/deps/icu-small/source/tools/toolutil/ucbuf.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/tools/toolutil/ucbuf.h b/deps/icu-small/source/tools/toolutil/ucbuf.h index cb9509b427..48d41ef4cd 100644 --- a/deps/icu-small/source/tools/toolutil/ucbuf.h +++ b/deps/icu-small/source/tools/toolutil/ucbuf.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* diff --git a/deps/icu-small/source/tools/toolutil/ucln_tu.cpp b/deps/icu-small/source/tools/toolutil/ucln_tu.cpp index 2f67641768..5354fe1753 100644 --- a/deps/icu-small/source/tools/toolutil/ucln_tu.cpp +++ b/deps/icu-small/source/tools/toolutil/ucln_tu.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /******************************************************************** * COPYRIGHT: diff --git a/deps/icu-small/source/tools/toolutil/ucm.c b/deps/icu-small/source/tools/toolutil/ucm.c deleted file mode 100644 index 8d4cdfc40f..0000000000 --- a/deps/icu-small/source/tools/toolutil/ucm.c +++ /dev/null @@ -1,1191 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2003-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: ucm.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2003jun20 -* created by: Markus W. Scherer -* -* This file reads a .ucm file, stores its mappings and sorts them. -* It implements handling of Unicode conversion mappings from .ucm files -* for makeconv, canonucm, rptp2ucm, etc. -* -* Unicode code point sequences with a length of more than 1, -* as well as byte sequences with more than 4 bytes or more than one complete -* character sequence are handled to support m:n mappings. -*/ - -#include "unicode/utypes.h" -#include "unicode/ustring.h" -#include "cstring.h" -#include "cmemory.h" -#include "filestrm.h" -#include "uarrsort.h" -#include "ucnvmbcs.h" -#include "ucnv_bld.h" -#include "ucnv_ext.h" -#include "uparse.h" -#include "ucm.h" -#include - -#if !UCONFIG_NO_CONVERSION - -/* -------------------------------------------------------------------------- */ - -static void -printMapping(UCMapping *m, UChar32 *codePoints, uint8_t *bytes, FILE *f) { - int32_t j; - - for(j=0; juLen; ++j) { - fprintf(f, "", (long)codePoints[j]); - } - - fputc(' ', f); - - for(j=0; jbLen; ++j) { - fprintf(f, "\\x%02X", bytes[j]); - } - - if(m->f>=0) { - fprintf(f, " |%u\n", m->f); - } else { - fputs("\n", f); - } -} - -U_CAPI void U_EXPORT2 -ucm_printMapping(UCMTable *table, UCMapping *m, FILE *f) { - printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m), f); -} - -U_CAPI void U_EXPORT2 -ucm_printTable(UCMTable *table, FILE *f, UBool byUnicode) { - UCMapping *m; - int32_t i, length; - - m=table->mappings; - length=table->mappingsLength; - if(byUnicode) { - for(i=0; ireverseMap; - for(i=0; iuLen==1 && r->uLen==1) { - /* compare two single code points */ - return l->u-r->u; - } - - /* get pointers to the code point sequences */ - lu=UCM_GET_CODE_POINTS(lTable, l); - ru=UCM_GET_CODE_POINTS(rTable, r); - - /* get the minimum length */ - if(l->uLen<=r->uLen) { - length=l->uLen; - } else { - length=r->uLen; - } - - /* compare the code points */ - for(i=0; iuLen-r->uLen; -} - -static int32_t -compareBytes(UCMTable *lTable, const UCMapping *l, - UCMTable *rTable, const UCMapping *r, - UBool lexical) { - const uint8_t *lb, *rb; - int32_t result, i, length; - - /* - * A lexical comparison is used for sorting in the builder, to allow - * an efficient search for a byte sequence that could be a prefix - * of a previously entered byte sequence. - * - * Comparing by lengths first is for compatibility with old .ucm tools - * like canonucm and rptp2ucm. - */ - if(lexical) { - /* get the minimum length and continue */ - if(l->bLen<=r->bLen) { - length=l->bLen; - } else { - length=r->bLen; - } - } else { - /* compare lengths first */ - result=l->bLen-r->bLen; - if(result!=0) { - return result; - } else { - length=l->bLen; - } - } - - /* get pointers to the byte sequences */ - lb=UCM_GET_BYTES(lTable, l); - rb=UCM_GET_BYTES(rTable, r); - - /* compare the bytes */ - for(i=0; ibLen-r->bLen; -} - -/* compare UCMappings for sorting */ -static int32_t -compareMappings(UCMTable *lTable, const UCMapping *l, - UCMTable *rTable, const UCMapping *r, - UBool uFirst) { - int32_t result; - - /* choose which side to compare first */ - if(uFirst) { - /* Unicode then bytes */ - result=compareUnicode(lTable, l, rTable, r); - if(result==0) { - result=compareBytes(lTable, l, rTable, r, FALSE); /* not lexically, like canonucm */ - } - } else { - /* bytes then Unicode */ - result=compareBytes(lTable, l, rTable, r, TRUE); /* lexically, for builder */ - if(result==0) { - result=compareUnicode(lTable, l, rTable, r); - } - } - - if(result!=0) { - return result; - } - - /* compare the flags */ - return l->f-r->f; -} - -/* sorting by Unicode first sorts mappings directly */ -static int32_t -compareMappingsUnicodeFirst(const void *context, const void *left, const void *right) { - return compareMappings( - (UCMTable *)context, (const UCMapping *)left, - (UCMTable *)context, (const UCMapping *)right, TRUE); -} - -/* sorting by bytes first sorts the reverseMap; use indirection to mappings */ -static int32_t -compareMappingsBytesFirst(const void *context, const void *left, const void *right) { - UCMTable *table=(UCMTable *)context; - int32_t l=*(const int32_t *)left, r=*(const int32_t *)right; - return compareMappings( - table, table->mappings+l, - table, table->mappings+r, FALSE); -} - -U_CAPI void U_EXPORT2 -ucm_sortTable(UCMTable *t) { - UErrorCode errorCode; - int32_t i; - - if(t->isSorted) { - return; - } - - errorCode=U_ZERO_ERROR; - - /* 1. sort by Unicode first */ - uprv_sortArray(t->mappings, t->mappingsLength, sizeof(UCMapping), - compareMappingsUnicodeFirst, t, - FALSE, &errorCode); - - /* build the reverseMap */ - if(t->reverseMap==NULL) { - /* - * allocate mappingsCapacity instead of mappingsLength so that - * if mappings are added, the reverseMap need not be - * reallocated each time - * (see ucm_moveMappings() and ucm_addMapping()) - */ - t->reverseMap=(int32_t *)uprv_malloc(t->mappingsCapacity*sizeof(int32_t)); - if(t->reverseMap==NULL) { - fprintf(stderr, "ucm error: unable to allocate reverseMap\n"); - exit(U_MEMORY_ALLOCATION_ERROR); - } - } - for(i=0; imappingsLength; ++i) { - t->reverseMap[i]=i; - } - - /* 2. sort reverseMap by mappings bytes first */ - uprv_sortArray(t->reverseMap, t->mappingsLength, sizeof(int32_t), - compareMappingsBytesFirst, t, - FALSE, &errorCode); - - if(U_FAILURE(errorCode)) { - fprintf(stderr, "ucm error: sortTable()/uprv_sortArray() fails - %s\n", - u_errorName(errorCode)); - exit(errorCode); - } - - t->isSorted=TRUE; -} - -/* - * remove mappings with their move flag set from the base table - * and move some of them (with UCM_MOVE_TO_EXT) to the extension table - */ -U_CAPI void U_EXPORT2 -ucm_moveMappings(UCMTable *base, UCMTable *ext) { - UCMapping *mb, *mbLimit; - int8_t flag; - - mb=base->mappings; - mbLimit=mb+base->mappingsLength; - - while(mbmoveFlag; - if(flag!=0) { - /* reset the move flag */ - mb->moveFlag=0; - - if(ext!=NULL && (flag&UCM_MOVE_TO_EXT)) { - /* add the mapping to the extension table */ - ucm_addMapping(ext, mb, UCM_GET_CODE_POINTS(base, mb), UCM_GET_BYTES(base, mb)); - } - - /* remove this mapping: move the last base mapping down and overwrite the current one */ - if(mb<(mbLimit-1)) { - uprv_memcpy(mb, mbLimit-1, sizeof(UCMapping)); - } - --mbLimit; - --base->mappingsLength; - base->isSorted=FALSE; - } else { - ++mb; - } - } -} - -enum { - NEEDS_MOVE=1, - HAS_ERRORS=2 -}; - -static uint8_t -checkBaseExtUnicode(UCMStates *baseStates, UCMTable *base, UCMTable *ext, - UBool moveToExt, UBool intersectBase) { - UCMapping *mb, *me, *mbLimit, *meLimit; - int32_t cmp; - uint8_t result; - - mb=base->mappings; - mbLimit=mb+base->mappingsLength; - - me=ext->mappings; - meLimit=me+ext->mappingsLength; - - result=0; - - for(;;) { - /* skip irrelevant mappings on both sides */ - for(;;) { - if(mb==mbLimit) { - return result; - } - - if((0<=mb->f && mb->f<=2) || mb->f==4) { - break; - } - - ++mb; - } - - for(;;) { - if(me==meLimit) { - return result; - } - - if((0<=me->f && me->f<=2) || me->f==4) { - break; - } - - ++me; - } - - /* compare the base and extension mappings */ - cmp=compareUnicode(base, mb, ext, me); - if(cmp<0) { - if(intersectBase && (intersectBase!=2 || mb->bLen>1)) { - /* - * mapping in base but not in ext, move it - * - * if ext is DBCS, move DBCS mappings here - * and check SBCS ones for Unicode prefix below - */ - mb->moveFlag|=UCM_MOVE_TO_EXT; - result|=NEEDS_MOVE; - - /* does mb map from an input sequence that is a prefix of me's? */ - } else if( mb->uLenuLen && - 0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen) - ) { - if(moveToExt) { - /* mark this mapping to be moved to the extension table */ - mb->moveFlag|=UCM_MOVE_TO_EXT; - result|=NEEDS_MOVE; - } else { - fprintf(stderr, - "ucm error: the base table contains a mapping whose input sequence\n" - " is a prefix of the input sequence of an extension mapping\n"); - ucm_printMapping(base, mb, stderr); - ucm_printMapping(ext, me, stderr); - result|=HAS_ERRORS; - } - } - - ++mb; - } else if(cmp==0) { - /* - * same output: remove the extension mapping, - * otherwise treat as an error - */ - if( mb->f==me->f && mb->bLen==me->bLen && - 0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen) - ) { - me->moveFlag|=UCM_REMOVE_MAPPING; - result|=NEEDS_MOVE; - } else if(intersectBase) { - /* mapping in base but not in ext, move it */ - mb->moveFlag|=UCM_MOVE_TO_EXT; - result|=NEEDS_MOVE; - } else { - fprintf(stderr, - "ucm error: the base table contains a mapping whose input sequence\n" - " is the same as the input sequence of an extension mapping\n" - " but it maps differently\n"); - ucm_printMapping(base, mb, stderr); - ucm_printMapping(ext, me, stderr); - result|=HAS_ERRORS; - } - - ++mb; - } else /* cmp>0 */ { - ++me; - } - } -} - -static uint8_t -checkBaseExtBytes(UCMStates *baseStates, UCMTable *base, UCMTable *ext, - UBool moveToExt, UBool intersectBase) { - UCMapping *mb, *me; - int32_t *baseMap, *extMap; - int32_t b, e, bLimit, eLimit, cmp; - uint8_t result; - UBool isSISO; - - baseMap=base->reverseMap; - extMap=ext->reverseMap; - - b=e=0; - bLimit=base->mappingsLength; - eLimit=ext->mappingsLength; - - result=0; - - isSISO=(UBool)(baseStates->outputType==MBCS_OUTPUT_2_SISO); - - for(;;) { - /* skip irrelevant mappings on both sides */ - for(;; ++b) { - if(b==bLimit) { - return result; - } - mb=base->mappings+baseMap[b]; - - if(intersectBase==2 && mb->bLen==1) { - /* - * comparing a base against a DBCS extension: - * leave SBCS base mappings alone - */ - continue; - } - - if(mb->f==0 || mb->f==3) { - break; - } - } - - for(;;) { - if(e==eLimit) { - return result; - } - me=ext->mappings+extMap[e]; - - if(me->f==0 || me->f==3) { - break; - } - - ++e; - } - - /* compare the base and extension mappings */ - cmp=compareBytes(base, mb, ext, me, TRUE); - if(cmp<0) { - if(intersectBase) { - /* mapping in base but not in ext, move it */ - mb->moveFlag|=UCM_MOVE_TO_EXT; - result|=NEEDS_MOVE; - - /* - * does mb map from an input sequence that is a prefix of me's? - * for SI/SO tables, a single byte is never a prefix because it - * occurs in a separate single-byte state - */ - } else if( mb->bLenbLen && - (!isSISO || mb->bLen>1) && - 0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen) - ) { - if(moveToExt) { - /* mark this mapping to be moved to the extension table */ - mb->moveFlag|=UCM_MOVE_TO_EXT; - result|=NEEDS_MOVE; - } else { - fprintf(stderr, - "ucm error: the base table contains a mapping whose input sequence\n" - " is a prefix of the input sequence of an extension mapping\n"); - ucm_printMapping(base, mb, stderr); - ucm_printMapping(ext, me, stderr); - result|=HAS_ERRORS; - } - } - - ++b; - } else if(cmp==0) { - /* - * same output: remove the extension mapping, - * otherwise treat as an error - */ - if( mb->f==me->f && mb->uLen==me->uLen && - 0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen) - ) { - me->moveFlag|=UCM_REMOVE_MAPPING; - result|=NEEDS_MOVE; - } else if(intersectBase) { - /* mapping in base but not in ext, move it */ - mb->moveFlag|=UCM_MOVE_TO_EXT; - result|=NEEDS_MOVE; - } else { - fprintf(stderr, - "ucm error: the base table contains a mapping whose input sequence\n" - " is the same as the input sequence of an extension mapping\n" - " but it maps differently\n"); - ucm_printMapping(base, mb, stderr); - ucm_printMapping(ext, me, stderr); - result|=HAS_ERRORS; - } - - ++b; - } else /* cmp>0 */ { - ++e; - } - } -} - -U_CAPI UBool U_EXPORT2 -ucm_checkValidity(UCMTable *table, UCMStates *baseStates) { - UCMapping *m, *mLimit; - int32_t count; - UBool isOK; - - m=table->mappings; - mLimit=m+table->mappingsLength; - isOK=TRUE; - - while(mbLen); - if(count<1) { - ucm_printMapping(table, m, stderr); - isOK=FALSE; - } - ++m; - } - - return isOK; -} - -U_CAPI UBool U_EXPORT2 -ucm_checkBaseExt(UCMStates *baseStates, - UCMTable *base, UCMTable *ext, UCMTable *moveTarget, - UBool intersectBase) { - uint8_t result; - - /* if we have an extension table, we must always use precision flags */ - if(base->flagsType&UCM_FLAGS_IMPLICIT) { - fprintf(stderr, "ucm error: the base table contains mappings without precision flags\n"); - return FALSE; - } - if(ext->flagsType&UCM_FLAGS_IMPLICIT) { - fprintf(stderr, "ucm error: extension table contains mappings without precision flags\n"); - return FALSE; - } - - /* checking requires both tables to be sorted */ - ucm_sortTable(base); - ucm_sortTable(ext); - - /* check */ - result= - checkBaseExtUnicode(baseStates, base, ext, (UBool)(moveTarget!=NULL), intersectBase)| - checkBaseExtBytes(baseStates, base, ext, (UBool)(moveTarget!=NULL), intersectBase); - - if(result&HAS_ERRORS) { - return FALSE; - } - - if(result&NEEDS_MOVE) { - ucm_moveMappings(ext, NULL); - ucm_moveMappings(base, moveTarget); - ucm_sortTable(base); - ucm_sortTable(ext); - if(moveTarget!=NULL) { - ucm_sortTable(moveTarget); - } - } - - return TRUE; -} - -/* merge tables for rptp2ucm ------------------------------------------------ */ - -U_CAPI void U_EXPORT2 -ucm_mergeTables(UCMTable *fromUTable, UCMTable *toUTable, - const uint8_t *subchar, int32_t subcharLength, - uint8_t subchar1) { - UCMapping *fromUMapping, *toUMapping; - int32_t fromUIndex, toUIndex, fromUTop, toUTop, cmp; - - ucm_sortTable(fromUTable); - ucm_sortTable(toUTable); - - fromUMapping=fromUTable->mappings; - toUMapping=toUTable->mappings; - - fromUTop=fromUTable->mappingsLength; - toUTop=toUTable->mappingsLength; - - fromUIndex=toUIndex=0; - - while(fromUIndexcodepage - */ - if( (fromUMapping->bLen==subcharLength && - 0==uprv_memcmp(UCM_GET_BYTES(fromUTable, fromUMapping), subchar, subcharLength)) || - (subchar1!=0 && fromUMapping->bLen==1 && fromUMapping->b.bytes[0]==subchar1) - ) { - fromUMapping->f=2; /* SUB mapping */ - } else { - fromUMapping->f=1; /* normal fallback */ - } - - ++fromUMapping; - ++fromUIndex; - } else { - /* - * the toU mapping does not have a fromU counterpart: - * (reverse) fallback codepage->Unicode, copy it to the fromU table - */ - - /* ignore reverse fallbacks to Unicode SUB */ - if(!(toUMapping->uLen==1 && (toUMapping->u==0xfffd || toUMapping->u==0x1a))) { - toUMapping->f=3; /* reverse fallback */ - ucm_addMapping(fromUTable, toUMapping, UCM_GET_CODE_POINTS(toUTable, toUMapping), UCM_GET_BYTES(toUTable, toUMapping)); - - /* the table may have been reallocated */ - fromUMapping=fromUTable->mappings+fromUIndex; - } - - ++toUMapping; - ++toUIndex; - } - } - - /* either one or both tables are exhausted */ - while(fromUIndexbLen==subcharLength && - 0==uprv_memcmp(UCM_GET_BYTES(fromUTable, fromUMapping), subchar, subcharLength)) || - (subchar1!=0 && fromUMapping->bLen==1 && fromUMapping->b.bytes[0]==subchar1) - ) { - fromUMapping->f=2; /* SUB mapping */ - } else { - fromUMapping->f=1; /* normal fallback */ - } - - ++fromUMapping; - ++fromUIndex; - } - - while(toUIndexuLen==1 && (toUMapping->u==0xfffd || toUMapping->u==0x1a))) { - toUMapping->f=3; /* reverse fallback */ - ucm_addMapping(fromUTable, toUMapping, UCM_GET_CODE_POINTS(toUTable, toUMapping), UCM_GET_BYTES(toUTable, toUMapping)); - } - - ++toUMapping; - ++toUIndex; - } - - fromUTable->isSorted=FALSE; -} - -/* separate extension mappings out of base table for rptp2ucm --------------- */ - -U_CAPI UBool U_EXPORT2 -ucm_separateMappings(UCMFile *ucm, UBool isSISO) { - UCMTable *table; - UCMapping *m, *mLimit; - int32_t type; - UBool needsMove, isOK; - - table=ucm->base; - m=table->mappings; - mLimit=m+table->mappingsLength; - - needsMove=FALSE; - isOK=TRUE; - - for(; mbLen==1 && (m->b.bytes[0]==0xe || m->b.bytes[0]==0xf)) { - fprintf(stderr, "warning: removing illegal mapping from an SI/SO-stateful table\n"); - ucm_printMapping(table, m, stderr); - m->moveFlag|=UCM_REMOVE_MAPPING; - needsMove=TRUE; - continue; - } - - type=ucm_mappingType( - &ucm->states, m, - UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m)); - if(type<0) { - /* illegal byte sequence */ - printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m), stderr); - isOK=FALSE; - } else if(type>0) { - m->moveFlag|=UCM_MOVE_TO_EXT; - needsMove=TRUE; - } - } - - if(!isOK) { - return FALSE; - } - if(needsMove) { - ucm_moveMappings(ucm->base, ucm->ext); - return ucm_checkBaseExt(&ucm->states, ucm->base, ucm->ext, ucm->ext, FALSE); - } else { - ucm_sortTable(ucm->base); - return TRUE; - } -} - -/* ucm parser --------------------------------------------------------------- */ - -U_CAPI int8_t U_EXPORT2 -ucm_parseBytes(uint8_t bytes[UCNV_EXT_MAX_BYTES], const char *line, const char **ps) { - const char *s=*ps; - char *end; - uint8_t byte; - int8_t bLen; - - bLen=0; - for(;;) { - /* skip an optional plus sign */ - if(bLen>0 && *s=='+') { - ++s; - } - if(*s!='\\') { - break; - } - - if( s[1]!='x' || - (byte=(uint8_t)uprv_strtoul(s+2, &end, 16), end)!=s+4 - ) { - fprintf(stderr, "ucm error: byte must be formatted as \\xXX (2 hex digits) - \"%s\"\n", line); - return -1; - } - - if(bLen==UCNV_EXT_MAX_BYTES) { - fprintf(stderr, "ucm error: too many bytes on \"%s\"\n", line); - return -1; - } - bytes[bLen++]=byte; - s=end; - } - - *ps=s; - return bLen; -} - -/* parse a mapping line; must not be empty */ -U_CAPI UBool U_EXPORT2 -ucm_parseMappingLine(UCMapping *m, - UChar32 codePoints[UCNV_EXT_MAX_UCHARS], - uint8_t bytes[UCNV_EXT_MAX_BYTES], - const char *line) { - const char *s; - char *end; - UChar32 cp; - int32_t u16Length; - int8_t uLen, bLen, f; - - s=line; - uLen=bLen=0; - - /* parse code points */ - for(;;) { - /* skip an optional plus sign */ - if(uLen>0 && *s=='+') { - ++s; - } - if(*s!='<') { - break; - } - - if( s[1]!='U' || - (cp=(UChar32)uprv_strtoul(s+2, &end, 16), end)==s+2 || - *end!='>' - ) { - fprintf(stderr, "ucm error: Unicode code point must be formatted as (1..6 hex digits) - \"%s\"\n", line); - return FALSE; - } - if((uint32_t)cp>0x10ffff || U_IS_SURROGATE(cp)) { - fprintf(stderr, "ucm error: Unicode code point must be 0..d7ff or e000..10ffff - \"%s\"\n", line); - return FALSE; - } - - if(uLen==UCNV_EXT_MAX_UCHARS) { - fprintf(stderr, "ucm error: too many code points on \"%s\"\n", line); - return FALSE; - } - codePoints[uLen++]=cp; - s=end+1; - } - - if(uLen==0) { - fprintf(stderr, "ucm error: no Unicode code points on \"%s\"\n", line); - return FALSE; - } else if(uLen==1) { - m->u=codePoints[0]; - } else { - UErrorCode errorCode=U_ZERO_ERROR; - u_strFromUTF32(NULL, 0, &u16Length, codePoints, uLen, &errorCode); - if( (U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) || - u16Length>UCNV_EXT_MAX_UCHARS - ) { - fprintf(stderr, "ucm error: too many UChars on \"%s\"\n", line); - return FALSE; - } - } - - s=u_skipWhitespace(s); - - /* parse bytes */ - bLen=ucm_parseBytes(bytes, line, &s); - - if(bLen<0) { - return FALSE; - } else if(bLen==0) { - fprintf(stderr, "ucm error: no bytes on \"%s\"\n", line); - return FALSE; - } else if(bLen<=4) { - uprv_memcpy(m->b.bytes, bytes, bLen); - } - - /* skip everything until the fallback indicator, even the start of a comment */ - for(;;) { - if(*s==0) { - f=-1; /* no fallback indicator */ - break; - } else if(*s=='|') { - f=(int8_t)(s[1]-'0'); - if((uint8_t)f>4) { - fprintf(stderr, "ucm error: fallback indicator must be |0..|4 - \"%s\"\n", line); - return FALSE; - } - break; - } - ++s; - } - - m->uLen=uLen; - m->bLen=bLen; - m->f=f; - return TRUE; -} - -/* general APIs ------------------------------------------------------------- */ - -U_CAPI UCMTable * U_EXPORT2 -ucm_openTable() { - UCMTable *table=(UCMTable *)uprv_malloc(sizeof(UCMTable)); - if(table==NULL) { - fprintf(stderr, "ucm error: unable to allocate a UCMTable\n"); - exit(U_MEMORY_ALLOCATION_ERROR); - } - - memset(table, 0, sizeof(UCMTable)); - return table; -} - -U_CAPI void U_EXPORT2 -ucm_closeTable(UCMTable *table) { - if(table!=NULL) { - uprv_free(table->mappings); - uprv_free(table->codePoints); - uprv_free(table->bytes); - uprv_free(table->reverseMap); - uprv_free(table); - } -} - -U_CAPI void U_EXPORT2 -ucm_resetTable(UCMTable *table) { - if(table!=NULL) { - table->mappingsLength=0; - table->flagsType=0; - table->unicodeMask=0; - table->bytesLength=table->codePointsLength=0; - table->isSorted=FALSE; - } -} - -U_CAPI void U_EXPORT2 -ucm_addMapping(UCMTable *table, - UCMapping *m, - UChar32 codePoints[UCNV_EXT_MAX_UCHARS], - uint8_t bytes[UCNV_EXT_MAX_BYTES]) { - UCMapping *tm; - UChar32 c; - int32_t idx; - - if(table->mappingsLength>=table->mappingsCapacity) { - /* make the mappings array larger */ - if(table->mappingsCapacity==0) { - table->mappingsCapacity=1000; - } else { - table->mappingsCapacity*=10; - } - table->mappings=(UCMapping *)uprv_realloc(table->mappings, - table->mappingsCapacity*sizeof(UCMapping)); - if(table->mappings==NULL) { - fprintf(stderr, "ucm error: unable to allocate %d UCMappings\n", - (int)table->mappingsCapacity); - exit(U_MEMORY_ALLOCATION_ERROR); - } - - if(table->reverseMap!=NULL) { - /* the reverseMap must be reallocated in a new sort */ - uprv_free(table->reverseMap); - table->reverseMap=NULL; - } - } - - if(m->uLen>1 && table->codePointsCapacity==0) { - table->codePointsCapacity=10000; - table->codePoints=(UChar32 *)uprv_malloc(table->codePointsCapacity*4); - if(table->codePoints==NULL) { - fprintf(stderr, "ucm error: unable to allocate %d UChar32s\n", - (int)table->codePointsCapacity); - exit(U_MEMORY_ALLOCATION_ERROR); - } - } - - if(m->bLen>4 && table->bytesCapacity==0) { - table->bytesCapacity=10000; - table->bytes=(uint8_t *)uprv_malloc(table->bytesCapacity); - if(table->bytes==NULL) { - fprintf(stderr, "ucm error: unable to allocate %d bytes\n", - (int)table->bytesCapacity); - exit(U_MEMORY_ALLOCATION_ERROR); - } - } - - if(m->uLen>1) { - idx=table->codePointsLength; - table->codePointsLength+=m->uLen; - if(table->codePointsLength>table->codePointsCapacity) { - fprintf(stderr, "ucm error: too many code points in multiple-code point mappings\n"); - exit(U_MEMORY_ALLOCATION_ERROR); - } - - uprv_memcpy(table->codePoints+idx, codePoints, (size_t)m->uLen*4); - m->u=idx; - } - - if(m->bLen>4) { - idx=table->bytesLength; - table->bytesLength+=m->bLen; - if(table->bytesLength>table->bytesCapacity) { - fprintf(stderr, "ucm error: too many bytes in mappings with >4 charset bytes\n"); - exit(U_MEMORY_ALLOCATION_ERROR); - } - - uprv_memcpy(table->bytes+idx, bytes, m->bLen); - m->b.idx=idx; - } - - /* set unicodeMask */ - for(idx=0; idxuLen; ++idx) { - c=codePoints[idx]; - if(c>=0x10000) { - table->unicodeMask|=UCNV_HAS_SUPPLEMENTARY; /* there are supplementary code points */ - } else if(U_IS_SURROGATE(c)) { - table->unicodeMask|=UCNV_HAS_SURROGATES; /* there are surrogate code points */ - } - } - - /* set flagsType */ - if(m->f<0) { - table->flagsType|=UCM_FLAGS_IMPLICIT; - } else { - table->flagsType|=UCM_FLAGS_EXPLICIT; - } - - tm=table->mappings+table->mappingsLength++; - uprv_memcpy(tm, m, sizeof(UCMapping)); - - table->isSorted=FALSE; -} - -U_CAPI UCMFile * U_EXPORT2 -ucm_open() { - UCMFile *ucm=(UCMFile *)uprv_malloc(sizeof(UCMFile)); - if(ucm==NULL) { - fprintf(stderr, "ucm error: unable to allocate a UCMFile\n"); - exit(U_MEMORY_ALLOCATION_ERROR); - } - - memset(ucm, 0, sizeof(UCMFile)); - - ucm->base=ucm_openTable(); - ucm->ext=ucm_openTable(); - - ucm->states.stateFlags[0]=MBCS_STATE_FLAG_DIRECT; - ucm->states.conversionType=UCNV_UNSUPPORTED_CONVERTER; - ucm->states.outputType=-1; - ucm->states.minCharLength=ucm->states.maxCharLength=1; - - return ucm; -} - -U_CAPI void U_EXPORT2 -ucm_close(UCMFile *ucm) { - if(ucm!=NULL) { - ucm_closeTable(ucm->base); - ucm_closeTable(ucm->ext); - uprv_free(ucm); - } -} - -U_CAPI int32_t U_EXPORT2 -ucm_mappingType(UCMStates *baseStates, - UCMapping *m, - UChar32 codePoints[UCNV_EXT_MAX_UCHARS], - uint8_t bytes[UCNV_EXT_MAX_BYTES]) { - /* check validity of the bytes and count the characters in them */ - int32_t count=ucm_countChars(baseStates, bytes, m->bLen); - if(count<1) { - /* illegal byte sequence */ - return -1; - } - - /* - * Suitable for an ICU conversion base table means: - * - a 1:1 mapping (1 Unicode code point : 1 byte sequence) - * - precision flag 0..3 - * - SBCS: any 1:1 mapping - * (the table stores additional bits to distinguish mapping types) - * - MBCS: not a |2 SUB mapping for - * - MBCS: not a |1 fallback to 0x00 - * - MBCS: not a multi-byte mapping with leading 0x00 bytes - * - * Further restrictions for fromUnicode tables - * are enforced in makeconv (MBCSOkForBaseFromUnicode()). - * - * All of the MBCS fromUnicode specific tests could be removed from here, - * but the ones above are for unusual mappings, and removing the tests - * from here would change canonucm output which seems gratuitous. - * (Markus Scherer 2006-nov-28) - * - * Exception: All implicit mappings (f<0) that need to be moved - * because of fromUnicode restrictions _must_ be moved here because - * makeconv uses a hack for moving mappings only for the fromUnicode table - * that only works with non-negative values of f. - */ - if( m->uLen==1 && count==1 && m->f<=3 && - (baseStates->maxCharLength==1 || - !((m->f==2 && m->bLen==1) || - (m->f==1 && bytes[0]==0) || - (m->f<=1 && m->bLen>1 && bytes[0]==0))) - ) { - return 0; /* suitable for a base table */ - } else { - return 1; /* needs to go into an extension table */ - } -} - -U_CAPI UBool U_EXPORT2 -ucm_addMappingAuto(UCMFile *ucm, UBool forBase, UCMStates *baseStates, - UCMapping *m, - UChar32 codePoints[UCNV_EXT_MAX_UCHARS], - uint8_t bytes[UCNV_EXT_MAX_BYTES]) { - int32_t type; - - if(m->f==2 && m->uLen>1) { - fprintf(stderr, "ucm error: illegal |2 mapping from multiple code points\n"); - printMapping(m, codePoints, bytes, stderr); - return FALSE; - } - - if(baseStates!=NULL) { - /* check validity of the bytes and count the characters in them */ - type=ucm_mappingType(baseStates, m, codePoints, bytes); - if(type<0) { - /* illegal byte sequence */ - printMapping(m, codePoints, bytes, stderr); - return FALSE; - } - } else { - /* not used - adding a mapping for an extension-only table before its base table is read */ - type=1; - } - - /* - * Add the mapping to the base table if this is requested and suitable. - * Otherwise, add it to the extension table. - */ - if(forBase && type==0) { - ucm_addMapping(ucm->base, m, codePoints, bytes); - } else { - ucm_addMapping(ucm->ext, m, codePoints, bytes); - } - - return TRUE; -} - -U_CAPI UBool U_EXPORT2 -ucm_addMappingFromLine(UCMFile *ucm, const char *line, UBool forBase, UCMStates *baseStates) { - UCMapping m={ 0, {0}, 0, 0, 0, 0 }; - UChar32 codePoints[UCNV_EXT_MAX_UCHARS]; - uint8_t bytes[UCNV_EXT_MAX_BYTES]; - - const char *s; - - /* ignore empty and comment lines */ - if(line[0]=='#' || *(s=u_skipWhitespace(line))==0 || *s=='\n' || *s=='\r') { - return TRUE; - } - - return - ucm_parseMappingLine(&m, codePoints, bytes, line) && - ucm_addMappingAuto(ucm, forBase, baseStates, &m, codePoints, bytes); -} - -U_CAPI void U_EXPORT2 -ucm_readTable(UCMFile *ucm, FileStream* convFile, - UBool forBase, UCMStates *baseStates, - UErrorCode *pErrorCode) { - char line[500]; - char *end; - UBool isOK; - - if(U_FAILURE(*pErrorCode)) { - return; - } - - isOK=TRUE; - - for(;;) { - /* read the next line */ - if(!T_FileStream_readLine(convFile, line, sizeof(line))) { - fprintf(stderr, "incomplete charmap section\n"); - isOK=FALSE; - break; - } - - /* remove CR LF */ - end=uprv_strchr(line, 0); - while(line + +#if !UCONFIG_NO_CONVERSION + +/* -------------------------------------------------------------------------- */ + +static void +printMapping(UCMapping *m, UChar32 *codePoints, uint8_t *bytes, FILE *f) { + int32_t j; + + for(j=0; juLen; ++j) { + fprintf(f, "", (long)codePoints[j]); + } + + fputc(' ', f); + + for(j=0; jbLen; ++j) { + fprintf(f, "\\x%02X", bytes[j]); + } + + if(m->f>=0) { + fprintf(f, " |%u\n", m->f); + } else { + fputs("\n", f); + } +} + +U_CAPI void U_EXPORT2 +ucm_printMapping(UCMTable *table, UCMapping *m, FILE *f) { + printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m), f); +} + +U_CAPI void U_EXPORT2 +ucm_printTable(UCMTable *table, FILE *f, UBool byUnicode) { + UCMapping *m; + int32_t i, length; + + m=table->mappings; + length=table->mappingsLength; + if(byUnicode) { + for(i=0; ireverseMap; + for(i=0; iuLen==1 && r->uLen==1) { + /* compare two single code points */ + return l->u-r->u; + } + + /* get pointers to the code point sequences */ + lu=UCM_GET_CODE_POINTS(lTable, l); + ru=UCM_GET_CODE_POINTS(rTable, r); + + /* get the minimum length */ + if(l->uLen<=r->uLen) { + length=l->uLen; + } else { + length=r->uLen; + } + + /* compare the code points */ + for(i=0; iuLen-r->uLen; +} + +static int32_t +compareBytes(UCMTable *lTable, const UCMapping *l, + UCMTable *rTable, const UCMapping *r, + UBool lexical) { + const uint8_t *lb, *rb; + int32_t result, i, length; + + /* + * A lexical comparison is used for sorting in the builder, to allow + * an efficient search for a byte sequence that could be a prefix + * of a previously entered byte sequence. + * + * Comparing by lengths first is for compatibility with old .ucm tools + * like canonucm and rptp2ucm. + */ + if(lexical) { + /* get the minimum length and continue */ + if(l->bLen<=r->bLen) { + length=l->bLen; + } else { + length=r->bLen; + } + } else { + /* compare lengths first */ + result=l->bLen-r->bLen; + if(result!=0) { + return result; + } else { + length=l->bLen; + } + } + + /* get pointers to the byte sequences */ + lb=UCM_GET_BYTES(lTable, l); + rb=UCM_GET_BYTES(rTable, r); + + /* compare the bytes */ + for(i=0; ibLen-r->bLen; +} + +/* compare UCMappings for sorting */ +static int32_t +compareMappings(UCMTable *lTable, const UCMapping *l, + UCMTable *rTable, const UCMapping *r, + UBool uFirst) { + int32_t result; + + /* choose which side to compare first */ + if(uFirst) { + /* Unicode then bytes */ + result=compareUnicode(lTable, l, rTable, r); + if(result==0) { + result=compareBytes(lTable, l, rTable, r, FALSE); /* not lexically, like canonucm */ + } + } else { + /* bytes then Unicode */ + result=compareBytes(lTable, l, rTable, r, TRUE); /* lexically, for builder */ + if(result==0) { + result=compareUnicode(lTable, l, rTable, r); + } + } + + if(result!=0) { + return result; + } + + /* compare the flags */ + return l->f-r->f; +} +U_CDECL_BEGIN +/* sorting by Unicode first sorts mappings directly */ +static int32_t U_CALLCONV +compareMappingsUnicodeFirst(const void *context, const void *left, const void *right) { + return compareMappings( + (UCMTable *)context, (const UCMapping *)left, + (UCMTable *)context, (const UCMapping *)right, TRUE); +} + +/* sorting by bytes first sorts the reverseMap; use indirection to mappings */ +static int32_t U_CALLCONV +compareMappingsBytesFirst(const void *context, const void *left, const void *right) { + UCMTable *table=(UCMTable *)context; + int32_t l=*(const int32_t *)left, r=*(const int32_t *)right; + return compareMappings( + table, table->mappings+l, + table, table->mappings+r, FALSE); +} +U_CDECL_END + +U_CAPI void U_EXPORT2 +ucm_sortTable(UCMTable *t) { + UErrorCode errorCode; + int32_t i; + + if(t->isSorted) { + return; + } + + errorCode=U_ZERO_ERROR; + + /* 1. sort by Unicode first */ + uprv_sortArray(t->mappings, t->mappingsLength, sizeof(UCMapping), + compareMappingsUnicodeFirst, t, + FALSE, &errorCode); + + /* build the reverseMap */ + if(t->reverseMap==NULL) { + /* + * allocate mappingsCapacity instead of mappingsLength so that + * if mappings are added, the reverseMap need not be + * reallocated each time + * (see ucm_moveMappings() and ucm_addMapping()) + */ + t->reverseMap=(int32_t *)uprv_malloc(t->mappingsCapacity*sizeof(int32_t)); + if(t->reverseMap==NULL) { + fprintf(stderr, "ucm error: unable to allocate reverseMap\n"); + exit(U_MEMORY_ALLOCATION_ERROR); + } + } + for(i=0; imappingsLength; ++i) { + t->reverseMap[i]=i; + } + + /* 2. sort reverseMap by mappings bytes first */ + uprv_sortArray(t->reverseMap, t->mappingsLength, sizeof(int32_t), + compareMappingsBytesFirst, t, + FALSE, &errorCode); + + if(U_FAILURE(errorCode)) { + fprintf(stderr, "ucm error: sortTable()/uprv_sortArray() fails - %s\n", + u_errorName(errorCode)); + exit(errorCode); + } + + t->isSorted=TRUE; +} + +/* + * remove mappings with their move flag set from the base table + * and move some of them (with UCM_MOVE_TO_EXT) to the extension table + */ +U_CAPI void U_EXPORT2 +ucm_moveMappings(UCMTable *base, UCMTable *ext) { + UCMapping *mb, *mbLimit; + int8_t flag; + + mb=base->mappings; + mbLimit=mb+base->mappingsLength; + + while(mbmoveFlag; + if(flag!=0) { + /* reset the move flag */ + mb->moveFlag=0; + + if(ext!=NULL && (flag&UCM_MOVE_TO_EXT)) { + /* add the mapping to the extension table */ + ucm_addMapping(ext, mb, UCM_GET_CODE_POINTS(base, mb), UCM_GET_BYTES(base, mb)); + } + + /* remove this mapping: move the last base mapping down and overwrite the current one */ + if(mb<(mbLimit-1)) { + uprv_memcpy(mb, mbLimit-1, sizeof(UCMapping)); + } + --mbLimit; + --base->mappingsLength; + base->isSorted=FALSE; + } else { + ++mb; + } + } +} + +enum { + NEEDS_MOVE=1, + HAS_ERRORS=2 +}; + +static uint8_t +checkBaseExtUnicode(UCMStates *baseStates, UCMTable *base, UCMTable *ext, + UBool moveToExt, UBool intersectBase) { + (void)baseStates; + + UCMapping *mb, *me, *mbLimit, *meLimit; + int32_t cmp; + uint8_t result; + + mb=base->mappings; + mbLimit=mb+base->mappingsLength; + + me=ext->mappings; + meLimit=me+ext->mappingsLength; + + result=0; + + for(;;) { + /* skip irrelevant mappings on both sides */ + for(;;) { + if(mb==mbLimit) { + return result; + } + + if((0<=mb->f && mb->f<=2) || mb->f==4) { + break; + } + + ++mb; + } + + for(;;) { + if(me==meLimit) { + return result; + } + + if((0<=me->f && me->f<=2) || me->f==4) { + break; + } + + ++me; + } + + /* compare the base and extension mappings */ + cmp=compareUnicode(base, mb, ext, me); + if(cmp<0) { + if(intersectBase && (intersectBase!=2 || mb->bLen>1)) { + /* + * mapping in base but not in ext, move it + * + * if ext is DBCS, move DBCS mappings here + * and check SBCS ones for Unicode prefix below + */ + mb->moveFlag|=UCM_MOVE_TO_EXT; + result|=NEEDS_MOVE; + + /* does mb map from an input sequence that is a prefix of me's? */ + } else if( mb->uLenuLen && + 0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen) + ) { + if(moveToExt) { + /* mark this mapping to be moved to the extension table */ + mb->moveFlag|=UCM_MOVE_TO_EXT; + result|=NEEDS_MOVE; + } else { + fprintf(stderr, + "ucm error: the base table contains a mapping whose input sequence\n" + " is a prefix of the input sequence of an extension mapping\n"); + ucm_printMapping(base, mb, stderr); + ucm_printMapping(ext, me, stderr); + result|=HAS_ERRORS; + } + } + + ++mb; + } else if(cmp==0) { + /* + * same output: remove the extension mapping, + * otherwise treat as an error + */ + if( mb->f==me->f && mb->bLen==me->bLen && + 0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen) + ) { + me->moveFlag|=UCM_REMOVE_MAPPING; + result|=NEEDS_MOVE; + } else if(intersectBase) { + /* mapping in base but not in ext, move it */ + mb->moveFlag|=UCM_MOVE_TO_EXT; + result|=NEEDS_MOVE; + } else { + fprintf(stderr, + "ucm error: the base table contains a mapping whose input sequence\n" + " is the same as the input sequence of an extension mapping\n" + " but it maps differently\n"); + ucm_printMapping(base, mb, stderr); + ucm_printMapping(ext, me, stderr); + result|=HAS_ERRORS; + } + + ++mb; + } else /* cmp>0 */ { + ++me; + } + } +} + +static uint8_t +checkBaseExtBytes(UCMStates *baseStates, UCMTable *base, UCMTable *ext, + UBool moveToExt, UBool intersectBase) { + UCMapping *mb, *me; + int32_t *baseMap, *extMap; + int32_t b, e, bLimit, eLimit, cmp; + uint8_t result; + UBool isSISO; + + baseMap=base->reverseMap; + extMap=ext->reverseMap; + + b=e=0; + bLimit=base->mappingsLength; + eLimit=ext->mappingsLength; + + result=0; + + isSISO=(UBool)(baseStates->outputType==MBCS_OUTPUT_2_SISO); + + for(;;) { + /* skip irrelevant mappings on both sides */ + for(;; ++b) { + if(b==bLimit) { + return result; + } + mb=base->mappings+baseMap[b]; + + if(intersectBase==2 && mb->bLen==1) { + /* + * comparing a base against a DBCS extension: + * leave SBCS base mappings alone + */ + continue; + } + + if(mb->f==0 || mb->f==3) { + break; + } + } + + for(;;) { + if(e==eLimit) { + return result; + } + me=ext->mappings+extMap[e]; + + if(me->f==0 || me->f==3) { + break; + } + + ++e; + } + + /* compare the base and extension mappings */ + cmp=compareBytes(base, mb, ext, me, TRUE); + if(cmp<0) { + if(intersectBase) { + /* mapping in base but not in ext, move it */ + mb->moveFlag|=UCM_MOVE_TO_EXT; + result|=NEEDS_MOVE; + + /* + * does mb map from an input sequence that is a prefix of me's? + * for SI/SO tables, a single byte is never a prefix because it + * occurs in a separate single-byte state + */ + } else if( mb->bLenbLen && + (!isSISO || mb->bLen>1) && + 0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen) + ) { + if(moveToExt) { + /* mark this mapping to be moved to the extension table */ + mb->moveFlag|=UCM_MOVE_TO_EXT; + result|=NEEDS_MOVE; + } else { + fprintf(stderr, + "ucm error: the base table contains a mapping whose input sequence\n" + " is a prefix of the input sequence of an extension mapping\n"); + ucm_printMapping(base, mb, stderr); + ucm_printMapping(ext, me, stderr); + result|=HAS_ERRORS; + } + } + + ++b; + } else if(cmp==0) { + /* + * same output: remove the extension mapping, + * otherwise treat as an error + */ + if( mb->f==me->f && mb->uLen==me->uLen && + 0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen) + ) { + me->moveFlag|=UCM_REMOVE_MAPPING; + result|=NEEDS_MOVE; + } else if(intersectBase) { + /* mapping in base but not in ext, move it */ + mb->moveFlag|=UCM_MOVE_TO_EXT; + result|=NEEDS_MOVE; + } else { + fprintf(stderr, + "ucm error: the base table contains a mapping whose input sequence\n" + " is the same as the input sequence of an extension mapping\n" + " but it maps differently\n"); + ucm_printMapping(base, mb, stderr); + ucm_printMapping(ext, me, stderr); + result|=HAS_ERRORS; + } + + ++b; + } else /* cmp>0 */ { + ++e; + } + } +} + +U_CAPI UBool U_EXPORT2 +ucm_checkValidity(UCMTable *table, UCMStates *baseStates) { + UCMapping *m, *mLimit; + int32_t count; + UBool isOK; + + m=table->mappings; + mLimit=m+table->mappingsLength; + isOK=TRUE; + + while(mbLen); + if(count<1) { + ucm_printMapping(table, m, stderr); + isOK=FALSE; + } + ++m; + } + + return isOK; +} + +U_CAPI UBool U_EXPORT2 +ucm_checkBaseExt(UCMStates *baseStates, + UCMTable *base, UCMTable *ext, UCMTable *moveTarget, + UBool intersectBase) { + uint8_t result; + + /* if we have an extension table, we must always use precision flags */ + if(base->flagsType&UCM_FLAGS_IMPLICIT) { + fprintf(stderr, "ucm error: the base table contains mappings without precision flags\n"); + return FALSE; + } + if(ext->flagsType&UCM_FLAGS_IMPLICIT) { + fprintf(stderr, "ucm error: extension table contains mappings without precision flags\n"); + return FALSE; + } + + /* checking requires both tables to be sorted */ + ucm_sortTable(base); + ucm_sortTable(ext); + + /* check */ + result= + checkBaseExtUnicode(baseStates, base, ext, (UBool)(moveTarget!=NULL), intersectBase)| + checkBaseExtBytes(baseStates, base, ext, (UBool)(moveTarget!=NULL), intersectBase); + + if(result&HAS_ERRORS) { + return FALSE; + } + + if(result&NEEDS_MOVE) { + ucm_moveMappings(ext, NULL); + ucm_moveMappings(base, moveTarget); + ucm_sortTable(base); + ucm_sortTable(ext); + if(moveTarget!=NULL) { + ucm_sortTable(moveTarget); + } + } + + return TRUE; +} + +/* merge tables for rptp2ucm ------------------------------------------------ */ + +U_CAPI void U_EXPORT2 +ucm_mergeTables(UCMTable *fromUTable, UCMTable *toUTable, + const uint8_t *subchar, int32_t subcharLength, + uint8_t subchar1) { + UCMapping *fromUMapping, *toUMapping; + int32_t fromUIndex, toUIndex, fromUTop, toUTop, cmp; + + ucm_sortTable(fromUTable); + ucm_sortTable(toUTable); + + fromUMapping=fromUTable->mappings; + toUMapping=toUTable->mappings; + + fromUTop=fromUTable->mappingsLength; + toUTop=toUTable->mappingsLength; + + fromUIndex=toUIndex=0; + + while(fromUIndexcodepage + */ + if( (fromUMapping->bLen==subcharLength && + 0==uprv_memcmp(UCM_GET_BYTES(fromUTable, fromUMapping), subchar, subcharLength)) || + (subchar1!=0 && fromUMapping->bLen==1 && fromUMapping->b.bytes[0]==subchar1) + ) { + fromUMapping->f=2; /* SUB mapping */ + } else { + fromUMapping->f=1; /* normal fallback */ + } + + ++fromUMapping; + ++fromUIndex; + } else { + /* + * the toU mapping does not have a fromU counterpart: + * (reverse) fallback codepage->Unicode, copy it to the fromU table + */ + + /* ignore reverse fallbacks to Unicode SUB */ + if(!(toUMapping->uLen==1 && (toUMapping->u==0xfffd || toUMapping->u==0x1a))) { + toUMapping->f=3; /* reverse fallback */ + ucm_addMapping(fromUTable, toUMapping, UCM_GET_CODE_POINTS(toUTable, toUMapping), UCM_GET_BYTES(toUTable, toUMapping)); + + /* the table may have been reallocated */ + fromUMapping=fromUTable->mappings+fromUIndex; + } + + ++toUMapping; + ++toUIndex; + } + } + + /* either one or both tables are exhausted */ + while(fromUIndexbLen==subcharLength && + 0==uprv_memcmp(UCM_GET_BYTES(fromUTable, fromUMapping), subchar, subcharLength)) || + (subchar1!=0 && fromUMapping->bLen==1 && fromUMapping->b.bytes[0]==subchar1) + ) { + fromUMapping->f=2; /* SUB mapping */ + } else { + fromUMapping->f=1; /* normal fallback */ + } + + ++fromUMapping; + ++fromUIndex; + } + + while(toUIndexuLen==1 && (toUMapping->u==0xfffd || toUMapping->u==0x1a))) { + toUMapping->f=3; /* reverse fallback */ + ucm_addMapping(fromUTable, toUMapping, UCM_GET_CODE_POINTS(toUTable, toUMapping), UCM_GET_BYTES(toUTable, toUMapping)); + } + + ++toUMapping; + ++toUIndex; + } + + fromUTable->isSorted=FALSE; +} + +/* separate extension mappings out of base table for rptp2ucm --------------- */ + +U_CAPI UBool U_EXPORT2 +ucm_separateMappings(UCMFile *ucm, UBool isSISO) { + UCMTable *table; + UCMapping *m, *mLimit; + int32_t type; + UBool needsMove, isOK; + + table=ucm->base; + m=table->mappings; + mLimit=m+table->mappingsLength; + + needsMove=FALSE; + isOK=TRUE; + + for(; mbLen==1 && (m->b.bytes[0]==0xe || m->b.bytes[0]==0xf)) { + fprintf(stderr, "warning: removing illegal mapping from an SI/SO-stateful table\n"); + ucm_printMapping(table, m, stderr); + m->moveFlag|=UCM_REMOVE_MAPPING; + needsMove=TRUE; + continue; + } + + type=ucm_mappingType( + &ucm->states, m, + UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m)); + if(type<0) { + /* illegal byte sequence */ + printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m), stderr); + isOK=FALSE; + } else if(type>0) { + m->moveFlag|=UCM_MOVE_TO_EXT; + needsMove=TRUE; + } + } + + if(!isOK) { + return FALSE; + } + if(needsMove) { + ucm_moveMappings(ucm->base, ucm->ext); + return ucm_checkBaseExt(&ucm->states, ucm->base, ucm->ext, ucm->ext, FALSE); + } else { + ucm_sortTable(ucm->base); + return TRUE; + } +} + +/* ucm parser --------------------------------------------------------------- */ + +U_CAPI int8_t U_EXPORT2 +ucm_parseBytes(uint8_t bytes[UCNV_EXT_MAX_BYTES], const char *line, const char **ps) { + const char *s=*ps; + char *end; + uint8_t byte; + int8_t bLen; + + bLen=0; + for(;;) { + /* skip an optional plus sign */ + if(bLen>0 && *s=='+') { + ++s; + } + if(*s!='\\') { + break; + } + + if( s[1]!='x' || + (byte=(uint8_t)uprv_strtoul(s+2, &end, 16), end)!=s+4 + ) { + fprintf(stderr, "ucm error: byte must be formatted as \\xXX (2 hex digits) - \"%s\"\n", line); + return -1; + } + + if(bLen==UCNV_EXT_MAX_BYTES) { + fprintf(stderr, "ucm error: too many bytes on \"%s\"\n", line); + return -1; + } + bytes[bLen++]=byte; + s=end; + } + + *ps=s; + return bLen; +} + +/* parse a mapping line; must not be empty */ +U_CAPI UBool U_EXPORT2 +ucm_parseMappingLine(UCMapping *m, + UChar32 codePoints[UCNV_EXT_MAX_UCHARS], + uint8_t bytes[UCNV_EXT_MAX_BYTES], + const char *line) { + const char *s; + char *end; + UChar32 cp; + int32_t u16Length; + int8_t uLen, bLen, f; + + s=line; + uLen=bLen=0; + + /* parse code points */ + for(;;) { + /* skip an optional plus sign */ + if(uLen>0 && *s=='+') { + ++s; + } + if(*s!='<') { + break; + } + + if( s[1]!='U' || + (cp=(UChar32)uprv_strtoul(s+2, &end, 16), end)==s+2 || + *end!='>' + ) { + fprintf(stderr, "ucm error: Unicode code point must be formatted as (1..6 hex digits) - \"%s\"\n", line); + return FALSE; + } + if((uint32_t)cp>0x10ffff || U_IS_SURROGATE(cp)) { + fprintf(stderr, "ucm error: Unicode code point must be 0..d7ff or e000..10ffff - \"%s\"\n", line); + return FALSE; + } + + if(uLen==UCNV_EXT_MAX_UCHARS) { + fprintf(stderr, "ucm error: too many code points on \"%s\"\n", line); + return FALSE; + } + codePoints[uLen++]=cp; + s=end+1; + } + + if(uLen==0) { + fprintf(stderr, "ucm error: no Unicode code points on \"%s\"\n", line); + return FALSE; + } else if(uLen==1) { + m->u=codePoints[0]; + } else { + UErrorCode errorCode=U_ZERO_ERROR; + u_strFromUTF32(NULL, 0, &u16Length, codePoints, uLen, &errorCode); + if( (U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) || + u16Length>UCNV_EXT_MAX_UCHARS + ) { + fprintf(stderr, "ucm error: too many UChars on \"%s\"\n", line); + return FALSE; + } + } + + s=u_skipWhitespace(s); + + /* parse bytes */ + bLen=ucm_parseBytes(bytes, line, &s); + + if(bLen<0) { + return FALSE; + } else if(bLen==0) { + fprintf(stderr, "ucm error: no bytes on \"%s\"\n", line); + return FALSE; + } else if(bLen<=4) { + uprv_memcpy(m->b.bytes, bytes, bLen); + } + + /* skip everything until the fallback indicator, even the start of a comment */ + for(;;) { + if(*s==0) { + f=-1; /* no fallback indicator */ + break; + } else if(*s=='|') { + f=(int8_t)(s[1]-'0'); + if((uint8_t)f>4) { + fprintf(stderr, "ucm error: fallback indicator must be |0..|4 - \"%s\"\n", line); + return FALSE; + } + break; + } + ++s; + } + + m->uLen=uLen; + m->bLen=bLen; + m->f=f; + return TRUE; +} + +/* general APIs ------------------------------------------------------------- */ + +U_CAPI UCMTable * U_EXPORT2 +ucm_openTable() { + UCMTable *table=(UCMTable *)uprv_malloc(sizeof(UCMTable)); + if(table==NULL) { + fprintf(stderr, "ucm error: unable to allocate a UCMTable\n"); + exit(U_MEMORY_ALLOCATION_ERROR); + } + + memset(table, 0, sizeof(UCMTable)); + return table; +} + +U_CAPI void U_EXPORT2 +ucm_closeTable(UCMTable *table) { + if(table!=NULL) { + uprv_free(table->mappings); + uprv_free(table->codePoints); + uprv_free(table->bytes); + uprv_free(table->reverseMap); + uprv_free(table); + } +} + +U_CAPI void U_EXPORT2 +ucm_resetTable(UCMTable *table) { + if(table!=NULL) { + table->mappingsLength=0; + table->flagsType=0; + table->unicodeMask=0; + table->bytesLength=table->codePointsLength=0; + table->isSorted=FALSE; + } +} + +U_CAPI void U_EXPORT2 +ucm_addMapping(UCMTable *table, + UCMapping *m, + UChar32 codePoints[UCNV_EXT_MAX_UCHARS], + uint8_t bytes[UCNV_EXT_MAX_BYTES]) { + UCMapping *tm; + UChar32 c; + int32_t idx; + + if(table->mappingsLength>=table->mappingsCapacity) { + /* make the mappings array larger */ + if(table->mappingsCapacity==0) { + table->mappingsCapacity=1000; + } else { + table->mappingsCapacity*=10; + } + table->mappings=(UCMapping *)uprv_realloc(table->mappings, + table->mappingsCapacity*sizeof(UCMapping)); + if(table->mappings==NULL) { + fprintf(stderr, "ucm error: unable to allocate %d UCMappings\n", + (int)table->mappingsCapacity); + exit(U_MEMORY_ALLOCATION_ERROR); + } + + if(table->reverseMap!=NULL) { + /* the reverseMap must be reallocated in a new sort */ + uprv_free(table->reverseMap); + table->reverseMap=NULL; + } + } + + if(m->uLen>1 && table->codePointsCapacity==0) { + table->codePointsCapacity=10000; + table->codePoints=(UChar32 *)uprv_malloc(table->codePointsCapacity*4); + if(table->codePoints==NULL) { + fprintf(stderr, "ucm error: unable to allocate %d UChar32s\n", + (int)table->codePointsCapacity); + exit(U_MEMORY_ALLOCATION_ERROR); + } + } + + if(m->bLen>4 && table->bytesCapacity==0) { + table->bytesCapacity=10000; + table->bytes=(uint8_t *)uprv_malloc(table->bytesCapacity); + if(table->bytes==NULL) { + fprintf(stderr, "ucm error: unable to allocate %d bytes\n", + (int)table->bytesCapacity); + exit(U_MEMORY_ALLOCATION_ERROR); + } + } + + if(m->uLen>1) { + idx=table->codePointsLength; + table->codePointsLength+=m->uLen; + if(table->codePointsLength>table->codePointsCapacity) { + fprintf(stderr, "ucm error: too many code points in multiple-code point mappings\n"); + exit(U_MEMORY_ALLOCATION_ERROR); + } + + uprv_memcpy(table->codePoints+idx, codePoints, (size_t)m->uLen*4); + m->u=idx; + } + + if(m->bLen>4) { + idx=table->bytesLength; + table->bytesLength+=m->bLen; + if(table->bytesLength>table->bytesCapacity) { + fprintf(stderr, "ucm error: too many bytes in mappings with >4 charset bytes\n"); + exit(U_MEMORY_ALLOCATION_ERROR); + } + + uprv_memcpy(table->bytes+idx, bytes, m->bLen); + m->b.idx=idx; + } + + /* set unicodeMask */ + for(idx=0; idxuLen; ++idx) { + c=codePoints[idx]; + if(c>=0x10000) { + table->unicodeMask|=UCNV_HAS_SUPPLEMENTARY; /* there are supplementary code points */ + } else if(U_IS_SURROGATE(c)) { + table->unicodeMask|=UCNV_HAS_SURROGATES; /* there are surrogate code points */ + } + } + + /* set flagsType */ + if(m->f<0) { + table->flagsType|=UCM_FLAGS_IMPLICIT; + } else { + table->flagsType|=UCM_FLAGS_EXPLICIT; + } + + tm=table->mappings+table->mappingsLength++; + uprv_memcpy(tm, m, sizeof(UCMapping)); + + table->isSorted=FALSE; +} + +U_CAPI UCMFile * U_EXPORT2 +ucm_open() { + UCMFile *ucm=(UCMFile *)uprv_malloc(sizeof(UCMFile)); + if(ucm==NULL) { + fprintf(stderr, "ucm error: unable to allocate a UCMFile\n"); + exit(U_MEMORY_ALLOCATION_ERROR); + } + + memset(ucm, 0, sizeof(UCMFile)); + + ucm->base=ucm_openTable(); + ucm->ext=ucm_openTable(); + + ucm->states.stateFlags[0]=MBCS_STATE_FLAG_DIRECT; + ucm->states.conversionType=UCNV_UNSUPPORTED_CONVERTER; + ucm->states.outputType=-1; + ucm->states.minCharLength=ucm->states.maxCharLength=1; + + return ucm; +} + +U_CAPI void U_EXPORT2 +ucm_close(UCMFile *ucm) { + if(ucm!=NULL) { + ucm_closeTable(ucm->base); + ucm_closeTable(ucm->ext); + uprv_free(ucm); + } +} + +U_CAPI int32_t U_EXPORT2 +ucm_mappingType(UCMStates *baseStates, + UCMapping *m, + UChar32 codePoints[UCNV_EXT_MAX_UCHARS], + uint8_t bytes[UCNV_EXT_MAX_BYTES]) { + (void)codePoints; + /* check validity of the bytes and count the characters in them */ + int32_t count=ucm_countChars(baseStates, bytes, m->bLen); + if(count<1) { + /* illegal byte sequence */ + return -1; + } + + /* + * Suitable for an ICU conversion base table means: + * - a 1:1 mapping (1 Unicode code point : 1 byte sequence) + * - precision flag 0..3 + * - SBCS: any 1:1 mapping + * (the table stores additional bits to distinguish mapping types) + * - MBCS: not a |2 SUB mapping for + * - MBCS: not a |1 fallback to 0x00 + * - MBCS: not a multi-byte mapping with leading 0x00 bytes + * + * Further restrictions for fromUnicode tables + * are enforced in makeconv (MBCSOkForBaseFromUnicode()). + * + * All of the MBCS fromUnicode specific tests could be removed from here, + * but the ones above are for unusual mappings, and removing the tests + * from here would change canonucm output which seems gratuitous. + * (Markus Scherer 2006-nov-28) + * + * Exception: All implicit mappings (f<0) that need to be moved + * because of fromUnicode restrictions _must_ be moved here because + * makeconv uses a hack for moving mappings only for the fromUnicode table + * that only works with non-negative values of f. + */ + if( m->uLen==1 && count==1 && m->f<=3 && + (baseStates->maxCharLength==1 || + !((m->f==2 && m->bLen==1) || + (m->f==1 && bytes[0]==0) || + (m->f<=1 && m->bLen>1 && bytes[0]==0))) + ) { + return 0; /* suitable for a base table */ + } else { + return 1; /* needs to go into an extension table */ + } +} + +U_CAPI UBool U_EXPORT2 +ucm_addMappingAuto(UCMFile *ucm, UBool forBase, UCMStates *baseStates, + UCMapping *m, + UChar32 codePoints[UCNV_EXT_MAX_UCHARS], + uint8_t bytes[UCNV_EXT_MAX_BYTES]) { + int32_t type; + + if(m->f==2 && m->uLen>1) { + fprintf(stderr, "ucm error: illegal |2 mapping from multiple code points\n"); + printMapping(m, codePoints, bytes, stderr); + return FALSE; + } + + if(baseStates!=NULL) { + /* check validity of the bytes and count the characters in them */ + type=ucm_mappingType(baseStates, m, codePoints, bytes); + if(type<0) { + /* illegal byte sequence */ + printMapping(m, codePoints, bytes, stderr); + return FALSE; + } + } else { + /* not used - adding a mapping for an extension-only table before its base table is read */ + type=1; + } + + /* + * Add the mapping to the base table if this is requested and suitable. + * Otherwise, add it to the extension table. + */ + if(forBase && type==0) { + ucm_addMapping(ucm->base, m, codePoints, bytes); + } else { + ucm_addMapping(ucm->ext, m, codePoints, bytes); + } + + return TRUE; +} + +U_CAPI UBool U_EXPORT2 +ucm_addMappingFromLine(UCMFile *ucm, const char *line, UBool forBase, UCMStates *baseStates) { + UCMapping m={ 0, {0}, 0, 0, 0, 0 }; + UChar32 codePoints[UCNV_EXT_MAX_UCHARS]; + uint8_t bytes[UCNV_EXT_MAX_BYTES]; + + const char *s; + + /* ignore empty and comment lines */ + if(line[0]=='#' || *(s=u_skipWhitespace(line))==0 || *s=='\n' || *s=='\r') { + return TRUE; + } + + return + ucm_parseMappingLine(&m, codePoints, bytes, line) && + ucm_addMappingAuto(ucm, forBase, baseStates, &m, codePoints, bytes); +} + +U_CAPI void U_EXPORT2 +ucm_readTable(UCMFile *ucm, FileStream* convFile, + UBool forBase, UCMStates *baseStates, + UErrorCode *pErrorCode) { + char line[500]; + char *end; + UBool isOK; + + if(U_FAILURE(*pErrorCode)) { + return; + } + + isOK=TRUE; + + for(;;) { + /* read the next line */ + if(!T_FileStream_readLine(convFile, line, sizeof(line))) { + fprintf(stderr, "incomplete charmap section\n"); + isOK=FALSE; + break; + } + + /* remove CR LF */ + end=uprv_strchr(line, 0); + while(line - -#if !UCONFIG_NO_CONVERSION - -/* MBCS state handling ------------------------------------------------------ */ - -/* - * state table row grammar (ebnf-style): - * (whitespace is allowed between all tokens) - * - * row=[[firstentry ','] entry (',' entry)*] - * firstentry="initial" | "surrogates" - * (initial state (default for state 0), output is all surrogate pairs) - * entry=range [':' nextstate] ['.' action] - * range=number ['-' number] - * nextstate=number - * (0..7f) - * action='u' | 's' | 'p' | 'i' - * (unassigned, state change only, surrogate pair, illegal) - * number=(1- or 2-digit hexadecimal number) - */ -static const char * -parseState(const char *s, int32_t state[256], uint32_t *pFlags) { - const char *t; - uint32_t start, end, i; - int32_t entry; - - /* initialize the state: all illegal with U+ffff */ - for(i=0; i<256; ++i) { - state[i]=MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0xffff); - } - - /* skip leading white space */ - s=u_skipWhitespace(s); - - /* is there an "initial" or "surrogates" directive? */ - if(uprv_strncmp("initial", s, 7)==0) { - *pFlags=MBCS_STATE_FLAG_DIRECT; - s=u_skipWhitespace(s+7); - if(*s++!=',') { - return s-1; - } - } else if(*pFlags==0 && uprv_strncmp("surrogates", s, 10)==0) { - *pFlags=MBCS_STATE_FLAG_SURROGATES; - s=u_skipWhitespace(s+10); - if(*s++!=',') { - return s-1; - } - } else if(*s==0) { - /* empty state row: all-illegal */ - return NULL; - } - - for(;;) { - /* read an entry, the start of the range first */ - s=u_skipWhitespace(s); - start=uprv_strtoul(s, (char **)&t, 16); - if(s==t || 0xffcountStates==MBCS_MAX_STATE_COUNT) { - fprintf(stderr, "ucm error: too many states (maximum %u)\n", MBCS_MAX_STATE_COUNT); - exit(U_INVALID_TABLE_FORMAT); - } - - error=parseState(s, states->stateTable[states->countStates], - &states->stateFlags[states->countStates]); - if(error!=NULL) { - fprintf(stderr, "ucm error: parse error in state definition at '%s'\n", error); - exit(U_INVALID_TABLE_FORMAT); - } - - ++states->countStates; -} - -U_CAPI UBool U_EXPORT2 -ucm_parseHeaderLine(UCMFile *ucm, - char *line, char **pKey, char **pValue) { - UCMStates *states; - char *s, *end; - char c; - - states=&ucm->states; - - /* remove comments and trailing CR and LF and remove whitespace from the end */ - for(end=line; (c=*end)!=0; ++end) { - if(c=='#' || c=='\r' || c=='\n') { - break; - } - } - while(end>line && (*(end-1)==' ' || *(end-1)=='\t')) { - --end; - } - *end=0; - - /* skip leading white space and ignore empty lines */ - s=(char *)u_skipWhitespace(line); - if(*s==0) { - return TRUE; - } - - /* stop at the beginning of the mapping section */ - if(uprv_memcmp(s, "CHARMAP", 7)==0) { - return FALSE; - } - - /* get the key name, bracketed in <> */ - if(*s!='<') { - fprintf(stderr, "ucm error: no header field in line \"%s\"\n", line); - exit(U_INVALID_TABLE_FORMAT); - } - *pKey=++s; - while(*s!='>') { - if(*s==0) { - fprintf(stderr, "ucm error: incomplete header field in line \"%s\"\n", line); - exit(U_INVALID_TABLE_FORMAT); - } - ++s; - } - *s=0; - - /* get the value string, possibly quoted */ - s=(char *)u_skipWhitespace(s+1); - if(*s!='"') { - *pValue=s; - } else { - /* remove the quotes */ - *pValue=s+1; - if(end>*pValue && *(end-1)=='"') { - *--end=0; - } - } - - /* collect the information from the header field, ignore unknown keys */ - if(uprv_strcmp(*pKey, "uconv_class")==0) { - if(uprv_strcmp(*pValue, "DBCS")==0) { - states->conversionType=UCNV_DBCS; - } else if(uprv_strcmp(*pValue, "SBCS")==0) { - states->conversionType = UCNV_SBCS; - } else if(uprv_strcmp(*pValue, "MBCS")==0) { - states->conversionType = UCNV_MBCS; - } else if(uprv_strcmp(*pValue, "EBCDIC_STATEFUL")==0) { - states->conversionType = UCNV_EBCDIC_STATEFUL; - } else { - fprintf(stderr, "ucm error: unknown %s\n", *pValue); - exit(U_INVALID_TABLE_FORMAT); - } - return TRUE; - } else if(uprv_strcmp(*pKey, "mb_cur_max")==0) { - c=**pValue; - if('1'<=c && c<='4' && (*pValue)[1]==0) { - states->maxCharLength=(int8_t)(c-'0'); - states->outputType=(int8_t)(states->maxCharLength-1); - } else { - fprintf(stderr, "ucm error: illegal %s\n", *pValue); - exit(U_INVALID_TABLE_FORMAT); - } - return TRUE; - } else if(uprv_strcmp(*pKey, "mb_cur_min")==0) { - c=**pValue; - if('1'<=c && c<='4' && (*pValue)[1]==0) { - states->minCharLength=(int8_t)(c-'0'); - } else { - fprintf(stderr, "ucm error: illegal %s\n", *pValue); - exit(U_INVALID_TABLE_FORMAT); - } - return TRUE; - } else if(uprv_strcmp(*pKey, "icu:state")==0) { - /* if an SBCS/DBCS/EBCDIC_STATEFUL converter has icu:state, then turn it into MBCS */ - switch(states->conversionType) { - case UCNV_SBCS: - case UCNV_DBCS: - case UCNV_EBCDIC_STATEFUL: - states->conversionType=UCNV_MBCS; - break; - case UCNV_MBCS: - break; - default: - fprintf(stderr, "ucm error: entry for non-MBCS table or before the line\n"); - exit(U_INVALID_TABLE_FORMAT); - } - - if(states->maxCharLength==0) { - fprintf(stderr, "ucm error: before the line\n"); - exit(U_INVALID_TABLE_FORMAT); - } - ucm_addState(states, *pValue); - return TRUE; - } else if(uprv_strcmp(*pKey, "icu:base")==0) { - if(**pValue==0) { - fprintf(stderr, "ucm error: without a base table name\n"); - exit(U_INVALID_TABLE_FORMAT); - } - uprv_strcpy(ucm->baseName, *pValue); - return TRUE; - } - - return FALSE; -} - -/* post-processing ---------------------------------------------------------- */ - -static int32_t -sumUpStates(UCMStates *states) { - int32_t entry, sum, state, cell, count; - UBool allStatesReady; - - /* - * Sum up the offsets for all states. - * In each final state (where there are only final entries), - * the offsets add up directly. - * In all other state table rows, for each transition entry to another state, - * the offsets sum of that state needs to be added. - * This is achieved in at most countStates iterations. - */ - allStatesReady=FALSE; - for(count=states->countStates; !allStatesReady && count>=0; --count) { - allStatesReady=TRUE; - for(state=states->countStates-1; state>=0; --state) { - if(!(states->stateFlags[state]&MBCS_STATE_FLAG_READY)) { - allStatesReady=FALSE; - sum=0; - - /* at first, add up only the final delta offsets to keep them <512 */ - for(cell=0; cell<256; ++cell) { - entry=states->stateTable[state][cell]; - if(MBCS_ENTRY_IS_FINAL(entry)) { - switch(MBCS_ENTRY_FINAL_ACTION(entry)) { - case MBCS_STATE_VALID_16: - states->stateTable[state][cell]=MBCS_ENTRY_FINAL_SET_VALUE(entry, sum); - sum+=1; - break; - case MBCS_STATE_VALID_16_PAIR: - states->stateTable[state][cell]=MBCS_ENTRY_FINAL_SET_VALUE(entry, sum); - sum+=2; - break; - default: - /* no addition */ - break; - } - } - } - - /* now, add up the delta offsets for the transitional entries */ - for(cell=0; cell<256; ++cell) { - entry=states->stateTable[state][cell]; - if(MBCS_ENTRY_IS_TRANSITION(entry)) { - if(states->stateFlags[MBCS_ENTRY_TRANSITION_STATE(entry)]&MBCS_STATE_FLAG_READY) { - states->stateTable[state][cell]=MBCS_ENTRY_TRANSITION_SET_OFFSET(entry, sum); - sum+=states->stateOffsetSum[MBCS_ENTRY_TRANSITION_STATE(entry)]; - } else { - /* that next state does not have a sum yet, we cannot finish the one for this state */ - sum=-1; - break; - } - } - } - - if(sum!=-1) { - states->stateOffsetSum[state]=sum; - states->stateFlags[state]|=MBCS_STATE_FLAG_READY; - } - } - } - } - - if(!allStatesReady) { - fprintf(stderr, "ucm error: the state table contains loops\n"); - exit(U_INVALID_TABLE_FORMAT); - } - - /* - * For all "direct" (i.e., initial) states>0, - * the offsets need to be increased by the sum of - * the previous initial states. - */ - sum=states->stateOffsetSum[0]; - for(state=1; statecountStates; ++state) { - if((states->stateFlags[state]&0xf)==MBCS_STATE_FLAG_DIRECT) { - int32_t sum2=sum; - sum+=states->stateOffsetSum[state]; - for(cell=0; cell<256; ++cell) { - entry=states->stateTable[state][cell]; - if(MBCS_ENTRY_IS_TRANSITION(entry)) { - states->stateTable[state][cell]=MBCS_ENTRY_TRANSITION_ADD_OFFSET(entry, sum2); - } - } - } - } - - /* round up to the next even number to have the following data 32-bit-aligned */ - return states->countToUCodeUnits=(sum+1)&~1; -} - -U_CAPI void U_EXPORT2 -ucm_processStates(UCMStates *states, UBool ignoreSISOCheck) { - int32_t entry, state, cell, count; - - if(states->conversionType==UCNV_UNSUPPORTED_CONVERTER) { - fprintf(stderr, "ucm error: missing conversion type ()\n"); - exit(U_INVALID_TABLE_FORMAT); - } - - if(states->countStates==0) { - switch(states->conversionType) { - case UCNV_SBCS: - /* SBCS: use MBCS data structure with a default state table */ - if(states->maxCharLength!=1) { - fprintf(stderr, "error: SBCS codepage with max B/char!=1\n"); - exit(U_INVALID_TABLE_FORMAT); - } - states->conversionType=UCNV_MBCS; - ucm_addState(states, "0-ff"); - break; - case UCNV_MBCS: - fprintf(stderr, "ucm error: missing state table information () for MBCS\n"); - exit(U_INVALID_TABLE_FORMAT); - break; - case UCNV_EBCDIC_STATEFUL: - /* EBCDIC_STATEFUL: use MBCS data structure with a default state table */ - if(states->minCharLength!=1 || states->maxCharLength!=2) { - fprintf(stderr, "error: DBCS codepage with min B/char!=1 or max B/char!=2\n"); - exit(U_INVALID_TABLE_FORMAT); - } - states->conversionType=UCNV_MBCS; - ucm_addState(states, "0-ff, e:1.s, f:0.s"); - ucm_addState(states, "initial, 0-3f:4, e:1.s, f:0.s, 40:3, 41-fe:2, ff:4"); - ucm_addState(states, "0-40:1.i, 41-fe:1., ff:1.i"); - ucm_addState(states, "0-ff:1.i, 40:1."); - ucm_addState(states, "0-ff:1.i"); - break; - case UCNV_DBCS: - /* DBCS: use MBCS data structure with a default state table */ - if(states->minCharLength!=2 || states->maxCharLength!=2) { - fprintf(stderr, "error: DBCS codepage with min or max B/char!=2\n"); - exit(U_INVALID_TABLE_FORMAT); - } - states->conversionType = UCNV_MBCS; - ucm_addState(states, "0-3f:3, 40:2, 41-fe:1, ff:3"); - ucm_addState(states, "41-fe"); - ucm_addState(states, "40"); - ucm_addState(states, ""); - break; - default: - fprintf(stderr, "ucm error: unknown charset structure\n"); - exit(U_INVALID_TABLE_FORMAT); - break; - } - } - - /* - * check that the min/max character lengths are reasonable; - * to do this right, all paths through the state table would have to be - * recursively walked while keeping track of the sequence lengths, - * but these simple checks cover most state tables in practice - */ - if(states->maxCharLengthminCharLength) { - fprintf(stderr, "ucm error: max B/char < min B/char\n"); - exit(U_INVALID_TABLE_FORMAT); - } - - /* count non-direct states and compare with max B/char */ - count=0; - for(state=0; statecountStates; ++state) { - if((states->stateFlags[state]&0xf)!=MBCS_STATE_FLAG_DIRECT) { - ++count; - } - } - if(states->maxCharLength>count+1) { - fprintf(stderr, "ucm error: max B/char too large\n"); - exit(U_INVALID_TABLE_FORMAT); - } - - if(states->minCharLength==1) { - int32_t action; - - /* - * if there are single-byte characters, - * then the initial state must have direct result states - */ - for(cell=0; cell<256; ++cell) { - entry=states->stateTable[0][cell]; - if( MBCS_ENTRY_IS_FINAL(entry) && - ((action=MBCS_ENTRY_FINAL_ACTION(entry))==MBCS_STATE_VALID_DIRECT_16 || - action==MBCS_STATE_UNASSIGNED) - ) { - break; - } - } - - if(cell==256) { - fprintf(stderr, "ucm warning: min B/char too small\n"); - } - } - - /* - * make sure that all "next state" values are within limits - * and that all next states after final ones have the "direct" - * flag of initial states - */ - for(state=states->countStates-1; state>=0; --state) { - for(cell=0; cell<256; ++cell) { - entry=states->stateTable[state][cell]; - if((uint8_t)MBCS_ENTRY_STATE(entry)>=states->countStates) { - fprintf(stderr, "ucm error: state table entry [%x][%x] has a next state of %x that is too high\n", - (int)state, (int)cell, (int)MBCS_ENTRY_STATE(entry)); - exit(U_INVALID_TABLE_FORMAT); - } - if(MBCS_ENTRY_IS_FINAL(entry) && (states->stateFlags[MBCS_ENTRY_STATE(entry)]&0xf)!=MBCS_STATE_FLAG_DIRECT) { - fprintf(stderr, "ucm error: state table entry [%x][%x] is final but has a non-initial next state of %x\n", - (int)state, (int)cell, (int)MBCS_ENTRY_STATE(entry)); - exit(U_INVALID_TABLE_FORMAT); - } else if(MBCS_ENTRY_IS_TRANSITION(entry) && (states->stateFlags[MBCS_ENTRY_STATE(entry)]&0xf)==MBCS_STATE_FLAG_DIRECT) { - fprintf(stderr, "ucm error: state table entry [%x][%x] is not final but has an initial next state of %x\n", - (int)state, (int)cell, (int)MBCS_ENTRY_STATE(entry)); - exit(U_INVALID_TABLE_FORMAT); - } - } - } - - /* is this an SI/SO (like EBCDIC-stateful) state table? */ - if(states->countStates>=2 && (states->stateFlags[1]&0xf)==MBCS_STATE_FLAG_DIRECT) { - if(states->maxCharLength!=2) { - fprintf(stderr, "ucm error: SI/SO codepages must have max 2 bytes/char (not %x)\n", (int)states->maxCharLength); - exit(U_INVALID_TABLE_FORMAT); - } - if(states->countStates<3) { - fprintf(stderr, "ucm error: SI/SO codepages must have at least 3 states (not %x)\n", (int)states->countStates); - exit(U_INVALID_TABLE_FORMAT); - } - /* are the SI/SO all in the right places? */ - if( ignoreSISOCheck || - (states->stateTable[0][0xe]==MBCS_ENTRY_FINAL(1, MBCS_STATE_CHANGE_ONLY, 0) && - states->stateTable[0][0xf]==MBCS_ENTRY_FINAL(0, MBCS_STATE_CHANGE_ONLY, 0) && - states->stateTable[1][0xe]==MBCS_ENTRY_FINAL(1, MBCS_STATE_CHANGE_ONLY, 0) && - states->stateTable[1][0xf]==MBCS_ENTRY_FINAL(0, MBCS_STATE_CHANGE_ONLY, 0)) - ) { - states->outputType=MBCS_OUTPUT_2_SISO; - } else { - fprintf(stderr, "ucm error: SI/SO codepages must have in states 0 and 1 transitions e:1.s, f:0.s\n"); - exit(U_INVALID_TABLE_FORMAT); - } - state=2; - } else { - state=1; - } - - /* check that no unexpected state is a "direct" one */ - while(statecountStates) { - if((states->stateFlags[state]&0xf)==MBCS_STATE_FLAG_DIRECT) { - fprintf(stderr, "ucm error: state %d is 'initial' - not supported except for SI/SO codepages\n", (int)state); - exit(U_INVALID_TABLE_FORMAT); - } - ++state; - } - - sumUpStates(states); -} - -/* find a fallback for this offset; return the index or -1 if not found */ -U_CAPI int32_t U_EXPORT2 -ucm_findFallback(_MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks, - uint32_t offset) { - int32_t i; - - if(countToUFallbacks==0) { - /* shortcut: most codepages do not have fallbacks from codepage to Unicode */ - return -1; - } - - /* do a linear search for the fallback mapping (the table is not yet sorted) */ - for(i=0; ioutputType==MBCS_OUTPUT_2_SISO) { - /* use the DBCS lead state for SI/SO codepages */ - leadState=1; - } else { - leadState=0; - } - - /* find the main trail state: the most used target state */ - uprv_memset(count, 0, sizeof(count)); - for(i=0; i<256; ++i) { - entry=states->stateTable[leadState][i]; - if(MBCS_ENTRY_IS_TRANSITION(entry)) { - ++count[MBCS_ENTRY_TRANSITION_STATE(entry)]; - } - } - trailState=0; - for(i=1; icountStates; ++i) { - if(count[i]>count[trailState]) { - trailState=i; - } - } - - /* count possible savings from lead bytes with all-unassigned results in all trail bytes */ - uprv_memset(count, 0, sizeof(count)); - savings=0; - /* for each lead byte */ - for(i=0; i<256; ++i) { - entry=states->stateTable[leadState][i]; - if(MBCS_ENTRY_IS_TRANSITION(entry) && (MBCS_ENTRY_TRANSITION_STATE(entry))==trailState) { - /* the offset is different for each lead byte */ - offset=MBCS_ENTRY_TRANSITION_OFFSET(entry); - /* for each trail byte for this lead byte */ - for(j=0; j<256; ++j) { - entry=states->stateTable[trailState][j]; - switch(MBCS_ENTRY_FINAL_ACTION(entry)) { - case MBCS_STATE_VALID_16: - entry=offset+MBCS_ENTRY_FINAL_VALUE_16(entry); - if((*pUnicodeCodeUnits)[entry]==0xfffe && ucm_findFallback(toUFallbacks, countToUFallbacks, entry)<0) { - ++count[i]; - } else { - j=999; /* do not count for this lead byte because there are assignments */ - } - break; - case MBCS_STATE_VALID_16_PAIR: - entry=offset+MBCS_ENTRY_FINAL_VALUE_16(entry); - if((*pUnicodeCodeUnits)[entry]==0xfffe) { - count[i]+=2; - } else { - j=999; /* do not count for this lead byte because there are assignments */ - } - break; - default: - break; - } - } - if(j==256) { - /* all trail bytes for this lead byte are unassigned */ - savings+=count[i]; - } else { - count[i]=0; - } - } - } - /* subtract from the possible savings the cost of an additional state */ - savings=savings*2-1024; /* count bytes, not 16-bit words */ - if(savings<=0) { - return; - } - if(verbose) { - printf("compacting toUnicode data saves %ld bytes\n", (long)savings); - } - if(states->countStates>=MBCS_MAX_STATE_COUNT) { - fprintf(stderr, "cannot compact toUnicode because the maximum number of states is reached\n"); - return; - } - - /* make a copy of the state table */ - oldStateTable=(int32_t (*)[256])uprv_malloc(states->countStates*1024); - if(oldStateTable==NULL) { - fprintf(stderr, "cannot compact toUnicode: out of memory\n"); - return; - } - uprv_memcpy(oldStateTable, states->stateTable, states->countStates*1024); - - /* add the new state */ - /* - * this function does not catch the degenerate case where all lead bytes - * have all-unassigned trail bytes and the lead state could be removed - */ - newState=states->countStates++; - states->stateFlags[newState]=0; - /* copy the old trail state, turning all assigned states into unassigned ones */ - for(i=0; i<256; ++i) { - entry=states->stateTable[trailState][i]; - switch(MBCS_ENTRY_FINAL_ACTION(entry)) { - case MBCS_STATE_VALID_16: - case MBCS_STATE_VALID_16_PAIR: - states->stateTable[newState][i]=MBCS_ENTRY_FINAL_SET_ACTION_VALUE(entry, MBCS_STATE_UNASSIGNED, 0xfffe); - break; - default: - states->stateTable[newState][i]=entry; - break; - } - } - - /* in the lead state, redirect all lead bytes with all-unassigned trail bytes to the new state */ - for(i=0; i<256; ++i) { - if(count[i]>0) { - states->stateTable[leadState][i]=MBCS_ENTRY_SET_STATE(states->stateTable[leadState][i], newState); - } - } - - /* sum up the new state table */ - for(i=0; icountStates; ++i) { - states->stateFlags[i]&=~MBCS_STATE_FLAG_READY; - } - sum=sumUpStates(states); - - /* allocate a new, smaller code units array */ - oldUnicodeCodeUnits=*pUnicodeCodeUnits; - if(sum==0) { - *pUnicodeCodeUnits=NULL; - if(oldUnicodeCodeUnits!=NULL) { - uprv_free(oldUnicodeCodeUnits); - } - uprv_free(oldStateTable); - return; - } - *pUnicodeCodeUnits=(uint16_t *)uprv_malloc(sum*sizeof(uint16_t)); - if(*pUnicodeCodeUnits==NULL) { - fprintf(stderr, "cannot compact toUnicode: out of memory allocating %ld 16-bit code units\n", - (long)sum); - /* revert to the old state table */ - *pUnicodeCodeUnits=oldUnicodeCodeUnits; - --states->countStates; - uprv_memcpy(states->stateTable, oldStateTable, states->countStates*1024); - uprv_free(oldStateTable); - return; - } - for(i=0; icountStates; ++leadState) { - if((states->stateFlags[leadState]&0xf)==MBCS_STATE_FLAG_DIRECT) { - /* for each lead byte from there */ - for(i=0; i<256; ++i) { - entry=states->stateTable[leadState][i]; - if(MBCS_ENTRY_IS_TRANSITION(entry)) { - trailState=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); - /* the new state does not have assigned states */ - if(trailState!=newState) { - trailOffset=MBCS_ENTRY_TRANSITION_OFFSET(entry); - oldTrailOffset=MBCS_ENTRY_TRANSITION_OFFSET(oldStateTable[leadState][i]); - /* for each trail byte */ - for(j=0; j<256; ++j) { - entry=states->stateTable[trailState][j]; - /* copy assigned-character code units and adjust fallback offsets */ - switch(MBCS_ENTRY_FINAL_ACTION(entry)) { - case MBCS_STATE_VALID_16: - offset=trailOffset+MBCS_ENTRY_FINAL_VALUE_16(entry); - /* find the old offset according to the old state table */ - oldOffset=oldTrailOffset+MBCS_ENTRY_FINAL_VALUE_16(oldStateTable[trailState][j]); - unit=(*pUnicodeCodeUnits)[offset]=oldUnicodeCodeUnits[oldOffset]; - if(unit==0xfffe && (fallback=ucm_findFallback(toUFallbacks, countToUFallbacks, oldOffset))>=0) { - toUFallbacks[fallback].offset=0x80000000|offset; - } - break; - case MBCS_STATE_VALID_16_PAIR: - offset=trailOffset+MBCS_ENTRY_FINAL_VALUE_16(entry); - /* find the old offset according to the old state table */ - oldOffset=oldTrailOffset+MBCS_ENTRY_FINAL_VALUE_16(oldStateTable[trailState][j]); - (*pUnicodeCodeUnits)[offset++]=oldUnicodeCodeUnits[oldOffset++]; - (*pUnicodeCodeUnits)[offset]=oldUnicodeCodeUnits[oldOffset]; - break; - default: - break; - } - } - } - } - } - } - } - - /* remove temporary flags from fallback offsets that protected them from being modified twice */ - for(i=0; i0 number of bytes that are used in unicodeCodeUnits[] that could be saved, - * if all sequences from this state are unassigned, returns the - * <0 there are assignments in unicodeCodeUnits[] - * 0 no use of unicodeCodeUnits[] - */ -static int32_t -findUnassigned(UCMStates *states, - uint16_t *unicodeCodeUnits, - _MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks, - int32_t state, int32_t offset, uint32_t b) { - int32_t i, entry, savings, localSavings, belowSavings; - UBool haveAssigned; - - localSavings=belowSavings=0; - haveAssigned=FALSE; - for(i=0; i<256; ++i) { - entry=states->stateTable[state][i]; - if(MBCS_ENTRY_IS_TRANSITION(entry)) { - savings=findUnassigned(states, - unicodeCodeUnits, - toUFallbacks, countToUFallbacks, - MBCS_ENTRY_TRANSITION_STATE(entry), - offset+MBCS_ENTRY_TRANSITION_OFFSET(entry), - (b<<8)|(uint32_t)i); - if(savings<0) { - haveAssigned=TRUE; - } else if(savings>0) { - printf(" all-unassigned sequences from prefix 0x%02lx state %ld use %ld bytes\n", - (unsigned long)((b<<8)|i), (long)state, (long)savings); - belowSavings+=savings; - } - } else if(!haveAssigned) { - switch(MBCS_ENTRY_FINAL_ACTION(entry)) { - case MBCS_STATE_VALID_16: - entry=offset+MBCS_ENTRY_FINAL_VALUE_16(entry); - if(unicodeCodeUnits[entry]==0xfffe && ucm_findFallback(toUFallbacks, countToUFallbacks, entry)<0) { - localSavings+=2; - } else { - haveAssigned=TRUE; - } - break; - case MBCS_STATE_VALID_16_PAIR: - entry=offset+MBCS_ENTRY_FINAL_VALUE_16(entry); - if(unicodeCodeUnits[entry]==0xfffe) { - localSavings+=4; - } else { - haveAssigned=TRUE; - } - break; - default: - break; - } - } - } - if(haveAssigned) { - return -1; - } else { - return localSavings+belowSavings; - } -} - -/* helper function for finding compaction opportunities */ -static void -compactToUnicodeHelper(UCMStates *states, - uint16_t *unicodeCodeUnits, - _MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks) { - int32_t state, savings; - - /* for each initial state */ - for(state=0; statecountStates; ++state) { - if((states->stateFlags[state]&0xf)==MBCS_STATE_FLAG_DIRECT) { - savings=findUnassigned(states, - unicodeCodeUnits, - toUFallbacks, countToUFallbacks, - state, 0, 0); - if(savings>0) { - printf(" all-unassigned sequences from initial state %ld use %ld bytes\n", - (long)state, (long)savings); - } - } - } -} - -static int32_t -compareFallbacks(const void *context, const void *fb1, const void *fb2) { - return ((const _MBCSToUFallback *)fb1)->offset-((const _MBCSToUFallback *)fb2)->offset; -} - -U_CAPI void U_EXPORT2 -ucm_optimizeStates(UCMStates *states, - uint16_t **pUnicodeCodeUnits, - _MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks, - UBool verbose) { - UErrorCode errorCode; - int32_t state, cell, entry; - - /* test each state table entry */ - for(state=0; statecountStates; ++state) { - for(cell=0; cell<256; ++cell) { - entry=states->stateTable[state][cell]; - /* - * if the entry is a final one with an MBCS_STATE_VALID_DIRECT_16 action code - * and the code point is "unassigned" (0xfffe), then change it to - * the "unassigned" action code with bits 26..23 set to zero and U+fffe. - */ - if(MBCS_ENTRY_SET_STATE(entry, 0)==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, 0xfffe)) { - states->stateTable[state][cell]=MBCS_ENTRY_FINAL_SET_ACTION(entry, MBCS_STATE_UNASSIGNED); - } - } - } - - /* try to compact the toUnicode tables */ - if(states->maxCharLength==2) { - compactToUnicode2(states, pUnicodeCodeUnits, toUFallbacks, countToUFallbacks, verbose); - } else if(states->maxCharLength>2) { - if(verbose) { - compactToUnicodeHelper(states, *pUnicodeCodeUnits, toUFallbacks, countToUFallbacks); - } - } - - /* sort toUFallbacks */ - /* - * It should be safe to sort them before compactToUnicode2() is called, - * because it should not change the relative order of the offset values - * that it adjusts, but they need to be sorted at some point, and - * it is safest here. - */ - if(countToUFallbacks>0) { - errorCode=U_ZERO_ERROR; /* nothing bad will happen... */ - uprv_sortArray(toUFallbacks, countToUFallbacks, - sizeof(_MBCSToUFallback), - compareFallbacks, NULL, FALSE, &errorCode); - } -} - -/* use a complete state table ----------------------------------------------- */ - -U_CAPI int32_t U_EXPORT2 -ucm_countChars(UCMStates *states, - const uint8_t *bytes, int32_t length) { - uint32_t offset; - int32_t i, entry, count; - uint8_t state; - - offset=0; - count=0; - state=0; - - if(states->countStates==0) { - fprintf(stderr, "ucm error: there is no state information!\n"); - return -1; - } - - /* for SI/SO (like EBCDIC-stateful), double-byte sequences start in state 1 */ - if(length==2 && states->outputType==MBCS_OUTPUT_2_SISO) { - state=1; - } - - /* - * Walk down the state table like in conversion, - * much like getNextUChar(). - * We assume that c<=0x10ffff. - */ - for(i=0; istateTable[state][bytes[i]]; - if(MBCS_ENTRY_IS_TRANSITION(entry)) { - state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); - offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry); - } else { - switch(MBCS_ENTRY_FINAL_ACTION(entry)) { - case MBCS_STATE_ILLEGAL: - fprintf(stderr, "ucm error: byte sequence ends in illegal state\n"); - return -1; - case MBCS_STATE_CHANGE_ONLY: - fprintf(stderr, "ucm error: byte sequence ends in state-change-only\n"); - return -1; - case MBCS_STATE_UNASSIGNED: - case MBCS_STATE_FALLBACK_DIRECT_16: - case MBCS_STATE_VALID_DIRECT_16: - case MBCS_STATE_FALLBACK_DIRECT_20: - case MBCS_STATE_VALID_DIRECT_20: - case MBCS_STATE_VALID_16: - case MBCS_STATE_VALID_16_PAIR: - /* count a complete character and prepare for a new one */ - ++count; - state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); - offset=0; - break; - default: - /* reserved, must never occur */ - fprintf(stderr, "ucm error: byte sequence reached reserved action code, entry: 0x%02lx\n", (unsigned long)entry); - return -1; - } - } - } - - if(offset!=0) { - fprintf(stderr, "ucm error: byte sequence too short, ends in non-final state %u\n", state); - return -1; - } - - /* - * for SI/SO (like EBCDIC-stateful), multiple-character results - * must consist of only double-byte sequences - */ - if(count>1 && states->outputType==MBCS_OUTPUT_2_SISO && length!=2*count) { - fprintf(stderr, "ucm error: SI/SO (like EBCDIC-stateful) result with %d characters does not contain all DBCS\n", (int)count); - return -1; - } - - return count; -} -#endif diff --git a/deps/icu-small/source/tools/toolutil/ucmstate.cpp b/deps/icu-small/source/tools/toolutil/ucmstate.cpp new file mode 100644 index 0000000000..2776575229 --- /dev/null +++ b/deps/icu-small/source/tools/toolutil/ucmstate.cpp @@ -0,0 +1,1051 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2003-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ucmstate.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2003oct09 +* created by: Markus W. Scherer +* +* This file handles ICU .ucm file state information as part of the ucm module. +* Most of this code used to be in makeconv.c. +*/ + +#include "unicode/utypes.h" +#include "cstring.h" +#include "cmemory.h" +#include "uarrsort.h" +#include "ucnvmbcs.h" +#include "ucnv_ext.h" +#include "uparse.h" +#include "ucm.h" +#include + +#if !UCONFIG_NO_CONVERSION + +/* MBCS state handling ------------------------------------------------------ */ + +/* + * state table row grammar (ebnf-style): + * (whitespace is allowed between all tokens) + * + * row=[[firstentry ','] entry (',' entry)*] + * firstentry="initial" | "surrogates" + * (initial state (default for state 0), output is all surrogate pairs) + * entry=range [':' nextstate] ['.' action] + * range=number ['-' number] + * nextstate=number + * (0..7f) + * action='u' | 's' | 'p' | 'i' + * (unassigned, state change only, surrogate pair, illegal) + * number=(1- or 2-digit hexadecimal number) + */ +static const char * +parseState(const char *s, int32_t state[256], uint32_t *pFlags) { + const char *t; + uint32_t start, end, i; + int32_t entry; + + /* initialize the state: all illegal with U+ffff */ + for(i=0; i<256; ++i) { + state[i]=MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0xffff); + } + + /* skip leading white space */ + s=u_skipWhitespace(s); + + /* is there an "initial" or "surrogates" directive? */ + if(uprv_strncmp("initial", s, 7)==0) { + *pFlags=MBCS_STATE_FLAG_DIRECT; + s=u_skipWhitespace(s+7); + if(*s++!=',') { + return s-1; + } + } else if(*pFlags==0 && uprv_strncmp("surrogates", s, 10)==0) { + *pFlags=MBCS_STATE_FLAG_SURROGATES; + s=u_skipWhitespace(s+10); + if(*s++!=',') { + return s-1; + } + } else if(*s==0) { + /* empty state row: all-illegal */ + return NULL; + } + + for(;;) { + /* read an entry, the start of the range first */ + s=u_skipWhitespace(s); + start=uprv_strtoul(s, (char **)&t, 16); + if(s==t || 0xffcountStates==MBCS_MAX_STATE_COUNT) { + fprintf(stderr, "ucm error: too many states (maximum %u)\n", MBCS_MAX_STATE_COUNT); + exit(U_INVALID_TABLE_FORMAT); + } + + error=parseState(s, states->stateTable[states->countStates], + &states->stateFlags[states->countStates]); + if(error!=NULL) { + fprintf(stderr, "ucm error: parse error in state definition at '%s'\n", error); + exit(U_INVALID_TABLE_FORMAT); + } + + ++states->countStates; +} + +U_CAPI UBool U_EXPORT2 +ucm_parseHeaderLine(UCMFile *ucm, + char *line, char **pKey, char **pValue) { + UCMStates *states; + char *s, *end; + char c; + + states=&ucm->states; + + /* remove comments and trailing CR and LF and remove whitespace from the end */ + for(end=line; (c=*end)!=0; ++end) { + if(c=='#' || c=='\r' || c=='\n') { + break; + } + } + while(end>line && (*(end-1)==' ' || *(end-1)=='\t')) { + --end; + } + *end=0; + + /* skip leading white space and ignore empty lines */ + s=(char *)u_skipWhitespace(line); + if(*s==0) { + return TRUE; + } + + /* stop at the beginning of the mapping section */ + if(uprv_memcmp(s, "CHARMAP", 7)==0) { + return FALSE; + } + + /* get the key name, bracketed in <> */ + if(*s!='<') { + fprintf(stderr, "ucm error: no header field in line \"%s\"\n", line); + exit(U_INVALID_TABLE_FORMAT); + } + *pKey=++s; + while(*s!='>') { + if(*s==0) { + fprintf(stderr, "ucm error: incomplete header field in line \"%s\"\n", line); + exit(U_INVALID_TABLE_FORMAT); + } + ++s; + } + *s=0; + + /* get the value string, possibly quoted */ + s=(char *)u_skipWhitespace(s+1); + if(*s!='"') { + *pValue=s; + } else { + /* remove the quotes */ + *pValue=s+1; + if(end>*pValue && *(end-1)=='"') { + *--end=0; + } + } + + /* collect the information from the header field, ignore unknown keys */ + if(uprv_strcmp(*pKey, "uconv_class")==0) { + if(uprv_strcmp(*pValue, "DBCS")==0) { + states->conversionType=UCNV_DBCS; + } else if(uprv_strcmp(*pValue, "SBCS")==0) { + states->conversionType = UCNV_SBCS; + } else if(uprv_strcmp(*pValue, "MBCS")==0) { + states->conversionType = UCNV_MBCS; + } else if(uprv_strcmp(*pValue, "EBCDIC_STATEFUL")==0) { + states->conversionType = UCNV_EBCDIC_STATEFUL; + } else { + fprintf(stderr, "ucm error: unknown %s\n", *pValue); + exit(U_INVALID_TABLE_FORMAT); + } + return TRUE; + } else if(uprv_strcmp(*pKey, "mb_cur_max")==0) { + c=**pValue; + if('1'<=c && c<='4' && (*pValue)[1]==0) { + states->maxCharLength=(int8_t)(c-'0'); + states->outputType=(int8_t)(states->maxCharLength-1); + } else { + fprintf(stderr, "ucm error: illegal %s\n", *pValue); + exit(U_INVALID_TABLE_FORMAT); + } + return TRUE; + } else if(uprv_strcmp(*pKey, "mb_cur_min")==0) { + c=**pValue; + if('1'<=c && c<='4' && (*pValue)[1]==0) { + states->minCharLength=(int8_t)(c-'0'); + } else { + fprintf(stderr, "ucm error: illegal %s\n", *pValue); + exit(U_INVALID_TABLE_FORMAT); + } + return TRUE; + } else if(uprv_strcmp(*pKey, "icu:state")==0) { + /* if an SBCS/DBCS/EBCDIC_STATEFUL converter has icu:state, then turn it into MBCS */ + switch(states->conversionType) { + case UCNV_SBCS: + case UCNV_DBCS: + case UCNV_EBCDIC_STATEFUL: + states->conversionType=UCNV_MBCS; + break; + case UCNV_MBCS: + break; + default: + fprintf(stderr, "ucm error: entry for non-MBCS table or before the line\n"); + exit(U_INVALID_TABLE_FORMAT); + } + + if(states->maxCharLength==0) { + fprintf(stderr, "ucm error: before the line\n"); + exit(U_INVALID_TABLE_FORMAT); + } + ucm_addState(states, *pValue); + return TRUE; + } else if(uprv_strcmp(*pKey, "icu:base")==0) { + if(**pValue==0) { + fprintf(stderr, "ucm error: without a base table name\n"); + exit(U_INVALID_TABLE_FORMAT); + } + uprv_strcpy(ucm->baseName, *pValue); + return TRUE; + } + + return FALSE; +} + +/* post-processing ---------------------------------------------------------- */ + +static int32_t +sumUpStates(UCMStates *states) { + int32_t entry, sum, state, cell, count; + UBool allStatesReady; + + /* + * Sum up the offsets for all states. + * In each final state (where there are only final entries), + * the offsets add up directly. + * In all other state table rows, for each transition entry to another state, + * the offsets sum of that state needs to be added. + * This is achieved in at most countStates iterations. + */ + allStatesReady=FALSE; + for(count=states->countStates; !allStatesReady && count>=0; --count) { + allStatesReady=TRUE; + for(state=states->countStates-1; state>=0; --state) { + if(!(states->stateFlags[state]&MBCS_STATE_FLAG_READY)) { + allStatesReady=FALSE; + sum=0; + + /* at first, add up only the final delta offsets to keep them <512 */ + for(cell=0; cell<256; ++cell) { + entry=states->stateTable[state][cell]; + if(MBCS_ENTRY_IS_FINAL(entry)) { + switch(MBCS_ENTRY_FINAL_ACTION(entry)) { + case MBCS_STATE_VALID_16: + states->stateTable[state][cell]=MBCS_ENTRY_FINAL_SET_VALUE(entry, sum); + sum+=1; + break; + case MBCS_STATE_VALID_16_PAIR: + states->stateTable[state][cell]=MBCS_ENTRY_FINAL_SET_VALUE(entry, sum); + sum+=2; + break; + default: + /* no addition */ + break; + } + } + } + + /* now, add up the delta offsets for the transitional entries */ + for(cell=0; cell<256; ++cell) { + entry=states->stateTable[state][cell]; + if(MBCS_ENTRY_IS_TRANSITION(entry)) { + if(states->stateFlags[MBCS_ENTRY_TRANSITION_STATE(entry)]&MBCS_STATE_FLAG_READY) { + states->stateTable[state][cell]=MBCS_ENTRY_TRANSITION_SET_OFFSET(entry, sum); + sum+=states->stateOffsetSum[MBCS_ENTRY_TRANSITION_STATE(entry)]; + } else { + /* that next state does not have a sum yet, we cannot finish the one for this state */ + sum=-1; + break; + } + } + } + + if(sum!=-1) { + states->stateOffsetSum[state]=sum; + states->stateFlags[state]|=MBCS_STATE_FLAG_READY; + } + } + } + } + + if(!allStatesReady) { + fprintf(stderr, "ucm error: the state table contains loops\n"); + exit(U_INVALID_TABLE_FORMAT); + } + + /* + * For all "direct" (i.e., initial) states>0, + * the offsets need to be increased by the sum of + * the previous initial states. + */ + sum=states->stateOffsetSum[0]; + for(state=1; statecountStates; ++state) { + if((states->stateFlags[state]&0xf)==MBCS_STATE_FLAG_DIRECT) { + int32_t sum2=sum; + sum+=states->stateOffsetSum[state]; + for(cell=0; cell<256; ++cell) { + entry=states->stateTable[state][cell]; + if(MBCS_ENTRY_IS_TRANSITION(entry)) { + states->stateTable[state][cell]=MBCS_ENTRY_TRANSITION_ADD_OFFSET(entry, sum2); + } + } + } + } + + /* round up to the next even number to have the following data 32-bit-aligned */ + return states->countToUCodeUnits=(sum+1)&~1; +} + +U_CAPI void U_EXPORT2 +ucm_processStates(UCMStates *states, UBool ignoreSISOCheck) { + int32_t entry, state, cell, count; + + if(states->conversionType==UCNV_UNSUPPORTED_CONVERTER) { + fprintf(stderr, "ucm error: missing conversion type ()\n"); + exit(U_INVALID_TABLE_FORMAT); + } + + if(states->countStates==0) { + switch(states->conversionType) { + case UCNV_SBCS: + /* SBCS: use MBCS data structure with a default state table */ + if(states->maxCharLength!=1) { + fprintf(stderr, "error: SBCS codepage with max B/char!=1\n"); + exit(U_INVALID_TABLE_FORMAT); + } + states->conversionType=UCNV_MBCS; + ucm_addState(states, "0-ff"); + break; + case UCNV_MBCS: + fprintf(stderr, "ucm error: missing state table information () for MBCS\n"); + exit(U_INVALID_TABLE_FORMAT); + break; + case UCNV_EBCDIC_STATEFUL: + /* EBCDIC_STATEFUL: use MBCS data structure with a default state table */ + if(states->minCharLength!=1 || states->maxCharLength!=2) { + fprintf(stderr, "error: DBCS codepage with min B/char!=1 or max B/char!=2\n"); + exit(U_INVALID_TABLE_FORMAT); + } + states->conversionType=UCNV_MBCS; + ucm_addState(states, "0-ff, e:1.s, f:0.s"); + ucm_addState(states, "initial, 0-3f:4, e:1.s, f:0.s, 40:3, 41-fe:2, ff:4"); + ucm_addState(states, "0-40:1.i, 41-fe:1., ff:1.i"); + ucm_addState(states, "0-ff:1.i, 40:1."); + ucm_addState(states, "0-ff:1.i"); + break; + case UCNV_DBCS: + /* DBCS: use MBCS data structure with a default state table */ + if(states->minCharLength!=2 || states->maxCharLength!=2) { + fprintf(stderr, "error: DBCS codepage with min or max B/char!=2\n"); + exit(U_INVALID_TABLE_FORMAT); + } + states->conversionType = UCNV_MBCS; + ucm_addState(states, "0-3f:3, 40:2, 41-fe:1, ff:3"); + ucm_addState(states, "41-fe"); + ucm_addState(states, "40"); + ucm_addState(states, ""); + break; + default: + fprintf(stderr, "ucm error: unknown charset structure\n"); + exit(U_INVALID_TABLE_FORMAT); + break; + } + } + + /* + * check that the min/max character lengths are reasonable; + * to do this right, all paths through the state table would have to be + * recursively walked while keeping track of the sequence lengths, + * but these simple checks cover most state tables in practice + */ + if(states->maxCharLengthminCharLength) { + fprintf(stderr, "ucm error: max B/char < min B/char\n"); + exit(U_INVALID_TABLE_FORMAT); + } + + /* count non-direct states and compare with max B/char */ + count=0; + for(state=0; statecountStates; ++state) { + if((states->stateFlags[state]&0xf)!=MBCS_STATE_FLAG_DIRECT) { + ++count; + } + } + if(states->maxCharLength>count+1) { + fprintf(stderr, "ucm error: max B/char too large\n"); + exit(U_INVALID_TABLE_FORMAT); + } + + if(states->minCharLength==1) { + int32_t action; + + /* + * if there are single-byte characters, + * then the initial state must have direct result states + */ + for(cell=0; cell<256; ++cell) { + entry=states->stateTable[0][cell]; + if( MBCS_ENTRY_IS_FINAL(entry) && + ((action=MBCS_ENTRY_FINAL_ACTION(entry))==MBCS_STATE_VALID_DIRECT_16 || + action==MBCS_STATE_UNASSIGNED) + ) { + break; + } + } + + if(cell==256) { + fprintf(stderr, "ucm warning: min B/char too small\n"); + } + } + + /* + * make sure that all "next state" values are within limits + * and that all next states after final ones have the "direct" + * flag of initial states + */ + for(state=states->countStates-1; state>=0; --state) { + for(cell=0; cell<256; ++cell) { + entry=states->stateTable[state][cell]; + if((uint8_t)MBCS_ENTRY_STATE(entry)>=states->countStates) { + fprintf(stderr, "ucm error: state table entry [%x][%x] has a next state of %x that is too high\n", + (int)state, (int)cell, (int)MBCS_ENTRY_STATE(entry)); + exit(U_INVALID_TABLE_FORMAT); + } + if(MBCS_ENTRY_IS_FINAL(entry) && (states->stateFlags[MBCS_ENTRY_STATE(entry)]&0xf)!=MBCS_STATE_FLAG_DIRECT) { + fprintf(stderr, "ucm error: state table entry [%x][%x] is final but has a non-initial next state of %x\n", + (int)state, (int)cell, (int)MBCS_ENTRY_STATE(entry)); + exit(U_INVALID_TABLE_FORMAT); + } else if(MBCS_ENTRY_IS_TRANSITION(entry) && (states->stateFlags[MBCS_ENTRY_STATE(entry)]&0xf)==MBCS_STATE_FLAG_DIRECT) { + fprintf(stderr, "ucm error: state table entry [%x][%x] is not final but has an initial next state of %x\n", + (int)state, (int)cell, (int)MBCS_ENTRY_STATE(entry)); + exit(U_INVALID_TABLE_FORMAT); + } + } + } + + /* is this an SI/SO (like EBCDIC-stateful) state table? */ + if(states->countStates>=2 && (states->stateFlags[1]&0xf)==MBCS_STATE_FLAG_DIRECT) { + if(states->maxCharLength!=2) { + fprintf(stderr, "ucm error: SI/SO codepages must have max 2 bytes/char (not %x)\n", (int)states->maxCharLength); + exit(U_INVALID_TABLE_FORMAT); + } + if(states->countStates<3) { + fprintf(stderr, "ucm error: SI/SO codepages must have at least 3 states (not %x)\n", (int)states->countStates); + exit(U_INVALID_TABLE_FORMAT); + } + /* are the SI/SO all in the right places? */ + if( ignoreSISOCheck || + (states->stateTable[0][0xe]==MBCS_ENTRY_FINAL(1, MBCS_STATE_CHANGE_ONLY, 0) && + states->stateTable[0][0xf]==MBCS_ENTRY_FINAL(0, MBCS_STATE_CHANGE_ONLY, 0) && + states->stateTable[1][0xe]==MBCS_ENTRY_FINAL(1, MBCS_STATE_CHANGE_ONLY, 0) && + states->stateTable[1][0xf]==MBCS_ENTRY_FINAL(0, MBCS_STATE_CHANGE_ONLY, 0)) + ) { + states->outputType=MBCS_OUTPUT_2_SISO; + } else { + fprintf(stderr, "ucm error: SI/SO codepages must have in states 0 and 1 transitions e:1.s, f:0.s\n"); + exit(U_INVALID_TABLE_FORMAT); + } + state=2; + } else { + state=1; + } + + /* check that no unexpected state is a "direct" one */ + while(statecountStates) { + if((states->stateFlags[state]&0xf)==MBCS_STATE_FLAG_DIRECT) { + fprintf(stderr, "ucm error: state %d is 'initial' - not supported except for SI/SO codepages\n", (int)state); + exit(U_INVALID_TABLE_FORMAT); + } + ++state; + } + + sumUpStates(states); +} + +/* find a fallback for this offset; return the index or -1 if not found */ +U_CAPI int32_t U_EXPORT2 +ucm_findFallback(_MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks, + uint32_t offset) { + int32_t i; + + if(countToUFallbacks==0) { + /* shortcut: most codepages do not have fallbacks from codepage to Unicode */ + return -1; + } + + /* do a linear search for the fallback mapping (the table is not yet sorted) */ + for(i=0; ioutputType==MBCS_OUTPUT_2_SISO) { + /* use the DBCS lead state for SI/SO codepages */ + leadState=1; + } else { + leadState=0; + } + + /* find the main trail state: the most used target state */ + uprv_memset(count, 0, sizeof(count)); + for(i=0; i<256; ++i) { + entry=states->stateTable[leadState][i]; + if(MBCS_ENTRY_IS_TRANSITION(entry)) { + ++count[MBCS_ENTRY_TRANSITION_STATE(entry)]; + } + } + trailState=0; + for(i=1; icountStates; ++i) { + if(count[i]>count[trailState]) { + trailState=i; + } + } + + /* count possible savings from lead bytes with all-unassigned results in all trail bytes */ + uprv_memset(count, 0, sizeof(count)); + savings=0; + /* for each lead byte */ + for(i=0; i<256; ++i) { + entry=states->stateTable[leadState][i]; + if(MBCS_ENTRY_IS_TRANSITION(entry) && (MBCS_ENTRY_TRANSITION_STATE(entry))==trailState) { + /* the offset is different for each lead byte */ + offset=MBCS_ENTRY_TRANSITION_OFFSET(entry); + /* for each trail byte for this lead byte */ + for(j=0; j<256; ++j) { + entry=states->stateTable[trailState][j]; + switch(MBCS_ENTRY_FINAL_ACTION(entry)) { + case MBCS_STATE_VALID_16: + entry=offset+MBCS_ENTRY_FINAL_VALUE_16(entry); + if((*pUnicodeCodeUnits)[entry]==0xfffe && ucm_findFallback(toUFallbacks, countToUFallbacks, entry)<0) { + ++count[i]; + } else { + j=999; /* do not count for this lead byte because there are assignments */ + } + break; + case MBCS_STATE_VALID_16_PAIR: + entry=offset+MBCS_ENTRY_FINAL_VALUE_16(entry); + if((*pUnicodeCodeUnits)[entry]==0xfffe) { + count[i]+=2; + } else { + j=999; /* do not count for this lead byte because there are assignments */ + } + break; + default: + break; + } + } + if(j==256) { + /* all trail bytes for this lead byte are unassigned */ + savings+=count[i]; + } else { + count[i]=0; + } + } + } + /* subtract from the possible savings the cost of an additional state */ + savings=savings*2-1024; /* count bytes, not 16-bit words */ + if(savings<=0) { + return; + } + if(verbose) { + printf("compacting toUnicode data saves %ld bytes\n", (long)savings); + } + if(states->countStates>=MBCS_MAX_STATE_COUNT) { + fprintf(stderr, "cannot compact toUnicode because the maximum number of states is reached\n"); + return; + } + + /* make a copy of the state table */ + oldStateTable=(int32_t (*)[256])uprv_malloc(states->countStates*1024); + if(oldStateTable==NULL) { + fprintf(stderr, "cannot compact toUnicode: out of memory\n"); + return; + } + uprv_memcpy(oldStateTable, states->stateTable, states->countStates*1024); + + /* add the new state */ + /* + * this function does not catch the degenerate case where all lead bytes + * have all-unassigned trail bytes and the lead state could be removed + */ + newState=states->countStates++; + states->stateFlags[newState]=0; + /* copy the old trail state, turning all assigned states into unassigned ones */ + for(i=0; i<256; ++i) { + entry=states->stateTable[trailState][i]; + switch(MBCS_ENTRY_FINAL_ACTION(entry)) { + case MBCS_STATE_VALID_16: + case MBCS_STATE_VALID_16_PAIR: + states->stateTable[newState][i]=MBCS_ENTRY_FINAL_SET_ACTION_VALUE(entry, MBCS_STATE_UNASSIGNED, 0xfffe); + break; + default: + states->stateTable[newState][i]=entry; + break; + } + } + + /* in the lead state, redirect all lead bytes with all-unassigned trail bytes to the new state */ + for(i=0; i<256; ++i) { + if(count[i]>0) { + states->stateTable[leadState][i]=MBCS_ENTRY_SET_STATE(states->stateTable[leadState][i], newState); + } + } + + /* sum up the new state table */ + for(i=0; icountStates; ++i) { + states->stateFlags[i]&=~MBCS_STATE_FLAG_READY; + } + sum=sumUpStates(states); + + /* allocate a new, smaller code units array */ + oldUnicodeCodeUnits=*pUnicodeCodeUnits; + if(sum==0) { + *pUnicodeCodeUnits=NULL; + if(oldUnicodeCodeUnits!=NULL) { + uprv_free(oldUnicodeCodeUnits); + } + uprv_free(oldStateTable); + return; + } + *pUnicodeCodeUnits=(uint16_t *)uprv_malloc(sum*sizeof(uint16_t)); + if(*pUnicodeCodeUnits==NULL) { + fprintf(stderr, "cannot compact toUnicode: out of memory allocating %ld 16-bit code units\n", + (long)sum); + /* revert to the old state table */ + *pUnicodeCodeUnits=oldUnicodeCodeUnits; + --states->countStates; + uprv_memcpy(states->stateTable, oldStateTable, states->countStates*1024); + uprv_free(oldStateTable); + return; + } + for(i=0; icountStates; ++leadState) { + if((states->stateFlags[leadState]&0xf)==MBCS_STATE_FLAG_DIRECT) { + /* for each lead byte from there */ + for(i=0; i<256; ++i) { + entry=states->stateTable[leadState][i]; + if(MBCS_ENTRY_IS_TRANSITION(entry)) { + trailState=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); + /* the new state does not have assigned states */ + if(trailState!=newState) { + trailOffset=MBCS_ENTRY_TRANSITION_OFFSET(entry); + oldTrailOffset=MBCS_ENTRY_TRANSITION_OFFSET(oldStateTable[leadState][i]); + /* for each trail byte */ + for(j=0; j<256; ++j) { + entry=states->stateTable[trailState][j]; + /* copy assigned-character code units and adjust fallback offsets */ + switch(MBCS_ENTRY_FINAL_ACTION(entry)) { + case MBCS_STATE_VALID_16: + offset=trailOffset+MBCS_ENTRY_FINAL_VALUE_16(entry); + /* find the old offset according to the old state table */ + oldOffset=oldTrailOffset+MBCS_ENTRY_FINAL_VALUE_16(oldStateTable[trailState][j]); + unit=(*pUnicodeCodeUnits)[offset]=oldUnicodeCodeUnits[oldOffset]; + if(unit==0xfffe && (fallback=ucm_findFallback(toUFallbacks, countToUFallbacks, oldOffset))>=0) { + toUFallbacks[fallback].offset=0x80000000|offset; + } + break; + case MBCS_STATE_VALID_16_PAIR: + offset=trailOffset+MBCS_ENTRY_FINAL_VALUE_16(entry); + /* find the old offset according to the old state table */ + oldOffset=oldTrailOffset+MBCS_ENTRY_FINAL_VALUE_16(oldStateTable[trailState][j]); + (*pUnicodeCodeUnits)[offset++]=oldUnicodeCodeUnits[oldOffset++]; + (*pUnicodeCodeUnits)[offset]=oldUnicodeCodeUnits[oldOffset]; + break; + default: + break; + } + } + } + } + } + } + } + + /* remove temporary flags from fallback offsets that protected them from being modified twice */ + for(i=0; i0 number of bytes that are used in unicodeCodeUnits[] that could be saved, + * if all sequences from this state are unassigned, returns the + * <0 there are assignments in unicodeCodeUnits[] + * 0 no use of unicodeCodeUnits[] + */ +static int32_t +findUnassigned(UCMStates *states, + uint16_t *unicodeCodeUnits, + _MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks, + int32_t state, int32_t offset, uint32_t b) { + int32_t i, entry, savings, localSavings, belowSavings; + UBool haveAssigned; + + localSavings=belowSavings=0; + haveAssigned=FALSE; + for(i=0; i<256; ++i) { + entry=states->stateTable[state][i]; + if(MBCS_ENTRY_IS_TRANSITION(entry)) { + savings=findUnassigned(states, + unicodeCodeUnits, + toUFallbacks, countToUFallbacks, + MBCS_ENTRY_TRANSITION_STATE(entry), + offset+MBCS_ENTRY_TRANSITION_OFFSET(entry), + (b<<8)|(uint32_t)i); + if(savings<0) { + haveAssigned=TRUE; + } else if(savings>0) { + printf(" all-unassigned sequences from prefix 0x%02lx state %ld use %ld bytes\n", + (unsigned long)((b<<8)|i), (long)state, (long)savings); + belowSavings+=savings; + } + } else if(!haveAssigned) { + switch(MBCS_ENTRY_FINAL_ACTION(entry)) { + case MBCS_STATE_VALID_16: + entry=offset+MBCS_ENTRY_FINAL_VALUE_16(entry); + if(unicodeCodeUnits[entry]==0xfffe && ucm_findFallback(toUFallbacks, countToUFallbacks, entry)<0) { + localSavings+=2; + } else { + haveAssigned=TRUE; + } + break; + case MBCS_STATE_VALID_16_PAIR: + entry=offset+MBCS_ENTRY_FINAL_VALUE_16(entry); + if(unicodeCodeUnits[entry]==0xfffe) { + localSavings+=4; + } else { + haveAssigned=TRUE; + } + break; + default: + break; + } + } + } + if(haveAssigned) { + return -1; + } else { + return localSavings+belowSavings; + } +} + +/* helper function for finding compaction opportunities */ +static void +compactToUnicodeHelper(UCMStates *states, + uint16_t *unicodeCodeUnits, + _MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks) { + int32_t state, savings; + + /* for each initial state */ + for(state=0; statecountStates; ++state) { + if((states->stateFlags[state]&0xf)==MBCS_STATE_FLAG_DIRECT) { + savings=findUnassigned(states, + unicodeCodeUnits, + toUFallbacks, countToUFallbacks, + state, 0, 0); + if(savings>0) { + printf(" all-unassigned sequences from initial state %ld use %ld bytes\n", + (long)state, (long)savings); + } + } + } +} + +U_CDECL_BEGIN +static int32_t U_CALLCONV +compareFallbacks(const void *context, const void *fb1, const void *fb2) { + (void)context; + return ((const _MBCSToUFallback *)fb1)->offset-((const _MBCSToUFallback *)fb2)->offset; +} +U_CDECL_END + +U_CAPI void U_EXPORT2 +ucm_optimizeStates(UCMStates *states, + uint16_t **pUnicodeCodeUnits, + _MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks, + UBool verbose) { + UErrorCode errorCode; + int32_t state, cell, entry; + + /* test each state table entry */ + for(state=0; statecountStates; ++state) { + for(cell=0; cell<256; ++cell) { + entry=states->stateTable[state][cell]; + /* + * if the entry is a final one with an MBCS_STATE_VALID_DIRECT_16 action code + * and the code point is "unassigned" (0xfffe), then change it to + * the "unassigned" action code with bits 26..23 set to zero and U+fffe. + */ + if(MBCS_ENTRY_SET_STATE(entry, 0)==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, 0xfffe)) { + states->stateTable[state][cell]=MBCS_ENTRY_FINAL_SET_ACTION(entry, MBCS_STATE_UNASSIGNED); + } + } + } + + /* try to compact the toUnicode tables */ + if(states->maxCharLength==2) { + compactToUnicode2(states, pUnicodeCodeUnits, toUFallbacks, countToUFallbacks, verbose); + } else if(states->maxCharLength>2) { + if(verbose) { + compactToUnicodeHelper(states, *pUnicodeCodeUnits, toUFallbacks, countToUFallbacks); + } + } + + /* sort toUFallbacks */ + /* + * It should be safe to sort them before compactToUnicode2() is called, + * because it should not change the relative order of the offset values + * that it adjusts, but they need to be sorted at some point, and + * it is safest here. + */ + if(countToUFallbacks>0) { + errorCode=U_ZERO_ERROR; /* nothing bad will happen... */ + uprv_sortArray(toUFallbacks, countToUFallbacks, + sizeof(_MBCSToUFallback), + compareFallbacks, NULL, FALSE, &errorCode); + } +} + +/* use a complete state table ----------------------------------------------- */ + +U_CAPI int32_t U_EXPORT2 +ucm_countChars(UCMStates *states, + const uint8_t *bytes, int32_t length) { + uint32_t offset; + int32_t i, entry, count; + uint8_t state; + + offset=0; + count=0; + state=0; + + if(states->countStates==0) { + fprintf(stderr, "ucm error: there is no state information!\n"); + return -1; + } + + /* for SI/SO (like EBCDIC-stateful), double-byte sequences start in state 1 */ + if(length==2 && states->outputType==MBCS_OUTPUT_2_SISO) { + state=1; + } + + /* + * Walk down the state table like in conversion, + * much like getNextUChar(). + * We assume that c<=0x10ffff. + */ + for(i=0; istateTable[state][bytes[i]]; + if(MBCS_ENTRY_IS_TRANSITION(entry)) { + state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); + offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry); + } else { + switch(MBCS_ENTRY_FINAL_ACTION(entry)) { + case MBCS_STATE_ILLEGAL: + fprintf(stderr, "ucm error: byte sequence ends in illegal state\n"); + return -1; + case MBCS_STATE_CHANGE_ONLY: + fprintf(stderr, "ucm error: byte sequence ends in state-change-only\n"); + return -1; + case MBCS_STATE_UNASSIGNED: + case MBCS_STATE_FALLBACK_DIRECT_16: + case MBCS_STATE_VALID_DIRECT_16: + case MBCS_STATE_FALLBACK_DIRECT_20: + case MBCS_STATE_VALID_DIRECT_20: + case MBCS_STATE_VALID_16: + case MBCS_STATE_VALID_16_PAIR: + /* count a complete character and prepare for a new one */ + ++count; + state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); + offset=0; + break; + default: + /* reserved, must never occur */ + fprintf(stderr, "ucm error: byte sequence reached reserved action code, entry: 0x%02lx\n", (unsigned long)entry); + return -1; + } + } + } + + if(offset!=0) { + fprintf(stderr, "ucm error: byte sequence too short, ends in non-final state %u\n", state); + return -1; + } + + /* + * for SI/SO (like EBCDIC-stateful), multiple-character results + * must consist of only double-byte sequences + */ + if(count>1 && states->outputType==MBCS_OUTPUT_2_SISO && length!=2*count) { + fprintf(stderr, "ucm error: SI/SO (like EBCDIC-stateful) result with %d characters does not contain all DBCS\n", (int)count); + return -1; + } + + return count; +} +#endif diff --git a/deps/icu-small/source/tools/toolutil/udbgutil.cpp b/deps/icu-small/source/tools/toolutil/udbgutil.cpp index bbb814ba90..446e11aaf9 100644 --- a/deps/icu-small/source/tools/toolutil/udbgutil.cpp +++ b/deps/icu-small/source/tools/toolutil/udbgutil.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /******************************************************************** * COPYRIGHT: @@ -351,8 +351,10 @@ int32_t udbg_enumByName(UDebugEnumType type, const char *value) { */ U_CAPI const char *udbg_getPlatform(void) { -#if U_PLATFORM_HAS_WIN32_API +#if U_PLATFORM_USES_ONLY_WIN32_API return "Windows"; +#elif U_PLATFORM == U_PF_CYGWIN + return "Cygwin"; #elif U_PLATFORM == U_PF_UNKNOWN return "unknown"; #elif U_PLATFORM == U_PF_DARWIN @@ -613,40 +615,6 @@ U_CAPI char *udbg_knownIssueURLFrom(const char *ticket, char *buf) { } -#if !U_HAVE_STD_STRING -const char *warning = "WARNING: Don't have std::string (STL) - known issue logs will be deficient."; - -U_CAPI void *udbg_knownIssue_openU(void *ptr, const char *ticket, char *where, const UChar *msg, UBool *firstForTicket, - UBool *firstForWhere) { - if(ptr==NULL) { - puts(warning); - } - printf("%s\tKnown Issue #%s\n", where, ticket); - - return (void*)warning; -} - -U_CAPI void *udbg_knownIssue_open(void *ptr, const char *ticket, char *where, const char *msg, UBool *firstForTicket, - UBool *firstForWhere) { - if(ptr==NULL) { - puts(warning); - } - if(msg==NULL) msg = ""; - printf("%s\tKnown Issue #%s \"%s\n", where, ticket, msg); - - return (void*)warning; -} - -U_CAPI UBool udbg_knownIssue_print(void *ptr) { - puts(warning); - return FALSE; -} - -U_CAPI void udbg_knownIssue_close(void *ptr) { - // nothing to do -} -#else - #include #include #include @@ -785,5 +753,3 @@ U_CAPI void udbg_knownIssue_close(void *ptr) { KnownIssues *t = static_cast(ptr); delete t; } - -#endif diff --git a/deps/icu-small/source/tools/toolutil/udbgutil.h b/deps/icu-small/source/tools/toolutil/udbgutil.h index 4bfb4cf867..2f186e6ed8 100644 --- a/deps/icu-small/source/tools/toolutil/udbgutil.h +++ b/deps/icu-small/source/tools/toolutil/udbgutil.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ************************************************************************ diff --git a/deps/icu-small/source/tools/toolutil/unewdata.c b/deps/icu-small/source/tools/toolutil/unewdata.c deleted file mode 100644 index f3b152c079..0000000000 --- a/deps/icu-small/source/tools/toolutil/unewdata.c +++ /dev/null @@ -1,275 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1999-2010, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: unewdata.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999oct25 -* created by: Markus W. Scherer -*/ - -#include -#include "unicode/utypes.h" -#include "unicode/putil.h" -#include "unicode/ustring.h" -#include "cmemory.h" -#include "cstring.h" -#include "filestrm.h" -#include "unicode/udata.h" -#include "unewdata.h" - -struct UNewDataMemory { - FileStream *file; - uint16_t headerSize; - uint8_t magic1, magic2; -}; - -U_CAPI UNewDataMemory * U_EXPORT2 -udata_create(const char *dir, const char *type, const char *name, - const UDataInfo *pInfo, - const char *comment, - UErrorCode *pErrorCode) { - UNewDataMemory *pData; - uint16_t headerSize, commentLength; - char filename[512]; - uint8_t bytes[16]; - int32_t length; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return NULL; - } else if(name==NULL || *name==0 || pInfo==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - /* allocate the data structure */ - pData=(UNewDataMemory *)uprv_malloc(sizeof(UNewDataMemory)); - if(pData==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - /* Check that the full path won't be too long */ - length = 0; /* Start with nothing */ - if(dir != NULL && *dir !=0) /* Add directory length if one was given */ - { - length += strlen(dir); - - /* Add 1 if dir doesn't end with path sep */ - if (dir[strlen(dir) - 1]!= U_FILE_SEP_CHAR) { - length++; - } - } - length += strlen(name); /* Add the filename length */ - - if(type != NULL && *type !=0) { /* Add directory length if given */ - length += strlen(type); - } - - - /* LDH buffer Length error check */ - if(length > ((int32_t)sizeof(filename) - 1)) - { - *pErrorCode = U_BUFFER_OVERFLOW_ERROR; - uprv_free(pData); - return NULL; - } - - /* open the output file */ - if(dir!=NULL && *dir!=0) { /* if dir has a value, we prepend it to the filename */ - char *p=filename+strlen(dir); - uprv_strcpy(filename, dir); - if (*(p-1)!=U_FILE_SEP_CHAR) { - *p++=U_FILE_SEP_CHAR; - *p=0; - } - } else { /* otherwise, we'll output to the current dir */ - filename[0]=0; - } - uprv_strcat(filename, name); - if(type!=NULL && *type!=0) { - uprv_strcat(filename, "."); - uprv_strcat(filename, type); - } - pData->file=T_FileStream_open(filename, "wb"); - if(pData->file==NULL) { - uprv_free(pData); - *pErrorCode=U_FILE_ACCESS_ERROR; - return NULL; - } - - /* write the header information */ - headerSize=(uint16_t)(pInfo->size+4); - if(comment!=NULL && *comment!=0) { - commentLength=(uint16_t)(uprv_strlen(comment)+1); - headerSize+=commentLength; - } else { - commentLength=0; - } - - /* write the size of the header, take padding into account */ - pData->headerSize=(uint16_t)((headerSize+15)&~0xf); - pData->magic1=0xda; - pData->magic2=0x27; - T_FileStream_write(pData->file, &pData->headerSize, 4); - - /* write the information data */ - T_FileStream_write(pData->file, pInfo, pInfo->size); - - /* write the comment */ - if(commentLength>0) { - T_FileStream_write(pData->file, comment, commentLength); - } - - /* write padding bytes to align the data section to 16 bytes */ - headerSize&=0xf; - if(headerSize!=0) { - headerSize=(uint16_t)(16-headerSize); - uprv_memset(bytes, 0, headerSize); - T_FileStream_write(pData->file, bytes, headerSize); - } - - return pData; -} - -U_CAPI uint32_t U_EXPORT2 -udata_finish(UNewDataMemory *pData, UErrorCode *pErrorCode) { - uint32_t fileLength=0; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - if(pData!=NULL) { - if(pData->file!=NULL) { - /* fflush(pData->file);*/ - fileLength=T_FileStream_size(pData->file); - if(T_FileStream_error(pData->file)) { - *pErrorCode=U_FILE_ACCESS_ERROR; - } else { - fileLength-=pData->headerSize; - } - T_FileStream_close(pData->file); - } - uprv_free(pData); - } - - return fileLength; -} - -/* dummy UDataInfo cf. udata.h */ -static const UDataInfo dummyDataInfo = { - sizeof(UDataInfo), - 0, - - U_IS_BIG_ENDIAN, - U_CHARSET_FAMILY, - U_SIZEOF_UCHAR, - 0, - - { 0, 0, 0, 0 }, /* dummy dataFormat */ - { 0, 0, 0, 0 }, /* dummy formatVersion */ - { 0, 0, 0, 0 } /* dummy dataVersion */ -}; - -U_CAPI void U_EXPORT2 -udata_createDummy(const char *dir, const char *type, const char *name, UErrorCode *pErrorCode) { - if(U_SUCCESS(*pErrorCode)) { - udata_finish(udata_create(dir, type, name, &dummyDataInfo, NULL, pErrorCode), pErrorCode); - if(U_FAILURE(*pErrorCode)) { - fprintf(stderr, "error %s writing dummy data file %s" U_FILE_SEP_STRING "%s.%s\n", - u_errorName(*pErrorCode), dir, name, type); - exit(*pErrorCode); - } - } -} - -U_CAPI void U_EXPORT2 -udata_write8(UNewDataMemory *pData, uint8_t byte) { - if(pData!=NULL && pData->file!=NULL) { - T_FileStream_write(pData->file, &byte, 1); - } -} - -U_CAPI void U_EXPORT2 -udata_write16(UNewDataMemory *pData, uint16_t word) { - if(pData!=NULL && pData->file!=NULL) { - T_FileStream_write(pData->file, &word, 2); - } -} - -U_CAPI void U_EXPORT2 -udata_write32(UNewDataMemory *pData, uint32_t wyde) { - if(pData!=NULL && pData->file!=NULL) { - T_FileStream_write(pData->file, &wyde, 4); - } -} - -U_CAPI void U_EXPORT2 -udata_writeBlock(UNewDataMemory *pData, const void *s, int32_t length) { - if(pData!=NULL && pData->file!=NULL) { - if(length>0) { - T_FileStream_write(pData->file, s, length); - } - } -} - -U_CAPI void U_EXPORT2 -udata_writePadding(UNewDataMemory *pData, int32_t length) { - static const uint8_t padding[16]={ - 0xaa, 0xaa, 0xaa, 0xaa, - 0xaa, 0xaa, 0xaa, 0xaa, - 0xaa, 0xaa, 0xaa, 0xaa, - 0xaa, 0xaa, 0xaa, 0xaa - }; - if(pData!=NULL && pData->file!=NULL) { - while(length>=16) { - T_FileStream_write(pData->file, padding, 16); - length-=16; - } - if(length>0) { - T_FileStream_write(pData->file, padding, length); - } - } -} - -U_CAPI void U_EXPORT2 -udata_writeString(UNewDataMemory *pData, const char *s, int32_t length) { - if(pData!=NULL && pData->file!=NULL) { - if(length==-1) { - length=(int32_t)uprv_strlen(s); - } - if(length>0) { - T_FileStream_write(pData->file, s, length); - } - } -} - -U_CAPI void U_EXPORT2 -udata_writeUString(UNewDataMemory *pData, const UChar *s, int32_t length) { - if(pData!=NULL && pData->file!=NULL) { - if(length==-1) { - length=u_strlen(s); - } - if(length>0) { - T_FileStream_write(pData->file, s, length*sizeof(UChar)); - } - } -} - -/* - * Hey, Emacs, please set the following: - * - * Local Variables: - * indent-tabs-mode: nil - * End: - * - */ diff --git a/deps/icu-small/source/tools/toolutil/unewdata.cpp b/deps/icu-small/source/tools/toolutil/unewdata.cpp new file mode 100644 index 0000000000..5c28e992c9 --- /dev/null +++ b/deps/icu-small/source/tools/toolutil/unewdata.cpp @@ -0,0 +1,275 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1999-2010, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: unewdata.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999oct25 +* created by: Markus W. Scherer +*/ + +#include +#include "unicode/utypes.h" +#include "unicode/putil.h" +#include "unicode/ustring.h" +#include "cmemory.h" +#include "cstring.h" +#include "filestrm.h" +#include "unicode/udata.h" +#include "unewdata.h" + +struct UNewDataMemory { + FileStream *file; + uint16_t headerSize; + uint8_t magic1, magic2; +}; + +U_CAPI UNewDataMemory * U_EXPORT2 +udata_create(const char *dir, const char *type, const char *name, + const UDataInfo *pInfo, + const char *comment, + UErrorCode *pErrorCode) { + UNewDataMemory *pData; + uint16_t headerSize, commentLength; + char filename[512]; + uint8_t bytes[16]; + int32_t length; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return NULL; + } else if(name==NULL || *name==0 || pInfo==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + /* allocate the data structure */ + pData=(UNewDataMemory *)uprv_malloc(sizeof(UNewDataMemory)); + if(pData==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + + /* Check that the full path won't be too long */ + length = 0; /* Start with nothing */ + if(dir != NULL && *dir !=0) /* Add directory length if one was given */ + { + length += strlen(dir); + + /* Add 1 if dir doesn't end with path sep */ + if (dir[strlen(dir) - 1]!= U_FILE_SEP_CHAR) { + length++; + } + } + length += strlen(name); /* Add the filename length */ + + if(type != NULL && *type !=0) { /* Add directory length if given */ + length += strlen(type); + } + + + /* LDH buffer Length error check */ + if(length > ((int32_t)sizeof(filename) - 1)) + { + *pErrorCode = U_BUFFER_OVERFLOW_ERROR; + uprv_free(pData); + return NULL; + } + + /* open the output file */ + if(dir!=NULL && *dir!=0) { /* if dir has a value, we prepend it to the filename */ + char *p=filename+strlen(dir); + uprv_strcpy(filename, dir); + if (*(p-1)!=U_FILE_SEP_CHAR) { + *p++=U_FILE_SEP_CHAR; + *p=0; + } + } else { /* otherwise, we'll output to the current dir */ + filename[0]=0; + } + uprv_strcat(filename, name); + if(type!=NULL && *type!=0) { + uprv_strcat(filename, "."); + uprv_strcat(filename, type); + } + pData->file=T_FileStream_open(filename, "wb"); + if(pData->file==NULL) { + uprv_free(pData); + *pErrorCode=U_FILE_ACCESS_ERROR; + return NULL; + } + + /* write the header information */ + headerSize=(uint16_t)(pInfo->size+4); + if(comment!=NULL && *comment!=0) { + commentLength=(uint16_t)(uprv_strlen(comment)+1); + headerSize+=commentLength; + } else { + commentLength=0; + } + + /* write the size of the header, take padding into account */ + pData->headerSize=(uint16_t)((headerSize+15)&~0xf); + pData->magic1=0xda; + pData->magic2=0x27; + T_FileStream_write(pData->file, &pData->headerSize, 4); + + /* write the information data */ + T_FileStream_write(pData->file, pInfo, pInfo->size); + + /* write the comment */ + if(commentLength>0) { + T_FileStream_write(pData->file, comment, commentLength); + } + + /* write padding bytes to align the data section to 16 bytes */ + headerSize&=0xf; + if(headerSize!=0) { + headerSize=(uint16_t)(16-headerSize); + uprv_memset(bytes, 0, headerSize); + T_FileStream_write(pData->file, bytes, headerSize); + } + + return pData; +} + +U_CAPI uint32_t U_EXPORT2 +udata_finish(UNewDataMemory *pData, UErrorCode *pErrorCode) { + uint32_t fileLength=0; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + if(pData!=NULL) { + if(pData->file!=NULL) { + /* fflush(pData->file);*/ + fileLength=T_FileStream_size(pData->file); + if(T_FileStream_error(pData->file)) { + *pErrorCode=U_FILE_ACCESS_ERROR; + } else { + fileLength-=pData->headerSize; + } + T_FileStream_close(pData->file); + } + uprv_free(pData); + } + + return fileLength; +} + +/* dummy UDataInfo cf. udata.h */ +static const UDataInfo dummyDataInfo = { + sizeof(UDataInfo), + 0, + + U_IS_BIG_ENDIAN, + U_CHARSET_FAMILY, + U_SIZEOF_UCHAR, + 0, + + { 0, 0, 0, 0 }, /* dummy dataFormat */ + { 0, 0, 0, 0 }, /* dummy formatVersion */ + { 0, 0, 0, 0 } /* dummy dataVersion */ +}; + +U_CAPI void U_EXPORT2 +udata_createDummy(const char *dir, const char *type, const char *name, UErrorCode *pErrorCode) { + if(U_SUCCESS(*pErrorCode)) { + udata_finish(udata_create(dir, type, name, &dummyDataInfo, NULL, pErrorCode), pErrorCode); + if(U_FAILURE(*pErrorCode)) { + fprintf(stderr, "error %s writing dummy data file %s" U_FILE_SEP_STRING "%s.%s\n", + u_errorName(*pErrorCode), dir, name, type); + exit(*pErrorCode); + } + } +} + +U_CAPI void U_EXPORT2 +udata_write8(UNewDataMemory *pData, uint8_t byte) { + if(pData!=NULL && pData->file!=NULL) { + T_FileStream_write(pData->file, &byte, 1); + } +} + +U_CAPI void U_EXPORT2 +udata_write16(UNewDataMemory *pData, uint16_t word) { + if(pData!=NULL && pData->file!=NULL) { + T_FileStream_write(pData->file, &word, 2); + } +} + +U_CAPI void U_EXPORT2 +udata_write32(UNewDataMemory *pData, uint32_t wyde) { + if(pData!=NULL && pData->file!=NULL) { + T_FileStream_write(pData->file, &wyde, 4); + } +} + +U_CAPI void U_EXPORT2 +udata_writeBlock(UNewDataMemory *pData, const void *s, int32_t length) { + if(pData!=NULL && pData->file!=NULL) { + if(length>0) { + T_FileStream_write(pData->file, s, length); + } + } +} + +U_CAPI void U_EXPORT2 +udata_writePadding(UNewDataMemory *pData, int32_t length) { + static const uint8_t padding[16]={ + 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa + }; + if(pData!=NULL && pData->file!=NULL) { + while(length>=16) { + T_FileStream_write(pData->file, padding, 16); + length-=16; + } + if(length>0) { + T_FileStream_write(pData->file, padding, length); + } + } +} + +U_CAPI void U_EXPORT2 +udata_writeString(UNewDataMemory *pData, const char *s, int32_t length) { + if(pData!=NULL && pData->file!=NULL) { + if(length==-1) { + length=(int32_t)uprv_strlen(s); + } + if(length>0) { + T_FileStream_write(pData->file, s, length); + } + } +} + +U_CAPI void U_EXPORT2 +udata_writeUString(UNewDataMemory *pData, const UChar *s, int32_t length) { + if(pData!=NULL && pData->file!=NULL) { + if(length==-1) { + length=u_strlen(s); + } + if(length>0) { + T_FileStream_write(pData->file, s, length*sizeof(UChar)); + } + } +} + +/* + * Hey, Emacs, please set the following: + * + * Local Variables: + * indent-tabs-mode: nil + * End: + * + */ diff --git a/deps/icu-small/source/tools/toolutil/unewdata.h b/deps/icu-small/source/tools/toolutil/unewdata.h index 9cc3994225..137fb49584 100644 --- a/deps/icu-small/source/tools/toolutil/unewdata.h +++ b/deps/icu-small/source/tools/toolutil/unewdata.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: unewdata.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/tools/toolutil/uoptions.c b/deps/icu-small/source/tools/toolutil/uoptions.c deleted file mode 100644 index e5ce64a372..0000000000 --- a/deps/icu-small/source/tools/toolutil/uoptions.c +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2000-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uoptions.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2000apr17 -* created by: Markus W. Scherer -* -* This file provides a command line argument parser. -*/ - -#include "unicode/utypes.h" -#include "cstring.h" -#include "uoptions.h" - -U_CAPI int U_EXPORT2 -u_parseArgs(int argc, char* argv[], - int optionCount, UOption options[]) { - char *arg; - int i=1, remaining=1; - char c, stopOptions=0; - - while(idoesOccur=1; - - if(option->hasArg!=UOPT_NO_ARG) { - /* parse the argument for the option, if any */ - if(i+1value=argv[++i]; - } else if(option->hasArg==UOPT_REQUIRES_ARG) { - /* there is no argument, but one is required: return with error */ - option->doesOccur=0; - return -i; - } - } - - if(option->optionFn!=NULL && option->optionFn(option->context, option)<0) { - /* the option function was called and returned an error */ - option->doesOccur=0; - return -i; - } - } - } else { - /* process one or more short options */ - do { - /* search for the option letter */ - int j; - for(j=0; jdoesOccur=1; - - if(option->hasArg!=UOPT_NO_ARG) { - /* parse the argument for the option, if any */ - if(*arg!=0) { - /* argument following in the same argv[] */ - option->value=arg; - /* do not process the rest of this arg as option letters */ - break; - } else if(i+1value=argv[++i]; - /* this break is redundant because we know that *arg==0 */ - break; - } else if(option->hasArg==UOPT_REQUIRES_ARG) { - /* there is no argument, but one is required: return with error */ - option->doesOccur=0; - return -i; - } - } - - if(option->optionFn!=NULL && option->optionFn(option->context, option)<0) { - /* the option function was called and returned an error */ - option->doesOccur=0; - return -i; - } - - /* get the next option letter */ - option=NULL; - c=*arg++; - } while(c!=0); - } - - /* go to next argv[] */ - ++i; - } else { - /* move a non-option up in argv[] */ - argv[remaining++]=arg; - ++i; - } - } - return remaining; -} diff --git a/deps/icu-small/source/tools/toolutil/uoptions.cpp b/deps/icu-small/source/tools/toolutil/uoptions.cpp new file mode 100644 index 0000000000..53a77bcc4c --- /dev/null +++ b/deps/icu-small/source/tools/toolutil/uoptions.cpp @@ -0,0 +1,133 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2000-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uoptions.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2000apr17 +* created by: Markus W. Scherer +* +* This file provides a command line argument parser. +*/ + +#include "unicode/utypes.h" +#include "cstring.h" +#include "uoptions.h" + +U_CAPI int U_EXPORT2 +u_parseArgs(int argc, char* argv[], + int optionCount, UOption options[]) { + char *arg; + int i=1, remaining=1; + char c, stopOptions=0; + + while(idoesOccur=1; + + if(option->hasArg!=UOPT_NO_ARG) { + /* parse the argument for the option, if any */ + if(i+1value=argv[++i]; + } else if(option->hasArg==UOPT_REQUIRES_ARG) { + /* there is no argument, but one is required: return with error */ + option->doesOccur=0; + return -i; + } + } + + if(option->optionFn!=NULL && option->optionFn(option->context, option)<0) { + /* the option function was called and returned an error */ + option->doesOccur=0; + return -i; + } + } + } else { + /* process one or more short options */ + do { + /* search for the option letter */ + int j; + for(j=0; jdoesOccur=1; + + if(option->hasArg!=UOPT_NO_ARG) { + /* parse the argument for the option, if any */ + if(*arg!=0) { + /* argument following in the same argv[] */ + option->value=arg; + /* do not process the rest of this arg as option letters */ + break; + } else if(i+1value=argv[++i]; + /* this break is redundant because we know that *arg==0 */ + break; + } else if(option->hasArg==UOPT_REQUIRES_ARG) { + /* there is no argument, but one is required: return with error */ + option->doesOccur=0; + return -i; + } + } + + if(option->optionFn!=NULL && option->optionFn(option->context, option)<0) { + /* the option function was called and returned an error */ + option->doesOccur=0; + return -i; + } + + /* get the next option letter */ + option=NULL; + c=*arg++; + } while(c!=0); + } + + /* go to next argv[] */ + ++i; + } else { + /* move a non-option up in argv[] */ + argv[remaining++]=arg; + ++i; + } + } + return remaining; +} diff --git a/deps/icu-small/source/tools/toolutil/uoptions.h b/deps/icu-small/source/tools/toolutil/uoptions.h index 72652feadb..a7a2e96c61 100644 --- a/deps/icu-small/source/tools/toolutil/uoptions.h +++ b/deps/icu-small/source/tools/toolutil/uoptions.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: uoptions.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/tools/toolutil/uparse.c b/deps/icu-small/source/tools/toolutil/uparse.c deleted file mode 100644 index a7142c3cba..0000000000 --- a/deps/icu-small/source/tools/toolutil/uparse.c +++ /dev/null @@ -1,383 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2000-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uparse.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2000apr18 -* created by: Markus W. Scherer -* -* This file provides a parser for files that are delimited by one single -* character like ';' or TAB. Example: the Unicode Character Properties files -* like UnicodeData.txt are semicolon-delimited. -*/ - -#include "unicode/utypes.h" -#include "unicode/uchar.h" -#include "unicode/ustring.h" -#include "unicode/utf16.h" -#include "cstring.h" -#include "filestrm.h" -#include "uparse.h" -#include "ustr_imp.h" - -#include - -U_CAPI const char * U_EXPORT2 -u_skipWhitespace(const char *s) { - while(U_IS_INV_WHITESPACE(*s)) { - ++s; - } - return s; -} - -U_CAPI char * U_EXPORT2 -u_rtrim(char *s) { - char *end=uprv_strchr(s, 0); - while(sstart && U_IS_INV_WHITESPACE(*(limit-1))) { - --limit; - } - - /* truncate the line */ - *limit=0; - } - - /* skip lines with only whitespace */ - if(u_skipWhitespace(start)[0]==0) { - continue; - } - - /* for each field, call the corresponding field function */ - for(i=0; i0 && dest==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - count=0; - for(;;) { - s=u_skipWhitespace(s); - if(*s==';' || *s==0) { - return count; - } - - /* read one code point */ - value=(uint32_t)uprv_strtoul(s, &end, 16); - if(end<=s || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) { - *pErrorCode=U_PARSE_ERROR; - return 0; - } - - /* append it to the destination array */ - if(count0 && dest==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - if(pFirst!=NULL) { - *pFirst=0xffffffff; - } - - destLength=0; - for(;;) { - s=u_skipWhitespace(s); - if(*s==';' || *s==0) { - if(destLength=0x110000) { - *pErrorCode=U_PARSE_ERROR; - return 0; - } - - /* store the first code point */ - if(pFirst!=NULL) { - *pFirst=value; - pFirst=NULL; - } - - /* append it to the destination array */ - if((destLength+U16_LENGTH(value))<=destCapacity) { - U16_APPEND_UNSAFE(dest, destLength, value); - } else { - destLength+=U16_LENGTH(value); - } - - /* go to the following characters */ - s=end; - } -} - -/* read a range like start or start..end */ -U_CAPI int32_t U_EXPORT2 -u_parseCodePointRangeAnyTerminator(const char *s, - uint32_t *pStart, uint32_t *pEnd, - const char **terminator, - UErrorCode *pErrorCode) { - char *end; - uint32_t value; - - if(U_FAILURE(*pErrorCode)) { - return 0; - } - if(s==NULL || pStart==NULL || pEnd==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* read the start code point */ - s=u_skipWhitespace(s); - value=(uint32_t)uprv_strtoul(s, &end, 16); - if(end<=s || value>=0x110000) { - *pErrorCode=U_PARSE_ERROR; - return 0; - } - *pStart=*pEnd=value; - - /* is there a "..end"? */ - s=u_skipWhitespace(end); - if(*s!='.' || s[1]!='.') { - *terminator=end; - return 1; - } - s=u_skipWhitespace(s+2); - - /* read the end code point */ - value=(uint32_t)uprv_strtoul(s, &end, 16); - if(end<=s || value>=0x110000) { - *pErrorCode=U_PARSE_ERROR; - return 0; - } - *pEnd=value; - - /* is this a valid range? */ - if(value<*pStart) { - *pErrorCode=U_PARSE_ERROR; - return 0; - } - - *terminator=end; - return value-*pStart+1; -} - -U_CAPI int32_t U_EXPORT2 -u_parseCodePointRange(const char *s, - uint32_t *pStart, uint32_t *pEnd, - UErrorCode *pErrorCode) { - const char *terminator; - int32_t rangeLength= - u_parseCodePointRangeAnyTerminator(s, pStart, pEnd, &terminator, pErrorCode); - if(U_SUCCESS(*pErrorCode)) { - terminator=u_skipWhitespace(terminator); - if(*terminator!=';' && *terminator!=0) { - *pErrorCode=U_PARSE_ERROR; - return 0; - } - } - return rangeLength; -} - -U_CAPI int32_t U_EXPORT2 -u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status) { - const char *read = source; - int32_t i = 0; - unsigned int value = 0; - if(sLen == -1) { - sLen = (int32_t)strlen(source); - } - - while(read < source+sLen) { - sscanf(read, "%2x", &value); - if(i < destCapacity) { - dest[i] = (char)value; - } - i++; - read += 2; - } - return u_terminateChars(dest, destCapacity, i, status); -} diff --git a/deps/icu-small/source/tools/toolutil/uparse.cpp b/deps/icu-small/source/tools/toolutil/uparse.cpp new file mode 100644 index 0000000000..937728d78a --- /dev/null +++ b/deps/icu-small/source/tools/toolutil/uparse.cpp @@ -0,0 +1,383 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2000-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uparse.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2000apr18 +* created by: Markus W. Scherer +* +* This file provides a parser for files that are delimited by one single +* character like ';' or TAB. Example: the Unicode Character Properties files +* like UnicodeData.txt are semicolon-delimited. +*/ + +#include "unicode/utypes.h" +#include "unicode/uchar.h" +#include "unicode/ustring.h" +#include "unicode/utf16.h" +#include "cstring.h" +#include "filestrm.h" +#include "uparse.h" +#include "ustr_imp.h" + +#include + +U_CAPI const char * U_EXPORT2 +u_skipWhitespace(const char *s) { + while(U_IS_INV_WHITESPACE(*s)) { + ++s; + } + return s; +} + +U_CAPI char * U_EXPORT2 +u_rtrim(char *s) { + char *end=uprv_strchr(s, 0); + while(sstart && U_IS_INV_WHITESPACE(*(limit-1))) { + --limit; + } + + /* truncate the line */ + *limit=0; + } + + /* skip lines with only whitespace */ + if(u_skipWhitespace(start)[0]==0) { + continue; + } + + /* for each field, call the corresponding field function */ + for(i=0; i0 && dest==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + count=0; + for(;;) { + s=u_skipWhitespace(s); + if(*s==';' || *s==0) { + return count; + } + + /* read one code point */ + value=(uint32_t)uprv_strtoul(s, &end, 16); + if(end<=s || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) { + *pErrorCode=U_PARSE_ERROR; + return 0; + } + + /* append it to the destination array */ + if(count0 && dest==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if(pFirst!=NULL) { + *pFirst=0xffffffff; + } + + destLength=0; + for(;;) { + s=u_skipWhitespace(s); + if(*s==';' || *s==0) { + if(destLength=0x110000) { + *pErrorCode=U_PARSE_ERROR; + return 0; + } + + /* store the first code point */ + if(pFirst!=NULL) { + *pFirst=value; + pFirst=NULL; + } + + /* append it to the destination array */ + if((destLength+U16_LENGTH(value))<=destCapacity) { + U16_APPEND_UNSAFE(dest, destLength, value); + } else { + destLength+=U16_LENGTH(value); + } + + /* go to the following characters */ + s=end; + } +} + +/* read a range like start or start..end */ +U_CAPI int32_t U_EXPORT2 +u_parseCodePointRangeAnyTerminator(const char *s, + uint32_t *pStart, uint32_t *pEnd, + const char **terminator, + UErrorCode *pErrorCode) { + char *end; + uint32_t value; + + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(s==NULL || pStart==NULL || pEnd==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* read the start code point */ + s=u_skipWhitespace(s); + value=(uint32_t)uprv_strtoul(s, &end, 16); + if(end<=s || value>=0x110000) { + *pErrorCode=U_PARSE_ERROR; + return 0; + } + *pStart=*pEnd=value; + + /* is there a "..end"? */ + s=u_skipWhitespace(end); + if(*s!='.' || s[1]!='.') { + *terminator=end; + return 1; + } + s=u_skipWhitespace(s+2); + + /* read the end code point */ + value=(uint32_t)uprv_strtoul(s, &end, 16); + if(end<=s || value>=0x110000) { + *pErrorCode=U_PARSE_ERROR; + return 0; + } + *pEnd=value; + + /* is this a valid range? */ + if(value<*pStart) { + *pErrorCode=U_PARSE_ERROR; + return 0; + } + + *terminator=end; + return value-*pStart+1; +} + +U_CAPI int32_t U_EXPORT2 +u_parseCodePointRange(const char *s, + uint32_t *pStart, uint32_t *pEnd, + UErrorCode *pErrorCode) { + const char *terminator; + int32_t rangeLength= + u_parseCodePointRangeAnyTerminator(s, pStart, pEnd, &terminator, pErrorCode); + if(U_SUCCESS(*pErrorCode)) { + terminator=u_skipWhitespace(terminator); + if(*terminator!=';' && *terminator!=0) { + *pErrorCode=U_PARSE_ERROR; + return 0; + } + } + return rangeLength; +} + +U_CAPI int32_t U_EXPORT2 +u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status) { + const char *read = source; + int32_t i = 0; + unsigned int value = 0; + if(sLen == -1) { + sLen = (int32_t)strlen(source); + } + + while(read < source+sLen) { + sscanf(read, "%2x", &value); + if(i < destCapacity) { + dest[i] = (char)value; + } + i++; + read += 2; + } + return u_terminateChars(dest, destCapacity, i, status); +} diff --git a/deps/icu-small/source/tools/toolutil/uparse.h b/deps/icu-small/source/tools/toolutil/uparse.h index 5ad766cf10..df0e79a21f 100644 --- a/deps/icu-small/source/tools/toolutil/uparse.h +++ b/deps/icu-small/source/tools/toolutil/uparse.h @@ -1,4 +1,4 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. +// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* @@ -8,7 +8,7 @@ * ******************************************************************************* * file name: uparse.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * diff --git a/deps/icu-small/source/tools/toolutil/writesrc.c b/deps/icu-small/source/tools/toolutil/writesrc.c deleted file mode 100644 index e3f14f2c46..0000000000 --- a/deps/icu-small/source/tools/toolutil/writesrc.c +++ /dev/null @@ -1,268 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2005-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: writesrc.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2005apr23 -* created by: Markus W. Scherer -* -* Helper functions for writing source code for data. -*/ - -#include -#include -#include "unicode/utypes.h" -#include "unicode/putil.h" -#include "utrie2.h" -#include "cstring.h" -#include "writesrc.h" - -static FILE * -usrc_createWithHeader(const char *path, const char *filename, - const char *generator, const char *header) { - char buffer[1024]; - const char *p; - char *q; - FILE *f; - char c; - - if(path==NULL) { - p=filename; - } else { - /* concatenate path and filename, with U_FILE_SEP_CHAR in between if necessary */ - uprv_strcpy(buffer, path); - q=buffer+uprv_strlen(buffer); - if(q>buffer && (c=*(q-1))!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) { - *q++=U_FILE_SEP_CHAR; - } - uprv_strcpy(q, filename); - p=buffer; - } - - f=fopen(p, "w"); - if(f!=NULL) { - const struct tm *lt; - time_t t; - - time(&t); - lt=localtime(&t); - if(generator==NULL) { - strftime(buffer, sizeof(buffer), "%Y-%m-%d", lt); - fprintf(f, header, filename, buffer); - } else { - fprintf(f, header, filename, generator); - } - } else { - fprintf( - stderr, - "usrc_create(%s, %s): unable to create file\n", - path!=NULL ? path : "", filename); - } - return f; -} - -U_CAPI FILE * U_EXPORT2 -usrc_create(const char *path, const char *filename, const char *generator) { - // TODO: Add parameter for the first year this file was generated, not before 2016. - static const char *header= - "// Copyright (C) 2016 and later: Unicode, Inc. and others.\n" - "// License & terms of use: http://www.unicode.org/copyright.html\n" - "//\n" - "// Copyright (C) 1999-2016, International Business Machines\n" - "// Corporation and others. All Rights Reserved.\n" - "//\n" - "// file name: %s\n" - "//\n" - "// machine-generated by: %s\n" - "\n\n"; - return usrc_createWithHeader(path, filename, generator, header); -} - -U_CAPI FILE * U_EXPORT2 -usrc_createTextData(const char *path, const char *filename, const char *generator) { - // TODO: Add parameter for the first year this file was generated, not before 2016. - static const char *header= - "# Copyright (C) 2016 and later: Unicode, Inc. and others.\n" - "# License & terms of use: http://www.unicode.org/copyright.html\n" - "# Copyright (C) 1999-2016, International Business Machines\n" - "# Corporation and others. All Rights Reserved.\n" - "#\n" - "# file name: %s\n" - "#\n" - "# machine-generated by: %s\n" - "\n\n"; - return usrc_createWithHeader(path, filename, generator, header); -} - -U_CAPI void U_EXPORT2 -usrc_writeArray(FILE *f, - const char *prefix, - const void *p, int32_t width, int32_t length, - const char *postfix) { - const uint8_t *p8; - const uint16_t *p16; - const uint32_t *p32; - uint32_t value; - int32_t i, col; - - p8=NULL; - p16=NULL; - p32=NULL; - switch(width) { - case 8: - p8=(const uint8_t *)p; - break; - case 16: - p16=(const uint16_t *)p; - break; - case 32: - p32=(const uint32_t *)p; - break; - default: - fprintf(stderr, "usrc_writeArray(width=%ld) unrecognized width\n", (long)width); - return; - } - if(prefix!=NULL) { - fprintf(f, prefix, (long)length); - } - for(i=col=0; i0) { - if(col<16) { - fputc(',', f); - } else { - fputs(",\n", f); - col=0; - } - } - switch(width) { - case 8: - value=p8[i]; - break; - case 16: - value=p16[i]; - break; - case 32: - value=p32[i]; - break; - default: - value=0; /* unreachable */ - break; - } - fprintf(f, value<=9 ? "%lu" : "0x%lx", (unsigned long)value); - } - if(postfix!=NULL) { - fputs(postfix, f); - } -} - -U_CAPI void U_EXPORT2 -usrc_writeUTrie2Arrays(FILE *f, - const char *indexPrefix, const char *data32Prefix, - const UTrie2 *pTrie, - const char *postfix) { - if(pTrie->data32==NULL) { - /* 16-bit trie */ - usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength+pTrie->dataLength, postfix); - } else { - /* 32-bit trie */ - usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, postfix); - usrc_writeArray(f, data32Prefix, pTrie->data32, 32, pTrie->dataLength, postfix); - } -} - -U_CAPI void U_EXPORT2 -usrc_writeUTrie2Struct(FILE *f, - const char *prefix, - const UTrie2 *pTrie, - const char *indexName, const char *data32Name, - const char *postfix) { - if(prefix!=NULL) { - fputs(prefix, f); - } - if(pTrie->data32==NULL) { - /* 16-bit trie */ - fprintf( - f, - " %s,\n" /* index */ - " %s+%ld,\n" /* data16 */ - " NULL,\n", /* data32 */ - indexName, - indexName, - (long)pTrie->indexLength); - } else { - /* 32-bit trie */ - fprintf( - f, - " %s,\n" /* index */ - " NULL,\n" /* data16 */ - " %s,\n", /* data32 */ - indexName, - data32Name); - } - fprintf( - f, - " %ld,\n" /* indexLength */ - " %ld,\n" /* dataLength */ - " 0x%hx,\n" /* index2NullOffset */ - " 0x%hx,\n" /* dataNullOffset */ - " 0x%lx,\n" /* initialValue */ - " 0x%lx,\n" /* errorValue */ - " 0x%lx,\n" /* highStart */ - " 0x%lx,\n" /* highValueIndex */ - " NULL, 0, FALSE, FALSE, 0, NULL\n", - (long)pTrie->indexLength, (long)pTrie->dataLength, - (short)pTrie->index2NullOffset, (short)pTrie->dataNullOffset, - (long)pTrie->initialValue, (long)pTrie->errorValue, - (long)pTrie->highStart, (long)pTrie->highValueIndex); - if(postfix!=NULL) { - fputs(postfix, f); - } -} - -U_CAPI void U_EXPORT2 -usrc_writeArrayOfMostlyInvChars(FILE *f, - const char *prefix, - const char *p, int32_t length, - const char *postfix) { - int32_t i, col; - int prev2, prev, c; - - if(prefix!=NULL) { - fprintf(f, prefix, (long)length); - } - prev2=prev=-1; - for(i=col=0; i0) { - /* Break long lines. Try to break at interesting places, to minimize revision diffs. */ - if( - /* Very long line. */ - col>=32 || - /* Long line, break after terminating NUL. */ - (col>=24 && prev2>=0x20 && prev==0) || - /* Medium-long line, break before non-NUL, non-character byte. */ - (col>=16 && (prev==0 || prev>=0x20) && 0 +#include +#include "unicode/utypes.h" +#include "unicode/putil.h" +#include "utrie2.h" +#include "cstring.h" +#include "writesrc.h" + +static FILE * +usrc_createWithHeader(const char *path, const char *filename, + const char *generator, const char *header) { + char buffer[1024]; + const char *p; + char *q; + FILE *f; + char c; + + if(path==NULL) { + p=filename; + } else { + /* concatenate path and filename, with U_FILE_SEP_CHAR in between if necessary */ + uprv_strcpy(buffer, path); + q=buffer+uprv_strlen(buffer); + if(q>buffer && (c=*(q-1))!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) { + *q++=U_FILE_SEP_CHAR; + } + uprv_strcpy(q, filename); + p=buffer; + } + + f=fopen(p, "w"); + if(f!=NULL) { + const struct tm *lt; + time_t t; + + time(&t); + lt=localtime(&t); + if(generator==NULL) { + strftime(buffer, sizeof(buffer), "%Y-%m-%d", lt); + fprintf(f, header, filename, buffer); + } else { + fprintf(f, header, filename, generator); + } + } else { + fprintf( + stderr, + "usrc_create(%s, %s): unable to create file\n", + path!=NULL ? path : "", filename); + } + return f; +} + +U_CAPI FILE * U_EXPORT2 +usrc_create(const char *path, const char *filename, const char *generator) { + // TODO: Add parameter for the first year this file was generated, not before 2016. + static const char *header= + "// © 2016 and later: Unicode, Inc. and others.\n" + "// License & terms of use: http://www.unicode.org/copyright.html\n" + "//\n" + "// Copyright (C) 1999-2016, International Business Machines\n" + "// Corporation and others. All Rights Reserved.\n" + "//\n" + "// file name: %s\n" + "//\n" + "// machine-generated by: %s\n" + "\n\n"; + return usrc_createWithHeader(path, filename, generator, header); +} + +U_CAPI FILE * U_EXPORT2 +usrc_createTextData(const char *path, const char *filename, const char *generator) { + // TODO: Add parameter for the first year this file was generated, not before 2016. + static const char *header= + "# Copyright (C) 2016 and later: Unicode, Inc. and others.\n" + "# License & terms of use: http://www.unicode.org/copyright.html\n" + "# Copyright (C) 1999-2016, International Business Machines\n" + "# Corporation and others. All Rights Reserved.\n" + "#\n" + "# file name: %s\n" + "#\n" + "# machine-generated by: %s\n" + "\n\n"; + return usrc_createWithHeader(path, filename, generator, header); +} + +U_CAPI void U_EXPORT2 +usrc_writeArray(FILE *f, + const char *prefix, + const void *p, int32_t width, int32_t length, + const char *postfix) { + const uint8_t *p8; + const uint16_t *p16; + const uint32_t *p32; + uint32_t value; + int32_t i, col; + + p8=NULL; + p16=NULL; + p32=NULL; + switch(width) { + case 8: + p8=(const uint8_t *)p; + break; + case 16: + p16=(const uint16_t *)p; + break; + case 32: + p32=(const uint32_t *)p; + break; + default: + fprintf(stderr, "usrc_writeArray(width=%ld) unrecognized width\n", (long)width); + return; + } + if(prefix!=NULL) { + fprintf(f, prefix, (long)length); + } + for(i=col=0; i0) { + if(col<16) { + fputc(',', f); + } else { + fputs(",\n", f); + col=0; + } + } + switch(width) { + case 8: + value=p8[i]; + break; + case 16: + value=p16[i]; + break; + case 32: + value=p32[i]; + break; + default: + value=0; /* unreachable */ + break; + } + fprintf(f, value<=9 ? "%lu" : "0x%lx", (unsigned long)value); + } + if(postfix!=NULL) { + fputs(postfix, f); + } +} + +U_CAPI void U_EXPORT2 +usrc_writeUTrie2Arrays(FILE *f, + const char *indexPrefix, const char *data32Prefix, + const UTrie2 *pTrie, + const char *postfix) { + if(pTrie->data32==NULL) { + /* 16-bit trie */ + usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength+pTrie->dataLength, postfix); + } else { + /* 32-bit trie */ + usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, postfix); + usrc_writeArray(f, data32Prefix, pTrie->data32, 32, pTrie->dataLength, postfix); + } +} + +U_CAPI void U_EXPORT2 +usrc_writeUTrie2Struct(FILE *f, + const char *prefix, + const UTrie2 *pTrie, + const char *indexName, const char *data32Name, + const char *postfix) { + if(prefix!=NULL) { + fputs(prefix, f); + } + if(pTrie->data32==NULL) { + /* 16-bit trie */ + fprintf( + f, + " %s,\n" /* index */ + " %s+%ld,\n" /* data16 */ + " NULL,\n", /* data32 */ + indexName, + indexName, + (long)pTrie->indexLength); + } else { + /* 32-bit trie */ + fprintf( + f, + " %s,\n" /* index */ + " NULL,\n" /* data16 */ + " %s,\n", /* data32 */ + indexName, + data32Name); + } + fprintf( + f, + " %ld,\n" /* indexLength */ + " %ld,\n" /* dataLength */ + " 0x%hx,\n" /* index2NullOffset */ + " 0x%hx,\n" /* dataNullOffset */ + " 0x%lx,\n" /* initialValue */ + " 0x%lx,\n" /* errorValue */ + " 0x%lx,\n" /* highStart */ + " 0x%lx,\n" /* highValueIndex */ + " NULL, 0, FALSE, FALSE, 0, NULL\n", + (long)pTrie->indexLength, (long)pTrie->dataLength, + (short)pTrie->index2NullOffset, (short)pTrie->dataNullOffset, + (long)pTrie->initialValue, (long)pTrie->errorValue, + (long)pTrie->highStart, (long)pTrie->highValueIndex); + if(postfix!=NULL) { + fputs(postfix, f); + } +} + +U_CAPI void U_EXPORT2 +usrc_writeArrayOfMostlyInvChars(FILE *f, + const char *prefix, + const char *p, int32_t length, + const char *postfix) { + int32_t i, col; + int prev2, prev, c; + + if(prefix!=NULL) { + fprintf(f, prefix, (long)length); + } + prev2=prev=-1; + for(i=col=0; i0) { + /* Break long lines. Try to break at interesting places, to minimize revision diffs. */ + if( + /* Very long line. */ + col>=32 || + /* Long line, break after terminating NUL. */ + (col>=24 && prev2>=0x20 && prev==0) || + /* Medium-long line, break before non-NUL, non-character byte. */ + (col>=16 && (prev==0 || prev>=0x20) && 0