summaryrefslogtreecommitdiff
path: root/deps/node/deps/icu-small/source/tools/toolutil/ppucd.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'deps/node/deps/icu-small/source/tools/toolutil/ppucd.cpp')
-rw-r--r--deps/node/deps/icu-small/source/tools/toolutil/ppucd.cpp615
1 files changed, 0 insertions, 615 deletions
diff --git a/deps/node/deps/icu-small/source/tools/toolutil/ppucd.cpp b/deps/node/deps/icu-small/source/tools/toolutil/ppucd.cpp
deleted file mode 100644
index b11efa7f..00000000
--- a/deps/node/deps/icu-small/source/tools/toolutil/ppucd.cpp
+++ /dev/null
@@ -1,615 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2011-2014, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* file name: ppucd.cpp
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2011dec11
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/uchar.h"
-#include "charstr.h"
-#include "cstring.h"
-#include "ppucd.h"
-#include "uassert.h"
-#include "uparse.h"
-
-#include <stdio.h>
-#include <string.h>
-
-U_NAMESPACE_BEGIN
-
-PropertyNames::~PropertyNames() {}
-
-int32_t
-PropertyNames::getPropertyEnum(const char *name) const {
- return u_getPropertyEnum(name);
-}
-
-int32_t
-PropertyNames::getPropertyValueEnum(int32_t property, const char *name) const {
- return u_getPropertyValueEnum((UProperty)property, name);
-}
-
-UniProps::UniProps()
- : start(U_SENTINEL), end(U_SENTINEL),
- bmg(U_SENTINEL), bpb(U_SENTINEL),
- scf(U_SENTINEL), slc(U_SENTINEL), stc(U_SENTINEL), suc(U_SENTINEL),
- digitValue(-1), numericValue(NULL),
- name(NULL), nameAlias(NULL) {
- memset(binProps, 0, sizeof(binProps));
- memset(intProps, 0, sizeof(intProps));
- memset(age, 0, 4);
-}
-
-UniProps::~UniProps() {}
-
-const int32_t PreparsedUCD::kNumLineBuffers;
-
-PreparsedUCD::PreparsedUCD(const char *filename, UErrorCode &errorCode)
- : icuPnames(new PropertyNames()), pnames(icuPnames),
- file(NULL),
- defaultLineIndex(-1), blockLineIndex(-1), lineIndex(0),
- lineNumber(0),
- lineType(NO_LINE),
- fieldLimit(NULL), lineLimit(NULL) {
- if(U_FAILURE(errorCode)) { return; }
-
- if(filename==NULL || *filename==0 || (*filename=='-' && filename[1]==0)) {
- filename=NULL;
- file=stdin;
- } else {
- file=fopen(filename, "r");
- }
- if(file==NULL) {
- perror("error opening preparsed UCD");
- fprintf(stderr, "error opening preparsed UCD file %s\n", filename ? filename : "\"no file name given\"");
- errorCode=U_FILE_ACCESS_ERROR;
- return;
- }
-
- memset(ucdVersion, 0, 4);
- lines[0][0]=0;
-}
-
-PreparsedUCD::~PreparsedUCD() {
- if(file!=stdin) {
- fclose(file);
- }
- delete icuPnames;
-}
-
-// Same order as the LineType values.
-static const char *lineTypeStrings[]={
- NULL,
- NULL,
- "ucd",
- "property",
- "binary",
- "value",
- "defaults",
- "block",
- "cp",
- "unassigned",
- "algnamesrange"
-};
-
-PreparsedUCD::LineType
-PreparsedUCD::readLine(UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) { return NO_LINE; }
- // Select the next available line buffer.
- while(!isLineBufferAvailable(lineIndex)) {
- ++lineIndex;
- if (lineIndex == kNumLineBuffers) {
- lineIndex = 0;
- }
- }
- char *line=lines[lineIndex];
- *line=0;
- lineLimit=fieldLimit=line;
- lineType=NO_LINE;
- char *result=fgets(line, sizeof(lines[0]), file);
- if(result==NULL) {
- if(ferror(file)) {
- perror("error reading preparsed UCD");
- fprintf(stderr, "error reading preparsed UCD before line %ld\n", (long)lineNumber);
- errorCode=U_FILE_ACCESS_ERROR;
- }
- return NO_LINE;
- }
- ++lineNumber;
- if(*line=='#') {
- fieldLimit=strchr(line, 0);
- return lineType=EMPTY_LINE;
- }
- // Remove trailing /r/n.
- char c;
- char *limit=strchr(line, 0);
- while(line<limit && ((c=*(limit-1))=='\n' || c=='\r')) { --limit; }
- // Remove trailing white space.
- while(line<limit && ((c=*(limit-1))==' ' || c=='\t')) { --limit; }
- *limit=0;
- lineLimit=limit;
- if(line==limit) {
- fieldLimit=limit;
- return lineType=EMPTY_LINE;
- }
- // Split by ';'.
- char *semi=line;
- while((semi=strchr(semi, ';'))!=NULL) { *semi++=0; }
- fieldLimit=strchr(line, 0);
- // Determine the line type.
- int32_t type;
- for(type=EMPTY_LINE+1;; ++type) {
- if(type==LINE_TYPE_COUNT) {
- fprintf(stderr,
- "error in preparsed UCD: unknown line type (first field) '%s' on line %ld\n",
- line, (long)lineNumber);
- errorCode=U_PARSE_ERROR;
- return NO_LINE;
- }
- if(0==strcmp(line, lineTypeStrings[type])) {
- break;
- }
- }
- lineType=(LineType)type;
- if(lineType==UNICODE_VERSION_LINE && fieldLimit<lineLimit) {
- u_versionFromString(ucdVersion, fieldLimit+1);
- }
- return lineType;
-}
-
-const char *
-PreparsedUCD::firstField() {
- char *field=lines[lineIndex];
- fieldLimit=strchr(field, 0);
- return field;
-}
-
-const char *
-PreparsedUCD::nextField() {
- if(fieldLimit==lineLimit) { return NULL; }
- char *field=fieldLimit+1;
- fieldLimit=strchr(field, 0);
- return field;
-}
-
-const UniProps *
-PreparsedUCD::getProps(UnicodeSet &newValues, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) { return NULL; }
- newValues.clear();
- if(!lineHasPropertyValues()) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- firstField();
- const char *field=nextField();
- if(field==NULL) {
- // No range field after the type.
- fprintf(stderr,
- "error in preparsed UCD: missing default/block/cp range field "
- "(no second field) on line %ld\n",
- (long)lineNumber);
- errorCode=U_PARSE_ERROR;
- return NULL;
- }
- UChar32 start, end;
- if(!parseCodePointRange(field, start, end, errorCode)) { return NULL; }
- UniProps *props;
- UBool insideBlock=FALSE; // TRUE if cp or unassigned range inside the block range.
- switch(lineType) {
- case DEFAULTS_LINE:
- // Should occur before any block/cp/unassigned line.
- if(blockLineIndex>=0) {
- fprintf(stderr,
- "error in preparsed UCD: default line %ld after one or more block lines\n",
- (long)lineNumber);
- errorCode=U_PARSE_ERROR;
- return NULL;
- }
- if(defaultLineIndex>=0) {
- fprintf(stderr,
- "error in preparsed UCD: second line with default properties on line %ld\n",
- (long)lineNumber);
- errorCode=U_PARSE_ERROR;
- return NULL;
- }
- if(start!=0 || end!=0x10ffff) {
- fprintf(stderr,
- "error in preparsed UCD: default range must be 0..10FFFF, not '%s' on line %ld\n",
- field, (long)lineNumber);
- errorCode=U_PARSE_ERROR;
- return NULL;
- }
- props=&defaultProps;
- defaultLineIndex=lineIndex;
- break;
- case BLOCK_LINE:
- blockProps=defaultProps; // Block inherits default properties.
- props=&blockProps;
- blockLineIndex=lineIndex;
- break;
- case CP_LINE:
- case UNASSIGNED_LINE:
- if(blockProps.start<=start && end<=blockProps.end) {
- insideBlock=TRUE;
- if(lineType==CP_LINE) {
- // Code point range fully inside the last block inherits the block properties.
- cpProps=blockProps;
- } else {
- // Unassigned line inside the block is based on default properties
- // which override block properties.
- cpProps=defaultProps;
- newValues=blockValues;
- // Except, it inherits the one blk=Block property.
- int32_t blkIndex=UCHAR_BLOCK-UCHAR_INT_START;
- cpProps.intProps[blkIndex]=blockProps.intProps[blkIndex];
- newValues.remove((UChar32)UCHAR_BLOCK);
- }
- } else if(start>blockProps.end || end<blockProps.start) {
- // Code point range fully outside the last block inherits the default properties.
- cpProps=defaultProps;
- } else {
- // Code point range partially overlapping with the last block is illegal.
- fprintf(stderr,
- "error in preparsed UCD: cp range %s on line %ld only "
- "partially overlaps with block range %04lX..%04lX\n",
- field, (long)lineNumber, (long)blockProps.start, (long)blockProps.end);
- errorCode=U_PARSE_ERROR;
- return NULL;
- }
- props=&cpProps;
- break;
- default:
- // Will not occur because of the range check above.
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return NULL;
- }
- props->start=start;
- props->end=end;
- while((field=nextField())!=NULL) {
- if(!parseProperty(*props, field, newValues, errorCode)) { return NULL; }
- }
- if(lineType==BLOCK_LINE) {
- blockValues=newValues;
- } else if(lineType==UNASSIGNED_LINE && insideBlock) {
- // Unset newValues for values that are the same as the block values.
- for(int32_t prop=0; prop<UCHAR_BINARY_LIMIT; ++prop) {
- if(newValues.contains(prop) && cpProps.binProps[prop]==blockProps.binProps[prop]) {
- newValues.remove(prop);
- }
- }
- for(int32_t prop=UCHAR_INT_START; prop<UCHAR_INT_LIMIT; ++prop) {
- int32_t index=prop-UCHAR_INT_START;
- if(newValues.contains(prop) && cpProps.intProps[index]==blockProps.intProps[index]) {
- newValues.remove(prop);
- }
- }
- }
- return props;
-}
-
-static const struct {
- const char *name;
- int32_t prop;
-} ppucdProperties[]={
- { "Name_Alias", PPUCD_NAME_ALIAS },
- { "Conditional_Case_Mappings", PPUCD_CONDITIONAL_CASE_MAPPINGS },
- { "Turkic_Case_Folding", PPUCD_TURKIC_CASE_FOLDING }
-};
-
-// Returns TRUE for "ok to continue parsing fields".
-UBool
-PreparsedUCD::parseProperty(UniProps &props, const char *field, UnicodeSet &newValues,
- UErrorCode &errorCode) {
- CharString pBuffer;
- const char *p=field;
- const char *v=strchr(p, '=');
- int binaryValue;
- if(*p=='-') {
- if(v!=NULL) {
- fprintf(stderr,
- "error in preparsed UCD: mix of binary-property-no and "
- "enum-property syntax '%s' on line %ld\n",
- field, (long)lineNumber);
- errorCode=U_PARSE_ERROR;
- return FALSE;
- }
- binaryValue=0;
- ++p;
- } else if(v==NULL) {
- binaryValue=1;
- } else {
- binaryValue=-1;
- // Copy out the property name rather than modifying the field (writing a NUL).
- pBuffer.append(p, (int32_t)(v-p), errorCode);
- p=pBuffer.data();
- ++v;
- }
- int32_t prop=pnames->getPropertyEnum(p);
- if(prop<0) {
- for(int32_t i=0;; ++i) {
- if(i==UPRV_LENGTHOF(ppucdProperties)) {
- // Ignore unknown property names.
- return TRUE;
- }
- if(0==uprv_stricmp(p, ppucdProperties[i].name)) {
- prop=ppucdProperties[i].prop;
- U_ASSERT(prop>=0);
- break;
- }
- }
- }
- if(prop<UCHAR_BINARY_LIMIT) {
- if(binaryValue>=0) {
- props.binProps[prop]=(UBool)binaryValue;
- } else {
- // No binary value for a binary property.
- fprintf(stderr,
- "error in preparsed UCD: enum-property syntax '%s' "
- "for binary property on line %ld\n",
- field, (long)lineNumber);
- errorCode=U_PARSE_ERROR;
- }
- } else if(binaryValue>=0) {
- // Binary value for a non-binary property.
- fprintf(stderr,
- "error in preparsed UCD: binary-property syntax '%s' "
- "for non-binary property on line %ld\n",
- field, (long)lineNumber);
- errorCode=U_PARSE_ERROR;
- } else if (prop < UCHAR_INT_START) {
- fprintf(stderr,
- "error in preparsed UCD: prop value is invalid: '%d' for line %ld\n",
- prop, (long)lineNumber);
- errorCode=U_PARSE_ERROR;
- } else if(prop<UCHAR_INT_LIMIT) {
- int32_t value=pnames->getPropertyValueEnum(prop, v);
- if(value==UCHAR_INVALID_CODE && prop==UCHAR_CANONICAL_COMBINING_CLASS) {
- // TODO: Make getPropertyValueEnum(UCHAR_CANONICAL_COMBINING_CLASS, v) work.
- char *end;
- unsigned long ccc=uprv_strtoul(v, &end, 10);
- if(v<end && *end==0 && ccc<=254) {
- value=(int32_t)ccc;
- }
- }
- if(value==UCHAR_INVALID_CODE) {
- fprintf(stderr,
- "error in preparsed UCD: '%s' is not a valid value on line %ld\n",
- field, (long)lineNumber);
- errorCode=U_PARSE_ERROR;
- } else {
- props.intProps[prop-UCHAR_INT_START]=value;
- }
- } else if(*v=='<') {
- // Do not parse default values like <code point>, just set null values.
- switch(prop) {
- case UCHAR_BIDI_MIRRORING_GLYPH:
- props.bmg=U_SENTINEL;
- break;
- case UCHAR_BIDI_PAIRED_BRACKET:
- props.bpb=U_SENTINEL;
- break;
- case UCHAR_SIMPLE_CASE_FOLDING:
- props.scf=U_SENTINEL;
- break;
- case UCHAR_SIMPLE_LOWERCASE_MAPPING:
- props.slc=U_SENTINEL;
- break;
- case UCHAR_SIMPLE_TITLECASE_MAPPING:
- props.stc=U_SENTINEL;
- break;
- case UCHAR_SIMPLE_UPPERCASE_MAPPING:
- props.suc=U_SENTINEL;
- break;
- case UCHAR_CASE_FOLDING:
- props.cf.remove();
- break;
- case UCHAR_LOWERCASE_MAPPING:
- props.lc.remove();
- break;
- case UCHAR_TITLECASE_MAPPING:
- props.tc.remove();
- break;
- case UCHAR_UPPERCASE_MAPPING:
- props.uc.remove();
- break;
- case UCHAR_SCRIPT_EXTENSIONS:
- props.scx.clear();
- break;
- default:
- fprintf(stderr,
- "error in preparsed UCD: '%s' is not a valid default value on line %ld\n",
- field, (long)lineNumber);
- errorCode=U_PARSE_ERROR;
- }
- } else {
- char c;
- switch(prop) {
- case UCHAR_NUMERIC_VALUE:
- props.numericValue=v;
- c=*v;
- if('0'<=c && c<='9' && v[1]==0) {
- props.digitValue=c-'0';
- } else {
- props.digitValue=-1;
- }
- break;
- case UCHAR_NAME:
- props.name=v;
- break;
- case UCHAR_AGE:
- u_versionFromString(props.age, v); // Writes 0.0.0.0 if v is not numeric.
- break;
- case UCHAR_BIDI_MIRRORING_GLYPH:
- props.bmg=parseCodePoint(v, errorCode);
- break;
- case UCHAR_BIDI_PAIRED_BRACKET:
- props.bpb=parseCodePoint(v, errorCode);
- break;
- case UCHAR_SIMPLE_CASE_FOLDING:
- props.scf=parseCodePoint(v, errorCode);
- break;
- case UCHAR_SIMPLE_LOWERCASE_MAPPING:
- props.slc=parseCodePoint(v, errorCode);
- break;
- case UCHAR_SIMPLE_TITLECASE_MAPPING:
- props.stc=parseCodePoint(v, errorCode);
- break;
- case UCHAR_SIMPLE_UPPERCASE_MAPPING:
- props.suc=parseCodePoint(v, errorCode);
- break;
- case UCHAR_CASE_FOLDING:
- parseString(v, props.cf, errorCode);
- break;
- case UCHAR_LOWERCASE_MAPPING:
- parseString(v, props.lc, errorCode);
- break;
- case UCHAR_TITLECASE_MAPPING:
- parseString(v, props.tc, errorCode);
- break;
- case UCHAR_UPPERCASE_MAPPING:
- parseString(v, props.uc, errorCode);
- break;
- case PPUCD_NAME_ALIAS:
- props.nameAlias=v;
- break;
- case PPUCD_CONDITIONAL_CASE_MAPPINGS:
- case PPUCD_TURKIC_CASE_FOLDING:
- // No need to parse their values: They are hardcoded in the runtime library.
- break;
- case UCHAR_SCRIPT_EXTENSIONS:
- parseScriptExtensions(v, props.scx, errorCode);
- break;
- default:
- // Ignore unhandled properties.
- return TRUE;
- }
- }
- if(U_SUCCESS(errorCode)) {
- newValues.add((UChar32)prop);
- return TRUE;
- } else {
- return FALSE;
- }
-}
-
-UBool
-PreparsedUCD::getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) { return FALSE; }
- if(lineType!=ALG_NAMES_RANGE_LINE) {
- errorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return FALSE;
- }
- firstField();
- const char *field=nextField();
- if(field==NULL) {
- // No range field after the type.
- fprintf(stderr,
- "error in preparsed UCD: missing algnamesrange range field "
- "(no second field) on line %ld\n",
- (long)lineNumber);
- errorCode=U_PARSE_ERROR;
- return FALSE;
- }
- return parseCodePointRange(field, start, end, errorCode);
-}
-
-UChar32
-PreparsedUCD::parseCodePoint(const char *s, UErrorCode &errorCode) {
- char *end;
- uint32_t value=(uint32_t)uprv_strtoul(s, &end, 16);
- if(end<=s || *end!=0 || value>=0x110000) {
- fprintf(stderr,
- "error in preparsed UCD: '%s' is not a valid code point on line %ld\n",
- s, (long)lineNumber);
- errorCode=U_PARSE_ERROR;
- return U_SENTINEL;
- }
- return (UChar32)value;
-}
-
-UBool
-PreparsedUCD::parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode) {
- uint32_t st, e;
- u_parseCodePointRange(s, &st, &e, &errorCode);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr,
- "error in preparsed UCD: '%s' is not a valid code point range on line %ld\n",
- s, (long)lineNumber);
- return FALSE;
- }
- start=(UChar32)st;
- end=(UChar32)e;
- return TRUE;
-}
-
-void
-PreparsedUCD::parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode) {
- UChar *buffer=toUCharPtr(uni.getBuffer(-1));
- int32_t length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode);
- if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
- errorCode=U_ZERO_ERROR;
- uni.releaseBuffer(0);
- buffer=toUCharPtr(uni.getBuffer(length));
- length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode);
- }
- uni.releaseBuffer(length);
- if(U_FAILURE(errorCode)) {
- fprintf(stderr,
- "error in preparsed UCD: '%s' is not a valid Unicode string on line %ld\n",
- s, (long)lineNumber);
- }
-}
-
-void
-PreparsedUCD::parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode) {
- if(U_FAILURE(errorCode)) { return; }
- scx.clear();
- CharString scString;
- for(;;) {
- const char *scs;
- const char *scLimit=strchr(s, ' ');
- if(scLimit!=NULL) {
- scs=scString.clear().append(s, (int32_t)(scLimit-s), errorCode).data();
- if(U_FAILURE(errorCode)) { return; }
- } else {
- scs=s;
- }
- int32_t script=pnames->getPropertyValueEnum(UCHAR_SCRIPT, scs);
- if(script==UCHAR_INVALID_CODE) {
- fprintf(stderr,
- "error in preparsed UCD: '%s' is not a valid script code on line %ld\n",
- scs, (long)lineNumber);
- errorCode=U_PARSE_ERROR;
- return;
- } else if(scx.contains(script)) {
- fprintf(stderr,
- "error in preparsed UCD: scx has duplicate '%s' codes on line %ld\n",
- scs, (long)lineNumber);
- errorCode=U_PARSE_ERROR;
- return;
- } else {
- scx.add(script);
- }
- if(scLimit!=NULL) {
- s=scLimit+1;
- } else {
- break;
- }
- }
- if(scx.isEmpty()) {
- fprintf(stderr, "error in preparsed UCD: empty scx= on line %ld\n", (long)lineNumber);
- errorCode=U_PARSE_ERROR;
- }
-}
-
-U_NAMESPACE_END