diff options
Diffstat (limited to 'deps/node/deps/icu-small/source/tools/escapesrc/escapesrc.cpp')
-rw-r--r-- | deps/node/deps/icu-small/source/tools/escapesrc/escapesrc.cpp | 420 |
1 files changed, 0 insertions, 420 deletions
diff --git a/deps/node/deps/icu-small/source/tools/escapesrc/escapesrc.cpp b/deps/node/deps/icu-small/source/tools/escapesrc/escapesrc.cpp deleted file mode 100644 index f51a86ea..00000000 --- a/deps/node/deps/icu-small/source/tools/escapesrc/escapesrc.cpp +++ /dev/null @@ -1,420 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -#include <stdio.h> -#include <string> -#include <stdlib.h> -#include <errno.h> -#include <string.h> -#include <iostream> -#include <fstream> - -// We only use U8_* macros, which are entirely inline. -#include "unicode/utf8.h" - -// This contains a codepage and ISO 14882:1998 illegality table. -// Use "make gen-table" to rebuild it. -#include "cptbl.h" - -/** - * What is this? - * - * "This" is a preprocessor that makes an attempt to convert fully valid C++11 source code - * in utf-8 into something consumable by certain compilers (Solaris, xlC) - * which aren't quite standards compliant. - * - * - u"<unicode>" or u'<unicode>' gets converted to u"\uNNNN" or u'\uNNNN' - * - u8"<unicode>" gets converted to "\xAA\xBB\xCC\xDD" etc. - * (some compilers do not support the u8 prefix correctly.) - * - if the system is EBCDIC-based, that is used to correct the input characters. - * - * Usage: - * escapesrc infile.cpp outfile.cpp - * Normally this is invoked by the build stage, with a rule such as: - * - * _%.cpp: $(srcdir)/%.cpp - * @$(BINDIR)/escapesrc$(EXEEXT) $< $@ - * %.o: _%.cpp - * $(COMPILE.cc) ... $@ $< - * - * In the Makefiles, SKIP_ESCAPING=YES is used to prevent escapesrc.cpp - * from being itself escaped. - */ - - -static const char - kSPACE = 0x20, - kTAB = 0x09, - kLF = 0x0A, - kCR = 0x0D; - -// For convenience -# define cp1047_to_8859(c) cp1047_8859_1[c] - -// Our app's name -std::string prog; - -/** - * Give the usual 1-line documentation and exit - */ -void usage() { - fprintf(stderr, "%s: usage: %s infile.cpp outfile.cpp\n", prog.c_str(), prog.c_str()); -} - -/** - * Delete the output file (if any) - * We want to delete even if we didn't generate, because it might be stale. - */ -int cleanup(const std::string &outfile) { - const char *outstr = outfile.c_str(); - if(outstr && *outstr) { - int rc = std::remove(outstr); - if(rc == 0) { - fprintf(stderr, "%s: deleted %s\n", prog.c_str(), outstr); - return 0; - } else { - if( errno == ENOENT ) { - return 0; // File did not exist - no error. - } else { - perror("std::remove"); - return 1; - } - } - } - return 0; -} - -/** - * Skip across any known whitespace. - * @param p startpoint - * @param e limit - * @return first non-whitespace char - */ -inline const char *skipws(const char *p, const char *e) { - for(;p<e;p++) { - switch(*p) { - case kSPACE: - case kTAB: - case kLF: - case kCR: - break; - default: - return p; // non ws - } - } - return p; -} - -/** - * Append a byte, hex encoded - * @param outstr sstring to append to - * @param byte the byte to append - */ -void appendByte(std::string &outstr, - uint8_t byte) { - char tmp2[5]; - sprintf(tmp2, "\\x%02X", 0xFF & (int)(byte)); - outstr += tmp2; -} - -/** - * Append the bytes from 'linestr' into outstr, with escaping - * @param outstr the output buffer - * @param linestr the input buffer - * @param pos in/out: the current char under consideration - * @param chars the number of chars to consider - * @return true on failure - */ -bool appendUtf8(std::string &outstr, - const std::string &linestr, - size_t &pos, - size_t chars) { - char tmp[9]; - for(size_t i=0;i<chars;i++) { - tmp[i] = linestr[++pos]; - } - tmp[chars] = 0; - unsigned int c; - sscanf(tmp, "%X", &c); - UChar32 ch = c & 0x1FFFFF; - - // now to append \\x%% etc - uint8_t bytesNeeded = U8_LENGTH(ch); - if(bytesNeeded == 0) { - fprintf(stderr, "Illegal code point U+%X\n", ch); - return true; - } - uint8_t bytes[4]; - uint8_t *s = bytes; - size_t i = 0; - U8_APPEND_UNSAFE(s, i, ch); - for(size_t t = 0; t<i; t++) { - appendByte(outstr, s[t]); - } - return false; -} - -/** - * Fixup u8"x" - * @param linestr string to mutate. Already escaped into \u format. - * @param origpos beginning, points to 'u8"' - * @param pos end, points to " - * @return false for no-problem, true for failure! - */ -bool fixu8(std::string &linestr, size_t origpos, size_t &endpos) { - size_t pos = origpos + 3; - std::string outstr; - outstr += '\"'; // local encoding - for(;pos<endpos;pos++) { - char c = linestr[pos]; - if(c == '\\') { - char c2 = linestr[++pos]; - switch(c2) { - case '\'': - case '"': -#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY) - c2 = cp1047_to_8859(c2); -#endif - appendByte(outstr, c2); - break; - case 'u': - appendUtf8(outstr, linestr, pos, 4); - break; - case 'U': - appendUtf8(outstr, linestr, pos, 8); - break; - } - } else { -#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY) - c = cp1047_to_8859(c); -#endif - appendByte(outstr, c); - } - } - outstr += ('\"'); - - linestr.replace(origpos, (endpos-origpos+1), outstr); - - return false; // OK -} - -/** - * fix the u"x"/u'x'/u8"x" string at the position - * u8'x' is not supported, sorry. - * @param linestr the input string - * @param pos the position - * @return false = no err, true = had err - */ -bool fixAt(std::string &linestr, size_t pos) { - size_t origpos = pos; - - if(linestr[pos] != 'u') { - fprintf(stderr, "Not a 'u'?"); - return true; - } - - pos++; // past 'u' - - bool utf8 = false; - - if(linestr[pos] == '8') { // u8" - utf8 = true; - pos++; - } - - char quote = linestr[pos]; - - if(quote != '\'' && quote != '\"') { - fprintf(stderr, "Quote is '%c' - not sure what to do.\n", quote); - return true; - } - - if(quote == '\'' && utf8) { - fprintf(stderr, "Cannot do u8'...'\n"); - return true; - } - - pos ++; - - //printf("u%c…%c\n", quote, quote); - - for(; pos < linestr.size(); pos++) { - if(linestr[pos] == quote) { - if(utf8) { - return fixu8(linestr, origpos, pos); // fix u8"..." - } else { - return false; // end of quote - } - } - if(linestr[pos] == '\\') { - pos++; - if(linestr[pos] == quote) continue; // quoted quote - if(linestr[pos] == 'u') continue; // for now ... unicode escape - if(linestr[pos] == '\\') continue; - // some other escape… ignore - } else { - size_t old_pos = pos; - int32_t i = pos; -#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY) - // mogrify 1-4 bytes from 1047 'back' to utf-8 - char old_byte = linestr[pos]; - linestr[pos] = cp1047_to_8859(linestr[pos]); - // how many more? - int32_t trail = U8_COUNT_TRAIL_BYTES(linestr[pos]); - for(size_t pos2 = pos+1; trail>0; pos2++,trail--) { - linestr[pos2] = cp1047_to_8859(linestr[pos2]); - if(linestr[pos2] == 0x0A) { - linestr[pos2] = 0x85; // NL is ambiguous here - } - } -#endif - - // Proceed to decode utf-8 - const uint8_t *s = (const uint8_t*) (linestr.c_str()); - int32_t length = linestr.size(); - UChar32 c; - if(U8_IS_SINGLE((uint8_t)s[i]) && oldIllegal[s[i]]) { -#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY) - linestr[pos] = old_byte; // put it back -#endif - continue; // single code point not previously legal for \u escaping - } - - // otherwise, convert it to \u / \U - { - U8_NEXT(s, i, length, c); - } - if(c<0) { - fprintf(stderr, "Illegal utf-8 sequence at Column: %d\n", (int)old_pos); - fprintf(stderr, "Line: >>%s<<\n", linestr.c_str()); - return true; - } - - size_t seqLen = (i-pos); - - //printf("U+%04X pos %d [len %d]\n", c, pos, seqLen);fflush(stdout); - - char newSeq[20]; - if( c <= 0xFFFF) { - sprintf(newSeq, "\\u%04X", c); - } else { - sprintf(newSeq, "\\U%08X", c); - } - linestr.replace(pos, seqLen, newSeq); - pos += strlen(newSeq) - 1; - } - } - - return false; -} - -/** - * Fixup an entire line - * false = no err - * true = had err - * @param no the line number (not used) - * @param linestr the string to fix - * @return true if any err, else false - */ -bool fixLine(int /*no*/, std::string &linestr) { - const char *line = linestr.c_str(); - size_t len = linestr.size(); - - // no u' in the line? - if(!strstr(line, "u'") && !strstr(line, "u\"") && !strstr(line, "u8\"")) { - return false; // Nothing to do. No u' or u" detected - } - - // start from the end and find all u" cases - size_t pos = len = linestr.size(); - while((pos>0) && (pos = linestr.rfind("u\"", pos)) != std::string::npos) { - //printf("found doublequote at %d\n", pos); - if(fixAt(linestr, pos)) return true; - if(pos == 0) break; - pos--; - } - - // reset and find all u' cases - pos = len = linestr.size(); - while((pos>0) && (pos = linestr.rfind("u'", pos)) != std::string::npos) { - //printf("found singlequote at %d\n", pos); - if(fixAt(linestr, pos)) return true; - if(pos == 0) break; - pos--; - } - - // reset and find all u8" cases - pos = len = linestr.size(); - while((pos>0) && (pos = linestr.rfind("u8\"", pos)) != std::string::npos) { - if(fixAt(linestr, pos)) return true; - if(pos == 0) break; - pos--; - } - - //fprintf(stderr, "%d - fixed\n", no); - return false; -} - -/** - * Convert a whole file - * @param infile - * @param outfile - * @return 1 on err, 0 otherwise - */ -int convert(const std::string &infile, const std::string &outfile) { - fprintf(stderr, "escapesrc: %s -> %s\n", infile.c_str(), outfile.c_str()); - - std::ifstream inf; - - inf.open(infile.c_str(), std::ios::in); - - if(!inf.is_open()) { - fprintf(stderr, "%s: could not open input file %s\n", prog.c_str(), infile.c_str()); - cleanup(outfile); - return 1; - } - - std::ofstream outf; - - outf.open(outfile.c_str(), std::ios::out); - - if(!outf.is_open()) { - fprintf(stderr, "%s: could not open output file %s\n", prog.c_str(), outfile.c_str()); - return 1; - } - - // TODO: any platform variations of #line? - outf << "#line 1 \"" << infile << "\"" << '\n'; - - int no = 0; - std::string linestr; - while( getline( inf, linestr)) { - no++; - if(fixLine(no, linestr)) { - outf.close(); - fprintf(stderr, "%s:%d: Fixup failed by %s\n", infile.c_str(), no, prog.c_str()); - cleanup(outfile); - return 1; - } - outf << linestr << '\n'; - } - - return 0; -} - -/** - * Main function - */ -int main(int argc, const char *argv[]) { - prog = argv[0]; - - if(argc != 3) { - usage(); - return 1; - } - - std::string infile = argv[1]; - std::string outfile = argv[2]; - - return convert(infile, outfile); -} |