summaryrefslogtreecommitdiff
path: root/tools/icu/shrink-icu-src.py
blob: c91472ed3082dd091adcec5ecaa9125700b0b6de (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env python
from __future__ import print_function
import optparse
import os
import re
import sys
import shutil
import bz2

parser = optparse.OptionParser()

parser.add_option('--icudst',
    action='store',
    dest='icudst',
    default='deps/icu-small',
    help='path to target ICU directory. Will be deleted.')

parser.add_option('--icu-src',
    action='store',
    dest='icusrc',
    default='deps/icu',
    help='path to source ICU directory.')

parser.add_option('--icutmp',
    action='store',
    dest='icutmp',
    default='out/Release/obj/gen/icutmp',
    help='path to icutmp dir.')

(options, args) = parser.parse_args()

if os.path.isdir(options.icudst):
    print('Deleting existing icudst %s' % (options.icudst))
    shutil.rmtree(options.icudst)

if not os.path.isdir(options.icusrc):
    print('Missing source ICU dir --icusrc=%s' % (options.icusrc))
    sys.exit(1)

# compression stuff. Keep the suffix and the compression function in sync.
compression_suffix = '.bz2'
def compress_data(infp, outfp):
    with open(infp, 'rb') as inf:
        with bz2.BZ2File(outfp, 'wb') as outf:
            shutil.copyfileobj(inf, outf)

def print_size(fn):
    size = (os.stat(fn).st_size) / 1024000
    print('%dM\t%s' % (size, fn))

ignore_regex = re.compile('^.*\.(vcxproj|filters|nrm|icu|dat|xml|txt|ac|guess|m4|in|sub|py|mak)$')

def icu_ignore(dir, files):
    subdir = dir[len(options.icusrc)+1::]
    ign = []
    if len(subdir) == 0:
        # remove all files at root level
        ign = ign + files
        # except...
        ign.remove('source')
        if 'LICENSE' in ign:
            ign.remove('LICENSE')
            # license.html will be removed (it's obviated by LICENSE)
        elif 'license.html' in ign:
            ign.remove('license.html')
    elif subdir == 'source':
        ign = ign + ['layout','samples','test','extra','config','layoutex','allinone','data']
        ign = ign + ['runConfigureICU','install-sh','mkinstalldirs','configure']
        ign = ign + ['io']
    elif subdir == 'source/tools':
        ign = ign + ['tzcode','ctestfw','gensprep','gennorm2','gendict','icuswap',
        'genbrk','gencfu','gencolusb','genren','memcheck','makeconv','gencnval','icuinfo','gentest']
    ign = ign + ['.DS_Store', 'Makefile', 'Makefile.in']

    for file in files:
        if ignore_regex.match(file):
            ign = ign + [file]

    # print '>%s< [%s]' % (subdir, ign)
    return ign

# copied from configure
def icu_info(icu_full_path):
    uvernum_h = os.path.join(icu_full_path, 'source/common/unicode/uvernum.h')
    if not os.path.isfile(uvernum_h):
        print(' Error: could not load %s - is ICU installed?' % uvernum_h)
        sys.exit(1)
    icu_ver_major = None
    matchVerExp = r'^\s*#define\s+U_ICU_VERSION_SHORT\s+"([^"]*)".*'
    match_version = re.compile(matchVerExp)
    for line in open(uvernum_h).readlines():
        m = match_version.match(line)
        if m:
            icu_ver_major = m.group(1)
    if not icu_ver_major:
        print(' Could not read U_ICU_VERSION_SHORT version from %s' % uvernum_h)
        sys.exit(1)
    icu_endianness = sys.byteorder[0]  # TODO(srl295): EBCDIC should be 'e'
    return (icu_ver_major, icu_endianness)

(icu_ver_major, icu_endianness) = icu_info(options.icusrc)
print("Data file root: icudt%s%s" % (icu_ver_major, icu_endianness))
dst_datafile = os.path.join(options.icudst, "source","data","in", "icudt%s%s.dat" % (icu_ver_major, icu_endianness))

src_datafile = os.path.join(options.icusrc, "source/data/in/icudt%sl.dat" % (icu_ver_major))
dst_cmp_datafile = "%s%s" % (dst_datafile, compression_suffix)

if not os.path.isfile(src_datafile):
    print("Error: icu data file not found: %s" % src_datafile)
    exit(1)

print("will use datafile %s" % (src_datafile))

print('%s --> %s' % (options.icusrc, options.icudst))
shutil.copytree(options.icusrc, options.icudst, ignore=icu_ignore)

# now, make the data dir (since we ignored it)
icudst_data = os.path.join(options.icudst, "source", "data")
icudst_in = os.path.join(icudst_data, "in")
os.mkdir(icudst_data)
os.mkdir(icudst_in)

print_size(src_datafile)

print('%s --compress-> %s' % (src_datafile, dst_cmp_datafile))
compress_data(src_datafile, dst_cmp_datafile)
print_size(dst_cmp_datafile)
readme_name = os.path.join(options.icudst, "README-FULL-ICU.txt" )

# Now, print a short notice
fi = open(readme_name, 'wb')
print("ICU sources - auto generated by shrink-icu-src.py", file=fi)
print("", file=fi)
print("This directory contains the ICU subset used by --with-intl=full-icu", file=fi)
print("It is a strict subset of ICU %s source files with the following exception(s):" % (icu_ver_major), file=fi)
print("* %s : compressed data file" % (dst_cmp_datafile), file=fi)
print("", file=fi)
print("", file=fi)
print("To rebuild this directory, see ../../tools/icu/README.md", file=fi)
print("", file=fi)
fi.close()