summaryrefslogtreecommitdiff
path: root/tools/eslint/node_modules/chardet/index.js
blob: e5bceddd6c8fa978c8058f1aedc022ad26c9ee41 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117

var fs = require('fs');

var utf8  = require('./encoding/utf8'),
  unicode = require('./encoding/unicode'),
  mbcs    = require('./encoding/mbcs'),
  sbcs    = require('./encoding/sbcs'),
  iso2022 = require('./encoding/iso2022');

var self = this;

var recognisers = [
  new utf8,
  new unicode.UTF_16BE,
  new unicode.UTF_16LE,
  new unicode.UTF_32BE,
  new unicode.UTF_32LE,
  new mbcs.sjis,
  new mbcs.big5,
  new mbcs.euc_jp,
  new mbcs.euc_kr,
  new mbcs.gb_18030,
  new iso2022.ISO_2022_JP,
  new iso2022.ISO_2022_KR,
  new iso2022.ISO_2022_CN,
  new sbcs.ISO_8859_1,
  new sbcs.ISO_8859_2,
  new sbcs.ISO_8859_5,
  new sbcs.ISO_8859_6,
  new sbcs.ISO_8859_7,
  new sbcs.ISO_8859_8,
  new sbcs.ISO_8859_9,
  new sbcs.windows_1251,
  new sbcs.windows_1256,
  new sbcs.KOI8_R
];

module.exports.detect = function(buffer) {

  // Tally up the byte occurence statistics.
  var fByteStats = [];
  for (var i = 0; i < 256; i++)
    fByteStats[i] = 0;

  for (var i = buffer.length - 1; i >= 0; i--)
    fByteStats[buffer[i] & 0x00ff]++;

  var fC1Bytes = false;
  for (var i = 0x80; i <= 0x9F; i += 1) {
    if (fByteStats[i] != 0) {
      fC1Bytes = true;
      break;
    }
  }

  var context = {
    fByteStats:  fByteStats,
    fC1Bytes:    fC1Bytes,
    fRawInput:   buffer,
    fRawLength:  buffer.length,
    fInputBytes: buffer,
    fInputLen:   buffer.length
  };

  var match = recognisers.map(function(rec) {
    return rec.match(context);
  }).filter(function(match) {
    return !!match;
  }).sort(function(a, b) {
    return a.confidence - b.confidence;
  }).pop();

  return match ? match.name : null;
};

module.exports.detectFile = function(filepath, opts, cb) {
  if (typeof opts === 'function') {
    cb = opts;
    opts = undefined;
  }

  var fd;

  var handler = function(err, buffer) {
    if (fd) {
      fs.closeSync(fd);
    }

    if (err) return cb(err, null);
    cb(null, self.detect(buffer));
  };

  if (opts && opts.sampleSize) {
    fd = fs.openSync(filepath, 'r'),
      sample = new Buffer(opts.sampleSize);

    fs.read(fd, sample, 0, opts.sampleSize, null, function(err) {
      handler(err, sample);
    });
    return;
  }

  fs.readFile(filepath, handler);
};

module.exports.detectFileSync = function(filepath, opts) {
  if (opts && opts.sampleSize) {
    var fd = fs.openSync(filepath, 'r'),
      sample = new Buffer(opts.sampleSize);

    fs.readSync(fd, sample, 0, opts.sampleSize);
    fs.closeSync(fd);
    return self.detect(sample);
  }

  return self.detect(fs.readFileSync(filepath));
};