aboutsummaryrefslogtreecommitdiff
path: root/deps/v8/src/asmjs/asm-scanner.h
blob: 076a7607e3862496d8fec786b8a93e0fbb7c7746 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_ASMJS_ASM_SCANNER_H_
#define V8_ASMJS_ASM_SCANNER_H_

#include <memory>
#include <string>
#include <unordered_map>

#include "src/asmjs/asm-names.h"
#include "src/base/logging.h"
#include "src/common/globals.h"

namespace v8 {
namespace internal {

class Utf16CharacterStream;

// A custom scanner to extract the token stream needed to parse valid
// asm.js: http://asmjs.org/spec/latest/
// This scanner intentionally avoids the portion of JavaScript lexing
// that are not required to determine if code is valid asm.js code.
// * Strings are disallowed except for 'use asm'.
// * Only the subset of keywords needed to check asm.js invariants are
//   included.
// * Identifiers are accumulated into local + global string tables
//   (for performance).
class V8_EXPORT_PRIVATE AsmJsScanner {
 public:
  using token_t = int32_t;

  explicit AsmJsScanner(Utf16CharacterStream* stream);

  // Get current token.
  token_t Token() const { return token_; }
  // Get position of current token.
  size_t Position() const { return position_; }
  // Advance to the next token.
  void Next();
  // Back up by one token.
  void Rewind();

  // Get raw string for current identifier. Note that the returned string will
  // become invalid when the scanner advances, create a copy to preserve it.
  const std::string& GetIdentifierString() const {
    // Identifier strings don't work after a rewind.
    DCHECK(!rewind_);
    return identifier_string_;
  }

  // Check if we just passed a newline.
  bool IsPrecededByNewline() const {
    // Newline tracking doesn't work if you back up.
    DCHECK(!rewind_);
    return preceded_by_newline_;
  }

#if DEBUG
  // Debug only method to go from a token back to its name.
  // Slow, only use for debugging.
  std::string Name(token_t token) const;
#endif

  // Restores old position (token after that position). Note that it is not
  // allowed to rewind right after a seek, because previous tokens are unknown.
  void Seek(size_t pos);

  // Select whether identifiers are resolved in global or local scope,
  // and which scope new identifiers are added to.
  void EnterLocalScope() { in_local_scope_ = true; }
  void EnterGlobalScope() { in_local_scope_ = false; }
  // Drop all current local identifiers.
  void ResetLocals();

  // Methods to check if a token is an identifier and which scope.
  bool IsLocal() const { return IsLocal(Token()); }
  bool IsGlobal() const { return IsGlobal(Token()); }
  static bool IsLocal(token_t token) { return token <= kLocalsStart; }
  static bool IsGlobal(token_t token) { return token >= kGlobalsStart; }
  // Methods to find the index position of an identifier (count starting from
  // 0 for each scope separately).
  static size_t LocalIndex(token_t token) {
    DCHECK(IsLocal(token));
    return -(token - kLocalsStart);
  }
  static size_t GlobalIndex(token_t token) {
    DCHECK(IsGlobal(token));
    return token - kGlobalsStart;
  }

  // Methods to check if the current token is a numeric literal considered an
  // asm.js "double" (contains a dot) or an "unsigned" (without a dot). Note
  // that numbers without a dot outside the [0 .. 2^32) range are errors.
  bool IsUnsigned() const { return Token() == kUnsigned; }
  uint32_t AsUnsigned() const {
    DCHECK(IsUnsigned());
    return unsigned_value_;
  }
  bool IsDouble() const { return Token() == kDouble; }
  double AsDouble() const {
    DCHECK(IsDouble());
    return double_value_;
  }

  // clang-format off
  enum {
    // [-10000-kMaxIdentifierCount, -10000)    :: Local identifiers (counting
    //                                            backwards)
    // [-10000 .. -1)                          :: Builtin tokens like keywords
    //                                            (also includes some special
    //                                             ones like end of input)
    // 0        .. 255                         :: Single char tokens
    // 256      .. 256+kMaxIdentifierCount     :: Global identifiers
    kLocalsStart = -10000,
#define V(name, _junk1, _junk2, _junk3) kToken_##name,
    STDLIB_MATH_FUNCTION_LIST(V)
    STDLIB_ARRAY_TYPE_LIST(V)
#undef V
#define V(name, _junk1) kToken_##name,
    STDLIB_MATH_VALUE_LIST(V)
#undef V
#define V(name) kToken_##name,
    STDLIB_OTHER_LIST(V)
    KEYWORD_NAME_LIST(V)
#undef V
#define V(rawname, name) kToken_##name,
    LONG_SYMBOL_NAME_LIST(V)
#undef V
#define V(name, value, string_name) name = value,
    SPECIAL_TOKEN_LIST(V)
#undef V
    kGlobalsStart = 256,
  };
  // clang-format on

 private:
  Utf16CharacterStream* stream_;
  token_t token_;
  token_t preceding_token_;
  token_t next_token_;         // Only set when in {rewind} state.
  size_t position_;            // Corresponds to {token} position.
  size_t preceding_position_;  // Corresponds to {preceding_token} position.
  size_t next_position_;       // Only set when in {rewind} state.
  bool rewind_;
  std::string identifier_string_;
  bool in_local_scope_;
  std::unordered_map<std::string, token_t> local_names_;
  std::unordered_map<std::string, token_t> global_names_;
  std::unordered_map<std::string, token_t> property_names_;
  int global_count_;
  double double_value_;
  uint32_t unsigned_value_;
  bool preceded_by_newline_;

  // Consume multiple characters.
  void ConsumeIdentifier(uc32 ch);
  void ConsumeNumber(uc32 ch);
  bool ConsumeCComment();
  void ConsumeCPPComment();
  void ConsumeString(uc32 quote);
  void ConsumeCompareOrShift(uc32 ch);

  // Classify character categories.
  bool IsIdentifierStart(uc32 ch);
  bool IsIdentifierPart(uc32 ch);
  bool IsNumberStart(uc32 ch);
};

}  // namespace internal
}  // namespace v8

#endif  // V8_ASMJS_ASM_SCANNER_H_