summaryrefslogtreecommitdiff
path: root/deps/v8/src/asmjs/asm-parser.h
blob: c7bf30c29e56cb3f576b8eba6959b03b5997b219 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_ASMJS_ASM_PARSER_H_
#define V8_ASMJS_ASM_PARSER_H_

#include <memory>
#include <string>

#include "src/asmjs/asm-scanner.h"
#include "src/asmjs/asm-types.h"
#include "src/base/enum-set.h"
#include "src/utils/vector.h"
#include "src/wasm/wasm-module-builder.h"
#include "src/zone/zone-containers.h"

namespace v8 {
namespace internal {

class Utf16CharacterStream;

namespace wasm {

// A custom parser + validator + wasm converter for asm.js:
// http://asmjs.org/spec/latest/
// This parser intentionally avoids the portion of JavaScript parsing
// that are not required to determine if code is valid asm.js code.
// * It is mostly one pass.
// * It bails out on unexpected input.
// * It assumes strict ordering insofar as permitted by asm.js validation rules.
// * It relies on a custom scanner that provides de-duped identifiers in two
//   scopes (local + module wide).
class AsmJsParser {
 public:
  // clang-format off
  enum StandardMember {
    kInfinity,
    kNaN,
#define V(_unused1, name, _unused2, _unused3) kMath##name,
    STDLIB_MATH_FUNCTION_LIST(V)
#undef V
#define V(name, _unused1) kMath##name,
    STDLIB_MATH_VALUE_LIST(V)
#undef V
#define V(name, _unused1, _unused2, _unused3) k##name,
    STDLIB_ARRAY_TYPE_LIST(V)
#undef V
  };
  // clang-format on

  using StdlibSet = base::EnumSet<StandardMember, uint64_t>;

  explicit AsmJsParser(Zone* zone, uintptr_t stack_limit,
                       Utf16CharacterStream* stream);
  bool Run();
  const char* failure_message() const { return failure_message_; }
  int failure_location() const { return failure_location_; }
  WasmModuleBuilder* module_builder() { return module_builder_; }
  const StdlibSet* stdlib_uses() const { return &stdlib_uses_; }

 private:
  // clang-format off
  enum class VarKind {
    kUnused,
    kLocal,
    kGlobal,
    kSpecial,
    kFunction,
    kTable,
    kImportedFunction,
#define V(_unused0, Name, _unused1, _unused2) kMath##Name,
    STDLIB_MATH_FUNCTION_LIST(V)
#undef V
#define V(Name, _unused1) kMath##Name,
    STDLIB_MATH_VALUE_LIST(V)
#undef V
  };
  // clang-format on

  // A single import in asm.js can require multiple imports in wasm, if the
  // function is used with different signatures. {cache} keeps the wasm
  // imports for the single asm.js import of name {function_name}.
  struct FunctionImportInfo {
    Vector<const char> function_name;
    ZoneUnorderedMap<FunctionSig, uint32_t> cache;

    // Constructor.
    FunctionImportInfo(Vector<const char> name, Zone* zone)
        : function_name(name), cache(zone) {}
  };

  struct VarInfo {
    AsmType* type = AsmType::None();
    WasmFunctionBuilder* function_builder = nullptr;
    FunctionImportInfo* import = nullptr;
    uint32_t mask = 0;
    uint32_t index = 0;
    VarKind kind = VarKind::kUnused;
    bool mutable_variable = true;
    bool function_defined = false;
  };

  struct GlobalImport {
    Vector<const char> import_name;
    ValueType value_type;
    VarInfo* var_info;
  };

  // Distinguish different kinds of blocks participating in {block_stack}. Each
  // entry on that stack represents one block in the wasm code, and determines
  // which block 'break' and 'continue' target in the current context:
  //  - kRegular: The target of a 'break' (with & without identifier).
  //              Pushed by an IterationStatement and a SwitchStatement.
  //  - kLoop   : The target of a 'continue' (with & without identifier).
  //              Pushed by an IterationStatement.
  //  - kNamed  : The target of a 'break' with a specific identifier.
  //              Pushed by a BlockStatement.
  //  - kOther  : Only used for internal blocks, can never be targeted.
  enum class BlockKind { kRegular, kLoop, kNamed, kOther };

  // One entry in the {block_stack}, see {BlockKind} above for details. Blocks
  // without a label have {kTokenNone} set as their label.
  struct BlockInfo {
    BlockKind kind;
    AsmJsScanner::token_t label;
  };

  // Helper class to make {TempVariable} safe for nesting.
  class TemporaryVariableScope;

  template <typename T>
  class CachedVectors {
   public:
    explicit CachedVectors(Zone* zone) : reusable_vectors_(zone) {}

    Zone* zone() const { return reusable_vectors_.get_allocator().zone(); }

    inline void fill(ZoneVector<T>* vec) {
      if (reusable_vectors_.empty()) return;
      reusable_vectors_.back().swap(*vec);
      reusable_vectors_.pop_back();
      vec->clear();
    }

    inline void reuse(ZoneVector<T>* vec) {
      reusable_vectors_.emplace_back(std::move(*vec));
    }

   private:
    ZoneVector<ZoneVector<T>> reusable_vectors_;
  };

  template <typename T>
  class CachedVector final : public ZoneVector<T> {
   public:
    explicit CachedVector(CachedVectors<T>* cache)
        : ZoneVector<T>(cache->zone()), cache_(cache) {
      cache->fill(this);
    }
    ~CachedVector() { cache_->reuse(this); }

   private:
    CachedVectors<T>* cache_;
  };

  Zone* zone_;
  AsmJsScanner scanner_;
  WasmModuleBuilder* module_builder_;
  WasmFunctionBuilder* current_function_builder_;
  AsmType* return_type_;
  uintptr_t stack_limit_;
  StdlibSet stdlib_uses_;
  ZoneVector<VarInfo> global_var_info_;
  ZoneVector<VarInfo> local_var_info_;

  CachedVectors<ValueType> cached_valuetype_vectors_{zone_};
  CachedVectors<AsmType*> cached_asm_type_p_vectors_{zone_};
  CachedVectors<AsmJsScanner::token_t> cached_token_t_vectors_{zone_};
  CachedVectors<int32_t> cached_int_vectors_{zone_};

  int function_temp_locals_offset_;
  int function_temp_locals_used_;
  int function_temp_locals_depth_;

  // Error Handling related
  bool failed_;
  const char* failure_message_;
  int failure_location_;

  // Module Related.
  AsmJsScanner::token_t stdlib_name_;
  AsmJsScanner::token_t foreign_name_;
  AsmJsScanner::token_t heap_name_;

  static const AsmJsScanner::token_t kTokenNone = 0;

  // Track if parsing a heap assignment.
  bool inside_heap_assignment_;
  AsmType* heap_access_type_;

  ZoneVector<BlockInfo> block_stack_;

  // Types used for stdlib function and their set up.
  AsmType* stdlib_dq2d_;
  AsmType* stdlib_dqdq2d_;
  AsmType* stdlib_i2s_;
  AsmType* stdlib_ii2s_;
  AsmType* stdlib_minmax_;
  AsmType* stdlib_abs_;
  AsmType* stdlib_ceil_like_;
  AsmType* stdlib_fround_;

  // When making calls, the return type is needed to lookup signatures.
  // For `+callsite(..)` or `fround(callsite(..))` use this value to pass
  // along the coercion.
  AsmType* call_coercion_;

  // The source position associated with the above {call_coercion}.
  size_t call_coercion_position_;

  // When making calls, the coercion can also appear in the source stream
  // syntactically "behind" the call site. For `callsite(..)|0` use this
  // value to flag that such a coercion must happen.
  AsmType* call_coercion_deferred_;

  // The source position at which requesting a deferred coercion via the
  // aforementioned {call_coercion_deferred} is allowed.
  size_t call_coercion_deferred_position_;

  // The code position of the last heap access shift by an immediate value.
  // For `heap[expr >> value:NumericLiteral]` this indicates from where to
  // delete code when the expression is used as part of a valid heap access.
  // Will be set to {kNoHeapAccessShift} if heap access shift wasn't matched.
  size_t heap_access_shift_position_;
  uint32_t heap_access_shift_value_;
  static const size_t kNoHeapAccessShift = -1;

  // Used to track the last label we've seen so it can be matched to later
  // statements it's attached to.
  AsmJsScanner::token_t pending_label_;

  // Global imports. The list of imported variables that are copied during
  // module instantiation into a corresponding global variable.
  ZoneLinkedList<GlobalImport> global_imports_;

  Zone* zone() { return zone_; }

  inline bool Peek(AsmJsScanner::token_t token) {
    return scanner_.Token() == token;
  }

  inline bool Check(AsmJsScanner::token_t token) {
    if (scanner_.Token() == token) {
      scanner_.Next();
      return true;
    } else {
      return false;
    }
  }

  inline bool CheckForZero() {
    if (scanner_.IsUnsigned() && scanner_.AsUnsigned() == 0) {
      scanner_.Next();
      return true;
    } else {
      return false;
    }
  }

  inline bool CheckForDouble(double* value) {
    if (scanner_.IsDouble()) {
      *value = scanner_.AsDouble();
      scanner_.Next();
      return true;
    } else {
      return false;
    }
  }

  inline bool CheckForUnsigned(uint32_t* value) {
    if (scanner_.IsUnsigned()) {
      *value = scanner_.AsUnsigned();
      scanner_.Next();
      return true;
    } else {
      return false;
    }
  }

  inline bool CheckForUnsignedBelow(uint32_t limit, uint32_t* value) {
    if (scanner_.IsUnsigned() && scanner_.AsUnsigned() < limit) {
      *value = scanner_.AsUnsigned();
      scanner_.Next();
      return true;
    } else {
      return false;
    }
  }

  inline AsmJsScanner::token_t Consume() {
    AsmJsScanner::token_t ret = scanner_.Token();
    scanner_.Next();
    return ret;
  }

  void SkipSemicolon();

  VarInfo* GetVarInfo(AsmJsScanner::token_t token);
  uint32_t VarIndex(VarInfo* info);
  void DeclareGlobal(VarInfo* info, bool mutable_variable, AsmType* type,
                     ValueType vtype,
                     const WasmInitExpr& init = WasmInitExpr());
  void DeclareStdlibFunc(VarInfo* info, VarKind kind, AsmType* type);
  void AddGlobalImport(Vector<const char> name, AsmType* type, ValueType vtype,
                       bool mutable_variable, VarInfo* info);

  // Allocates a temporary local variable. The given {index} is absolute within
  // the function body, consider using {TemporaryVariableScope} when nesting.
  uint32_t TempVariable(int index);

  // Preserves a copy of the scanner's current identifier string in the zone.
  Vector<const char> CopyCurrentIdentifierString();

  // Use to set up block stack layers (including synthetic ones for if-else).
  // Begin/Loop/End below are implemented with these plus code generation.
  void BareBegin(BlockKind kind, AsmJsScanner::token_t label = 0);
  void BareEnd();
  int FindContinueLabelDepth(AsmJsScanner::token_t label);
  int FindBreakLabelDepth(AsmJsScanner::token_t label);

  // Use to set up actual wasm blocks/loops.
  void Begin(AsmJsScanner::token_t label = 0);
  void Loop(AsmJsScanner::token_t label = 0);
  void End();

  void InitializeStdlibTypes();

  FunctionSig* ConvertSignature(AsmType* return_type,
                                const ZoneVector<AsmType*>& params);

  void ValidateModule();            // 6.1 ValidateModule
  void ValidateModuleParameters();  // 6.1 ValidateModule - parameters
  void ValidateModuleVars();        // 6.1 ValidateModule - variables
  void ValidateModuleVar(bool mutable_variable);
  void ValidateModuleVarImport(VarInfo* info, bool mutable_variable);
  void ValidateModuleVarStdlib(VarInfo* info);
  void ValidateModuleVarNewStdlib(VarInfo* info);
  void ValidateModuleVarFromGlobal(VarInfo* info, bool mutable_variable);

  void ValidateExport();         // 6.2 ValidateExport
  void ValidateFunctionTable();  // 6.3 ValidateFunctionTable
  void ValidateFunction();       // 6.4 ValidateFunction
  void ValidateFunctionParams(ZoneVector<AsmType*>* params);
  void ValidateFunctionLocals(size_t param_count,
                              ZoneVector<ValueType>* locals);
  void ValidateStatement();              // 6.5 ValidateStatement
  void Block();                          // 6.5.1 Block
  void ExpressionStatement();            // 6.5.2 ExpressionStatement
  void EmptyStatement();                 // 6.5.3 EmptyStatement
  void IfStatement();                    // 6.5.4 IfStatement
  void ReturnStatement();                // 6.5.5 ReturnStatement
  bool IterationStatement();             // 6.5.6 IterationStatement
  void WhileStatement();                 // 6.5.6 IterationStatement - while
  void DoStatement();                    // 6.5.6 IterationStatement - do
  void ForStatement();                   // 6.5.6 IterationStatement - for
  void BreakStatement();                 // 6.5.7 BreakStatement
  void ContinueStatement();              // 6.5.8 ContinueStatement
  void LabelledStatement();              // 6.5.9 LabelledStatement
  void SwitchStatement();                // 6.5.10 SwitchStatement
  void ValidateCase();                   // 6.6. ValidateCase
  void ValidateDefault();                // 6.7 ValidateDefault
  AsmType* ValidateExpression();         // 6.8 ValidateExpression
  AsmType* Expression(AsmType* expect);  // 6.8.1 Expression
  AsmType* NumericLiteral();             // 6.8.2 NumericLiteral
  AsmType* Identifier();                 // 6.8.3 Identifier
  AsmType* CallExpression();             // 6.8.4 CallExpression
  AsmType* MemberExpression();           // 6.8.5 MemberExpression
  AsmType* AssignmentExpression();       // 6.8.6 AssignmentExpression
  AsmType* UnaryExpression();            // 6.8.7 UnaryExpression
  AsmType* MultiplicativeExpression();   // 6.8.8 MultiplicativeExpression
  AsmType* AdditiveExpression();         // 6.8.9 AdditiveExpression
  AsmType* ShiftExpression();            // 6.8.10 ShiftExpression
  AsmType* RelationalExpression();       // 6.8.11 RelationalExpression
  AsmType* EqualityExpression();         // 6.8.12 EqualityExpression
  AsmType* BitwiseANDExpression();       // 6.8.13 BitwiseANDExpression
  AsmType* BitwiseXORExpression();       // 6.8.14 BitwiseXORExpression
  AsmType* BitwiseORExpression();        // 6.8.15 BitwiseORExpression
  AsmType* ConditionalExpression();      // 6.8.16 ConditionalExpression
  AsmType* ParenthesizedExpression();    // 6.8.17 ParenthesiedExpression
  AsmType* ValidateCall();               // 6.9 ValidateCall
  bool PeekCall();                       // 6.9 ValidateCall - helper
  void ValidateHeapAccess();             // 6.10 ValidateHeapAccess
  void ValidateFloatCoercion();          // 6.11 ValidateFloatCoercion

  // Used as part of {ForStatement}. Scans forward to the next `)` in order to
  // skip over the third expression in a for-statement. This is one piece that
  // makes this parser not be a pure single-pass.
  void ScanToClosingParenthesis();

  // Used as part of {SwitchStatement}. Collects all case labels in the current
  // switch-statement, then resets the scanner position. This is one piece that
  // makes this parser not be a pure single-pass.
  void GatherCases(ZoneVector<int32_t>* cases);
};

}  // namespace wasm
}  // namespace internal
}  // namespace v8

#endif  // V8_ASMJS_ASM_PARSER_H_