quickjs-tart

quickjs-based runtime for wallet-core logic
Log | Files | Refs | README | LICENSE

u1.h (3168B)


      1 while (bytes >= 64) {
      2     __m128i       x_0, x_1, x_2, x_3;
      3     __m128i       t_1;
      4     const __m128i rot16 =
      5         _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
      6     const __m128i rot8 =
      7         _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
      8 
      9     uint32_t in12;
     10     uint32_t in13;
     11     int      i;
     12 
     13     x_0 = _mm_loadu_si128((const __m128i*) (x + 0));
     14     x_1 = _mm_loadu_si128((const __m128i*) (x + 4));
     15     x_2 = _mm_loadu_si128((const __m128i*) (x + 8));
     16     x_3 = _mm_loadu_si128((const __m128i*) (x + 12));
     17 
     18     for (i = 0; i < ROUNDS; i += 2) {
     19         x_0 = _mm_add_epi32(x_0, x_1);
     20         x_3 = _mm_xor_si128(x_3, x_0);
     21         x_3 = _mm_shuffle_epi8(x_3, rot16);
     22 
     23         x_2 = _mm_add_epi32(x_2, x_3);
     24         x_1 = _mm_xor_si128(x_1, x_2);
     25 
     26         t_1 = x_1;
     27         x_1 = _mm_slli_epi32(x_1, 12);
     28         t_1 = _mm_srli_epi32(t_1, 20);
     29         x_1 = _mm_xor_si128(x_1, t_1);
     30 
     31         x_0 = _mm_add_epi32(x_0, x_1);
     32         x_3 = _mm_xor_si128(x_3, x_0);
     33         x_0 = _mm_shuffle_epi32(x_0, 0x93);
     34         x_3 = _mm_shuffle_epi8(x_3, rot8);
     35 
     36         x_2 = _mm_add_epi32(x_2, x_3);
     37         x_3 = _mm_shuffle_epi32(x_3, 0x4e);
     38         x_1 = _mm_xor_si128(x_1, x_2);
     39         x_2 = _mm_shuffle_epi32(x_2, 0x39);
     40 
     41         t_1 = x_1;
     42         x_1 = _mm_slli_epi32(x_1, 7);
     43         t_1 = _mm_srli_epi32(t_1, 25);
     44         x_1 = _mm_xor_si128(x_1, t_1);
     45 
     46         x_0 = _mm_add_epi32(x_0, x_1);
     47         x_3 = _mm_xor_si128(x_3, x_0);
     48         x_3 = _mm_shuffle_epi8(x_3, rot16);
     49 
     50         x_2 = _mm_add_epi32(x_2, x_3);
     51         x_1 = _mm_xor_si128(x_1, x_2);
     52 
     53         t_1 = x_1;
     54         x_1 = _mm_slli_epi32(x_1, 12);
     55         t_1 = _mm_srli_epi32(t_1, 20);
     56         x_1 = _mm_xor_si128(x_1, t_1);
     57 
     58         x_0 = _mm_add_epi32(x_0, x_1);
     59         x_3 = _mm_xor_si128(x_3, x_0);
     60         x_0 = _mm_shuffle_epi32(x_0, 0x39);
     61         x_3 = _mm_shuffle_epi8(x_3, rot8);
     62 
     63         x_2 = _mm_add_epi32(x_2, x_3);
     64         x_3 = _mm_shuffle_epi32(x_3, 0x4e);
     65         x_1 = _mm_xor_si128(x_1, x_2);
     66         x_2 = _mm_shuffle_epi32(x_2, 0x93);
     67 
     68         t_1 = x_1;
     69         x_1 = _mm_slli_epi32(x_1, 7);
     70         t_1 = _mm_srli_epi32(t_1, 25);
     71         x_1 = _mm_xor_si128(x_1, t_1);
     72     }
     73     x_0 = _mm_add_epi32(x_0, _mm_loadu_si128((const __m128i*) (x + 0)));
     74     x_1 = _mm_add_epi32(x_1, _mm_loadu_si128((const __m128i*) (x + 4)));
     75     x_2 = _mm_add_epi32(x_2, _mm_loadu_si128((const __m128i*) (x + 8)));
     76     x_3 = _mm_add_epi32(x_3, _mm_loadu_si128((const __m128i*) (x + 12)));
     77     x_0 = _mm_xor_si128(x_0, _mm_loadu_si128((const __m128i*) (m + 0)));
     78     x_1 = _mm_xor_si128(x_1, _mm_loadu_si128((const __m128i*) (m + 16)));
     79     x_2 = _mm_xor_si128(x_2, _mm_loadu_si128((const __m128i*) (m + 32)));
     80     x_3 = _mm_xor_si128(x_3, _mm_loadu_si128((const __m128i*) (m + 48)));
     81     _mm_storeu_si128((__m128i*) (c + 0), x_0);
     82     _mm_storeu_si128((__m128i*) (c + 16), x_1);
     83     _mm_storeu_si128((__m128i*) (c + 32), x_2);
     84     _mm_storeu_si128((__m128i*) (c + 48), x_3);
     85 
     86     in12 = x[12];
     87     in13 = x[13];
     88     in12++;
     89     if (in12 == 0) {
     90         in13++;
     91     }
     92     x[12] = in12;
     93     x[13] = in13;
     94 
     95     bytes -= 64;
     96     c += 64;
     97     m += 64;
     98 }