diff options
Diffstat (limited to 'deps/openssl/openssl/crypto/modes')
-rw-r--r-- | deps/openssl/openssl/crypto/modes/asm/aesni-gcm-x86_64.pl | 75 | ||||
-rw-r--r-- | deps/openssl/openssl/crypto/modes/asm/ghash-armv4.pl | 4 | ||||
-rwxr-xr-x | deps/openssl/openssl/crypto/modes/asm/ghash-ia64.pl | 2 | ||||
-rw-r--r-- | deps/openssl/openssl/crypto/modes/asm/ghash-parisc.pl | 16 | ||||
-rw-r--r-- | deps/openssl/openssl/crypto/modes/asm/ghash-s390x.pl | 14 | ||||
-rw-r--r-- | deps/openssl/openssl/crypto/modes/asm/ghash-x86.pl | 15 | ||||
-rw-r--r-- | deps/openssl/openssl/crypto/modes/asm/ghash-x86_64.pl | 80 | ||||
-rwxr-xr-x | deps/openssl/openssl/crypto/modes/asm/ghashp8-ppc.pl | 5 | ||||
-rw-r--r-- | deps/openssl/openssl/crypto/modes/asm/ghashv8-armx.pl | 379 | ||||
-rw-r--r-- | deps/openssl/openssl/crypto/modes/build.info | 5 | ||||
-rw-r--r-- | deps/openssl/openssl/crypto/modes/cts128.c | 193 | ||||
-rw-r--r-- | deps/openssl/openssl/crypto/modes/gcm128.c | 1019 | ||||
-rw-r--r-- | deps/openssl/openssl/crypto/modes/modes_lcl.h | 4 | ||||
-rw-r--r-- | deps/openssl/openssl/crypto/modes/ocb128.c | 33 | ||||
-rw-r--r-- | deps/openssl/openssl/crypto/modes/wrap128.c | 16 |
15 files changed, 826 insertions, 1034 deletions
diff --git a/deps/openssl/openssl/crypto/modes/asm/aesni-gcm-x86_64.pl b/deps/openssl/openssl/crypto/modes/asm/aesni-gcm-x86_64.pl index 5ad62b3979..b42016101e 100644 --- a/deps/openssl/openssl/crypto/modes/asm/aesni-gcm-x86_64.pl +++ b/deps/openssl/openssl/crypto/modes/asm/aesni-gcm-x86_64.pl @@ -35,6 +35,8 @@ # Applications using the EVP interface will observe a few percent # worse performance.] # +# Knights Landing processes 1 byte in 1.25 cycles (measured with EVP). +# # [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest # [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf @@ -116,23 +118,6 @@ _aesni_ctr32_ghash_6x: vpxor $rndkey,$inout3,$inout3 vmovups 0x10-0x80($key),$T2 # borrow $T2 for $rndkey vpclmulqdq \$0x01,$Hkey,$Z3,$Z2 - - # At this point, the current block of 96 (0x60) bytes has already been - # loaded into registers. Concurrently with processing it, we want to - # load the next 96 bytes of input for the next round. Obviously, we can - # only do this if there are at least 96 more bytes of input beyond the - # input we're currently processing, or else we'd read past the end of - # the input buffer. Here, we set |%r12| to 96 if there are at least 96 - # bytes of input beyond the 96 bytes we're already processing, and we - # set |%r12| to 0 otherwise. In the case where we set |%r12| to 96, - # we'll read in the next block so that it is in registers for the next - # loop iteration. In the case where we set |%r12| to 0, we'll re-read - # the current block and then ignore what we re-read. - # - # At this point, |$in0| points to the current (already read into - # registers) block, and |$end0| points to 2*96 bytes before the end of - # the input. Thus, |$in0| > |$end0| means that we do not have the next - # 96-byte block to read in, and |$in0| <= |$end0| means we do. xor %r12,%r12 cmp $in0,$end0 @@ -424,20 +409,25 @@ $code.=<<___; .type aesni_gcm_decrypt,\@function,6 .align 32 aesni_gcm_decrypt: +.cfi_startproc xor $ret,$ret - - # We call |_aesni_ctr32_ghash_6x|, which requires at least 96 (0x60) - # bytes of input. cmp \$0x60,$len # minimal accepted length jb .Lgcm_dec_abort lea (%rsp),%rax # save stack pointer +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -482,15 +472,7 @@ $code.=<<___; vmovdqu 0x50($inp),$Z3 # I[5] lea ($inp),$in0 vmovdqu 0x40($inp),$Z0 - - # |_aesni_ctr32_ghash_6x| requires |$end0| to point to 2*96 (0xc0) - # bytes before the end of the input. Note, in particular, that this is - # correct even if |$len| is not an even multiple of 96 or 16. XXX: This - # seems to require that |$inp| + |$len| >= 2*96 (0xc0); i.e. |$inp| must - # not be near the very beginning of the address space when |$len| < 2*96 - # (0xc0). lea -0xc0($inp,$len),$end0 - vmovdqu 0x30($inp),$Z1 shr \$4,$len xor $ret,$ret @@ -537,15 +519,23 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 +.cfi_restore %r15 mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp # restore %rsp +.cfi_def_cfa_register %rsp .Lgcm_dec_abort: mov $ret,%rax # return value ret +.cfi_endproc .size aesni_gcm_decrypt,.-aesni_gcm_decrypt ___ @@ -645,21 +635,25 @@ _aesni_ctr32_6x: .type aesni_gcm_encrypt,\@function,6 .align 32 aesni_gcm_encrypt: +.cfi_startproc xor $ret,$ret - - # We call |_aesni_ctr32_6x| twice, each call consuming 96 bytes of - # input. Then we call |_aesni_ctr32_ghash_6x|, which requires at - # least 96 more bytes of input. cmp \$0x60*3,$len # minimal accepted length jb .Lgcm_enc_abort lea (%rsp),%rax # save stack pointer +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -699,16 +693,7 @@ $code.=<<___; .Lenc_no_key_aliasing: lea ($out),$in0 - - # |_aesni_ctr32_ghash_6x| requires |$end0| to point to 2*96 (0xc0) - # bytes before the end of the input. Note, in particular, that this is - # correct even if |$len| is not an even multiple of 96 or 16. Unlike in - # the decryption case, there's no caveat that |$out| must not be near - # the very beginning of the address space, because we know that - # |$len| >= 3*96 from the check above, and so we know - # |$out| + |$len| >= 2*96 (0xc0). lea -0xc0($out,$len),$end0 - shr \$4,$len call _aesni_ctr32_6x @@ -931,15 +916,23 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 +.cfi_restore %r15 mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp # restore %rsp +.cfi_def_cfa_register %rsp .Lgcm_enc_abort: mov $ret,%rax # return value ret +.cfi_endproc .size aesni_gcm_encrypt,.-aesni_gcm_encrypt ___ diff --git a/deps/openssl/openssl/crypto/modes/asm/ghash-armv4.pl b/deps/openssl/openssl/crypto/modes/asm/ghash-armv4.pl index 1cf14a6c9f..dcc23f7d7d 100644 --- a/deps/openssl/openssl/crypto/modes/asm/ghash-armv4.pl +++ b/deps/openssl/openssl/crypto/modes/asm/ghash-armv4.pl @@ -54,7 +54,7 @@ # # Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software # Polynomial Multiplication on ARM Processors using the NEON Engine. -# +# # http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf # ==================================================================== @@ -525,7 +525,7 @@ $code.=<<___; #ifdef __ARMEL__ vrev64.8 $Xl,$Xl #endif - sub $Xi,#16 + sub $Xi,#16 vst1.64 $Xl#hi,[$Xi]! @ write out Xi vst1.64 $Xl#lo,[$Xi] diff --git a/deps/openssl/openssl/crypto/modes/asm/ghash-ia64.pl b/deps/openssl/openssl/crypto/modes/asm/ghash-ia64.pl index 81e75f71a8..eb9ded91e5 100755 --- a/deps/openssl/openssl/crypto/modes/asm/ghash-ia64.pl +++ b/deps/openssl/openssl/crypto/modes/asm/ghash-ia64.pl @@ -156,7 +156,7 @@ $code.=<<___; ___ ###################################################################### -# "528B" (well, "512B" actualy) streamed GHASH +# "528B" (well, "512B" actually) streamed GHASH # $Xip="in0"; $Htbl="in1"; diff --git a/deps/openssl/openssl/crypto/modes/asm/ghash-parisc.pl b/deps/openssl/openssl/crypto/modes/asm/ghash-parisc.pl index 1d6254543b..a614c99c22 100644 --- a/deps/openssl/openssl/crypto/modes/asm/ghash-parisc.pl +++ b/deps/openssl/openssl/crypto/modes/asm/ghash-parisc.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -705,7 +705,7 @@ my $depd = sub { my ($mod,$args) = @_; my $orig = "depd$mod\t$args"; - # I only have ",z" completer, it's impicitly encoded... + # I only have ",z" completer, it's implicitly encoded... if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16 { my $opcode=(0x3c<<26)|($4<<21)|($1<<16); my $cpos=63-$2; @@ -724,6 +724,11 @@ sub assemble { ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args"; } +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler/) { + $gnuas = 1; +} + foreach (split("\n",$code)) { s/\`([^\`]*)\`/eval $1/ge; if ($SIZE_T==4) { @@ -731,7 +736,12 @@ foreach (split("\n",$code)) { s/cmpb,\*/comb,/; s/,\*/,/; } - s/\bbv\b/bve/ if ($SIZE_T==8); + + s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8); + s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8); + s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8); + s/\bbv\b/bve/ if ($SIZE_T==8); + print $_,"\n"; } diff --git a/deps/openssl/openssl/crypto/modes/asm/ghash-s390x.pl b/deps/openssl/openssl/crypto/modes/asm/ghash-s390x.pl index 6e628d8823..17dc375053 100644 --- a/deps/openssl/openssl/crypto/modes/asm/ghash-s390x.pl +++ b/deps/openssl/openssl/crypto/modes/asm/ghash-s390x.pl @@ -80,6 +80,8 @@ $rem_4bit="%r14"; $sp="%r15"; $code.=<<___; +#include "s390x_arch.h" + .text .globl gcm_gmult_4bit @@ -89,12 +91,13 @@ ___ $code.=<<___ if(!$softonly && 0); # hardware is slow for single block... larl %r1,OPENSSL_s390xcap_P lghi %r0,0 - lg %r1,24(%r1) # load second word of kimd capabilities vector + lg %r1,S390X_KIMD+8(%r1) # load second word of kimd capabilities + # vector tmhh %r1,0x4000 # check for function 65 jz .Lsoft_gmult stg %r0,16($sp) # arrange 16 bytes of zero input stg %r0,24($sp) - lghi %r0,65 # function 65 + lghi %r0,S390X_GHASH # function 65 la %r1,0($Xi) # H lies right after Xi in gcm128_context la $inp,16($sp) lghi $len,16 @@ -123,10 +126,11 @@ gcm_ghash_4bit: ___ $code.=<<___ if(!$softonly); larl %r1,OPENSSL_s390xcap_P - lg %r0,24(%r1) # load second word of kimd capabilities vector + lg %r0,S390X_KIMD+8(%r1) # load second word of kimd capabilities + # vector tmhh %r0,0x4000 # check for function 65 jz .Lsoft_ghash - lghi %r0,65 # function 65 + lghi %r0,S390X_GHASH # function 65 la %r1,0($Xi) # H lies right after Xi in gcm128_context .long 0xb93e0004 # kimd %r0,$inp brc 1,.-4 # pay attention to "partial completion" @@ -149,7 +153,7 @@ $code.=<<___; lg $Zhi,0+1($Xi) lghi $tmp,0 .Louter: - xg $Zhi,0($inp) # Xi ^= inp + xg $Zhi,0($inp) # Xi ^= inp xg $Zlo,8($inp) xgr $Zhi,$tmp stg $Zlo,8+1($Xi) diff --git a/deps/openssl/openssl/crypto/modes/asm/ghash-x86.pl b/deps/openssl/openssl/crypto/modes/asm/ghash-x86.pl index cd8458256e..bcbe6e399d 100644 --- a/deps/openssl/openssl/crypto/modes/asm/ghash-x86.pl +++ b/deps/openssl/openssl/crypto/modes/asm/ghash-x86.pl @@ -103,14 +103,13 @@ # # Does it make sense to increase Naggr? To start with it's virtually # impossible in 32-bit mode, because of limited register bank -# capacity. Otherwise improvement has to be weighed agiainst slower +# capacity. Otherwise improvement has to be weighed against slower # setup, as well as code size and complexity increase. As even # optimistic estimate doesn't promise 30% performance improvement, # there are currently no plans to increase Naggr. # -# Special thanks to David Woodhouse <dwmw2@infradead.org> for -# providing access to a Westmere-based system on behalf of Intel -# Open Source Technology Centre. +# Special thanks to David Woodhouse for providing access to a +# Westmere-based system on behalf of Intel Open Source Technology Centre. # January 2010 # @@ -139,7 +138,7 @@ require "x86asm.pl"; $output=pop; open STDOUT,">$output"; -&asm_init($ARGV[0],"ghash-x86.pl",$x86only = $ARGV[$#ARGV] eq "386"); +&asm_init($ARGV[0],$x86only = $ARGV[$#ARGV] eq "386"); $sse2=0; for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } @@ -811,7 +810,7 @@ sub mmx_loop() { &bswap ($dat); &pshufw ($Zhi,$Zhi,0b00011011); # 76543210 &bswap ("ebx"); - + &cmp ("ecx",&DWP(528+16+8,"esp")); # are we done? &jne (&label("outer")); } @@ -915,7 +914,7 @@ my ($Xhi,$Xi) = @_; &psllq ($Xi,57); # &movdqa ($T1,$Xi); # &pslldq ($Xi,8); - &psrldq ($T1,8); # + &psrldq ($T1,8); # &pxor ($Xi,$T2); &pxor ($Xhi,$T1); # @@ -1085,7 +1084,7 @@ my ($Xhi,$Xi) = @_; &psllq ($Xi,57); # &movdqa ($T1,$Xi); # &pslldq ($Xi,8); - &psrldq ($T1,8); # + &psrldq ($T1,8); # &pxor ($Xi,$T2); &pxor ($Xhi,$T1); # &pshufd ($T1,$Xhn,0b01001110); diff --git a/deps/openssl/openssl/crypto/modes/asm/ghash-x86_64.pl b/deps/openssl/openssl/crypto/modes/asm/ghash-x86_64.pl index 387e3f854e..afc30c3e72 100644 --- a/deps/openssl/openssl/crypto/modes/asm/ghash-x86_64.pl +++ b/deps/openssl/openssl/crypto/modes/asm/ghash-x86_64.pl @@ -44,9 +44,8 @@ # See ghash-x86.pl for background information and details about coding # techniques. # -# Special thanks to David Woodhouse <dwmw2@infradead.org> for -# providing access to a Westmere-based system on behalf of Intel -# Open Source Technology Centre. +# Special thanks to David Woodhouse for providing access to a +# Westmere-based system on behalf of Intel Open Source Technology Centre. # December 2012 # @@ -74,6 +73,7 @@ # Skylake 0.44(+110%)(if system doesn't support AVX) # Bulldozer 1.49(+27%) # Silvermont 2.88(+13%) +# Knights L 2.12(-) (if system doesn't support AVX) # Goldmont 1.08(+24%) # March 2013 @@ -86,6 +86,8 @@ # it performs in 0.41 cycles per byte on Haswell processor, in # 0.29 on Broadwell, and in 0.36 on Skylake. # +# Knights Landing achieves 1.09 cpb. +# # [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest $flavour = shift; @@ -236,9 +238,21 @@ $code=<<___; .type gcm_gmult_4bit,\@function,2 .align 16 gcm_gmult_4bit: +.cfi_startproc push %rbx - push %rbp # %rbp and %r12 are pushed exclusively in +.cfi_push %rbx + push %rbp # %rbp and others are pushed exclusively in +.cfi_push %rbp push %r12 # order to reuse Win64 exception handler... +.cfi_push %r12 + push %r13 +.cfi_push %r13 + push %r14 +.cfi_push %r14 + push %r15 +.cfi_push %r15 + sub \$280,%rsp +.cfi_adjust_cfa_offset 280 .Lgmult_prologue: movzb 15($Xi),$Zlo @@ -249,10 +263,15 @@ $code.=<<___; mov $Zlo,8($Xi) mov $Zhi,($Xi) - mov 16(%rsp),%rbx - lea 24(%rsp),%rsp + lea 280+48(%rsp),%rsi +.cfi_def_cfa %rsi,8 + mov -8(%rsi),%rbx +.cfi_restore %rbx + lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lgmult_epilogue: ret +.cfi_endproc .size gcm_gmult_4bit,.-gcm_gmult_4bit ___ @@ -266,13 +285,21 @@ $code.=<<___; .type gcm_ghash_4bit,\@function,4 .align 16 gcm_ghash_4bit: +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 sub \$280,%rsp +.cfi_adjust_cfa_offset 280 .Lghash_prologue: mov $inp,%r14 # reassign couple of args mov $len,%r15 @@ -400,16 +427,25 @@ $code.=<<___; mov $Zlo,8($Xi) mov $Zhi,($Xi) - lea 280(%rsp),%rsi - mov 0(%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp + lea 280+48(%rsp),%rsi +.cfi_def_cfa %rsi,8 + mov -48(%rsi),%r15 +.cfi_restore %r15 + mov -40(%rsi),%r14 +.cfi_restore %r14 + mov -32(%rsi),%r13 +.cfi_restore %r13 + mov -24(%rsi),%r12 +.cfi_restore %r12 + mov -16(%rsi),%rbp +.cfi_restore %rbp + mov -8(%rsi),%rbx +.cfi_restore %rbx + lea 0(%rsi),%rsp +.cfi_def_cfa_register %rsp .Lghash_epilogue: ret +.cfi_endproc .size gcm_ghash_4bit,.-gcm_ghash_4bit ___ @@ -469,7 +505,7 @@ $code.=<<___; psllq \$57,$Xi # movdqa $Xi,$T1 # pslldq \$8,$Xi - psrldq \$8,$T1 # + psrldq \$8,$T1 # pxor $T2,$Xi pxor $T1,$Xhi # @@ -583,7 +619,7 @@ ___ &clmul64x64_T2 ($Xhi,$Xi,$Hkey,$T2); $code.=<<___ if (0 || (&reduction_alg9($Xhi,$Xi)&&0)); # experimental alternative. special thing about is that there - # no dependency between the two multiplications... + # no dependency between the two multiplications... mov \$`0xE1<<1`,%eax mov \$0xA040608020C0E000,%r10 # ((7..0)·0xE0)&0xff mov \$0x07,%r11d @@ -758,7 +794,7 @@ $code.=<<___; movdqa $T2,$T1 # pslldq \$8,$T2 pclmulqdq \$0x00,$Hkey2,$Xln - psrldq \$8,$T1 # + psrldq \$8,$T1 # pxor $T2,$Xi pxor $T1,$Xhi # movdqu 0($inp),$T1 @@ -894,7 +930,7 @@ $code.=<<___; psllq \$57,$Xi # movdqa $Xi,$T1 # pslldq \$8,$Xi - psrldq \$8,$T1 # + psrldq \$8,$T1 # pxor $T2,$Xi pshufd \$0b01001110,$Xhn,$Xmn pxor $T1,$Xhi # @@ -1648,14 +1684,20 @@ se_handler: cmp %r10,%rbx # context->Rip>=epilogue label jae .Lin_prologue - lea 24(%rax),%rax # adjust "rsp" + lea 48+280(%rax),%rax # adjust "rsp" mov -8(%rax),%rbx mov -16(%rax),%rbp mov -24(%rax),%r12 + mov -32(%rax),%r13 + mov -40(%rax),%r14 + mov -48(%rax),%r15 mov %rbx,144($context) # restore context->Rbx mov %rbp,160($context) # restore context->Rbp mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R15 .Lin_prologue: mov 8(%rax),%rdi diff --git a/deps/openssl/openssl/crypto/modes/asm/ghashp8-ppc.pl b/deps/openssl/openssl/crypto/modes/asm/ghashp8-ppc.pl index f0598cb28c..6a2ac71295 100755 --- a/deps/openssl/openssl/crypto/modes/asm/ghashp8-ppc.pl +++ b/deps/openssl/openssl/crypto/modes/asm/ghashp8-ppc.pl @@ -1,5 +1,5 @@ #! /usr/bin/env perl -# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy @@ -23,13 +23,14 @@ # Relative comparison is therefore more informative. This initial # version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x # faster than "4-bit" integer-only compiler-generated 64-bit code. -# "Initial version" means that there is room for futher improvement. +# "Initial version" means that there is room for further improvement. # May 2016 # # 2x aggregated reduction improves performance by 50% (resulting # performance on POWER8 is 1 cycle per processed byte), and 4x # aggregated reduction - by 170% or 2.7x (resulting in 0.55 cpb). +# POWER9 delivers 0.51 cpb. $flavour=shift; $output =shift; diff --git a/deps/openssl/openssl/crypto/modes/asm/ghashv8-armx.pl b/deps/openssl/openssl/crypto/modes/asm/ghashv8-armx.pl index e13c709019..47e8820080 100644 --- a/deps/openssl/openssl/crypto/modes/asm/ghashv8-armx.pl +++ b/deps/openssl/openssl/crypto/modes/asm/ghashv8-armx.pl @@ -19,23 +19,29 @@ # June 2014 # # Initial version was developed in tight cooperation with Ard -# Biesheuvel <ard.biesheuvel@linaro.org> from bits-n-pieces from -# other assembly modules. Just like aesv8-armx.pl this module -# supports both AArch32 and AArch64 execution modes. +# Biesheuvel of Linaro from bits-n-pieces from other assembly modules. +# Just like aesv8-armx.pl this module supports both AArch32 and +# AArch64 execution modes. # # July 2014 # # Implement 2x aggregated reduction [see ghash-x86.pl for background # information]. # +# November 2017 +# +# AArch64 register bank to "accommodate" 4x aggregated reduction and +# improve performance by 20-70% depending on processor. +# # Current performance in cycles per processed byte: # -# PMULL[2] 32-bit NEON(*) -# Apple A7 0.92 5.62 -# Cortex-A53 1.01 8.39 -# Cortex-A57 1.17 7.61 -# Denver 0.71 6.02 -# Mongoose 1.10 8.06 +# 64-bit PMULL 32-bit PMULL 32-bit NEON(*) +# Apple A7 0.58 0.92 5.62 +# Cortex-A53 0.85 1.01 8.39 +# Cortex-A57 0.73 1.17 7.61 +# Denver 0.51 0.65 6.02 +# Mongoose 0.65 1.10 8.06 +# Kryo 0.76 1.16 8.00 # # (*) presented for reference/comparison purposes; @@ -131,8 +137,56 @@ gcm_init_v8: vext.8 $t1,$H2,$H2,#8 @ Karatsuba pre-processing veor $t1,$t1,$H2 vext.8 $Hhl,$t0,$t1,#8 @ pack Karatsuba pre-processed - vst1.64 {$Hhl-$H2},[x0] @ store Htable[1..2] + vst1.64 {$Hhl-$H2},[x0],#32 @ store Htable[1..2] +___ +if ($flavour =~ /64/) { +my ($t3,$Yl,$Ym,$Yh) = map("q$_",(4..7)); +$code.=<<___; + @ calculate H^3 and H^4 + vpmull.p64 $Xl,$H, $H2 + vpmull.p64 $Yl,$H2,$H2 + vpmull2.p64 $Xh,$H, $H2 + vpmull2.p64 $Yh,$H2,$H2 + vpmull.p64 $Xm,$t0,$t1 + vpmull.p64 $Ym,$t1,$t1 + + vext.8 $t0,$Xl,$Xh,#8 @ Karatsuba post-processing + vext.8 $t1,$Yl,$Yh,#8 + veor $t2,$Xl,$Xh + veor $Xm,$Xm,$t0 + veor $t3,$Yl,$Yh + veor $Ym,$Ym,$t1 + veor $Xm,$Xm,$t2 + vpmull.p64 $t2,$Xl,$xC2 @ 1st phase + veor $Ym,$Ym,$t3 + vpmull.p64 $t3,$Yl,$xC2 + + vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result + vmov $Yh#lo,$Ym#hi + vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl + vmov $Ym#hi,$Yl#lo + veor $Xl,$Xm,$t2 + veor $Yl,$Ym,$t3 + + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase + vext.8 $t3,$Yl,$Yl,#8 + vpmull.p64 $Xl,$Xl,$xC2 + vpmull.p64 $Yl,$Yl,$xC2 + veor $t2,$t2,$Xh + veor $t3,$t3,$Yh + veor $H, $Xl,$t2 @ H^3 + veor $H2,$Yl,$t3 @ H^4 + + vext.8 $t0,$H, $H,#8 @ Karatsuba pre-processing + vext.8 $t1,$H2,$H2,#8 + veor $t0,$t0,$H + veor $t1,$t1,$H2 + vext.8 $Hhl,$t0,$t1,#8 @ pack Karatsuba pre-processed + vst1.64 {$H-$H2},[x0] @ store Htable[3..5] +___ +} +$code.=<<___; ret .size gcm_init_v8,.-gcm_init_v8 ___ @@ -201,6 +255,10 @@ $code.=<<___; .align 4 gcm_ghash_v8: ___ +$code.=<<___ if ($flavour =~ /64/); + cmp $len,#64 + b.hs .Lgcm_ghash_v8_4x +___ $code.=<<___ if ($flavour !~ /64/); vstmdb sp!,{d8-d15} @ 32-bit ABI says so ___ @@ -210,13 +268,13 @@ $code.=<<___; @ loaded value would have @ to be rotated in order to @ make it appear as in - @ alorithm specification + @ algorithm specification subs $len,$len,#32 @ see if $len is 32 or larger mov $inc,#16 @ $inc is used as post- @ increment for input pointer; @ as loop is modulo-scheduled @ $inc is zeroed just in time - @ to preclude oversteping + @ to preclude overstepping @ inp[len], which means that @ last block[s] are actually @ loaded twice, but last @@ -348,7 +406,297 @@ $code.=<<___; ret .size gcm_ghash_v8,.-gcm_ghash_v8 ___ + +if ($flavour =~ /64/) { # 4x subroutine +my ($I0,$j1,$j2,$j3, + $I1,$I2,$I3,$H3,$H34,$H4,$Yl,$Ym,$Yh) = map("q$_",(4..7,15..23)); + +$code.=<<___; +.type gcm_ghash_v8_4x,%function +.align 4 +gcm_ghash_v8_4x: +.Lgcm_ghash_v8_4x: + vld1.64 {$Xl},[$Xi] @ load [rotated] Xi + vld1.64 {$H-$H2},[$Htbl],#48 @ load twisted H, ..., H^2 + vmov.i8 $xC2,#0xe1 + vld1.64 {$H3-$H4},[$Htbl] @ load twisted H^3, ..., H^4 + vshl.u64 $xC2,$xC2,#57 @ compose 0xc2.0 constant + + vld1.64 {$I0-$j3},[$inp],#64 +#ifndef __ARMEB__ + vrev64.8 $Xl,$Xl + vrev64.8 $j1,$j1 + vrev64.8 $j2,$j2 + vrev64.8 $j3,$j3 + vrev64.8 $I0,$I0 +#endif + vext.8 $I3,$j3,$j3,#8 + vext.8 $I2,$j2,$j2,#8 + vext.8 $I1,$j1,$j1,#8 + + vpmull.p64 $Yl,$H,$I3 @ H·Ii+3 + veor $j3,$j3,$I3 + vpmull2.p64 $Yh,$H,$I3 + vpmull.p64 $Ym,$Hhl,$j3 + + vpmull.p64 $t0,$H2,$I2 @ H^2·Ii+2 + veor $j2,$j2,$I2 + vpmull2.p64 $I2,$H2,$I2 + vpmull2.p64 $j2,$Hhl,$j2 + + veor $Yl,$Yl,$t0 + veor $Yh,$Yh,$I2 + veor $Ym,$Ym,$j2 + + vpmull.p64 $j3,$H3,$I1 @ H^3·Ii+1 + veor $j1,$j1,$I1 + vpmull2.p64 $I1,$H3,$I1 + vpmull.p64 $j1,$H34,$j1 + + veor $Yl,$Yl,$j3 + veor $Yh,$Yh,$I1 + veor $Ym,$Ym,$j1 + + subs $len,$len,#128 + b.lo .Ltail4x + + b .Loop4x + +.align 4 +.Loop4x: + veor $t0,$I0,$Xl + vld1.64 {$I0-$j3},[$inp],#64 + vext.8 $IN,$t0,$t0,#8 +#ifndef __ARMEB__ + vrev64.8 $j1,$j1 + vrev64.8 $j2,$j2 + vrev64.8 $j3,$j3 + vrev64.8 $I0,$I0 +#endif + + vpmull.p64 $Xl,$H4,$IN @ H^4·(Xi+Ii) + veor $t0,$t0,$IN + vpmull2.p64 $Xh,$H4,$IN + vext.8 $I3,$j3,$j3,#8 + vpmull2.p64 $Xm,$H34,$t0 + + veor $Xl,$Xl,$Yl + veor $Xh,$Xh,$Yh + vext.8 $I2,$j2,$j2,#8 + veor $Xm,$Xm,$Ym + vext.8 $I1,$j1,$j1,#8 + + vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing + veor $t2,$Xl,$Xh + vpmull.p64 $Yl,$H,$I3 @ H·Ii+3 + veor $j3,$j3,$I3 + veor $Xm,$Xm,$t1 + vpmull2.p64 $Yh,$H,$I3 + veor $Xm,$Xm,$t2 + vpmull.p64 $Ym,$Hhl,$j3 + + vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction + vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result + vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl + vpmull.p64 $t0,$H2,$I2 @ H^2·Ii+2 + veor $j2,$j2,$I2 + vpmull2.p64 $I2,$H2,$I2 + veor $Xl,$Xm,$t2 + vpmull2.p64 $j2,$Hhl,$j2 + + veor $Yl,$Yl,$t0 + veor $Yh,$Yh,$I2 + veor $Ym,$Ym,$j2 + + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction + vpmull.p64 $Xl,$Xl,$xC2 + vpmull.p64 $j3,$H3,$I1 @ H^3·Ii+1 + veor $j1,$j1,$I1 + veor $t2,$t2,$Xh + vpmull2.p64 $I1,$H3,$I1 + vpmull.p64 $j1,$H34,$j1 + + veor $Xl,$Xl,$t2 + veor $Yl,$Yl,$j3 + veor $Yh,$Yh,$I1 + vext.8 $Xl,$Xl,$Xl,#8 + veor $Ym,$Ym,$j1 + + subs $len,$len,#64 + b.hs .Loop4x + +.Ltail4x: + veor $t0,$I0,$Xl + vext.8 $IN,$t0,$t0,#8 + + vpmull.p64 $Xl,$H4,$IN @ H^4·(Xi+Ii) + veor $t0,$t0,$IN + vpmull2.p64 $Xh,$H4,$IN + vpmull2.p64 $Xm,$H34,$t0 + + veor $Xl,$Xl,$Yl + veor $Xh,$Xh,$Yh + veor $Xm,$Xm,$Ym + + adds $len,$len,#64 + b.eq .Ldone4x + + cmp $len,#32 + b.lo .Lone + b.eq .Ltwo +.Lthree: + vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing + veor $t2,$Xl,$Xh + veor $Xm,$Xm,$t1 + vld1.64 {$I0-$j2},[$inp] + veor $Xm,$Xm,$t2 +#ifndef __ARMEB__ + vrev64.8 $j1,$j1 + vrev64.8 $j2,$j2 + vrev64.8 $I0,$I0 +#endif + + vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction + vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result + vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl + vext.8 $I2,$j2,$j2,#8 + vext.8 $I1,$j1,$j1,#8 + veor $Xl,$Xm,$t2 + + vpmull.p64 $Yl,$H,$I2 @ H·Ii+2 + veor $j2,$j2,$I2 + + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction + vpmull.p64 $Xl,$Xl,$xC2 + veor $t2,$t2,$Xh + vpmull2.p64 $Yh,$H,$I2 + vpmull.p64 $Ym,$Hhl,$j2 + veor $Xl,$Xl,$t2 + vpmull.p64 $j3,$H2,$I1 @ H^2·Ii+1 + veor $j1,$j1,$I1 + vext.8 $Xl,$Xl,$Xl,#8 + + vpmull2.p64 $I1,$H2,$I1 + veor $t0,$I0,$Xl + vpmull2.p64 $j1,$Hhl,$j1 + vext.8 $IN,$t0,$t0,#8 + + veor $Yl,$Yl,$j3 + veor $Yh,$Yh,$I1 + veor $Ym,$Ym,$j1 + + vpmull.p64 $Xl,$H3,$IN @ H^3·(Xi+Ii) + veor $t0,$t0,$IN + vpmull2.p64 $Xh,$H3,$IN + vpmull.p64 $Xm,$H34,$t0 + + veor $Xl,$Xl,$Yl + veor $Xh,$Xh,$Yh + veor $Xm,$Xm,$Ym + b .Ldone4x + +.align 4 +.Ltwo: + vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing + veor $t2,$Xl,$Xh + veor $Xm,$Xm,$t1 + vld1.64 {$I0-$j1},[$inp] + veor $Xm,$Xm,$t2 +#ifndef __ARMEB__ + vrev64.8 $j1,$j1 + vrev64.8 $I0,$I0 +#endif + + vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction + vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result + vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl + vext.8 $I1,$j1,$j1,#8 + veor $Xl,$Xm,$t2 + + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction + vpmull.p64 $Xl,$Xl,$xC2 + veor $t2,$t2,$Xh + veor $Xl,$Xl,$t2 + vext.8 $Xl,$Xl,$Xl,#8 + + vpmull.p64 $Yl,$H,$I1 @ H·Ii+1 + veor $j1,$j1,$I1 + + veor $t0,$I0,$Xl + vext.8 $IN,$t0,$t0,#8 + + vpmull2.p64 $Yh,$H,$I1 + vpmull.p64 $Ym,$Hhl,$j1 + + vpmull.p64 $Xl,$H2,$IN @ H^2·(Xi+Ii) + veor $t0,$t0,$IN + vpmull2.p64 $Xh,$H2,$IN + vpmull2.p64 $Xm,$Hhl,$t0 + + veor $Xl,$Xl,$Yl + veor $Xh,$Xh,$Yh + veor $Xm,$Xm,$Ym + b .Ldone4x + +.align 4 +.Lone: + vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing + veor $t2,$Xl,$Xh + veor $Xm,$Xm,$t1 + vld1.64 {$I0},[$inp] + veor $Xm,$Xm,$t2 +#ifndef __ARMEB__ + vrev64.8 $I0,$I0 +#endif + + vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction + vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result + vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl + veor $Xl,$Xm,$t2 + + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction + vpmull.p64 $Xl,$Xl,$xC2 + veor $t2,$t2,$Xh + veor $Xl,$Xl,$t2 + vext.8 $Xl,$Xl,$Xl,#8 + + veor $t0,$I0,$Xl + vext.8 $IN,$t0,$t0,#8 + + vpmull.p64 $Xl,$H,$IN + veor $t0,$t0,$IN + vpmull2.p64 $Xh,$H,$IN + vpmull.p64 $Xm,$Hhl,$t0 + +.Ldone4x: + vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing + veor $t2,$Xl,$Xh + veor $Xm,$Xm,$t1 + veor $Xm,$Xm,$t2 + + vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction + vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result + vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl + veor $Xl,$Xm,$t2 + + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction + vpmull.p64 $Xl,$Xl,$xC2 + veor $t2,$t2,$Xh + veor $Xl,$Xl,$t2 + vext.8 $Xl,$Xl,$Xl,#8 + +#ifndef __ARMEB__ + vrev64.8 $Xl,$Xl +#endif + vst1.64 {$Xl},[$Xi] @ write out Xi + + ret +.size gcm_ghash_v8_4x,.-gcm_ghash_v8_4x +___ + } +} + $code.=<<___; .asciz "GHASH for ARMv8, CRYPTOGAMS by <appro\@openssl.org>" .align 2 @@ -360,7 +708,8 @@ if ($flavour =~ /64/) { ######## 64-bit code my $arg=shift; $arg =~ m/q([0-9]+)#(lo|hi),\s*q([0-9]+)#(lo|hi)/o && - sprintf "ins v%d.d[%d],v%d.d[%d]",$1,($2 eq "lo")?0:1,$3,($4 eq "lo")?0:1; + sprintf "ins v%d.d[%d],v%d.d[%d]",$1<8?$1:$1+8,($2 eq "lo")?0:1, + $3<8?$3:$3+8,($4 eq "lo")?0:1; } foreach(split("\n",$code)) { s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o or @@ -375,7 +724,7 @@ if ($flavour =~ /64/) { ######## 64-bit code s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo; # old->new registers s/@\s/\/\//o; # old->new style commentary - # fix up remainig legacy suffixes + # fix up remaining legacy suffixes s/\.[ui]?8(\s)/$1/o; s/\.[uis]?32//o and s/\.16b/\.4s/go; m/\.p64/o and s/\.16b/\.1q/o; # 1st pmull argument @@ -415,7 +764,7 @@ if ($flavour =~ /64/) { ######## 64-bit code s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers s/\/\/\s?/@ /o; # new->old style commentary - # fix up remainig new-style suffixes + # fix up remaining new-style suffixes s/\],#[0-9]+/]!/o; s/cclr\s+([^,]+),\s*([a-z]+)/mov$2 $1,#0/o or diff --git a/deps/openssl/openssl/crypto/modes/build.info b/deps/openssl/openssl/crypto/modes/build.info index b794c5041a..821340eb90 100644 --- a/deps/openssl/openssl/crypto/modes/build.info +++ b/deps/openssl/openssl/crypto/modes/build.info @@ -6,8 +6,9 @@ SOURCE[../../libcrypto]=\ INCLUDE[gcm128.o]=.. -GENERATE[ghash-ia64.s]=asm/ghash-ia64.pl $(CFLAGS) $(LIB_CFLAGS) -GENERATE[ghash-x86.s]=asm/ghash-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(LIB_CFLAGS) $(PROCESSOR) +GENERATE[ghash-ia64.s]=asm/ghash-ia64.pl $(LIB_CFLAGS) $(LIB_CPPFLAGS) +GENERATE[ghash-x86.s]=asm/ghash-x86.pl \ + $(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR) GENERATE[ghash-x86_64.s]=asm/ghash-x86_64.pl $(PERLASM_SCHEME) GENERATE[aesni-gcm-x86_64.s]=asm/aesni-gcm-x86_64.pl $(PERLASM_SCHEME) GENERATE[ghash-sparcv9.S]=asm/ghash-sparcv9.pl $(PERLASM_SCHEME) diff --git a/deps/openssl/openssl/crypto/modes/cts128.c b/deps/openssl/openssl/crypto/modes/cts128.c index 77ec994b4f..93826a1e2f 100644 --- a/deps/openssl/openssl/crypto/modes/cts128.c +++ b/deps/openssl/openssl/crypto/modes/cts128.c @@ -328,196 +328,3 @@ size_t CRYPTO_nistcts128_decrypt(const unsigned char *in, unsigned char *out, #endif return 16 + len + residue; } - -#if defined(SELFTEST) -# include <stdio.h> -# include <openssl/aes.h> - -/* test vectors from RFC 3962 */ -static const unsigned char test_key[16] = "chicken teriyaki"; -static const unsigned char test_input[64] = - "I would like the" " General Gau's C" - "hicken, please, " "and wonton soup."; -static const unsigned char test_iv[16] = - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - -static const unsigned char vector_17[17] = { - 0xc6, 0x35, 0x35, 0x68, 0xf2, 0xbf, 0x8c, 0xb4, - 0xd8, 0xa5, 0x80, 0x36, 0x2d, 0xa7, 0xff, 0x7f, - 0x97 -}; - -static const unsigned char vector_31[31] = { - 0xfc, 0x00, 0x78, 0x3e, 0x0e, 0xfd, 0xb2, 0xc1, - 0xd4, 0x45, 0xd4, 0xc8, 0xef, 0xf7, 0xed, 0x22, - 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0, - 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5 -}; - -static const unsigned char vector_32[32] = { - 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5, - 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5, 0xa8, - 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0, - 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84 -}; - -static const unsigned char vector_47[47] = { - 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0, - 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84, - 0xb3, 0xff, 0xfd, 0x94, 0x0c, 0x16, 0xa1, 0x8c, - 0x1b, 0x55, 0x49, 0xd2, 0xf8, 0x38, 0x02, 0x9e, - 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5, - 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5 -}; - -static const unsigned char vector_48[48] = { - 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0, - 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84, - 0x9d, 0xad, 0x8b, 0xbb, 0x96, 0xc4, 0xcd, 0xc0, - 0x3b, 0xc1, 0x03, 0xe1, 0xa1, 0x94, 0xbb, 0xd8, - 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5, - 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5, 0xa8 -}; - -static const unsigned char vector_64[64] = { - 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0, - 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84, - 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5, - 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5, 0xa8, - 0x48, 0x07, 0xef, 0xe8, 0x36, 0xee, 0x89, 0xa5, - 0x26, 0x73, 0x0d, 0xbc, 0x2f, 0x7b, 0xc8, 0x40, - 0x9d, 0xad, 0x8b, 0xbb, 0x96, 0xc4, 0xcd, 0xc0, - 0x3b, 0xc1, 0x03, 0xe1, 0xa1, 0x94, 0xbb, 0xd8 -}; - -static AES_KEY encks, decks; - -void test_vector(const unsigned char *vector, size_t len) -{ - unsigned char iv[sizeof(test_iv)]; - unsigned char cleartext[64], ciphertext[64]; - size_t tail; - - printf("vector_%d\n", len); - fflush(stdout); - - if ((tail = len % 16) == 0) - tail = 16; - tail += 16; - - /* test block-based encryption */ - memcpy(iv, test_iv, sizeof(test_iv)); - CRYPTO_cts128_encrypt_block(test_input, ciphertext, len, &encks, iv, - (block128_f) AES_encrypt); - if (memcmp(ciphertext, vector, len)) - fprintf(stderr, "output_%d mismatch\n", len), exit(1); - if (memcmp(iv, vector + len - tail, sizeof(iv))) - fprintf(stderr, "iv_%d mismatch\n", len), exit(1); - - /* test block-based decryption */ - memcpy(iv, test_iv, sizeof(test_iv)); - CRYPTO_cts128_decrypt_block(ciphertext, cleartext, len, &decks, iv, - (block128_f) AES_decrypt); - if (memcmp(cleartext, test_input, len)) - fprintf(stderr, "input_%d mismatch\n", len), exit(2); - if (memcmp(iv, vector + len - tail, sizeof(iv))) - fprintf(stderr, "iv_%d mismatch\n", len), exit(2); - - /* test streamed encryption */ - memcpy(iv, test_iv, sizeof(test_iv)); - CRYPTO_cts128_encrypt(test_input, ciphertext, len, &encks, iv, - (cbc128_f) AES_cbc_encrypt); - if (memcmp(ciphertext, vector, len)) - fprintf(stderr, "output_%d mismatch\n", len), exit(3); - if (memcmp(iv, vector + len - tail, sizeof(iv))) - fprintf(stderr, "iv_%d mismatch\n", len), exit(3); - - /* test streamed decryption */ - memcpy(iv, test_iv, sizeof(test_iv)); - CRYPTO_cts128_decrypt(ciphertext, cleartext, len, &decks, iv, - (cbc128_f) AES_cbc_encrypt); - if (memcmp(cleartext, test_input, len)) - fprintf(stderr, "input_%d mismatch\n", len), exit(4); - if (memcmp(iv, vector + len - tail, sizeof(iv))) - fprintf(stderr, "iv_%d mismatch\n", len), exit(4); -} - -void test_nistvector(const unsigned char *vector, size_t len) -{ - unsigned char iv[sizeof(test_iv)]; - unsigned char cleartext[64], ciphertext[64], nistvector[64]; - size_t tail; - - printf("nistvector_%d\n", len); - fflush(stdout); - - if ((tail = len % 16) == 0) - tail = 16; - - len -= 16 + tail; - memcpy(nistvector, vector, len); - /* flip two last blocks */ - memcpy(nistvector + len, vector + len + 16, tail); - memcpy(nistvector + len + tail, vector + len, 16); - len += 16 + tail; - tail = 16; - - /* test block-based encryption */ - memcpy(iv, test_iv, sizeof(test_iv)); - CRYPTO_nistcts128_encrypt_block(test_input, ciphertext, len, &encks, iv, - (block128_f) AES_encrypt); - if (memcmp(ciphertext, nistvector, len)) - fprintf(stderr, "output_%d mismatch\n", len), exit(1); - if (memcmp(iv, nistvector + len - tail, sizeof(iv))) - fprintf(stderr, "iv_%d mismatch\n", len), exit(1); - - /* test block-based decryption */ - memcpy(iv, test_iv, sizeof(test_iv)); - CRYPTO_nistcts128_decrypt_block(ciphertext, cleartext, len, &decks, iv, - (block128_f) AES_decrypt); - if (memcmp(cleartext, test_input, len)) - fprintf(stderr, "input_%d mismatch\n", len), exit(2); - if (memcmp(iv, nistvector + len - tail, sizeof(iv))) - fprintf(stderr, "iv_%d mismatch\n", len), exit(2); - - /* test streamed encryption */ - memcpy(iv, test_iv, sizeof(test_iv)); - CRYPTO_nistcts128_encrypt(test_input, ciphertext, len, &encks, iv, - (cbc128_f) AES_cbc_encrypt); - if (memcmp(ciphertext, nistvector, len)) - fprintf(stderr, "output_%d mismatch\n", len), exit(3); - if (memcmp(iv, nistvector + len - tail, sizeof(iv))) - fprintf(stderr, "iv_%d mismatch\n", len), exit(3); - - /* test streamed decryption */ - memcpy(iv, test_iv, sizeof(test_iv)); - CRYPTO_nistcts128_decrypt(ciphertext, cleartext, len, &decks, iv, - (cbc128_f) AES_cbc_encrypt); - if (memcmp(cleartext, test_input, len)) - fprintf(stderr, "input_%d mismatch\n", len), exit(4); - if (memcmp(iv, nistvector + len - tail, sizeof(iv))) - fprintf(stderr, "iv_%d mismatch\n", len), exit(4); -} - -int main() -{ - AES_set_encrypt_key(test_key, 128, &encks); - AES_set_decrypt_key(test_key, 128, &decks); - - test_vector(vector_17, sizeof(vector_17)); - test_vector(vector_31, sizeof(vector_31)); - test_vector(vector_32, sizeof(vector_32)); - test_vector(vector_47, sizeof(vector_47)); - test_vector(vector_48, sizeof(vector_48)); - test_vector(vector_64, sizeof(vector_64)); - - test_nistvector(vector_17, sizeof(vector_17)); - test_nistvector(vector_31, sizeof(vector_31)); - test_nistvector(vector_32, sizeof(vector_32)); - test_nistvector(vector_47, sizeof(vector_47)); - test_nistvector(vector_48, sizeof(vector_48)); - test_nistvector(vector_64, sizeof(vector_64)); - - return 0; -} -#endif diff --git a/deps/openssl/openssl/crypto/modes/gcm128.c b/deps/openssl/openssl/crypto/modes/gcm128.c index a2b05c4d6c..15f76e3e86 100644 --- a/deps/openssl/openssl/crypto/modes/gcm128.c +++ b/deps/openssl/openssl/crypto/modes/gcm128.c @@ -1,5 +1,5 @@ /* - * Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -209,7 +209,7 @@ static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256]) } } -# define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable) +# define GCM_MUL(ctx) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable) #elif TABLE_BITS==4 @@ -550,7 +550,7 @@ void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp, size_t len); # endif -# define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable) +# define GCM_MUL(ctx) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable) # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT) # define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len) /* @@ -624,7 +624,7 @@ static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2]) } } -# define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u) +# define GCM_MUL(ctx) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u) #endif @@ -703,7 +703,7 @@ void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp, #ifdef GCM_FUNCREF_4BIT # undef GCM_MUL -# define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable) +# define GCM_MUL(ctx) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable) # ifdef GHASH # undef GHASH # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len) @@ -836,10 +836,6 @@ void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; #endif - ctx->Yi.u[0] = 0; - ctx->Yi.u[1] = 0; - ctx->Xi.u[0] = 0; - ctx->Xi.u[1] = 0; ctx->len.u[0] = 0; /* AAD length */ ctx->len.u[1] = 0; /* message length */ ctx->ares = 0; @@ -847,53 +843,68 @@ void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, if (len == 12) { memcpy(ctx->Yi.c, iv, 12); + ctx->Yi.c[12] = 0; + ctx->Yi.c[13] = 0; + ctx->Yi.c[14] = 0; ctx->Yi.c[15] = 1; ctr = 1; } else { size_t i; u64 len0 = len; + /* Borrow ctx->Xi to calculate initial Yi */ + ctx->Xi.u[0] = 0; + ctx->Xi.u[1] = 0; + while (len >= 16) { for (i = 0; i < 16; ++i) - ctx->Yi.c[i] ^= iv[i]; - GCM_MUL(ctx, Yi); + ctx->Xi.c[i] ^= iv[i]; + GCM_MUL(ctx); iv += 16; len -= 16; } if (len) { for (i = 0; i < len; ++i) - ctx->Yi.c[i] ^= iv[i]; - GCM_MUL(ctx, Yi); + ctx->Xi.c[i] ^= iv[i]; + GCM_MUL(ctx); } len0 <<= 3; if (is_endian.little) { #ifdef BSWAP8 - ctx->Yi.u[1] ^= BSWAP8(len0); + ctx->Xi.u[1] ^= BSWAP8(len0); #else - ctx->Yi.c[8] ^= (u8)(len0 >> 56); - ctx->Yi.c[9] ^= (u8)(len0 >> 48); - ctx->Yi.c[10] ^= (u8)(len0 >> 40); - ctx->Yi.c[11] ^= (u8)(len0 >> 32); - ctx->Yi.c[12] ^= (u8)(len0 >> 24); - ctx->Yi.c[13] ^= (u8)(len0 >> 16); - ctx->Yi.c[14] ^= (u8)(len0 >> 8); - ctx->Yi.c[15] ^= (u8)(len0); + ctx->Xi.c[8] ^= (u8)(len0 >> 56); + ctx->Xi.c[9] ^= (u8)(len0 >> 48); + ctx->Xi.c[10] ^= (u8)(len0 >> 40); + ctx->Xi.c[11] ^= (u8)(len0 >> 32); + ctx->Xi.c[12] ^= (u8)(len0 >> 24); + ctx->Xi.c[13] ^= (u8)(len0 >> 16); + ctx->Xi.c[14] ^= (u8)(len0 >> 8); + ctx->Xi.c[15] ^= (u8)(len0); #endif - } else - ctx->Yi.u[1] ^= len0; + } else { + ctx->Xi.u[1] ^= len0; + } - GCM_MUL(ctx, Yi); + GCM_MUL(ctx); if (is_endian.little) #ifdef BSWAP4 - ctr = BSWAP4(ctx->Yi.d[3]); + ctr = BSWAP4(ctx->Xi.d[3]); #else - ctr = GETU32(ctx->Yi.c + 12); + ctr = GETU32(ctx->Xi.c + 12); #endif else - ctr = ctx->Yi.d[3]; + ctr = ctx->Xi.d[3]; + + /* Copy borrowed Xi to Yi */ + ctx->Yi.u[0] = ctx->Xi.u[0]; + ctx->Yi.u[1] = ctx->Xi.u[1]; } + ctx->Xi.u[0] = 0; + ctx->Xi.u[1] = 0; + (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key); ++ctr; if (is_endian.little) @@ -936,7 +947,7 @@ int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad, n = (n + 1) % 16; } if (n == 0) - GCM_MUL(ctx, Xi); + GCM_MUL(ctx); else { ctx->ares = n; return 0; @@ -952,7 +963,7 @@ int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad, while (len >= 16) { for (i = 0; i < 16; ++i) ctx->Xi.c[i] ^= aad[i]; - GCM_MUL(ctx, Xi); + GCM_MUL(ctx); aad += 16; len -= 16; } @@ -975,7 +986,7 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, long one; char little; } is_endian = { 1 }; - unsigned int n, ctr; + unsigned int n, ctr, mres; size_t i; u64 mlen = ctx->len.u[1]; block128_f block = ctx->block; @@ -993,9 +1004,23 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, return -1; ctx->len.u[1] = mlen; + mres = ctx->mres; + if (ctx->ares) { /* First call to encrypt finalizes GHASH(AAD) */ - GCM_MUL(ctx, Xi); +#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) + if (len == 0) { + GCM_MUL(ctx); + ctx->ares = 0; + return 0; + } + memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); + ctx->Xi.u[0] = 0; + ctx->Xi.u[1] = 0; + mres = sizeof(ctx->Xi); +#else + GCM_MUL(ctx); +#endif ctx->ares = 0; } @@ -1008,28 +1033,48 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, else ctr = ctx->Yi.d[3]; - n = ctx->mres; + n = mres % 16; #if !defined(OPENSSL_SMALL_FOOTPRINT) if (16 % sizeof(size_t) == 0) { /* always true actually */ do { if (n) { +# if defined(GHASH) + while (n && len) { + ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n]; + --len; + n = (n + 1) % 16; + } + if (n == 0) { + GHASH(ctx, ctx->Xn, mres); + mres = 0; + } else { + ctx->mres = mres; + return 0; + } +# else while (n && len) { ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; --len; n = (n + 1) % 16; } - if (n == 0) - GCM_MUL(ctx, Xi); - else { + if (n == 0) { + GCM_MUL(ctx); + mres = 0; + } else { ctx->mres = n; return 0; } +# endif } # if defined(STRICT_ALIGNMENT) if (((size_t)in | (size_t)out) % sizeof(size_t) != 0) break; # endif # if defined(GHASH) + if (len >= 16 && mres) { + GHASH(ctx, ctx->Xn, mres); + mres = 0; + } # if defined(GHASH_CHUNK) while (len >= GHASH_CHUNK) { size_t j = GHASH_CHUNK; @@ -1100,7 +1145,7 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, ctx->Yi.d[3] = ctr; for (i = 0; i < 16 / sizeof(size_t); ++i) ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i]; - GCM_MUL(ctx, Xi); + GCM_MUL(ctx); out += 16; in += 16; len -= 16; @@ -1117,13 +1162,21 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, # endif else ctx->Yi.d[3] = ctr; +# if defined(GHASH) + while (len--) { + ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n]; + ++n; + } +# else while (len--) { ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; ++n; } + mres = n; +# endif } - ctx->mres = n; + ctx->mres = mres; return 0; } while (0); } @@ -1141,13 +1194,22 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, else ctx->Yi.d[3] = ctr; } - ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n]; +#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) + ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n]; n = (n + 1) % 16; + if (mres == sizeof(ctx->Xn)) { + GHASH(ctx,ctx->Xn,sizeof(ctx->Xn)); + mres = 0; + } +#else + ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n]; + mres = n = (n + 1) % 16; if (n == 0) - GCM_MUL(ctx, Xi); + GCM_MUL(ctx); +#endif } - ctx->mres = n; + ctx->mres = mres; return 0; } @@ -1159,7 +1221,7 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, long one; char little; } is_endian = { 1 }; - unsigned int n, ctr; + unsigned int n, ctr, mres; size_t i; u64 mlen = ctx->len.u[1]; block128_f block = ctx->block; @@ -1177,9 +1239,23 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, return -1; ctx->len.u[1] = mlen; + mres = ctx->mres; + if (ctx->ares) { /* First call to decrypt finalizes GHASH(AAD) */ - GCM_MUL(ctx, Xi); +#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) + if (len == 0) { + GCM_MUL(ctx); + ctx->ares = 0; + return 0; + } + memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); + ctx->Xi.u[0] = 0; + ctx->Xi.u[1] = 0; + mres = sizeof(ctx->Xi); +#else + GCM_MUL(ctx); +#endif ctx->ares = 0; } @@ -1192,11 +1268,25 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, else ctr = ctx->Yi.d[3]; - n = ctx->mres; + n = mres % 16; #if !defined(OPENSSL_SMALL_FOOTPRINT) if (16 % sizeof(size_t) == 0) { /* always true actually */ do { if (n) { +# if defined(GHASH) + while (n && len) { + *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n]; + --len; + n = (n + 1) % 16; + } + if (n == 0) { + GHASH(ctx, ctx->Xn, mres); + mres = 0; + } else { + ctx->mres = mres; + return 0; + } +# else while (n && len) { u8 c = *(in++); *(out++) = c ^ ctx->EKi.c[n]; @@ -1204,18 +1294,24 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, --len; n = (n + 1) % 16; } - if (n == 0) - GCM_MUL(ctx, Xi); - else { + if (n == 0) { + GCM_MUL(ctx); + mres = 0; + } else { ctx->mres = n; return 0; } +# endif } # if defined(STRICT_ALIGNMENT) if (((size_t)in | (size_t)out) % sizeof(size_t) != 0) break; # endif # if defined(GHASH) + if (len >= 16 && mres) { + GHASH(ctx, ctx->Xn, mres); + mres = 0; + } # if defined(GHASH_CHUNK) while (len >= GHASH_CHUNK) { size_t j = GHASH_CHUNK; @@ -1287,7 +1383,7 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, out[i] = c ^ ctx->EKi.t[i]; ctx->Xi.t[i] ^= c; } - GCM_MUL(ctx, Xi); + GCM_MUL(ctx); out += 16; in += 16; len -= 16; @@ -1304,15 +1400,23 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, # endif else ctx->Yi.d[3] = ctr; +# if defined(GHASH) + while (len--) { + out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n]; + ++n; + } +# else while (len--) { u8 c = in[n]; ctx->Xi.c[n] ^= c; out[n] = c ^ ctx->EKi.c[n]; ++n; } + mres = n; +# endif } - ctx->mres = n; + ctx->mres = mres; return 0; } while (0); } @@ -1331,15 +1435,24 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, else ctx->Yi.d[3] = ctr; } +#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) + out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n]; + n = (n + 1) % 16; + if (mres == sizeof(ctx->Xn)) { + GHASH(ctx,ctx->Xn,sizeof(ctx->Xn)); + mres = 0; + } +#else c = in[i]; out[i] = c ^ ctx->EKi.c[n]; ctx->Xi.c[n] ^= c; - n = (n + 1) % 16; + mres = n = (n + 1) % 16; if (n == 0) - GCM_MUL(ctx, Xi); + GCM_MUL(ctx); +#endif } - ctx->mres = n; + ctx->mres = mres; return 0; } @@ -1354,7 +1467,7 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, long one; char little; } is_endian = { 1 }; - unsigned int n, ctr; + unsigned int n, ctr, mres; size_t i; u64 mlen = ctx->len.u[1]; void *key = ctx->key; @@ -1371,9 +1484,23 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, return -1; ctx->len.u[1] = mlen; + mres = ctx->mres; + if (ctx->ares) { /* First call to encrypt finalizes GHASH(AAD) */ - GCM_MUL(ctx, Xi); +#if defined(GHASH) + if (len == 0) { + GCM_MUL(ctx); + ctx->ares = 0; + return 0; + } + memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); + ctx->Xi.u[0] = 0; + ctx->Xi.u[1] = 0; + mres = sizeof(ctx->Xi); +#else + GCM_MUL(ctx); +#endif ctx->ares = 0; } @@ -1386,30 +1513,51 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, else ctr = ctx->Yi.d[3]; - n = ctx->mres; + n = mres % 16; if (n) { +# if defined(GHASH) + while (n && len) { + ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n]; + --len; + n = (n + 1) % 16; + } + if (n == 0) { + GHASH(ctx, ctx->Xn, mres); + mres = 0; + } else { + ctx->mres = mres; + return 0; + } +# else while (n && len) { ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; --len; n = (n + 1) % 16; } - if (n == 0) - GCM_MUL(ctx, Xi); - else { + if (n == 0) { + GCM_MUL(ctx); + mres = 0; + } else { ctx->mres = n; return 0; } +# endif } -# if defined(GHASH) && defined(GHASH_CHUNK) +# if defined(GHASH) + if (len >= 16 && mres) { + GHASH(ctx, ctx->Xn, mres); + mres = 0; + } +# if defined(GHASH_CHUNK) while (len >= GHASH_CHUNK) { (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); ctr += GHASH_CHUNK / 16; if (is_endian.little) -# ifdef BSWAP4 +# ifdef BSWAP4 ctx->Yi.d[3] = BSWAP4(ctr); -# else +# else PUTU32(ctx->Yi.c + 12, ctr); -# endif +# endif else ctx->Yi.d[3] = ctr; GHASH(ctx, out, GHASH_CHUNK); @@ -1417,6 +1565,7 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, in += GHASH_CHUNK; len -= GHASH_CHUNK; } +# endif # endif if ((i = (len & (size_t)-16))) { size_t j = i / 16; @@ -1440,7 +1589,7 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, while (j--) { for (i = 0; i < 16; ++i) ctx->Xi.c[i] ^= out[i]; - GCM_MUL(ctx, Xi); + GCM_MUL(ctx); out += 16; } # endif @@ -1457,12 +1606,16 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, else ctx->Yi.d[3] = ctr; while (len--) { - ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; +# if defined(GHASH) + ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n]; +# else + ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n]; +# endif ++n; } } - ctx->mres = n; + ctx->mres = mres; return 0; #endif } @@ -1478,7 +1631,7 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, long one; char little; } is_endian = { 1 }; - unsigned int n, ctr; + unsigned int n, ctr, mres; size_t i; u64 mlen = ctx->len.u[1]; void *key = ctx->key; @@ -1495,9 +1648,23 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, return -1; ctx->len.u[1] = mlen; + mres = ctx->mres; + if (ctx->ares) { /* First call to decrypt finalizes GHASH(AAD) */ - GCM_MUL(ctx, Xi); +# if defined(GHASH) + if (len == 0) { + GCM_MUL(ctx); + ctx->ares = 0; + return 0; + } + memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); + ctx->Xi.u[0] = 0; + ctx->Xi.u[1] = 0; + mres = sizeof(ctx->Xi); +# else + GCM_MUL(ctx); +# endif ctx->ares = 0; } @@ -1510,8 +1677,22 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, else ctr = ctx->Yi.d[3]; - n = ctx->mres; + n = mres % 16; if (n) { +# if defined(GHASH) + while (n && len) { + *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n]; + --len; + n = (n + 1) % 16; + } + if (n == 0) { + GHASH(ctx, ctx->Xn, mres); + mres = 0; + } else { + ctx->mres = mres; + return 0; + } +# else while (n && len) { u8 c = *(in++); *(out++) = c ^ ctx->EKi.c[n]; @@ -1519,30 +1700,38 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, --len; n = (n + 1) % 16; } - if (n == 0) - GCM_MUL(ctx, Xi); - else { + if (n == 0) { + GCM_MUL(ctx); + mres = 0; + } else { ctx->mres = n; return 0; } +# endif } -# if defined(GHASH) && defined(GHASH_CHUNK) +# if defined(GHASH) + if (len >= 16 && mres) { + GHASH(ctx, ctx->Xn, mres); + mres = 0; + } +# if defined(GHASH_CHUNK) while (len >= GHASH_CHUNK) { GHASH(ctx, in, GHASH_CHUNK); (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); ctr += GHASH_CHUNK / 16; if (is_endian.little) -# ifdef BSWAP4 +# ifdef BSWAP4 ctx->Yi.d[3] = BSWAP4(ctr); -# else +# else PUTU32(ctx->Yi.c + 12, ctr); -# endif +# endif else ctx->Yi.d[3] = ctr; out += GHASH_CHUNK; in += GHASH_CHUNK; len -= GHASH_CHUNK; } +# endif # endif if ((i = (len & (size_t)-16))) { size_t j = i / 16; @@ -1554,7 +1743,7 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, size_t k; for (k = 0; k < 16; ++k) ctx->Xi.c[k] ^= in[k]; - GCM_MUL(ctx, Xi); + GCM_MUL(ctx); in += 16; } j = i / 16; @@ -1586,14 +1775,18 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, else ctx->Yi.d[3] = ctr; while (len--) { +# if defined(GHASH) + out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n]; +# else u8 c = in[n]; - ctx->Xi.c[n] ^= c; + ctx->Xi.c[mres++] ^= c; out[n] = c ^ ctx->EKi.c[n]; +# endif ++n; } } - ctx->mres = n; + ctx->mres = mres; return 0; #endif } @@ -1609,10 +1802,32 @@ int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag, u64 clen = ctx->len.u[1] << 3; #ifdef GCM_FUNCREF_4BIT void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; +# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) + void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], + const u8 *inp, size_t len) = ctx->ghash; +# endif #endif +#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) + u128 bitlen; + unsigned int mres = ctx->mres; + + if (mres) { + unsigned blocks = (mres + 15) & -16; + + memset(ctx->Xn + mres, 0, blocks - mres); + mres = blocks; + if (mres == sizeof(ctx->Xn)) { + GHASH(ctx, ctx->Xn, mres); + mres = 0; + } + } else if (ctx->ares) { + GCM_MUL(ctx); + } +#else if (ctx->mres || ctx->ares) - GCM_MUL(ctx, Xi); + GCM_MUL(ctx); +#endif if (is_endian.little) { #ifdef BSWAP8 @@ -1629,9 +1844,17 @@ int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag, #endif } +#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) + bitlen.hi = alen; + bitlen.lo = clen; + memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen)); + mres += sizeof(bitlen); + GHASH(ctx, ctx->Xn, mres); +#else ctx->Xi.u[0] ^= alen; ctx->Xi.u[1] ^= clen; - GCM_MUL(ctx, Xi); + GCM_MUL(ctx); +#endif ctx->Xi.u[0] ^= ctx->EK0.u[0]; ctx->Xi.u[1] ^= ctx->EK0.u[1]; @@ -1663,639 +1886,3 @@ void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) { OPENSSL_clear_free(ctx, sizeof(*ctx)); } - -#if defined(SELFTEST) -# include <stdio.h> -# include <openssl/aes.h> - -/* Test Case 1 */ -static const u8 K1[16], *P1 = NULL, *A1 = NULL, IV1[12], *C1 = NULL; -static const u8 T1[] = { - 0x58, 0xe2, 0xfc, 0xce, 0xfa, 0x7e, 0x30, 0x61, - 0x36, 0x7f, 0x1d, 0x57, 0xa4, 0xe7, 0x45, 0x5a -}; - -/* Test Case 2 */ -# define K2 K1 -# define A2 A1 -# define IV2 IV1 -static const u8 P2[16]; -static const u8 C2[] = { - 0x03, 0x88, 0xda, 0xce, 0x60, 0xb6, 0xa3, 0x92, - 0xf3, 0x28, 0xc2, 0xb9, 0x71, 0xb2, 0xfe, 0x78 -}; - -static const u8 T2[] = { - 0xab, 0x6e, 0x47, 0xd4, 0x2c, 0xec, 0x13, 0xbd, - 0xf5, 0x3a, 0x67, 0xb2, 0x12, 0x57, 0xbd, 0xdf -}; - -/* Test Case 3 */ -# define A3 A2 -static const u8 K3[] = { - 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c, - 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08 -}; - -static const u8 P3[] = { - 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, - 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a, - 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda, - 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, - 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53, - 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25, - 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57, - 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55 -}; - -static const u8 IV3[] = { - 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad, - 0xde, 0xca, 0xf8, 0x88 -}; - -static const u8 C3[] = { - 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24, - 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c, - 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0, - 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e, - 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c, - 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05, - 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97, - 0x3d, 0x58, 0xe0, 0x91, 0x47, 0x3f, 0x59, 0x85 -}; - -static const u8 T3[] = { - 0x4d, 0x5c, 0x2a, 0xf3, 0x27, 0xcd, 0x64, 0xa6, - 0x2c, 0xf3, 0x5a, 0xbd, 0x2b, 0xa6, 0xfa, 0xb4 -}; - -/* Test Case 4 */ -# define K4 K3 -# define IV4 IV3 -static const u8 P4[] = { - 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, - 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a, - 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda, - 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, - 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53, - 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25, - 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57, - 0xba, 0x63, 0x7b, 0x39 -}; - -static const u8 A4[] = { - 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef, - 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef, - 0xab, 0xad, 0xda, 0xd2 -}; - -static const u8 C4[] = { - 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24, - 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c, - 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0, - 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e, - 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c, - 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05, - 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97, - 0x3d, 0x58, 0xe0, 0x91 -}; - -static const u8 T4[] = { - 0x5b, 0xc9, 0x4f, 0xbc, 0x32, 0x21, 0xa5, 0xdb, - 0x94, 0xfa, 0xe9, 0x5a, 0xe7, 0x12, 0x1a, 0x47 -}; - -/* Test Case 5 */ -# define K5 K4 -# define P5 P4 -# define A5 A4 -static const u8 IV5[] = { - 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad -}; - -static const u8 C5[] = { - 0x61, 0x35, 0x3b, 0x4c, 0x28, 0x06, 0x93, 0x4a, - 0x77, 0x7f, 0xf5, 0x1f, 0xa2, 0x2a, 0x47, 0x55, - 0x69, 0x9b, 0x2a, 0x71, 0x4f, 0xcd, 0xc6, 0xf8, - 0x37, 0x66, 0xe5, 0xf9, 0x7b, 0x6c, 0x74, 0x23, - 0x73, 0x80, 0x69, 0x00, 0xe4, 0x9f, 0x24, 0xb2, - 0x2b, 0x09, 0x75, 0x44, 0xd4, 0x89, 0x6b, 0x42, - 0x49, 0x89, 0xb5, 0xe1, 0xeb, 0xac, 0x0f, 0x07, - 0xc2, 0x3f, 0x45, 0x98 -}; - -static const u8 T5[] = { - 0x36, 0x12, 0xd2, 0xe7, 0x9e, 0x3b, 0x07, 0x85, - 0x56, 0x1b, 0xe1, 0x4a, 0xac, 0xa2, 0xfc, 0xcb -}; - -/* Test Case 6 */ -# define K6 K5 -# define P6 P5 -# define A6 A5 -static const u8 IV6[] = { - 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5, - 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa, - 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1, - 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28, - 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39, - 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54, - 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57, - 0xa6, 0x37, 0xb3, 0x9b -}; - -static const u8 C6[] = { - 0x8c, 0xe2, 0x49, 0x98, 0x62, 0x56, 0x15, 0xb6, - 0x03, 0xa0, 0x33, 0xac, 0xa1, 0x3f, 0xb8, 0x94, - 0xbe, 0x91, 0x12, 0xa5, 0xc3, 0xa2, 0x11, 0xa8, - 0xba, 0x26, 0x2a, 0x3c, 0xca, 0x7e, 0x2c, 0xa7, - 0x01, 0xe4, 0xa9, 0xa4, 0xfb, 0xa4, 0x3c, 0x90, - 0xcc, 0xdc, 0xb2, 0x81, 0xd4, 0x8c, 0x7c, 0x6f, - 0xd6, 0x28, 0x75, 0xd2, 0xac, 0xa4, 0x17, 0x03, - 0x4c, 0x34, 0xae, 0xe5 -}; - -static const u8 T6[] = { - 0x61, 0x9c, 0xc5, 0xae, 0xff, 0xfe, 0x0b, 0xfa, - 0x46, 0x2a, 0xf4, 0x3c, 0x16, 0x99, 0xd0, 0x50 -}; - -/* Test Case 7 */ -static const u8 K7[24], *P7 = NULL, *A7 = NULL, IV7[12], *C7 = NULL; -static const u8 T7[] = { - 0xcd, 0x33, 0xb2, 0x8a, 0xc7, 0x73, 0xf7, 0x4b, - 0xa0, 0x0e, 0xd1, 0xf3, 0x12, 0x57, 0x24, 0x35 -}; - -/* Test Case 8 */ -# define K8 K7 -# define IV8 IV7 -# define A8 A7 -static const u8 P8[16]; -static const u8 C8[] = { - 0x98, 0xe7, 0x24, 0x7c, 0x07, 0xf0, 0xfe, 0x41, - 0x1c, 0x26, 0x7e, 0x43, 0x84, 0xb0, 0xf6, 0x00 -}; - -static const u8 T8[] = { - 0x2f, 0xf5, 0x8d, 0x80, 0x03, 0x39, 0x27, 0xab, - 0x8e, 0xf4, 0xd4, 0x58, 0x75, 0x14, 0xf0, 0xfb -}; - -/* Test Case 9 */ -# define A9 A8 -static const u8 K9[] = { - 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c, - 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08, - 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c -}; - -static const u8 P9[] = { - 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, - 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a, - 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda, - 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, - 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53, - 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25, - 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57, - 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55 -}; - -static const u8 IV9[] = { - 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad, - 0xde, 0xca, 0xf8, 0x88 -}; - -static const u8 C9[] = { - 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41, - 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57, - 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84, - 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c, - 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25, - 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47, - 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9, - 0xcc, 0xda, 0x27, 0x10, 0xac, 0xad, 0xe2, 0x56 -}; - -static const u8 T9[] = { - 0x99, 0x24, 0xa7, 0xc8, 0x58, 0x73, 0x36, 0xbf, - 0xb1, 0x18, 0x02, 0x4d, 0xb8, 0x67, 0x4a, 0x14 -}; - -/* Test Case 10 */ -# define K10 K9 -# define IV10 IV9 -static const u8 P10[] = { - 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, - 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a, - 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda, - 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, - 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53, - 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25, - 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57, - 0xba, 0x63, 0x7b, 0x39 -}; - -static const u8 A10[] = { - 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef, - 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef, - 0xab, 0xad, 0xda, 0xd2 -}; - -static const u8 C10[] = { - 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41, - 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57, - 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84, - 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c, - 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25, - 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47, - 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9, - 0xcc, 0xda, 0x27, 0x10 -}; - -static const u8 T10[] = { - 0x25, 0x19, 0x49, 0x8e, 0x80, 0xf1, 0x47, 0x8f, - 0x37, 0xba, 0x55, 0xbd, 0x6d, 0x27, 0x61, 0x8c -}; - -/* Test Case 11 */ -# define K11 K10 -# define P11 P10 -# define A11 A10 -static const u8 IV11[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad }; - -static const u8 C11[] = { - 0x0f, 0x10, 0xf5, 0x99, 0xae, 0x14, 0xa1, 0x54, - 0xed, 0x24, 0xb3, 0x6e, 0x25, 0x32, 0x4d, 0xb8, - 0xc5, 0x66, 0x63, 0x2e, 0xf2, 0xbb, 0xb3, 0x4f, - 0x83, 0x47, 0x28, 0x0f, 0xc4, 0x50, 0x70, 0x57, - 0xfd, 0xdc, 0x29, 0xdf, 0x9a, 0x47, 0x1f, 0x75, - 0xc6, 0x65, 0x41, 0xd4, 0xd4, 0xda, 0xd1, 0xc9, - 0xe9, 0x3a, 0x19, 0xa5, 0x8e, 0x8b, 0x47, 0x3f, - 0xa0, 0xf0, 0x62, 0xf7 -}; - -static const u8 T11[] = { - 0x65, 0xdc, 0xc5, 0x7f, 0xcf, 0x62, 0x3a, 0x24, - 0x09, 0x4f, 0xcc, 0xa4, 0x0d, 0x35, 0x33, 0xf8 -}; - -/* Test Case 12 */ -# define K12 K11 -# define P12 P11 -# define A12 A11 -static const u8 IV12[] = { - 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5, - 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa, - 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1, - 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28, - 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39, - 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54, - 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57, - 0xa6, 0x37, 0xb3, 0x9b -}; - -static const u8 C12[] = { - 0xd2, 0x7e, 0x88, 0x68, 0x1c, 0xe3, 0x24, 0x3c, - 0x48, 0x30, 0x16, 0x5a, 0x8f, 0xdc, 0xf9, 0xff, - 0x1d, 0xe9, 0xa1, 0xd8, 0xe6, 0xb4, 0x47, 0xef, - 0x6e, 0xf7, 0xb7, 0x98, 0x28, 0x66, 0x6e, 0x45, - 0x81, 0xe7, 0x90, 0x12, 0xaf, 0x34, 0xdd, 0xd9, - 0xe2, 0xf0, 0x37, 0x58, 0x9b, 0x29, 0x2d, 0xb3, - 0xe6, 0x7c, 0x03, 0x67, 0x45, 0xfa, 0x22, 0xe7, - 0xe9, 0xb7, 0x37, 0x3b -}; - -static const u8 T12[] = { - 0xdc, 0xf5, 0x66, 0xff, 0x29, 0x1c, 0x25, 0xbb, - 0xb8, 0x56, 0x8f, 0xc3, 0xd3, 0x76, 0xa6, 0xd9 -}; - -/* Test Case 13 */ -static const u8 K13[32], *P13 = NULL, *A13 = NULL, IV13[12], *C13 = NULL; -static const u8 T13[] = { - 0x53, 0x0f, 0x8a, 0xfb, 0xc7, 0x45, 0x36, 0xb9, - 0xa9, 0x63, 0xb4, 0xf1, 0xc4, 0xcb, 0x73, 0x8b -}; - -/* Test Case 14 */ -# define K14 K13 -# define A14 A13 -static const u8 P14[16], IV14[12]; -static const u8 C14[] = { - 0xce, 0xa7, 0x40, 0x3d, 0x4d, 0x60, 0x6b, 0x6e, - 0x07, 0x4e, 0xc5, 0xd3, 0xba, 0xf3, 0x9d, 0x18 -}; - -static const u8 T14[] = { - 0xd0, 0xd1, 0xc8, 0xa7, 0x99, 0x99, 0x6b, 0xf0, - 0x26, 0x5b, 0x98, 0xb5, 0xd4, 0x8a, 0xb9, 0x19 -}; - -/* Test Case 15 */ -# define A15 A14 -static const u8 K15[] = { - 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c, - 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08, - 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c, - 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08 -}; - -static const u8 P15[] = { - 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, - 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a, - 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda, - 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, - 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53, - 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25, - 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57, - 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55 -}; - -static const u8 IV15[] = { - 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad, - 0xde, 0xca, 0xf8, 0x88 -}; - -static const u8 C15[] = { - 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07, - 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d, - 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9, - 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa, - 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d, - 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38, - 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a, - 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad -}; - -static const u8 T15[] = { - 0xb0, 0x94, 0xda, 0xc5, 0xd9, 0x34, 0x71, 0xbd, - 0xec, 0x1a, 0x50, 0x22, 0x70, 0xe3, 0xcc, 0x6c -}; - -/* Test Case 16 */ -# define K16 K15 -# define IV16 IV15 -static const u8 P16[] = { - 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, - 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a, - 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda, - 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, - 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53, - 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25, - 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57, - 0xba, 0x63, 0x7b, 0x39 -}; - -static const u8 A16[] = { - 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef, - 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef, - 0xab, 0xad, 0xda, 0xd2 -}; - -static const u8 C16[] = { - 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07, - 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d, - 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9, - 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa, - 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d, - 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38, - 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a, - 0xbc, 0xc9, 0xf6, 0x62 -}; - -static const u8 T16[] = { - 0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e, 0x17, 0x68, - 0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b -}; - -/* Test Case 17 */ -# define K17 K16 -# define P17 P16 -# define A17 A16 -static const u8 IV17[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad }; - -static const u8 C17[] = { - 0xc3, 0x76, 0x2d, 0xf1, 0xca, 0x78, 0x7d, 0x32, - 0xae, 0x47, 0xc1, 0x3b, 0xf1, 0x98, 0x44, 0xcb, - 0xaf, 0x1a, 0xe1, 0x4d, 0x0b, 0x97, 0x6a, 0xfa, - 0xc5, 0x2f, 0xf7, 0xd7, 0x9b, 0xba, 0x9d, 0xe0, - 0xfe, 0xb5, 0x82, 0xd3, 0x39, 0x34, 0xa4, 0xf0, - 0x95, 0x4c, 0xc2, 0x36, 0x3b, 0xc7, 0x3f, 0x78, - 0x62, 0xac, 0x43, 0x0e, 0x64, 0xab, 0xe4, 0x99, - 0xf4, 0x7c, 0x9b, 0x1f -}; - -static const u8 T17[] = { - 0x3a, 0x33, 0x7d, 0xbf, 0x46, 0xa7, 0x92, 0xc4, - 0x5e, 0x45, 0x49, 0x13, 0xfe, 0x2e, 0xa8, 0xf2 -}; - -/* Test Case 18 */ -# define K18 K17 -# define P18 P17 -# define A18 A17 -static const u8 IV18[] = { - 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5, - 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa, - 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1, - 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28, - 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39, - 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54, - 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57, - 0xa6, 0x37, 0xb3, 0x9b -}; - -static const u8 C18[] = { - 0x5a, 0x8d, 0xef, 0x2f, 0x0c, 0x9e, 0x53, 0xf1, - 0xf7, 0x5d, 0x78, 0x53, 0x65, 0x9e, 0x2a, 0x20, - 0xee, 0xb2, 0xb2, 0x2a, 0xaf, 0xde, 0x64, 0x19, - 0xa0, 0x58, 0xab, 0x4f, 0x6f, 0x74, 0x6b, 0xf4, - 0x0f, 0xc0, 0xc3, 0xb7, 0x80, 0xf2, 0x44, 0x45, - 0x2d, 0xa3, 0xeb, 0xf1, 0xc5, 0xd8, 0x2c, 0xde, - 0xa2, 0x41, 0x89, 0x97, 0x20, 0x0e, 0xf8, 0x2e, - 0x44, 0xae, 0x7e, 0x3f -}; - -static const u8 T18[] = { - 0xa4, 0x4a, 0x82, 0x66, 0xee, 0x1c, 0x8e, 0xb0, - 0xc8, 0xb5, 0xd4, 0xcf, 0x5a, 0xe9, 0xf1, 0x9a -}; - -/* Test Case 19 */ -# define K19 K1 -# define P19 P1 -# define IV19 IV1 -# define C19 C1 -static const u8 A19[] = { - 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, - 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a, - 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda, - 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, - 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53, - 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25, - 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57, - 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55, - 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07, - 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d, - 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9, - 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa, - 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d, - 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38, - 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a, - 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad -}; - -static const u8 T19[] = { - 0x5f, 0xea, 0x79, 0x3a, 0x2d, 0x6f, 0x97, 0x4d, - 0x37, 0xe6, 0x8e, 0x0c, 0xb8, 0xff, 0x94, 0x92 -}; - -/* Test Case 20 */ -# define K20 K1 -# define A20 A1 -/* this results in 0xff in counter LSB */ -static const u8 IV20[64] = { 0xff, 0xff, 0xff, 0xff }; - -static const u8 P20[288]; -static const u8 C20[] = { - 0x56, 0xb3, 0x37, 0x3c, 0xa9, 0xef, 0x6e, 0x4a, - 0x2b, 0x64, 0xfe, 0x1e, 0x9a, 0x17, 0xb6, 0x14, - 0x25, 0xf1, 0x0d, 0x47, 0xa7, 0x5a, 0x5f, 0xce, - 0x13, 0xef, 0xc6, 0xbc, 0x78, 0x4a, 0xf2, 0x4f, - 0x41, 0x41, 0xbd, 0xd4, 0x8c, 0xf7, 0xc7, 0x70, - 0x88, 0x7a, 0xfd, 0x57, 0x3c, 0xca, 0x54, 0x18, - 0xa9, 0xae, 0xff, 0xcd, 0x7c, 0x5c, 0xed, 0xdf, - 0xc6, 0xa7, 0x83, 0x97, 0xb9, 0xa8, 0x5b, 0x49, - 0x9d, 0xa5, 0x58, 0x25, 0x72, 0x67, 0xca, 0xab, - 0x2a, 0xd0, 0xb2, 0x3c, 0xa4, 0x76, 0xa5, 0x3c, - 0xb1, 0x7f, 0xb4, 0x1c, 0x4b, 0x8b, 0x47, 0x5c, - 0xb4, 0xf3, 0xf7, 0x16, 0x50, 0x94, 0xc2, 0x29, - 0xc9, 0xe8, 0xc4, 0xdc, 0x0a, 0x2a, 0x5f, 0xf1, - 0x90, 0x3e, 0x50, 0x15, 0x11, 0x22, 0x13, 0x76, - 0xa1, 0xcd, 0xb8, 0x36, 0x4c, 0x50, 0x61, 0xa2, - 0x0c, 0xae, 0x74, 0xbc, 0x4a, 0xcd, 0x76, 0xce, - 0xb0, 0xab, 0xc9, 0xfd, 0x32, 0x17, 0xef, 0x9f, - 0x8c, 0x90, 0xbe, 0x40, 0x2d, 0xdf, 0x6d, 0x86, - 0x97, 0xf4, 0xf8, 0x80, 0xdf, 0xf1, 0x5b, 0xfb, - 0x7a, 0x6b, 0x28, 0x24, 0x1e, 0xc8, 0xfe, 0x18, - 0x3c, 0x2d, 0x59, 0xe3, 0xf9, 0xdf, 0xff, 0x65, - 0x3c, 0x71, 0x26, 0xf0, 0xac, 0xb9, 0xe6, 0x42, - 0x11, 0xf4, 0x2b, 0xae, 0x12, 0xaf, 0x46, 0x2b, - 0x10, 0x70, 0xbe, 0xf1, 0xab, 0x5e, 0x36, 0x06, - 0x87, 0x2c, 0xa1, 0x0d, 0xee, 0x15, 0xb3, 0x24, - 0x9b, 0x1a, 0x1b, 0x95, 0x8f, 0x23, 0x13, 0x4c, - 0x4b, 0xcc, 0xb7, 0xd0, 0x32, 0x00, 0xbc, 0xe4, - 0x20, 0xa2, 0xf8, 0xeb, 0x66, 0xdc, 0xf3, 0x64, - 0x4d, 0x14, 0x23, 0xc1, 0xb5, 0x69, 0x90, 0x03, - 0xc1, 0x3e, 0xce, 0xf4, 0xbf, 0x38, 0xa3, 0xb6, - 0x0e, 0xed, 0xc3, 0x40, 0x33, 0xba, 0xc1, 0x90, - 0x27, 0x83, 0xdc, 0x6d, 0x89, 0xe2, 0xe7, 0x74, - 0x18, 0x8a, 0x43, 0x9c, 0x7e, 0xbc, 0xc0, 0x67, - 0x2d, 0xbd, 0xa4, 0xdd, 0xcf, 0xb2, 0x79, 0x46, - 0x13, 0xb0, 0xbe, 0x41, 0x31, 0x5e, 0xf7, 0x78, - 0x70, 0x8a, 0x70, 0xee, 0x7d, 0x75, 0x16, 0x5c -}; - -static const u8 T20[] = { - 0x8b, 0x30, 0x7f, 0x6b, 0x33, 0x28, 0x6d, 0x0a, - 0xb0, 0x26, 0xa9, 0xed, 0x3f, 0xe1, 0xe8, 0x5f -}; - -# define TEST_CASE(n) do { \ - u8 out[sizeof(P##n)]; \ - AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \ - CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \ - CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \ - memset(out,0,sizeof(out)); \ - if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \ - if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \ - if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \ - (C##n && memcmp(out,C##n,sizeof(out)))) \ - ret++, printf ("encrypt test#%d failed.\n",n); \ - CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \ - memset(out,0,sizeof(out)); \ - if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \ - if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \ - if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \ - (P##n && memcmp(out,P##n,sizeof(out)))) \ - ret++, printf ("decrypt test#%d failed.\n",n); \ - } while(0) - -int main() -{ - GCM128_CONTEXT ctx; - AES_KEY key; - int ret = 0; - - TEST_CASE(1); - TEST_CASE(2); - TEST_CASE(3); - TEST_CASE(4); - TEST_CASE(5); - TEST_CASE(6); - TEST_CASE(7); - TEST_CASE(8); - TEST_CASE(9); - TEST_CASE(10); - TEST_CASE(11); - TEST_CASE(12); - TEST_CASE(13); - TEST_CASE(14); - TEST_CASE(15); - TEST_CASE(16); - TEST_CASE(17); - TEST_CASE(18); - TEST_CASE(19); - TEST_CASE(20); - -# ifdef OPENSSL_CPUID_OBJ - { - size_t start, stop, gcm_t, ctr_t, OPENSSL_rdtsc(); - union { - u64 u; - u8 c[1024]; - } buf; - int i; - - AES_set_encrypt_key(K1, sizeof(K1) * 8, &key); - CRYPTO_gcm128_init(&ctx, &key, (block128_f) AES_encrypt); - CRYPTO_gcm128_setiv(&ctx, IV1, sizeof(IV1)); - - CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf)); - start = OPENSSL_rdtsc(); - CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf)); - gcm_t = OPENSSL_rdtsc() - start; - - CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf), - &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres, - (block128_f) AES_encrypt); - start = OPENSSL_rdtsc(); - CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf), - &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres, - (block128_f) AES_encrypt); - ctr_t = OPENSSL_rdtsc() - start; - - printf("%.2f-%.2f=%.2f\n", - gcm_t / (double)sizeof(buf), - ctr_t / (double)sizeof(buf), - (gcm_t - ctr_t) / (double)sizeof(buf)); -# ifdef GHASH - { - void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], - const u8 *inp, size_t len) = ctx.ghash; - - GHASH((&ctx), buf.c, sizeof(buf)); - start = OPENSSL_rdtsc(); - for (i = 0; i < 100; ++i) - GHASH((&ctx), buf.c, sizeof(buf)); - gcm_t = OPENSSL_rdtsc() - start; - printf("%.2f\n", gcm_t / (double)sizeof(buf) / (double)i); - } -# endif - } -# endif - - return ret; -} -#endif diff --git a/deps/openssl/openssl/crypto/modes/modes_lcl.h b/deps/openssl/openssl/crypto/modes/modes_lcl.h index 4fc32e190f..f2ae01d11a 100644 --- a/deps/openssl/openssl/crypto/modes/modes_lcl.h +++ b/deps/openssl/openssl/crypto/modes/modes_lcl.h @@ -73,6 +73,7 @@ typedef unsigned char u8; # endif # elif defined(_MSC_VER) # if _MSC_VER>=1300 +# include <stdlib.h> # pragma intrinsic(_byteswap_uint64,_byteswap_ulong) # define BSWAP8(x) _byteswap_uint64((u64)(x)) # define BSWAP4(x) _byteswap_ulong((u32)(x)) @@ -127,6 +128,9 @@ struct gcm128_context { unsigned int mres, ares; block128_f block; void *key; +#if !defined(OPENSSL_SMALL_FOOTPRINT) + unsigned char Xn[48]; +#endif }; struct xts128_context { diff --git a/deps/openssl/openssl/crypto/modes/ocb128.c b/deps/openssl/openssl/crypto/modes/ocb128.c index fc92b246bd..713b9aaf19 100644 --- a/deps/openssl/openssl/crypto/modes/ocb128.c +++ b/deps/openssl/openssl/crypto/modes/ocb128.c @@ -9,6 +9,7 @@ #include <string.h> #include <openssl/crypto.h> +#include <openssl/err.h> #include "modes_lcl.h" #ifndef OPENSSL_NO_OCB @@ -41,22 +42,13 @@ static u32 ocb_ntz(u64 n) static void ocb_block_lshift(const unsigned char *in, size_t shift, unsigned char *out) { - unsigned char shift_mask; int i; - unsigned char mask[15]; + unsigned char carry = 0, carry_next; - shift_mask = 0xff; - shift_mask <<= (8 - shift); for (i = 15; i >= 0; i--) { - if (i > 0) { - mask[i - 1] = in[i] & shift_mask; - mask[i - 1] >>= 8 - shift; - } - out[i] = in[i] << shift; - - if (i != 15) { - out[i] ^= mask[i]; - } + carry_next = in[i] >> (8 - shift); + out[i] = (in[i] << shift) | carry; + carry = carry_next; } } @@ -73,7 +65,7 @@ static void ocb_double(OCB_BLOCK *in, OCB_BLOCK *out) */ mask = in->c[0] & 0x80; mask >>= 7; - mask *= 135; + mask = (0 - mask) & 0x87; ocb_block_lshift(in->c, 1, out->c); @@ -118,8 +110,7 @@ static OCB_BLOCK *ocb_lookup_l(OCB128_CONTEXT *ctx, size_t idx) * the index. */ ctx->max_l_index += (idx - ctx->max_l_index + 4) & ~3; - tmp_ptr = - OPENSSL_realloc(ctx->l, ctx->max_l_index * sizeof(OCB_BLOCK)); + tmp_ptr = OPENSSL_realloc(ctx->l, ctx->max_l_index * sizeof(OCB_BLOCK)); if (tmp_ptr == NULL) /* prevent ctx->l from being clobbered */ return NULL; ctx->l = tmp_ptr; @@ -164,9 +155,10 @@ int CRYPTO_ocb128_init(OCB128_CONTEXT *ctx, void *keyenc, void *keydec, memset(ctx, 0, sizeof(*ctx)); ctx->l_index = 0; ctx->max_l_index = 5; - ctx->l = OPENSSL_malloc(ctx->max_l_index * 16); - if (ctx->l == NULL) + if ((ctx->l = OPENSSL_malloc(ctx->max_l_index * 16)) == NULL) { + CRYPTOerr(CRYPTO_F_CRYPTO_OCB128_INIT, ERR_R_MALLOC_FAILURE); return 0; + } /* * We set both the encryption and decryption key schedules - decryption @@ -210,9 +202,10 @@ int CRYPTO_ocb128_copy_ctx(OCB128_CONTEXT *dest, OCB128_CONTEXT *src, if (keydec) dest->keydec = keydec; if (src->l) { - dest->l = OPENSSL_malloc(src->max_l_index * 16); - if (dest->l == NULL) + if ((dest->l = OPENSSL_malloc(src->max_l_index * 16)) == NULL) { + CRYPTOerr(CRYPTO_F_CRYPTO_OCB128_COPY_CTX, ERR_R_MALLOC_FAILURE); return 0; + } memcpy(dest->l, src->l, (src->l_index + 1) * 16); } return 1; diff --git a/deps/openssl/openssl/crypto/modes/wrap128.c b/deps/openssl/openssl/crypto/modes/wrap128.c index 46809a0e74..d7e56cc260 100644 --- a/deps/openssl/openssl/crypto/modes/wrap128.c +++ b/deps/openssl/openssl/crypto/modes/wrap128.c @@ -1,5 +1,5 @@ /* - * Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2013-2018 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -237,7 +237,7 @@ size_t CRYPTO_128_wrap_pad(void *key, const unsigned char *icv, * * @param[in] key Key value. * @param[in] icv (Non-standard) IV, 4 bytes. NULL = use default_aiv. - * @param[out] out Plaintext. Minimal buffer length = inlen bytes. + * @param[out] out Plaintext. Minimal buffer length = (inlen - 8) bytes. * Input and output buffers can overlap if block function * supports that. * @param[in] in Ciphertext as n 64-bit blocks. @@ -267,7 +267,6 @@ size_t CRYPTO_128_unwrap_pad(void *key, const unsigned char *icv, if ((inlen & 0x7) != 0 || inlen < 16 || inlen >= CRYPTO128_WRAP_MAX) return 0; - memmove(out, in, inlen); if (inlen == 16) { /* * Section 4.2 - special case in step 1: When n=1, the ciphertext @@ -275,14 +274,17 @@ size_t CRYPTO_128_unwrap_pad(void *key, const unsigned char *icv, * single AES block using AES in ECB mode: AIV | P[1] = DEC(K, C[0] | * C[1]) */ - block(out, out, key); - memcpy(aiv, out, 8); + unsigned char buff[16]; + + block(in, buff, key); + memcpy(aiv, buff, 8); /* Remove AIV */ - memmove(out, out + 8, 8); + memcpy(out, buff + 8, 8); padded_len = 8; + OPENSSL_cleanse(buff, inlen); } else { padded_len = inlen - 8; - ret = crypto_128_unwrap_raw(key, aiv, out, out, inlen, block); + ret = crypto_128_unwrap_raw(key, aiv, out, in, inlen, block); if (padded_len != ret) { OPENSSL_cleanse(out, inlen); return 0; |