15 files changed, 826 insertions, 1034 deletions
diff --git a/deps/openssl/openssl/crypto/modes/asm/aesni-gcm-x86_64.pl b/deps/openssl/openssl/crypto/modes/asm/aesni-gcm-x86_64.pl
index 5ad62b3979..b42016101e 100644
--- a/deps/openssl/openssl/crypto/modes/asm/aesni-gcm-x86_64.pl
+++ b/deps/openssl/openssl/crypto/modes/asm/aesni-gcm-x86_64.pl
@@ -35,6 +35,8 @@
 # Applications using the EVP interface will observe a few percent
 # worse performance.]
 #
+# Knights Landing processes 1 byte in 1.25 cycles (measured with EVP).
+#
 # [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
 # [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf
 
@@ -116,23 +118,6 @@ _aesni_ctr32_ghash_6x:
 	  vpxor		$rndkey,$inout3,$inout3
 	  vmovups	0x10-0x80($key),$T2	# borrow $T2 for $rndkey
 	vpclmulqdq	\$0x01,$Hkey,$Z3,$Z2
-
-	# At this point, the current block of 96 (0x60) bytes has already been
-	# loaded into registers. Concurrently with processing it, we want to
-	# load the next 96 bytes of input for the next round. Obviously, we can
-	# only do this if there are at least 96 more bytes of input beyond the
-	# input we're currently processing, or else we'd read past the end of
-	# the input buffer. Here, we set |%r12| to 96 if there are at least 96
-	# bytes of input beyond the 96 bytes we're already processing, and we
-	# set |%r12| to 0 otherwise. In the case where we set |%r12| to 96,
-	# we'll read in the next block so that it is in registers for the next
-	# loop iteration. In the case where we set |%r12| to 0, we'll re-read
-	# the current block and then ignore what we re-read.
-	#
-	# At this point, |$in0| points to the current (already read into
-	# registers) block, and |$end0| points to 2*96 bytes before the end of
-	# the input. Thus, |$in0| > |$end0| means that we do not have the next
-	# 96-byte block to read in, and |$in0| <= |$end0| means we do.
 	xor		%r12,%r12
 	cmp		$in0,$end0
 
@@ -424,20 +409,25 @@ $code.=<<___;
 .type	aesni_gcm_decrypt,\@function,6
 .align	32
 aesni_gcm_decrypt:
+.cfi_startproc
 	xor	$ret,$ret
-
-	# We call |_aesni_ctr32_ghash_6x|, which requires at least 96 (0x60)
-	# bytes of input.
 	cmp	\$0x60,$len			# minimal accepted length
 	jb	.Lgcm_dec_abort
 
 	lea	(%rsp),%rax			# save stack pointer
+.cfi_def_cfa_register	%rax
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 ___
 $code.=<<___ if ($win64);
 	lea	-0xa8(%rsp),%rsp
@@ -482,15 +472,7 @@ $code.=<<___;
 	vmovdqu		0x50($inp),$Z3		# I[5]
 	lea		($inp),$in0
 	vmovdqu		0x40($inp),$Z0
-
-	# |_aesni_ctr32_ghash_6x| requires |$end0| to point to 2*96 (0xc0)
-	# bytes before the end of the input. Note, in particular, that this is
-	# correct even if |$len| is not an even multiple of 96 or 16. XXX: This
-	# seems to require that |$inp| + |$len| >= 2*96 (0xc0); i.e. |$inp| must
-	# not be near the very beginning of the address space when |$len| < 2*96
-	# (0xc0).
 	lea		-0xc0($inp,$len),$end0
-
 	vmovdqu		0x30($inp),$Z1
 	shr		\$4,$len
 	xor		$ret,$ret
@@ -537,15 +519,23 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	mov	-48(%rax),%r15
+.cfi_restore	%r15
 	mov	-40(%rax),%r14
+.cfi_restore	%r14
 	mov	-32(%rax),%r13
+.cfi_restore	%r13
 	mov	-24(%rax),%r12
+.cfi_restore	%r12
 	mov	-16(%rax),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rax),%rbx
+.cfi_restore	%rbx
 	lea	(%rax),%rsp		# restore %rsp
+.cfi_def_cfa_register	%rsp
 .Lgcm_dec_abort:
 	mov	$ret,%rax		# return value
 	ret
+.cfi_endproc
 .size	aesni_gcm_decrypt,.-aesni_gcm_decrypt
 ___
 
@@ -645,21 +635,25 @@ _aesni_ctr32_6x:
 .type	aesni_gcm_encrypt,\@function,6
 .align	32
 aesni_gcm_encrypt:
+.cfi_startproc
 	xor	$ret,$ret
-
-	# We call |_aesni_ctr32_6x| twice, each call consuming 96 bytes of
-	# input. Then we call |_aesni_ctr32_ghash_6x|, which requires at
-	# least 96 more bytes of input.
 	cmp	\$0x60*3,$len			# minimal accepted length
 	jb	.Lgcm_enc_abort
 
 	lea	(%rsp),%rax			# save stack pointer
+.cfi_def_cfa_register	%rax
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 ___
 $code.=<<___ if ($win64);
 	lea	-0xa8(%rsp),%rsp
@@ -699,16 +693,7 @@ $code.=<<___;
 .Lenc_no_key_aliasing:
 
 	lea		($out),$in0
-
-	# |_aesni_ctr32_ghash_6x| requires |$end0| to point to 2*96 (0xc0)
-	# bytes before the end of the input. Note, in particular, that this is
-	# correct even if |$len| is not an even multiple of 96 or 16. Unlike in
-	# the decryption case, there's no caveat that |$out| must not be near
-	# the very beginning of the address space, because we know that
-	# |$len| >= 3*96 from the check above, and so we know
-	# |$out| + |$len| >= 2*96 (0xc0).
 	lea		-0xc0($out,$len),$end0
-
 	shr		\$4,$len
 
 	call		_aesni_ctr32_6x
@@ -931,15 +916,23 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	mov	-48(%rax),%r15
+.cfi_restore	%r15
 	mov	-40(%rax),%r14
+.cfi_restore	%r14
 	mov	-32(%rax),%r13
+.cfi_restore	%r13
 	mov	-24(%rax),%r12
+.cfi_restore	%r12
 	mov	-16(%rax),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rax),%rbx
+.cfi_restore	%rbx
 	lea	(%rax),%rsp		# restore %rsp
+.cfi_def_cfa_register	%rsp
 .Lgcm_enc_abort:
 	mov	$ret,%rax		# return value
 	ret
+.cfi_endproc
 .size	aesni_gcm_encrypt,.-aesni_gcm_encrypt
 ___
 
diff --git a/deps/openssl/openssl/crypto/modes/asm/ghash-armv4.pl b/deps/openssl/openssl/crypto/modes/asm/ghash-armv4.pl
index 1cf14a6c9f..dcc23f7d7d 100644
--- a/deps/openssl/openssl/crypto/modes/asm/ghash-armv4.pl
+++ b/deps/openssl/openssl/crypto/modes/asm/ghash-armv4.pl
@@ -54,7 +54,7 @@
 #
 # Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
 # Polynomial Multiplication on ARM Processors using the NEON Engine.
-# 
+#
 # http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
 
 # ====================================================================
@@ -525,7 +525,7 @@ $code.=<<___;
 #ifdef __ARMEL__
 	vrev64.8	$Xl,$Xl
 #endif
-	sub		$Xi,#16	
+	sub		$Xi,#16
 	vst1.64		$Xl#hi,[$Xi]!		@ write out Xi
 	vst1.64		$Xl#lo,[$Xi]
 
diff --git a/deps/openssl/openssl/crypto/modes/asm/ghash-ia64.pl b/deps/openssl/openssl/crypto/modes/asm/ghash-ia64.pl
index 81e75f71a8..eb9ded91e5 100755
--- a/deps/openssl/openssl/crypto/modes/asm/ghash-ia64.pl
+++ b/deps/openssl/openssl/crypto/modes/asm/ghash-ia64.pl
@@ -156,7 +156,7 @@ $code.=<<___;
 ___
 
 ######################################################################
-# "528B" (well, "512B" actualy) streamed GHASH
+# "528B" (well, "512B" actually) streamed GHASH
 #
 $Xip="in0";
 $Htbl="in1";
diff --git a/deps/openssl/openssl/crypto/modes/asm/ghash-parisc.pl b/deps/openssl/openssl/crypto/modes/asm/ghash-parisc.pl
index 1d6254543b..a614c99c22 100644
--- a/deps/openssl/openssl/crypto/modes/asm/ghash-parisc.pl
+++ b/deps/openssl/openssl/crypto/modes/asm/ghash-parisc.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -705,7 +705,7 @@ my $depd = sub {
   my ($mod,$args) = @_;
   my $orig = "depd$mod\t$args";
 
-    # I only have ",z" completer, it's impicitly encoded...
+    # I only have ",z" completer, it's implicitly encoded...
     if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/)	# format 16
     {	my $opcode=(0x3c<<26)|($4<<21)|($1<<16);
     	my $cpos=63-$2;
@@ -724,6 +724,11 @@ sub assemble {
     ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
 }
 
+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+	=~ /GNU assembler/) {
+    $gnuas = 1;
+}
+
 foreach (split("\n",$code)) {
 	s/\`([^\`]*)\`/eval $1/ge;
 	if ($SIZE_T==4) {
@@ -731,7 +736,12 @@ foreach (split("\n",$code)) {
 		s/cmpb,\*/comb,/;
 		s/,\*/,/;
 	}
-	s/\bbv\b/bve/	if ($SIZE_T==8);
+
+	s/(\.LEVEL\s+2\.0)W/$1w/	if ($gnuas && $SIZE_T==8);
+	s/\.SPACE\s+\$TEXT\$/.text/	if ($gnuas && $SIZE_T==8);
+	s/\.SUBSPA.*//			if ($gnuas && $SIZE_T==8);
+	s/\bbv\b/bve/			if ($SIZE_T==8);
+
 	print $_,"\n";
 }
 
diff --git a/deps/openssl/openssl/crypto/modes/asm/ghash-s390x.pl b/deps/openssl/openssl/crypto/modes/asm/ghash-s390x.pl
index 6e628d8823..17dc375053 100644
--- a/deps/openssl/openssl/crypto/modes/asm/ghash-s390x.pl
+++ b/deps/openssl/openssl/crypto/modes/asm/ghash-s390x.pl
@@ -80,6 +80,8 @@ $rem_4bit="%r14";
 $sp="%r15";
 
 $code.=<<___;
+#include "s390x_arch.h"
+
 .text
 
 .globl	gcm_gmult_4bit
@@ -89,12 +91,13 @@ ___
 $code.=<<___ if(!$softonly && 0);	# hardware is slow for single block...
 	larl	%r1,OPENSSL_s390xcap_P
 	lghi	%r0,0
-	lg	%r1,24(%r1)	# load second word of kimd capabilities vector
+	lg	%r1,S390X_KIMD+8(%r1)	# load second word of kimd capabilities
+					#  vector
 	tmhh	%r1,0x4000	# check for function 65
 	jz	.Lsoft_gmult
 	stg	%r0,16($sp)	# arrange 16 bytes of zero input
 	stg	%r0,24($sp)
-	lghi	%r0,65		# function 65
+	lghi	%r0,S390X_GHASH	# function 65
 	la	%r1,0($Xi)	# H lies right after Xi in gcm128_context
 	la	$inp,16($sp)
 	lghi	$len,16
@@ -123,10 +126,11 @@ gcm_ghash_4bit:
 ___
 $code.=<<___ if(!$softonly);
 	larl	%r1,OPENSSL_s390xcap_P
-	lg	%r0,24(%r1)	# load second word of kimd capabilities vector
+	lg	%r0,S390X_KIMD+8(%r1)	# load second word of kimd capabilities
+					#  vector
 	tmhh	%r0,0x4000	# check for function 65
 	jz	.Lsoft_ghash
-	lghi	%r0,65		# function 65
+	lghi	%r0,S390X_GHASH	# function 65
 	la	%r1,0($Xi)	# H lies right after Xi in gcm128_context
 	.long	0xb93e0004	# kimd %r0,$inp
 	brc	1,.-4		# pay attention to "partial completion"
@@ -149,7 +153,7 @@ $code.=<<___;
 	lg	$Zhi,0+1($Xi)
 	lghi	$tmp,0
 .Louter:
-	xg	$Zhi,0($inp)		# Xi ^= inp 
+	xg	$Zhi,0($inp)		# Xi ^= inp
 	xg	$Zlo,8($inp)
 	xgr	$Zhi,$tmp
 	stg	$Zlo,8+1($Xi)
diff --git a/deps/openssl/openssl/crypto/modes/asm/ghash-x86.pl b/deps/openssl/openssl/crypto/modes/asm/ghash-x86.pl
index cd8458256e..bcbe6e399d 100644
--- a/deps/openssl/openssl/crypto/modes/asm/ghash-x86.pl
+++ b/deps/openssl/openssl/crypto/modes/asm/ghash-x86.pl
@@ -103,14 +103,13 @@
 #
 # Does it make sense to increase Naggr? To start with it's virtually
 # impossible in 32-bit mode, because of limited register bank
-# capacity. Otherwise improvement has to be weighed agiainst slower
+# capacity. Otherwise improvement has to be weighed against slower
 # setup, as well as code size and complexity increase. As even
 # optimistic estimate doesn't promise 30% performance improvement,
 # there are currently no plans to increase Naggr.
 #
-# Special thanks to David Woodhouse <dwmw2@infradead.org> for
-# providing access to a Westmere-based system on behalf of Intel
-# Open Source Technology Centre.
+# Special thanks to David Woodhouse for providing access to a
+# Westmere-based system on behalf of Intel Open Source Technology Centre.
 
 # January 2010
 #
@@ -139,7 +138,7 @@ require "x86asm.pl";
 $output=pop;
 open STDOUT,">$output";
 
-&asm_init($ARGV[0],"ghash-x86.pl",$x86only = $ARGV[$#ARGV] eq "386");
+&asm_init($ARGV[0],$x86only = $ARGV[$#ARGV] eq "386");
 
 $sse2=0;
 for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
@@ -811,7 +810,7 @@ sub mmx_loop() {
     &bswap	($dat);
     &pshufw	($Zhi,$Zhi,0b00011011);		# 76543210
     &bswap	("ebx");
-    
+
     &cmp	("ecx",&DWP(528+16+8,"esp"));	# are we done?
     &jne	(&label("outer"));
   }
@@ -915,7 +914,7 @@ my ($Xhi,$Xi) = @_;
 	&psllq		($Xi,57);		#
 	&movdqa		($T1,$Xi);		#
 	&pslldq		($Xi,8);
-	&psrldq		($T1,8);		#	
+	&psrldq		($T1,8);		#
 	&pxor		($Xi,$T2);
 	&pxor		($Xhi,$T1);		#
 
@@ -1085,7 +1084,7 @@ my ($Xhi,$Xi) = @_;
 	  &psllq	($Xi,57);		#
 	  &movdqa	($T1,$Xi);		#
 	  &pslldq	($Xi,8);
-	  &psrldq	($T1,8);		#	
+	  &psrldq	($T1,8);		#
 	  &pxor		($Xi,$T2);
 	  &pxor		($Xhi,$T1);		#
 	&pshufd		($T1,$Xhn,0b01001110);
diff --git a/deps/openssl/openssl/crypto/modes/asm/ghash-x86_64.pl b/deps/openssl/openssl/crypto/modes/asm/ghash-x86_64.pl
index 387e3f854e..afc30c3e72 100644
--- a/deps/openssl/openssl/crypto/modes/asm/ghash-x86_64.pl
+++ b/deps/openssl/openssl/crypto/modes/asm/ghash-x86_64.pl
@@ -44,9 +44,8 @@
 # See ghash-x86.pl for background information and details about coding
 # techniques.
 #
-# Special thanks to David Woodhouse <dwmw2@infradead.org> for
-# providing access to a Westmere-based system on behalf of Intel
-# Open Source Technology Centre.
+# Special thanks to David Woodhouse for providing access to a
+# Westmere-based system on behalf of Intel Open Source Technology Centre.
 
 # December 2012
 #
@@ -74,6 +73,7 @@
 # Skylake	0.44(+110%)(if system doesn't support AVX)
 # Bulldozer	1.49(+27%)
 # Silvermont	2.88(+13%)
+# Knights L	2.12(-)    (if system doesn't support AVX)
 # Goldmont	1.08(+24%)
 
 # March 2013
@@ -86,6 +86,8 @@
 # it performs in 0.41 cycles per byte on Haswell processor, in
 # 0.29 on Broadwell, and in 0.36 on Skylake.
 #
+# Knights Landing achieves 1.09 cpb.
+#
 # [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
 
 $flavour = shift;
@@ -236,9 +238,21 @@ $code=<<___;
 .type	gcm_gmult_4bit,\@function,2
 .align	16
 gcm_gmult_4bit:
+.cfi_startproc
 	push	%rbx
-	push	%rbp		# %rbp and %r12 are pushed exclusively in
+.cfi_push	%rbx
+	push	%rbp		# %rbp and others are pushed exclusively in
+.cfi_push	%rbp
 	push	%r12		# order to reuse Win64 exception handler...
+.cfi_push	%r12
+	push	%r13
+.cfi_push	%r13
+	push	%r14
+.cfi_push	%r14
+	push	%r15
+.cfi_push	%r15
+	sub	\$280,%rsp
+.cfi_adjust_cfa_offset	280
 .Lgmult_prologue:
 
 	movzb	15($Xi),$Zlo
@@ -249,10 +263,15 @@ $code.=<<___;
 	mov	$Zlo,8($Xi)
 	mov	$Zhi,($Xi)
 
-	mov	16(%rsp),%rbx
-	lea	24(%rsp),%rsp
+	lea	280+48(%rsp),%rsi
+.cfi_def_cfa	%rsi,8
+	mov	-8(%rsi),%rbx
+.cfi_restore	%rbx
+	lea	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
 .Lgmult_epilogue:
 	ret
+.cfi_endproc
 .size	gcm_gmult_4bit,.-gcm_gmult_4bit
 ___
 
@@ -266,13 +285,21 @@ $code.=<<___;
 .type	gcm_ghash_4bit,\@function,4
 .align	16
 gcm_ghash_4bit:
+.cfi_startproc
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 	sub	\$280,%rsp
+.cfi_adjust_cfa_offset	280
 .Lghash_prologue:
 	mov	$inp,%r14		# reassign couple of args
 	mov	$len,%r15
@@ -400,16 +427,25 @@ $code.=<<___;
 	mov	$Zlo,8($Xi)
 	mov	$Zhi,($Xi)
 
-	lea	280(%rsp),%rsi
-	mov	0(%rsi),%r15
-	mov	8(%rsi),%r14
-	mov	16(%rsi),%r13
-	mov	24(%rsi),%r12
-	mov	32(%rsi),%rbp
-	mov	40(%rsi),%rbx
-	lea	48(%rsi),%rsp
+	lea	280+48(%rsp),%rsi
+.cfi_def_cfa	%rsi,8
+	mov	-48(%rsi),%r15
+.cfi_restore	%r15
+	mov	-40(%rsi),%r14
+.cfi_restore	%r14
+	mov	-32(%rsi),%r13
+.cfi_restore	%r13
+	mov	-24(%rsi),%r12
+.cfi_restore	%r12
+	mov	-16(%rsi),%rbp
+.cfi_restore	%rbp
+	mov	-8(%rsi),%rbx
+.cfi_restore	%rbx
+	lea	0(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
 .Lghash_epilogue:
 	ret
+.cfi_endproc
 .size	gcm_ghash_4bit,.-gcm_ghash_4bit
 ___
 
@@ -469,7 +505,7 @@ $code.=<<___;
 	psllq		\$57,$Xi		#
 	movdqa		$Xi,$T1			#
 	pslldq		\$8,$Xi
-	psrldq		\$8,$T1			#	
+	psrldq		\$8,$T1			#
 	pxor		$T2,$Xi
 	pxor		$T1,$Xhi		#
 
@@ -583,7 +619,7 @@ ___
 	&clmul64x64_T2	($Xhi,$Xi,$Hkey,$T2);
 $code.=<<___ if (0 || (&reduction_alg9($Xhi,$Xi)&&0));
 	# experimental alternative. special thing about is that there
-	# no dependency between the two multiplications... 
+	# no dependency between the two multiplications...
 	mov		\$`0xE1<<1`,%eax
 	mov		\$0xA040608020C0E000,%r10	# ((7..0)·0xE0)&0xff
 	mov		\$0x07,%r11d
@@ -758,7 +794,7 @@ $code.=<<___;
 	movdqa		$T2,$T1			#
 	pslldq		\$8,$T2
 	 pclmulqdq	\$0x00,$Hkey2,$Xln
-	psrldq		\$8,$T1			#	
+	psrldq		\$8,$T1			#
 	pxor		$T2,$Xi
 	pxor		$T1,$Xhi		#
 	movdqu		0($inp),$T1
@@ -894,7 +930,7 @@ $code.=<<___;
 	  psllq		\$57,$Xi		#
 	  movdqa	$Xi,$T1			#
 	  pslldq	\$8,$Xi
-	  psrldq	\$8,$T1			#	
+	  psrldq	\$8,$T1			#
 	  pxor		$T2,$Xi
 	pshufd		\$0b01001110,$Xhn,$Xmn
 	  pxor		$T1,$Xhi		#
@@ -1648,14 +1684,20 @@ se_handler:
 	cmp	%r10,%rbx		# context->Rip>=epilogue label
 	jae	.Lin_prologue
 
-	lea	24(%rax),%rax		# adjust "rsp"
+	lea	48+280(%rax),%rax	# adjust "rsp"
 
 	mov	-8(%rax),%rbx
 	mov	-16(%rax),%rbp
 	mov	-24(%rax),%r12
+	mov	-32(%rax),%r13
+	mov	-40(%rax),%r14
+	mov	-48(%rax),%r15
 	mov	%rbx,144($context)	# restore context->Rbx
 	mov	%rbp,160($context)	# restore context->Rbp
 	mov	%r12,216($context)	# restore context->R12
+	mov	%r13,224($context)	# restore context->R13
+	mov	%r14,232($context)	# restore context->R14
+	mov	%r15,240($context)	# restore context->R15
 
 .Lin_prologue:
 	mov	8(%rax),%rdi
diff --git a/deps/openssl/openssl/crypto/modes/asm/ghashp8-ppc.pl b/deps/openssl/openssl/crypto/modes/asm/ghashp8-ppc.pl
index f0598cb28c..6a2ac71295 100755
--- a/deps/openssl/openssl/crypto/modes/asm/ghashp8-ppc.pl
+++ b/deps/openssl/openssl/crypto/modes/asm/ghashp8-ppc.pl
@@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@@ -23,13 +23,14 @@
 # Relative comparison is therefore more informative. This initial
 # version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
 # faster than "4-bit" integer-only compiler-generated 64-bit code.
-# "Initial version" means that there is room for futher improvement.
+# "Initial version" means that there is room for further improvement.
 
 # May 2016
 #
 # 2x aggregated reduction improves performance by 50% (resulting
 # performance on POWER8 is 1 cycle per processed byte), and 4x
 # aggregated reduction - by 170% or 2.7x (resulting in 0.55 cpb).
+# POWER9 delivers 0.51 cpb.
 
 $flavour=shift;
 $output =shift;
diff --git a/deps/openssl/openssl/crypto/modes/asm/ghashv8-armx.pl b/deps/openssl/openssl/crypto/modes/asm/ghashv8-armx.pl
index e13c709019..47e8820080 100644
--- a/deps/openssl/openssl/crypto/modes/asm/ghashv8-armx.pl
+++ b/deps/openssl/openssl/crypto/modes/asm/ghashv8-armx.pl
@@ -19,23 +19,29 @@
 # June 2014
 #
 # Initial version was developed in tight cooperation with Ard
-# Biesheuvel <ard.biesheuvel@linaro.org> from bits-n-pieces from
-# other assembly modules. Just like aesv8-armx.pl this module
-# supports both AArch32 and AArch64 execution modes.
+# Biesheuvel of Linaro from bits-n-pieces from other assembly modules.
+# Just like aesv8-armx.pl this module supports both AArch32 and
+# AArch64 execution modes.
 #
 # July 2014
 #
 # Implement 2x aggregated reduction [see ghash-x86.pl for background
 # information].
 #
+# November 2017
+#
+# AArch64 register bank to "accommodate" 4x aggregated reduction and
+# improve performance by 20-70% depending on processor.
+#
 # Current performance in cycles per processed byte:
 #
-#		PMULL[2]	32-bit NEON(*)
-# Apple A7	0.92		5.62
-# Cortex-A53	1.01		8.39
-# Cortex-A57	1.17		7.61
-# Denver	0.71		6.02
-# Mongoose	1.10		8.06
+#		64-bit PMULL	32-bit PMULL	32-bit NEON(*)
+# Apple A7	0.58		0.92		5.62
+# Cortex-A53	0.85		1.01		8.39
+# Cortex-A57	0.73		1.17		7.61
+# Denver	0.51		0.65		6.02
+# Mongoose	0.65		1.10		8.06
+# Kryo		0.76		1.16		8.00
 #
 # (*)	presented for reference/comparison purposes;
 
@@ -131,8 +137,56 @@ gcm_init_v8:
 	vext.8		$t1,$H2,$H2,#8		@ Karatsuba pre-processing
 	veor		$t1,$t1,$H2
 	vext.8		$Hhl,$t0,$t1,#8		@ pack Karatsuba pre-processed
-	vst1.64		{$Hhl-$H2},[x0]		@ store Htable[1..2]
+	vst1.64		{$Hhl-$H2},[x0],#32	@ store Htable[1..2]
+___
+if ($flavour =~ /64/) {
+my ($t3,$Yl,$Ym,$Yh) = map("q$_",(4..7));
 
+$code.=<<___;
+	@ calculate H^3 and H^4
+	vpmull.p64	$Xl,$H, $H2
+	 vpmull.p64	$Yl,$H2,$H2
+	vpmull2.p64	$Xh,$H, $H2
+	 vpmull2.p64	$Yh,$H2,$H2
+	vpmull.p64	$Xm,$t0,$t1
+	 vpmull.p64	$Ym,$t1,$t1
+
+	vext.8		$t0,$Xl,$Xh,#8		@ Karatsuba post-processing
+	 vext.8		$t1,$Yl,$Yh,#8
+	veor		$t2,$Xl,$Xh
+	veor		$Xm,$Xm,$t0
+	 veor		$t3,$Yl,$Yh
+	 veor		$Ym,$Ym,$t1
+	veor		$Xm,$Xm,$t2
+	vpmull.p64	$t2,$Xl,$xC2		@ 1st phase
+	 veor		$Ym,$Ym,$t3
+	 vpmull.p64	$t3,$Yl,$xC2
+
+	vmov		$Xh#lo,$Xm#hi		@ Xh|Xm - 256-bit result
+	 vmov		$Yh#lo,$Ym#hi
+	vmov		$Xm#hi,$Xl#lo		@ Xm is rotated Xl
+	 vmov		$Ym#hi,$Yl#lo
+	veor		$Xl,$Xm,$t2
+	 veor		$Yl,$Ym,$t3
+
+	vext.8		$t2,$Xl,$Xl,#8		@ 2nd phase
+	 vext.8		$t3,$Yl,$Yl,#8
+	vpmull.p64	$Xl,$Xl,$xC2
+	 vpmull.p64	$Yl,$Yl,$xC2
+	veor		$t2,$t2,$Xh
+	 veor		$t3,$t3,$Yh
+	veor		$H, $Xl,$t2		@ H^3
+	 veor		$H2,$Yl,$t3		@ H^4
+
+	vext.8		$t0,$H, $H,#8		@ Karatsuba pre-processing
+	 vext.8		$t1,$H2,$H2,#8
+	veor		$t0,$t0,$H
+	 veor		$t1,$t1,$H2
+	vext.8		$Hhl,$t0,$t1,#8		@ pack Karatsuba pre-processed
+	vst1.64		{$H-$H2},[x0]		@ store Htable[3..5]
+___
+}
+$code.=<<___;
 	ret
 .size	gcm_init_v8,.-gcm_init_v8
 ___
@@ -201,6 +255,10 @@ $code.=<<___;
 .align	4
 gcm_ghash_v8:
 ___
+$code.=<<___	if ($flavour =~ /64/);
+	cmp		$len,#64
+	b.hs		.Lgcm_ghash_v8_4x
+___
 $code.=<<___		if ($flavour !~ /64/);
 	vstmdb		sp!,{d8-d15}		@ 32-bit ABI says so
 ___
@@ -210,13 +268,13 @@ $code.=<<___;
 						@ loaded value would have
 						@ to be rotated in order to
 						@ make it appear as in
-						@ alorithm specification
+						@ algorithm specification
 	subs		$len,$len,#32		@ see if $len is 32 or larger
 	mov		$inc,#16		@ $inc is used as post-
 						@ increment for input pointer;
 						@ as loop is modulo-scheduled
 						@ $inc is zeroed just in time
-						@ to preclude oversteping
+						@ to preclude overstepping
 						@ inp[len], which means that
 						@ last block[s] are actually
 						@ loaded twice, but last
@@ -348,7 +406,297 @@ $code.=<<___;
 	ret
 .size	gcm_ghash_v8,.-gcm_ghash_v8
 ___
+
+if ($flavour =~ /64/) {				# 4x subroutine
+my ($I0,$j1,$j2,$j3,
+    $I1,$I2,$I3,$H3,$H34,$H4,$Yl,$Ym,$Yh) = map("q$_",(4..7,15..23));
+
+$code.=<<___;
+.type	gcm_ghash_v8_4x,%function
+.align	4
+gcm_ghash_v8_4x:
+.Lgcm_ghash_v8_4x:
+	vld1.64		{$Xl},[$Xi]		@ load [rotated] Xi
+	vld1.64		{$H-$H2},[$Htbl],#48	@ load twisted H, ..., H^2
+	vmov.i8		$xC2,#0xe1
+	vld1.64		{$H3-$H4},[$Htbl]	@ load twisted H^3, ..., H^4
+	vshl.u64	$xC2,$xC2,#57		@ compose 0xc2.0 constant
+
+	vld1.64		{$I0-$j3},[$inp],#64
+#ifndef __ARMEB__
+	vrev64.8	$Xl,$Xl
+	vrev64.8	$j1,$j1
+	vrev64.8	$j2,$j2
+	vrev64.8	$j3,$j3
+	vrev64.8	$I0,$I0
+#endif
+	vext.8		$I3,$j3,$j3,#8
+	vext.8		$I2,$j2,$j2,#8
+	vext.8		$I1,$j1,$j1,#8
+
+	vpmull.p64	$Yl,$H,$I3		@ H·Ii+3
+	veor		$j3,$j3,$I3
+	vpmull2.p64	$Yh,$H,$I3
+	vpmull.p64	$Ym,$Hhl,$j3
+
+	vpmull.p64	$t0,$H2,$I2		@ H^2·Ii+2
+	veor		$j2,$j2,$I2
+	vpmull2.p64	$I2,$H2,$I2
+	vpmull2.p64	$j2,$Hhl,$j2
+
+	veor		$Yl,$Yl,$t0
+	veor		$Yh,$Yh,$I2
+	veor		$Ym,$Ym,$j2
+
+	vpmull.p64	$j3,$H3,$I1		@ H^3·Ii+1
+	veor		$j1,$j1,$I1
+	vpmull2.p64	$I1,$H3,$I1
+	vpmull.p64	$j1,$H34,$j1
+
+	veor		$Yl,$Yl,$j3
+	veor		$Yh,$Yh,$I1
+	veor		$Ym,$Ym,$j1
+
+	subs		$len,$len,#128
+	b.lo		.Ltail4x
+
+	b		.Loop4x
+
+.align	4
+.Loop4x:
+	veor		$t0,$I0,$Xl
+	 vld1.64	{$I0-$j3},[$inp],#64
+	vext.8		$IN,$t0,$t0,#8
+#ifndef __ARMEB__
+	 vrev64.8	$j1,$j1
+	 vrev64.8	$j2,$j2
+	 vrev64.8	$j3,$j3
+	 vrev64.8	$I0,$I0
+#endif
+
+	vpmull.p64	$Xl,$H4,$IN		@ H^4·(Xi+Ii)
+	veor		$t0,$t0,$IN
+	vpmull2.p64	$Xh,$H4,$IN
+	 vext.8		$I3,$j3,$j3,#8
+	vpmull2.p64	$Xm,$H34,$t0
+
+	veor		$Xl,$Xl,$Yl
+	veor		$Xh,$Xh,$Yh
+	 vext.8		$I2,$j2,$j2,#8
+	veor		$Xm,$Xm,$Ym
+	 vext.8		$I1,$j1,$j1,#8
+
+	vext.8		$t1,$Xl,$Xh,#8		@ Karatsuba post-processing
+	veor		$t2,$Xl,$Xh
+	 vpmull.p64	$Yl,$H,$I3		@ H·Ii+3
+	 veor		$j3,$j3,$I3
+	veor		$Xm,$Xm,$t1
+	 vpmull2.p64	$Yh,$H,$I3
+	veor		$Xm,$Xm,$t2
+	 vpmull.p64	$Ym,$Hhl,$j3
+
+	vpmull.p64	$t2,$Xl,$xC2		@ 1st phase of reduction
+	vmov		$Xh#lo,$Xm#hi		@ Xh|Xm - 256-bit result
+	vmov		$Xm#hi,$Xl#lo		@ Xm is rotated Xl
+	 vpmull.p64	$t0,$H2,$I2		@ H^2·Ii+2
+	 veor		$j2,$j2,$I2
+	 vpmull2.p64	$I2,$H2,$I2
+	veor		$Xl,$Xm,$t2
+	 vpmull2.p64	$j2,$Hhl,$j2
+
+	 veor		$Yl,$Yl,$t0
+	 veor		$Yh,$Yh,$I2
+	 veor		$Ym,$Ym,$j2
+
+	vext.8		$t2,$Xl,$Xl,#8		@ 2nd phase of reduction
+	vpmull.p64	$Xl,$Xl,$xC2
+	 vpmull.p64	$j3,$H3,$I1		@ H^3·Ii+1
+	 veor		$j1,$j1,$I1
+	veor		$t2,$t2,$Xh
+	 vpmull2.p64	$I1,$H3,$I1
+	 vpmull.p64	$j1,$H34,$j1
+
+	veor		$Xl,$Xl,$t2
+	 veor		$Yl,$Yl,$j3
+	 veor		$Yh,$Yh,$I1
+	vext.8		$Xl,$Xl,$Xl,#8
+	 veor		$Ym,$Ym,$j1
+
+	subs		$len,$len,#64
+	b.hs		.Loop4x
+
+.Ltail4x:
+	veor		$t0,$I0,$Xl
+	vext.8		$IN,$t0,$t0,#8
+
+	vpmull.p64	$Xl,$H4,$IN		@ H^4·(Xi+Ii)
+	veor		$t0,$t0,$IN
+	vpmull2.p64	$Xh,$H4,$IN
+	vpmull2.p64	$Xm,$H34,$t0
+
+	veor		$Xl,$Xl,$Yl
+	veor		$Xh,$Xh,$Yh
+	veor		$Xm,$Xm,$Ym
+
+	adds		$len,$len,#64
+	b.eq		.Ldone4x
+
+	cmp		$len,#32
+	b.lo		.Lone
+	b.eq		.Ltwo
+.Lthree:
+	vext.8		$t1,$Xl,$Xh,#8		@ Karatsuba post-processing
+	veor		$t2,$Xl,$Xh
+	veor		$Xm,$Xm,$t1
+	 vld1.64	{$I0-$j2},[$inp]
+	veor		$Xm,$Xm,$t2
+#ifndef	__ARMEB__
+	 vrev64.8	$j1,$j1
+	 vrev64.8	$j2,$j2
+	 vrev64.8	$I0,$I0
+#endif
+
+	vpmull.p64	$t2,$Xl,$xC2		@ 1st phase of reduction
+	vmov		$Xh#lo,$Xm#hi		@ Xh|Xm - 256-bit result
+	vmov		$Xm#hi,$Xl#lo		@ Xm is rotated Xl
+	 vext.8		$I2,$j2,$j2,#8
+	 vext.8		$I1,$j1,$j1,#8
+	veor		$Xl,$Xm,$t2
+
+	 vpmull.p64	$Yl,$H,$I2		@ H·Ii+2
+	 veor		$j2,$j2,$I2
+
+	vext.8		$t2,$Xl,$Xl,#8		@ 2nd phase of reduction
+	vpmull.p64	$Xl,$Xl,$xC2
+	veor		$t2,$t2,$Xh
+	 vpmull2.p64	$Yh,$H,$I2
+	 vpmull.p64	$Ym,$Hhl,$j2
+	veor		$Xl,$Xl,$t2
+	 vpmull.p64	$j3,$H2,$I1		@ H^2·Ii+1
+	 veor		$j1,$j1,$I1
+	vext.8		$Xl,$Xl,$Xl,#8
+
+	 vpmull2.p64	$I1,$H2,$I1
+	veor		$t0,$I0,$Xl
+	 vpmull2.p64	$j1,$Hhl,$j1
+	vext.8		$IN,$t0,$t0,#8
+
+	 veor		$Yl,$Yl,$j3
+	 veor		$Yh,$Yh,$I1
+	 veor		$Ym,$Ym,$j1
+
+	vpmull.p64	$Xl,$H3,$IN		@ H^3·(Xi+Ii)
+	veor		$t0,$t0,$IN
+	vpmull2.p64	$Xh,$H3,$IN
+	vpmull.p64	$Xm,$H34,$t0
+
+	veor		$Xl,$Xl,$Yl
+	veor		$Xh,$Xh,$Yh
+	veor		$Xm,$Xm,$Ym
+	b		.Ldone4x
+
+.align	4
+.Ltwo:
+	vext.8		$t1,$Xl,$Xh,#8		@ Karatsuba post-processing
+	veor		$t2,$Xl,$Xh
+	veor		$Xm,$Xm,$t1
+	 vld1.64	{$I0-$j1},[$inp]
+	veor		$Xm,$Xm,$t2
+#ifndef	__ARMEB__
+	 vrev64.8	$j1,$j1
+	 vrev64.8	$I0,$I0
+#endif
+
+	vpmull.p64	$t2,$Xl,$xC2		@ 1st phase of reduction
+	vmov		$Xh#lo,$Xm#hi		@ Xh|Xm - 256-bit result
+	vmov		$Xm#hi,$Xl#lo		@ Xm is rotated Xl
+	 vext.8		$I1,$j1,$j1,#8
+	veor		$Xl,$Xm,$t2
+
+	vext.8		$t2,$Xl,$Xl,#8		@ 2nd phase of reduction
+	vpmull.p64	$Xl,$Xl,$xC2
+	veor		$t2,$t2,$Xh
+	veor		$Xl,$Xl,$t2
+	vext.8		$Xl,$Xl,$Xl,#8
+
+	 vpmull.p64	$Yl,$H,$I1		@ H·Ii+1
+	 veor		$j1,$j1,$I1
+
+	veor		$t0,$I0,$Xl
+	vext.8		$IN,$t0,$t0,#8
+
+	 vpmull2.p64	$Yh,$H,$I1
+	 vpmull.p64	$Ym,$Hhl,$j1
+
+	vpmull.p64	$Xl,$H2,$IN		@ H^2·(Xi+Ii)
+	veor		$t0,$t0,$IN
+	vpmull2.p64	$Xh,$H2,$IN
+	vpmull2.p64	$Xm,$Hhl,$t0
+
+	veor		$Xl,$Xl,$Yl
+	veor		$Xh,$Xh,$Yh
+	veor		$Xm,$Xm,$Ym
+	b		.Ldone4x
+
+.align	4
+.Lone:
+	vext.8		$t1,$Xl,$Xh,#8		@ Karatsuba post-processing
+	veor		$t2,$Xl,$Xh
+	veor		$Xm,$Xm,$t1
+	 vld1.64	{$I0},[$inp]
+	veor		$Xm,$Xm,$t2
+#ifndef	__ARMEB__
+	 vrev64.8	$I0,$I0
+#endif
+
+	vpmull.p64	$t2,$Xl,$xC2		@ 1st phase of reduction
+	vmov		$Xh#lo,$Xm#hi		@ Xh|Xm - 256-bit result
+	vmov		$Xm#hi,$Xl#lo		@ Xm is rotated Xl
+	veor		$Xl,$Xm,$t2
+
+	vext.8		$t2,$Xl,$Xl,#8		@ 2nd phase of reduction
+	vpmull.p64	$Xl,$Xl,$xC2
+	veor		$t2,$t2,$Xh
+	veor		$Xl,$Xl,$t2
+	vext.8		$Xl,$Xl,$Xl,#8
+
+	veor		$t0,$I0,$Xl
+	vext.8		$IN,$t0,$t0,#8
+
+	vpmull.p64	$Xl,$H,$IN
+	veor		$t0,$t0,$IN
+	vpmull2.p64	$Xh,$H,$IN
+	vpmull.p64	$Xm,$Hhl,$t0
+
+.Ldone4x:
+	vext.8		$t1,$Xl,$Xh,#8		@ Karatsuba post-processing
+	veor		$t2,$Xl,$Xh
+	veor		$Xm,$Xm,$t1
+	veor		$Xm,$Xm,$t2
+
+	vpmull.p64	$t2,$Xl,$xC2		@ 1st phase of reduction
+	vmov		$Xh#lo,$Xm#hi		@ Xh|Xm - 256-bit result
+	vmov		$Xm#hi,$Xl#lo		@ Xm is rotated Xl
+	veor		$Xl,$Xm,$t2
+
+	vext.8		$t2,$Xl,$Xl,#8		@ 2nd phase of reduction
+	vpmull.p64	$Xl,$Xl,$xC2
+	veor		$t2,$t2,$Xh
+	veor		$Xl,$Xl,$t2
+	vext.8		$Xl,$Xl,$Xl,#8
+
+#ifndef __ARMEB__
+	vrev64.8	$Xl,$Xl
+#endif
+	vst1.64		{$Xl},[$Xi]		@ write out Xi
+
+	ret
+.size	gcm_ghash_v8_4x,.-gcm_ghash_v8_4x
+___
+
 }
+}
+
 $code.=<<___;
 .asciz  "GHASH for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
 .align  2
@@ -360,7 +708,8 @@ if ($flavour =~ /64/) {			######## 64-bit code
 	my $arg=shift;
 
 	$arg =~ m/q([0-9]+)#(lo|hi),\s*q([0-9]+)#(lo|hi)/o &&
-	sprintf	"ins	v%d.d[%d],v%d.d[%d]",$1,($2 eq "lo")?0:1,$3,($4 eq "lo")?0:1;
+	sprintf	"ins	v%d.d[%d],v%d.d[%d]",$1<8?$1:$1+8,($2 eq "lo")?0:1,
+					     $3<8?$3:$3+8,($4 eq "lo")?0:1;
     }
     foreach(split("\n",$code)) {
 	s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel	$1$2,$1zr,$1$2,$3/o	or
@@ -375,7 +724,7 @@ if ($flavour =~ /64/) {			######## 64-bit code
 	s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo;	# old->new registers
 	s/@\s/\/\//o;				# old->new style commentary
 
-	# fix up remainig legacy suffixes
+	# fix up remaining legacy suffixes
 	s/\.[ui]?8(\s)/$1/o;
 	s/\.[uis]?32//o and s/\.16b/\.4s/go;
 	m/\.p64/o and s/\.16b/\.1q/o;		# 1st pmull argument
@@ -415,7 +764,7 @@ if ($flavour =~ /64/) {			######## 64-bit code
 	s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go;	# new->old registers
 	s/\/\/\s?/@ /o;				# new->old style commentary
 
-	# fix up remainig new-style suffixes
+	# fix up remaining new-style suffixes
 	s/\],#[0-9]+/]!/o;
 
 	s/cclr\s+([^,]+),\s*([a-z]+)/mov$2	$1,#0/o			or
diff --git a/deps/openssl/openssl/crypto/modes/build.info b/deps/openssl/openssl/crypto/modes/build.info
index b794c5041a..821340eb90 100644
--- a/deps/openssl/openssl/crypto/modes/build.info
+++ b/deps/openssl/openssl/crypto/modes/build.info
@@ -6,8 +6,9 @@ SOURCE[../../libcrypto]=\
 
 INCLUDE[gcm128.o]=..
 
-GENERATE[ghash-ia64.s]=asm/ghash-ia64.pl $(CFLAGS) $(LIB_CFLAGS)
-GENERATE[ghash-x86.s]=asm/ghash-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(LIB_CFLAGS) $(PROCESSOR)
+GENERATE[ghash-ia64.s]=asm/ghash-ia64.pl $(LIB_CFLAGS) $(LIB_CPPFLAGS)
+GENERATE[ghash-x86.s]=asm/ghash-x86.pl \
+        $(PERLASM_SCHEME) $(LIB_CFLAGS) $(LIB_CPPFLAGS) $(PROCESSOR)
 GENERATE[ghash-x86_64.s]=asm/ghash-x86_64.pl $(PERLASM_SCHEME)
 GENERATE[aesni-gcm-x86_64.s]=asm/aesni-gcm-x86_64.pl $(PERLASM_SCHEME)
 GENERATE[ghash-sparcv9.S]=asm/ghash-sparcv9.pl $(PERLASM_SCHEME)
diff --git a/deps/openssl/openssl/crypto/modes/cts128.c b/deps/openssl/openssl/crypto/modes/cts128.c
index 77ec994b4f..93826a1e2f 100644
--- a/deps/openssl/openssl/crypto/modes/cts128.c
+++ b/deps/openssl/openssl/crypto/modes/cts128.c
@@ -328,196 +328,3 @@ size_t CRYPTO_nistcts128_decrypt(const unsigned char *in, unsigned char *out,
 #endif
     return 16 + len + residue;
 }
-
-#if defined(SELFTEST)
-# include <stdio.h>
-# include <openssl/aes.h>
-
-/* test vectors from RFC 3962 */
-static const unsigned char test_key[16] = "chicken teriyaki";
-static const unsigned char test_input[64] =
-    "I would like the" " General Gau's C"
-    "hicken, please, " "and wonton soup.";
-static const unsigned char test_iv[16] =
-    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
-
-static const unsigned char vector_17[17] = {
-    0xc6, 0x35, 0x35, 0x68, 0xf2, 0xbf, 0x8c, 0xb4,
-    0xd8, 0xa5, 0x80, 0x36, 0x2d, 0xa7, 0xff, 0x7f,
-    0x97
-};
-
-static const unsigned char vector_31[31] = {
-    0xfc, 0x00, 0x78, 0x3e, 0x0e, 0xfd, 0xb2, 0xc1,
-    0xd4, 0x45, 0xd4, 0xc8, 0xef, 0xf7, 0xed, 0x22,
-    0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0,
-    0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5
-};
-
-static const unsigned char vector_32[32] = {
-    0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5,
-    0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5, 0xa8,
-    0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0,
-    0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84
-};
-
-static const unsigned char vector_47[47] = {
-    0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0,
-    0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84,
-    0xb3, 0xff, 0xfd, 0x94, 0x0c, 0x16, 0xa1, 0x8c,
-    0x1b, 0x55, 0x49, 0xd2, 0xf8, 0x38, 0x02, 0x9e,
-    0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5,
-    0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5
-};
-
-static const unsigned char vector_48[48] = {
-    0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0,
-    0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84,
-    0x9d, 0xad, 0x8b, 0xbb, 0x96, 0xc4, 0xcd, 0xc0,
-    0x3b, 0xc1, 0x03, 0xe1, 0xa1, 0x94, 0xbb, 0xd8,
-    0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5,
-    0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5, 0xa8
-};
-
-static const unsigned char vector_64[64] = {
-    0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0,
-    0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84,
-    0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5,
-    0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5, 0xa8,
-    0x48, 0x07, 0xef, 0xe8, 0x36, 0xee, 0x89, 0xa5,
-    0x26, 0x73, 0x0d, 0xbc, 0x2f, 0x7b, 0xc8, 0x40,
-    0x9d, 0xad, 0x8b, 0xbb, 0x96, 0xc4, 0xcd, 0xc0,
-    0x3b, 0xc1, 0x03, 0xe1, 0xa1, 0x94, 0xbb, 0xd8
-};
-
-static AES_KEY encks, decks;
-
-void test_vector(const unsigned char *vector, size_t len)
-{
-    unsigned char iv[sizeof(test_iv)];
-    unsigned char cleartext[64], ciphertext[64];
-    size_t tail;
-
-    printf("vector_%d\n", len);
-    fflush(stdout);
-
-    if ((tail = len % 16) == 0)
-        tail = 16;
-    tail += 16;
-
-    /* test block-based encryption */
-    memcpy(iv, test_iv, sizeof(test_iv));
-    CRYPTO_cts128_encrypt_block(test_input, ciphertext, len, &encks, iv,
-                                (block128_f) AES_encrypt);
-    if (memcmp(ciphertext, vector, len))
-        fprintf(stderr, "output_%d mismatch\n", len), exit(1);
-    if (memcmp(iv, vector + len - tail, sizeof(iv)))
-        fprintf(stderr, "iv_%d mismatch\n", len), exit(1);
-
-    /* test block-based decryption */
-    memcpy(iv, test_iv, sizeof(test_iv));
-    CRYPTO_cts128_decrypt_block(ciphertext, cleartext, len, &decks, iv,
-                                (block128_f) AES_decrypt);
-    if (memcmp(cleartext, test_input, len))
-        fprintf(stderr, "input_%d mismatch\n", len), exit(2);
-    if (memcmp(iv, vector + len - tail, sizeof(iv)))
-        fprintf(stderr, "iv_%d mismatch\n", len), exit(2);
-
-    /* test streamed encryption */
-    memcpy(iv, test_iv, sizeof(test_iv));
-    CRYPTO_cts128_encrypt(test_input, ciphertext, len, &encks, iv,
-                          (cbc128_f) AES_cbc_encrypt);
-    if (memcmp(ciphertext, vector, len))
-        fprintf(stderr, "output_%d mismatch\n", len), exit(3);
-    if (memcmp(iv, vector + len - tail, sizeof(iv)))
-        fprintf(stderr, "iv_%d mismatch\n", len), exit(3);
-
-    /* test streamed decryption */
-    memcpy(iv, test_iv, sizeof(test_iv));
-    CRYPTO_cts128_decrypt(ciphertext, cleartext, len, &decks, iv,
-                          (cbc128_f) AES_cbc_encrypt);
-    if (memcmp(cleartext, test_input, len))
-        fprintf(stderr, "input_%d mismatch\n", len), exit(4);
-    if (memcmp(iv, vector + len - tail, sizeof(iv)))
-        fprintf(stderr, "iv_%d mismatch\n", len), exit(4);
-}
-
-void test_nistvector(const unsigned char *vector, size_t len)
-{
-    unsigned char iv[sizeof(test_iv)];
-    unsigned char cleartext[64], ciphertext[64], nistvector[64];
-    size_t tail;
-
-    printf("nistvector_%d\n", len);
-    fflush(stdout);
-
-    if ((tail = len % 16) == 0)
-        tail = 16;
-
-    len -= 16 + tail;
-    memcpy(nistvector, vector, len);
-    /* flip two last blocks */
-    memcpy(nistvector + len, vector + len + 16, tail);
-    memcpy(nistvector + len + tail, vector + len, 16);
-    len += 16 + tail;
-    tail = 16;
-
-    /* test block-based encryption */
-    memcpy(iv, test_iv, sizeof(test_iv));
-    CRYPTO_nistcts128_encrypt_block(test_input, ciphertext, len, &encks, iv,
-                                    (block128_f) AES_encrypt);
-    if (memcmp(ciphertext, nistvector, len))
-        fprintf(stderr, "output_%d mismatch\n", len), exit(1);
-    if (memcmp(iv, nistvector + len - tail, sizeof(iv)))
-        fprintf(stderr, "iv_%d mismatch\n", len), exit(1);
-
-    /* test block-based decryption */
-    memcpy(iv, test_iv, sizeof(test_iv));
-    CRYPTO_nistcts128_decrypt_block(ciphertext, cleartext, len, &decks, iv,
-                                    (block128_f) AES_decrypt);
-    if (memcmp(cleartext, test_input, len))
-        fprintf(stderr, "input_%d mismatch\n", len), exit(2);
-    if (memcmp(iv, nistvector + len - tail, sizeof(iv)))
-        fprintf(stderr, "iv_%d mismatch\n", len), exit(2);
-
-    /* test streamed encryption */
-    memcpy(iv, test_iv, sizeof(test_iv));
-    CRYPTO_nistcts128_encrypt(test_input, ciphertext, len, &encks, iv,
-                              (cbc128_f) AES_cbc_encrypt);
-    if (memcmp(ciphertext, nistvector, len))
-        fprintf(stderr, "output_%d mismatch\n", len), exit(3);
-    if (memcmp(iv, nistvector + len - tail, sizeof(iv)))
-        fprintf(stderr, "iv_%d mismatch\n", len), exit(3);
-
-    /* test streamed decryption */
-    memcpy(iv, test_iv, sizeof(test_iv));
-    CRYPTO_nistcts128_decrypt(ciphertext, cleartext, len, &decks, iv,
-                              (cbc128_f) AES_cbc_encrypt);
-    if (memcmp(cleartext, test_input, len))
-        fprintf(stderr, "input_%d mismatch\n", len), exit(4);
-    if (memcmp(iv, nistvector + len - tail, sizeof(iv)))
-        fprintf(stderr, "iv_%d mismatch\n", len), exit(4);
-}
-
-int main()
-{
-    AES_set_encrypt_key(test_key, 128, &encks);
-    AES_set_decrypt_key(test_key, 128, &decks);
-
-    test_vector(vector_17, sizeof(vector_17));
-    test_vector(vector_31, sizeof(vector_31));
-    test_vector(vector_32, sizeof(vector_32));
-    test_vector(vector_47, sizeof(vector_47));
-    test_vector(vector_48, sizeof(vector_48));
-    test_vector(vector_64, sizeof(vector_64));
-
-    test_nistvector(vector_17, sizeof(vector_17));
-    test_nistvector(vector_31, sizeof(vector_31));
-    test_nistvector(vector_32, sizeof(vector_32));
-    test_nistvector(vector_47, sizeof(vector_47));
-    test_nistvector(vector_48, sizeof(vector_48));
-    test_nistvector(vector_64, sizeof(vector_64));
-
-    return 0;
-}
-#endif
diff --git a/deps/openssl/openssl/crypto/modes/gcm128.c b/deps/openssl/openssl/crypto/modes/gcm128.c
index a2b05c4d6c..15f76e3e86 100644
--- a/deps/openssl/openssl/crypto/modes/gcm128.c
+++ b/deps/openssl/openssl/crypto/modes/gcm128.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -209,7 +209,7 @@ static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
     }
 }
 
-# define GCM_MUL(ctx,Xi)   gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
+# define GCM_MUL(ctx)      gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
 
 #elif   TABLE_BITS==4
 
@@ -550,7 +550,7 @@ void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
                     size_t len);
 # endif
 
-# define GCM_MUL(ctx,Xi)   gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
+# define GCM_MUL(ctx)      gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
 #  define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
 /*
@@ -624,7 +624,7 @@ static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
     }
 }
 
-# define GCM_MUL(ctx,Xi)   gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
+# define GCM_MUL(ctx)      gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
 
 #endif
 
@@ -703,7 +703,7 @@ void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
 
 #ifdef GCM_FUNCREF_4BIT
 # undef  GCM_MUL
-# define GCM_MUL(ctx,Xi)        (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
+# define GCM_MUL(ctx)           (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
 # ifdef GHASH
 #  undef  GHASH
 #  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
@@ -836,10 +836,6 @@ void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
     void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
 #endif
 
-    ctx->Yi.u[0] = 0;
-    ctx->Yi.u[1] = 0;
-    ctx->Xi.u[0] = 0;
-    ctx->Xi.u[1] = 0;
     ctx->len.u[0] = 0;          /* AAD length */
     ctx->len.u[1] = 0;          /* message length */
     ctx->ares = 0;
@@ -847,53 +843,68 @@ void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
 
     if (len == 12) {
         memcpy(ctx->Yi.c, iv, 12);
+        ctx->Yi.c[12] = 0;
+        ctx->Yi.c[13] = 0;
+        ctx->Yi.c[14] = 0;
         ctx->Yi.c[15] = 1;
         ctr = 1;
     } else {
         size_t i;
         u64 len0 = len;
 
+        /* Borrow ctx->Xi to calculate initial Yi */
+        ctx->Xi.u[0] = 0;
+        ctx->Xi.u[1] = 0;
+
         while (len >= 16) {
             for (i = 0; i < 16; ++i)
-                ctx->Yi.c[i] ^= iv[i];
-            GCM_MUL(ctx, Yi);
+                ctx->Xi.c[i] ^= iv[i];
+            GCM_MUL(ctx);
             iv += 16;
             len -= 16;
         }
         if (len) {
             for (i = 0; i < len; ++i)
-                ctx->Yi.c[i] ^= iv[i];
-            GCM_MUL(ctx, Yi);
+                ctx->Xi.c[i] ^= iv[i];
+            GCM_MUL(ctx);
         }
         len0 <<= 3;
         if (is_endian.little) {
 #ifdef BSWAP8
-            ctx->Yi.u[1] ^= BSWAP8(len0);
+            ctx->Xi.u[1] ^= BSWAP8(len0);
 #else
-            ctx->Yi.c[8] ^= (u8)(len0 >> 56);
-            ctx->Yi.c[9] ^= (u8)(len0 >> 48);
-            ctx->Yi.c[10] ^= (u8)(len0 >> 40);
-            ctx->Yi.c[11] ^= (u8)(len0 >> 32);
-            ctx->Yi.c[12] ^= (u8)(len0 >> 24);
-            ctx->Yi.c[13] ^= (u8)(len0 >> 16);
-            ctx->Yi.c[14] ^= (u8)(len0 >> 8);
-            ctx->Yi.c[15] ^= (u8)(len0);
+            ctx->Xi.c[8] ^= (u8)(len0 >> 56);
+            ctx->Xi.c[9] ^= (u8)(len0 >> 48);
+            ctx->Xi.c[10] ^= (u8)(len0 >> 40);
+            ctx->Xi.c[11] ^= (u8)(len0 >> 32);
+            ctx->Xi.c[12] ^= (u8)(len0 >> 24);
+            ctx->Xi.c[13] ^= (u8)(len0 >> 16);
+            ctx->Xi.c[14] ^= (u8)(len0 >> 8);
+            ctx->Xi.c[15] ^= (u8)(len0);
 #endif
-        } else
-            ctx->Yi.u[1] ^= len0;
+        } else {
+            ctx->Xi.u[1] ^= len0;
+        }
 
-        GCM_MUL(ctx, Yi);
+        GCM_MUL(ctx);
 
         if (is_endian.little)
 #ifdef BSWAP4
-            ctr = BSWAP4(ctx->Yi.d[3]);
+            ctr = BSWAP4(ctx->Xi.d[3]);
 #else
-            ctr = GETU32(ctx->Yi.c + 12);
+            ctr = GETU32(ctx->Xi.c + 12);
 #endif
         else
-            ctr = ctx->Yi.d[3];
+            ctr = ctx->Xi.d[3];
+
+        /* Copy borrowed Xi to Yi */
+        ctx->Yi.u[0] = ctx->Xi.u[0];
+        ctx->Yi.u[1] = ctx->Xi.u[1];
     }
 
+    ctx->Xi.u[0] = 0;
+    ctx->Xi.u[1] = 0;
+
     (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
     ++ctr;
     if (is_endian.little)
@@ -936,7 +947,7 @@ int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
             n = (n + 1) % 16;
         }
         if (n == 0)
-            GCM_MUL(ctx, Xi);
+            GCM_MUL(ctx);
         else {
             ctx->ares = n;
             return 0;
@@ -952,7 +963,7 @@ int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
     while (len >= 16) {
         for (i = 0; i < 16; ++i)
             ctx->Xi.c[i] ^= aad[i];
-        GCM_MUL(ctx, Xi);
+        GCM_MUL(ctx);
         aad += 16;
         len -= 16;
     }
@@ -975,7 +986,7 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
         long one;
         char little;
     } is_endian = { 1 };
-    unsigned int n, ctr;
+    unsigned int n, ctr, mres;
     size_t i;
     u64 mlen = ctx->len.u[1];
     block128_f block = ctx->block;
@@ -993,9 +1004,23 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
         return -1;
     ctx->len.u[1] = mlen;
 
+    mres = ctx->mres;
+
     if (ctx->ares) {
         /* First call to encrypt finalizes GHASH(AAD) */
-        GCM_MUL(ctx, Xi);
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+        if (len == 0) {
+            GCM_MUL(ctx);
+            ctx->ares = 0;
+            return 0;
+        }
+        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
+        ctx->Xi.u[0] = 0;
+        ctx->Xi.u[1] = 0;
+        mres = sizeof(ctx->Xi);
+#else
+        GCM_MUL(ctx);
+#endif
         ctx->ares = 0;
     }
 
@@ -1008,28 +1033,48 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
     else
         ctr = ctx->Yi.d[3];
 
-    n = ctx->mres;
+    n = mres % 16;
 #if !defined(OPENSSL_SMALL_FOOTPRINT)
     if (16 % sizeof(size_t) == 0) { /* always true actually */
         do {
             if (n) {
+# if defined(GHASH)
+                while (n && len) {
+                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
+                    --len;
+                    n = (n + 1) % 16;
+                }
+                if (n == 0) {
+                    GHASH(ctx, ctx->Xn, mres);
+                    mres = 0;
+                } else {
+                    ctx->mres = mres;
+                    return 0;
+                }
+# else
                 while (n && len) {
                     ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
                     --len;
                     n = (n + 1) % 16;
                 }
-                if (n == 0)
-                    GCM_MUL(ctx, Xi);
-                else {
+                if (n == 0) {
+                    GCM_MUL(ctx);
+                    mres = 0;
+                } else {
                     ctx->mres = n;
                     return 0;
                 }
+# endif
             }
 # if defined(STRICT_ALIGNMENT)
             if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
                 break;
 # endif
 # if defined(GHASH)
+            if (len >= 16 && mres) {
+                GHASH(ctx, ctx->Xn, mres);
+                mres = 0;
+            }
 #  if defined(GHASH_CHUNK)
             while (len >= GHASH_CHUNK) {
                 size_t j = GHASH_CHUNK;
@@ -1100,7 +1145,7 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
                     ctx->Yi.d[3] = ctr;
                 for (i = 0; i < 16 / sizeof(size_t); ++i)
                     ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
-                GCM_MUL(ctx, Xi);
+                GCM_MUL(ctx);
                 out += 16;
                 in += 16;
                 len -= 16;
@@ -1117,13 +1162,21 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
 # endif
                 else
                     ctx->Yi.d[3] = ctr;
+# if defined(GHASH)
+                while (len--) {
+                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
+                    ++n;
+                }
+# else
                 while (len--) {
                     ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
                     ++n;
                 }
+                mres = n;
+# endif
             }
 
-            ctx->mres = n;
+            ctx->mres = mres;
             return 0;
         } while (0);
     }
@@ -1141,13 +1194,22 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
             else
                 ctx->Yi.d[3] = ctr;
         }
-        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
         n = (n + 1) % 16;
+        if (mres == sizeof(ctx->Xn)) {
+            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
+            mres = 0;
+        }
+#else
+        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
+        mres = n = (n + 1) % 16;
         if (n == 0)
-            GCM_MUL(ctx, Xi);
+            GCM_MUL(ctx);
+#endif
     }
 
-    ctx->mres = n;
+    ctx->mres = mres;
     return 0;
 }
 
@@ -1159,7 +1221,7 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
         long one;
         char little;
     } is_endian = { 1 };
-    unsigned int n, ctr;
+    unsigned int n, ctr, mres;
     size_t i;
     u64 mlen = ctx->len.u[1];
     block128_f block = ctx->block;
@@ -1177,9 +1239,23 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
         return -1;
     ctx->len.u[1] = mlen;
 
+    mres = ctx->mres;
+
     if (ctx->ares) {
         /* First call to decrypt finalizes GHASH(AAD) */
-        GCM_MUL(ctx, Xi);
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+        if (len == 0) {
+            GCM_MUL(ctx);
+            ctx->ares = 0;
+            return 0;
+        }
+        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
+        ctx->Xi.u[0] = 0;
+        ctx->Xi.u[1] = 0;
+        mres = sizeof(ctx->Xi);
+#else
+        GCM_MUL(ctx);
+#endif
         ctx->ares = 0;
     }
 
@@ -1192,11 +1268,25 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
     else
         ctr = ctx->Yi.d[3];
 
-    n = ctx->mres;
+    n = mres % 16;
 #if !defined(OPENSSL_SMALL_FOOTPRINT)
     if (16 % sizeof(size_t) == 0) { /* always true actually */
         do {
             if (n) {
+# if defined(GHASH)
+                while (n && len) {
+                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
+                    --len;
+                    n = (n + 1) % 16;
+                }
+                if (n == 0) {
+                    GHASH(ctx, ctx->Xn, mres);
+                    mres = 0;
+                } else {
+                    ctx->mres = mres;
+                    return 0;
+                }
+# else
                 while (n && len) {
                     u8 c = *(in++);
                     *(out++) = c ^ ctx->EKi.c[n];
@@ -1204,18 +1294,24 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
                     --len;
                     n = (n + 1) % 16;
                 }
-                if (n == 0)
-                    GCM_MUL(ctx, Xi);
-                else {
+                if (n == 0) {
+                    GCM_MUL(ctx);
+                    mres = 0;
+                } else {
                     ctx->mres = n;
                     return 0;
                 }
+# endif
             }
 # if defined(STRICT_ALIGNMENT)
             if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
                 break;
 # endif
 # if defined(GHASH)
+            if (len >= 16 && mres) {
+                GHASH(ctx, ctx->Xn, mres);
+                mres = 0;
+            }
 #  if defined(GHASH_CHUNK)
             while (len >= GHASH_CHUNK) {
                 size_t j = GHASH_CHUNK;
@@ -1287,7 +1383,7 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
                     out[i] = c ^ ctx->EKi.t[i];
                     ctx->Xi.t[i] ^= c;
                 }
-                GCM_MUL(ctx, Xi);
+                GCM_MUL(ctx);
                 out += 16;
                 in += 16;
                 len -= 16;
@@ -1304,15 +1400,23 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
 # endif
                 else
                     ctx->Yi.d[3] = ctr;
+# if defined(GHASH)
+                while (len--) {
+                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
+                    ++n;
+                }
+# else
                 while (len--) {
                     u8 c = in[n];
                     ctx->Xi.c[n] ^= c;
                     out[n] = c ^ ctx->EKi.c[n];
                     ++n;
                 }
+                mres = n;
+# endif
             }
 
-            ctx->mres = n;
+            ctx->mres = mres;
             return 0;
         } while (0);
     }
@@ -1331,15 +1435,24 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
             else
                 ctx->Yi.d[3] = ctr;
         }
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
+        n = (n + 1) % 16;
+        if (mres == sizeof(ctx->Xn)) {
+            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
+            mres = 0;
+        }
+#else
         c = in[i];
         out[i] = c ^ ctx->EKi.c[n];
         ctx->Xi.c[n] ^= c;
-        n = (n + 1) % 16;
+        mres = n = (n + 1) % 16;
         if (n == 0)
-            GCM_MUL(ctx, Xi);
+            GCM_MUL(ctx);
+#endif
     }
 
-    ctx->mres = n;
+    ctx->mres = mres;
     return 0;
 }
 
@@ -1354,7 +1467,7 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
         long one;
         char little;
     } is_endian = { 1 };
-    unsigned int n, ctr;
+    unsigned int n, ctr, mres;
     size_t i;
     u64 mlen = ctx->len.u[1];
     void *key = ctx->key;
@@ -1371,9 +1484,23 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
         return -1;
     ctx->len.u[1] = mlen;
 
+    mres = ctx->mres;
+
     if (ctx->ares) {
         /* First call to encrypt finalizes GHASH(AAD) */
-        GCM_MUL(ctx, Xi);
+#if defined(GHASH)
+        if (len == 0) {
+            GCM_MUL(ctx);
+            ctx->ares = 0;
+            return 0;
+        }
+        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
+        ctx->Xi.u[0] = 0;
+        ctx->Xi.u[1] = 0;
+        mres = sizeof(ctx->Xi);
+#else
+        GCM_MUL(ctx);
+#endif
         ctx->ares = 0;
     }
 
@@ -1386,30 +1513,51 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
     else
         ctr = ctx->Yi.d[3];
 
-    n = ctx->mres;
+    n = mres % 16;
     if (n) {
+# if defined(GHASH)
+        while (n && len) {
+            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
+            --len;
+            n = (n + 1) % 16;
+        }
+        if (n == 0) {
+            GHASH(ctx, ctx->Xn, mres);
+            mres = 0;
+        } else {
+            ctx->mres = mres;
+            return 0;
+        }
+# else
         while (n && len) {
             ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
             --len;
             n = (n + 1) % 16;
         }
-        if (n == 0)
-            GCM_MUL(ctx, Xi);
-        else {
+        if (n == 0) {
+            GCM_MUL(ctx);
+            mres = 0;
+        } else {
             ctx->mres = n;
             return 0;
         }
+# endif
     }
-# if defined(GHASH) && defined(GHASH_CHUNK)
+# if defined(GHASH)
+        if (len >= 16 && mres) {
+            GHASH(ctx, ctx->Xn, mres);
+            mres = 0;
+        }
+#  if defined(GHASH_CHUNK)
     while (len >= GHASH_CHUNK) {
         (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
         ctr += GHASH_CHUNK / 16;
         if (is_endian.little)
-#  ifdef BSWAP4
+#   ifdef BSWAP4
             ctx->Yi.d[3] = BSWAP4(ctr);
-#  else
+#   else
             PUTU32(ctx->Yi.c + 12, ctr);
-#  endif
+#   endif
         else
             ctx->Yi.d[3] = ctr;
         GHASH(ctx, out, GHASH_CHUNK);
@@ -1417,6 +1565,7 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
         in += GHASH_CHUNK;
         len -= GHASH_CHUNK;
     }
+#  endif
 # endif
     if ((i = (len & (size_t)-16))) {
         size_t j = i / 16;
@@ -1440,7 +1589,7 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
         while (j--) {
             for (i = 0; i < 16; ++i)
                 ctx->Xi.c[i] ^= out[i];
-            GCM_MUL(ctx, Xi);
+            GCM_MUL(ctx);
             out += 16;
         }
 # endif
@@ -1457,12 +1606,16 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
         else
             ctx->Yi.d[3] = ctr;
         while (len--) {
-            ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
+# if defined(GHASH)
+            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
+# else
+            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
+# endif
             ++n;
         }
     }
 
-    ctx->mres = n;
+    ctx->mres = mres;
     return 0;
 #endif
 }
@@ -1478,7 +1631,7 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
         long one;
         char little;
     } is_endian = { 1 };
-    unsigned int n, ctr;
+    unsigned int n, ctr, mres;
     size_t i;
     u64 mlen = ctx->len.u[1];
     void *key = ctx->key;
@@ -1495,9 +1648,23 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
         return -1;
     ctx->len.u[1] = mlen;
 
+    mres = ctx->mres;
+
     if (ctx->ares) {
         /* First call to decrypt finalizes GHASH(AAD) */
-        GCM_MUL(ctx, Xi);
+# if defined(GHASH)
+        if (len == 0) {
+            GCM_MUL(ctx);
+            ctx->ares = 0;
+            return 0;
+        }
+        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
+        ctx->Xi.u[0] = 0;
+        ctx->Xi.u[1] = 0;
+        mres = sizeof(ctx->Xi);
+# else
+        GCM_MUL(ctx);
+# endif
         ctx->ares = 0;
     }
 
@@ -1510,8 +1677,22 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
     else
         ctr = ctx->Yi.d[3];
 
-    n = ctx->mres;
+    n = mres % 16;
     if (n) {
+# if defined(GHASH)
+        while (n && len) {
+            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
+            --len;
+            n = (n + 1) % 16;
+        }
+        if (n == 0) {
+            GHASH(ctx, ctx->Xn, mres);
+            mres = 0;
+        } else {
+            ctx->mres = mres;
+            return 0;
+        }
+# else
         while (n && len) {
             u8 c = *(in++);
             *(out++) = c ^ ctx->EKi.c[n];
@@ -1519,30 +1700,38 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
             --len;
             n = (n + 1) % 16;
         }
-        if (n == 0)
-            GCM_MUL(ctx, Xi);
-        else {
+        if (n == 0) {
+            GCM_MUL(ctx);
+            mres = 0;
+        } else {
             ctx->mres = n;
             return 0;
         }
+# endif
     }
-# if defined(GHASH) && defined(GHASH_CHUNK)
+# if defined(GHASH)
+    if (len >= 16 && mres) {
+        GHASH(ctx, ctx->Xn, mres);
+        mres = 0;
+    }
+#  if defined(GHASH_CHUNK)
     while (len >= GHASH_CHUNK) {
         GHASH(ctx, in, GHASH_CHUNK);
         (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
         ctr += GHASH_CHUNK / 16;
         if (is_endian.little)
-#  ifdef BSWAP4
+#   ifdef BSWAP4
             ctx->Yi.d[3] = BSWAP4(ctr);
-#  else
+#   else
             PUTU32(ctx->Yi.c + 12, ctr);
-#  endif
+#   endif
         else
             ctx->Yi.d[3] = ctr;
         out += GHASH_CHUNK;
         in += GHASH_CHUNK;
         len -= GHASH_CHUNK;
     }
+#  endif
 # endif
     if ((i = (len & (size_t)-16))) {
         size_t j = i / 16;
@@ -1554,7 +1743,7 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
             size_t k;
             for (k = 0; k < 16; ++k)
                 ctx->Xi.c[k] ^= in[k];
-            GCM_MUL(ctx, Xi);
+            GCM_MUL(ctx);
             in += 16;
         }
         j = i / 16;
@@ -1586,14 +1775,18 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
         else
             ctx->Yi.d[3] = ctr;
         while (len--) {
+# if defined(GHASH)
+            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
+# else
             u8 c = in[n];
-            ctx->Xi.c[n] ^= c;
+            ctx->Xi.c[mres++] ^= c;
             out[n] = c ^ ctx->EKi.c[n];
+# endif
             ++n;
         }
     }
 
-    ctx->mres = n;
+    ctx->mres = mres;
     return 0;
 #endif
 }
@@ -1609,10 +1802,32 @@ int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
     u64 clen = ctx->len.u[1] << 3;
 #ifdef GCM_FUNCREF_4BIT
     void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
+# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
+                         const u8 *inp, size_t len) = ctx->ghash;
+# endif
 #endif
 
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+    u128 bitlen;
+    unsigned int mres = ctx->mres;
+
+    if (mres) {
+        unsigned blocks = (mres + 15) & -16;
+
+        memset(ctx->Xn + mres, 0, blocks - mres);
+        mres = blocks;
+        if (mres == sizeof(ctx->Xn)) {
+            GHASH(ctx, ctx->Xn, mres);
+            mres = 0;
+        }
+    } else if (ctx->ares) {
+        GCM_MUL(ctx);
+    }
+#else
     if (ctx->mres || ctx->ares)
-        GCM_MUL(ctx, Xi);
+        GCM_MUL(ctx);
+#endif
 
     if (is_endian.little) {
 #ifdef BSWAP8
@@ -1629,9 +1844,17 @@ int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
 #endif
     }
 
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+    bitlen.hi = alen;
+    bitlen.lo = clen;
+    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
+    mres += sizeof(bitlen);
+    GHASH(ctx, ctx->Xn, mres);
+#else
     ctx->Xi.u[0] ^= alen;
     ctx->Xi.u[1] ^= clen;
-    GCM_MUL(ctx, Xi);
+    GCM_MUL(ctx);
+#endif
 
     ctx->Xi.u[0] ^= ctx->EK0.u[0];
     ctx->Xi.u[1] ^= ctx->EK0.u[1];
@@ -1663,639 +1886,3 @@ void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
 {
     OPENSSL_clear_free(ctx, sizeof(*ctx));
 }
-
-#if defined(SELFTEST)
-# include <stdio.h>
-# include <openssl/aes.h>
-
-/* Test Case 1 */
-static const u8 K1[16], *P1 = NULL, *A1 = NULL, IV1[12], *C1 = NULL;
-static const u8 T1[] = {
-    0x58, 0xe2, 0xfc, 0xce, 0xfa, 0x7e, 0x30, 0x61,
-    0x36, 0x7f, 0x1d, 0x57, 0xa4, 0xe7, 0x45, 0x5a
-};
-
-/* Test Case 2 */
-# define K2 K1
-# define A2 A1
-# define IV2 IV1
-static const u8 P2[16];
-static const u8 C2[] = {
-    0x03, 0x88, 0xda, 0xce, 0x60, 0xb6, 0xa3, 0x92,
-    0xf3, 0x28, 0xc2, 0xb9, 0x71, 0xb2, 0xfe, 0x78
-};
-
-static const u8 T2[] = {
-    0xab, 0x6e, 0x47, 0xd4, 0x2c, 0xec, 0x13, 0xbd,
-    0xf5, 0x3a, 0x67, 0xb2, 0x12, 0x57, 0xbd, 0xdf
-};
-
-/* Test Case 3 */
-# define A3 A2
-static const u8 K3[] = {
-    0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
-    0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
-};
-
-static const u8 P3[] = {
-    0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
-    0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
-    0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
-    0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
-    0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
-    0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
-    0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
-    0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
-};
-
-static const u8 IV3[] = {
-    0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
-    0xde, 0xca, 0xf8, 0x88
-};
-
-static const u8 C3[] = {
-    0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
-    0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
-    0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
-    0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
-    0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
-    0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
-    0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
-    0x3d, 0x58, 0xe0, 0x91, 0x47, 0x3f, 0x59, 0x85
-};
-
-static const u8 T3[] = {
-    0x4d, 0x5c, 0x2a, 0xf3, 0x27, 0xcd, 0x64, 0xa6,
-    0x2c, 0xf3, 0x5a, 0xbd, 0x2b, 0xa6, 0xfa, 0xb4
-};
-
-/* Test Case 4 */
-# define K4 K3
-# define IV4 IV3
-static const u8 P4[] = {
-    0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
-    0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
-    0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
-    0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
-    0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
-    0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
-    0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
-    0xba, 0x63, 0x7b, 0x39
-};
-
-static const u8 A4[] = {
-    0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
-    0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
-    0xab, 0xad, 0xda, 0xd2
-};
-
-static const u8 C4[] = {
-    0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
-    0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
-    0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
-    0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
-    0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
-    0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
-    0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
-    0x3d, 0x58, 0xe0, 0x91
-};
-
-static const u8 T4[] = {
-    0x5b, 0xc9, 0x4f, 0xbc, 0x32, 0x21, 0xa5, 0xdb,
-    0x94, 0xfa, 0xe9, 0x5a, 0xe7, 0x12, 0x1a, 0x47
-};
-
-/* Test Case 5 */
-# define K5 K4
-# define P5 P4
-# define A5 A4
-static const u8 IV5[] = {
-    0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad
-};
-
-static const u8 C5[] = {
-    0x61, 0x35, 0x3b, 0x4c, 0x28, 0x06, 0x93, 0x4a,
-    0x77, 0x7f, 0xf5, 0x1f, 0xa2, 0x2a, 0x47, 0x55,
-    0x69, 0x9b, 0x2a, 0x71, 0x4f, 0xcd, 0xc6, 0xf8,
-    0x37, 0x66, 0xe5, 0xf9, 0x7b, 0x6c, 0x74, 0x23,
-    0x73, 0x80, 0x69, 0x00, 0xe4, 0x9f, 0x24, 0xb2,
-    0x2b, 0x09, 0x75, 0x44, 0xd4, 0x89, 0x6b, 0x42,
-    0x49, 0x89, 0xb5, 0xe1, 0xeb, 0xac, 0x0f, 0x07,
-    0xc2, 0x3f, 0x45, 0x98
-};
-
-static const u8 T5[] = {
-    0x36, 0x12, 0xd2, 0xe7, 0x9e, 0x3b, 0x07, 0x85,
-    0x56, 0x1b, 0xe1, 0x4a, 0xac, 0xa2, 0xfc, 0xcb
-};
-
-/* Test Case 6 */
-# define K6 K5
-# define P6 P5
-# define A6 A5
-static const u8 IV6[] = {
-    0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
-    0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
-    0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
-    0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
-    0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
-    0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
-    0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
-    0xa6, 0x37, 0xb3, 0x9b
-};
-
-static const u8 C6[] = {
-    0x8c, 0xe2, 0x49, 0x98, 0x62, 0x56, 0x15, 0xb6,
-    0x03, 0xa0, 0x33, 0xac, 0xa1, 0x3f, 0xb8, 0x94,
-    0xbe, 0x91, 0x12, 0xa5, 0xc3, 0xa2, 0x11, 0xa8,
-    0xba, 0x26, 0x2a, 0x3c, 0xca, 0x7e, 0x2c, 0xa7,
-    0x01, 0xe4, 0xa9, 0xa4, 0xfb, 0xa4, 0x3c, 0x90,
-    0xcc, 0xdc, 0xb2, 0x81, 0xd4, 0x8c, 0x7c, 0x6f,
-    0xd6, 0x28, 0x75, 0xd2, 0xac, 0xa4, 0x17, 0x03,
-    0x4c, 0x34, 0xae, 0xe5
-};
-
-static const u8 T6[] = {
-    0x61, 0x9c, 0xc5, 0xae, 0xff, 0xfe, 0x0b, 0xfa,
-    0x46, 0x2a, 0xf4, 0x3c, 0x16, 0x99, 0xd0, 0x50
-};
-
-/* Test Case 7 */
-static const u8 K7[24], *P7 = NULL, *A7 = NULL, IV7[12], *C7 = NULL;
-static const u8 T7[] = {
-    0xcd, 0x33, 0xb2, 0x8a, 0xc7, 0x73, 0xf7, 0x4b,
-    0xa0, 0x0e, 0xd1, 0xf3, 0x12, 0x57, 0x24, 0x35
-};
-
-/* Test Case 8 */
-# define K8 K7
-# define IV8 IV7
-# define A8 A7
-static const u8 P8[16];
-static const u8 C8[] = {
-    0x98, 0xe7, 0x24, 0x7c, 0x07, 0xf0, 0xfe, 0x41,
-    0x1c, 0x26, 0x7e, 0x43, 0x84, 0xb0, 0xf6, 0x00
-};
-
-static const u8 T8[] = {
-    0x2f, 0xf5, 0x8d, 0x80, 0x03, 0x39, 0x27, 0xab,
-    0x8e, 0xf4, 0xd4, 0x58, 0x75, 0x14, 0xf0, 0xfb
-};
-
-/* Test Case 9 */
-# define A9 A8
-static const u8 K9[] = {
-    0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
-    0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
-    0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c
-};
-
-static const u8 P9[] = {
-    0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
-    0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
-    0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
-    0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
-    0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
-    0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
-    0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
-    0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
-};
-
-static const u8 IV9[] = {
-    0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
-    0xde, 0xca, 0xf8, 0x88
-};
-
-static const u8 C9[] = {
-    0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
-    0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
-    0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
-    0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
-    0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
-    0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
-    0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
-    0xcc, 0xda, 0x27, 0x10, 0xac, 0xad, 0xe2, 0x56
-};
-
-static const u8 T9[] = {
-    0x99, 0x24, 0xa7, 0xc8, 0x58, 0x73, 0x36, 0xbf,
-    0xb1, 0x18, 0x02, 0x4d, 0xb8, 0x67, 0x4a, 0x14
-};
-
-/* Test Case 10 */
-# define K10 K9
-# define IV10 IV9
-static const u8 P10[] = {
-    0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
-    0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
-    0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
-    0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
-    0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
-    0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
-    0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
-    0xba, 0x63, 0x7b, 0x39
-};
-
-static const u8 A10[] = {
-    0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
-    0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
-    0xab, 0xad, 0xda, 0xd2
-};
-
-static const u8 C10[] = {
-    0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
-    0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
-    0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
-    0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
-    0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
-    0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
-    0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
-    0xcc, 0xda, 0x27, 0x10
-};
-
-static const u8 T10[] = {
-    0x25, 0x19, 0x49, 0x8e, 0x80, 0xf1, 0x47, 0x8f,
-    0x37, 0xba, 0x55, 0xbd, 0x6d, 0x27, 0x61, 0x8c
-};
-
-/* Test Case 11 */
-# define K11 K10
-# define P11 P10
-# define A11 A10
-static const u8 IV11[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
-
-static const u8 C11[] = {
-    0x0f, 0x10, 0xf5, 0x99, 0xae, 0x14, 0xa1, 0x54,
-    0xed, 0x24, 0xb3, 0x6e, 0x25, 0x32, 0x4d, 0xb8,
-    0xc5, 0x66, 0x63, 0x2e, 0xf2, 0xbb, 0xb3, 0x4f,
-    0x83, 0x47, 0x28, 0x0f, 0xc4, 0x50, 0x70, 0x57,
-    0xfd, 0xdc, 0x29, 0xdf, 0x9a, 0x47, 0x1f, 0x75,
-    0xc6, 0x65, 0x41, 0xd4, 0xd4, 0xda, 0xd1, 0xc9,
-    0xe9, 0x3a, 0x19, 0xa5, 0x8e, 0x8b, 0x47, 0x3f,
-    0xa0, 0xf0, 0x62, 0xf7
-};
-
-static const u8 T11[] = {
-    0x65, 0xdc, 0xc5, 0x7f, 0xcf, 0x62, 0x3a, 0x24,
-    0x09, 0x4f, 0xcc, 0xa4, 0x0d, 0x35, 0x33, 0xf8
-};
-
-/* Test Case 12 */
-# define K12 K11
-# define P12 P11
-# define A12 A11
-static const u8 IV12[] = {
-    0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
-    0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
-    0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
-    0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
-    0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
-    0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
-    0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
-    0xa6, 0x37, 0xb3, 0x9b
-};
-
-static const u8 C12[] = {
-    0xd2, 0x7e, 0x88, 0x68, 0x1c, 0xe3, 0x24, 0x3c,
-    0x48, 0x30, 0x16, 0x5a, 0x8f, 0xdc, 0xf9, 0xff,
-    0x1d, 0xe9, 0xa1, 0xd8, 0xe6, 0xb4, 0x47, 0xef,
-    0x6e, 0xf7, 0xb7, 0x98, 0x28, 0x66, 0x6e, 0x45,
-    0x81, 0xe7, 0x90, 0x12, 0xaf, 0x34, 0xdd, 0xd9,
-    0xe2, 0xf0, 0x37, 0x58, 0x9b, 0x29, 0x2d, 0xb3,
-    0xe6, 0x7c, 0x03, 0x67, 0x45, 0xfa, 0x22, 0xe7,
-    0xe9, 0xb7, 0x37, 0x3b
-};
-
-static const u8 T12[] = {
-    0xdc, 0xf5, 0x66, 0xff, 0x29, 0x1c, 0x25, 0xbb,
-    0xb8, 0x56, 0x8f, 0xc3, 0xd3, 0x76, 0xa6, 0xd9
-};
-
-/* Test Case 13 */
-static const u8 K13[32], *P13 = NULL, *A13 = NULL, IV13[12], *C13 = NULL;
-static const u8 T13[] = {
-    0x53, 0x0f, 0x8a, 0xfb, 0xc7, 0x45, 0x36, 0xb9,
-    0xa9, 0x63, 0xb4, 0xf1, 0xc4, 0xcb, 0x73, 0x8b
-};
-
-/* Test Case 14 */
-# define K14 K13
-# define A14 A13
-static const u8 P14[16], IV14[12];
-static const u8 C14[] = {
-    0xce, 0xa7, 0x40, 0x3d, 0x4d, 0x60, 0x6b, 0x6e,
-    0x07, 0x4e, 0xc5, 0xd3, 0xba, 0xf3, 0x9d, 0x18
-};
-
-static const u8 T14[] = {
-    0xd0, 0xd1, 0xc8, 0xa7, 0x99, 0x99, 0x6b, 0xf0,
-    0x26, 0x5b, 0x98, 0xb5, 0xd4, 0x8a, 0xb9, 0x19
-};
-
-/* Test Case 15 */
-# define A15 A14
-static const u8 K15[] = {
-    0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
-    0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
-    0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
-    0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
-};
-
-static const u8 P15[] = {
-    0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
-    0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
-    0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
-    0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
-    0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
-    0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
-    0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
-    0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
-};
-
-static const u8 IV15[] = {
-    0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
-    0xde, 0xca, 0xf8, 0x88
-};
-
-static const u8 C15[] = {
-    0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
-    0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
-    0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
-    0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
-    0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
-    0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
-    0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
-    0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
-};
-
-static const u8 T15[] = {
-    0xb0, 0x94, 0xda, 0xc5, 0xd9, 0x34, 0x71, 0xbd,
-    0xec, 0x1a, 0x50, 0x22, 0x70, 0xe3, 0xcc, 0x6c
-};
-
-/* Test Case 16 */
-# define K16 K15
-# define IV16 IV15
-static const u8 P16[] = {
-    0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
-    0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
-    0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
-    0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
-    0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
-    0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
-    0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
-    0xba, 0x63, 0x7b, 0x39
-};
-
-static const u8 A16[] = {
-    0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
-    0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
-    0xab, 0xad, 0xda, 0xd2
-};
-
-static const u8 C16[] = {
-    0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
-    0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
-    0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
-    0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
-    0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
-    0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
-    0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
-    0xbc, 0xc9, 0xf6, 0x62
-};
-
-static const u8 T16[] = {
-    0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e, 0x17, 0x68,
-    0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b
-};
-
-/* Test Case 17 */
-# define K17 K16
-# define P17 P16
-# define A17 A16
-static const u8 IV17[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
-
-static const u8 C17[] = {
-    0xc3, 0x76, 0x2d, 0xf1, 0xca, 0x78, 0x7d, 0x32,
-    0xae, 0x47, 0xc1, 0x3b, 0xf1, 0x98, 0x44, 0xcb,
-    0xaf, 0x1a, 0xe1, 0x4d, 0x0b, 0x97, 0x6a, 0xfa,
-    0xc5, 0x2f, 0xf7, 0xd7, 0x9b, 0xba, 0x9d, 0xe0,
-    0xfe, 0xb5, 0x82, 0xd3, 0x39, 0x34, 0xa4, 0xf0,
-    0x95, 0x4c, 0xc2, 0x36, 0x3b, 0xc7, 0x3f, 0x78,
-    0x62, 0xac, 0x43, 0x0e, 0x64, 0xab, 0xe4, 0x99,
-    0xf4, 0x7c, 0x9b, 0x1f
-};
-
-static const u8 T17[] = {
-    0x3a, 0x33, 0x7d, 0xbf, 0x46, 0xa7, 0x92, 0xc4,
-    0x5e, 0x45, 0x49, 0x13, 0xfe, 0x2e, 0xa8, 0xf2
-};
-
-/* Test Case 18 */
-# define K18 K17
-# define P18 P17
-# define A18 A17
-static const u8 IV18[] = {
-    0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
-    0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
-    0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
-    0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
-    0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
-    0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
-    0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
-    0xa6, 0x37, 0xb3, 0x9b
-};
-
-static const u8 C18[] = {
-    0x5a, 0x8d, 0xef, 0x2f, 0x0c, 0x9e, 0x53, 0xf1,
-    0xf7, 0x5d, 0x78, 0x53, 0x65, 0x9e, 0x2a, 0x20,
-    0xee, 0xb2, 0xb2, 0x2a, 0xaf, 0xde, 0x64, 0x19,
-    0xa0, 0x58, 0xab, 0x4f, 0x6f, 0x74, 0x6b, 0xf4,
-    0x0f, 0xc0, 0xc3, 0xb7, 0x80, 0xf2, 0x44, 0x45,
-    0x2d, 0xa3, 0xeb, 0xf1, 0xc5, 0xd8, 0x2c, 0xde,
-    0xa2, 0x41, 0x89, 0x97, 0x20, 0x0e, 0xf8, 0x2e,
-    0x44, 0xae, 0x7e, 0x3f
-};
-
-static const u8 T18[] = {
-    0xa4, 0x4a, 0x82, 0x66, 0xee, 0x1c, 0x8e, 0xb0,
-    0xc8, 0xb5, 0xd4, 0xcf, 0x5a, 0xe9, 0xf1, 0x9a
-};
-
-/* Test Case 19 */
-# define K19 K1
-# define P19 P1
-# define IV19 IV1
-# define C19 C1
-static const u8 A19[] = {
-    0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
-    0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
-    0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
-    0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
-    0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
-    0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
-    0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
-    0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55,
-    0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
-    0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
-    0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
-    0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
-    0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
-    0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
-    0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
-    0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
-};
-
-static const u8 T19[] = {
-    0x5f, 0xea, 0x79, 0x3a, 0x2d, 0x6f, 0x97, 0x4d,
-    0x37, 0xe6, 0x8e, 0x0c, 0xb8, 0xff, 0x94, 0x92
-};
-
-/* Test Case 20 */
-# define K20 K1
-# define A20 A1
-/* this results in 0xff in counter LSB */
-static const u8 IV20[64] = { 0xff, 0xff, 0xff, 0xff };
-
-static const u8 P20[288];
-static const u8 C20[] = {
-    0x56, 0xb3, 0x37, 0x3c, 0xa9, 0xef, 0x6e, 0x4a,
-    0x2b, 0x64, 0xfe, 0x1e, 0x9a, 0x17, 0xb6, 0x14,
-    0x25, 0xf1, 0x0d, 0x47, 0xa7, 0x5a, 0x5f, 0xce,
-    0x13, 0xef, 0xc6, 0xbc, 0x78, 0x4a, 0xf2, 0x4f,
-    0x41, 0x41, 0xbd, 0xd4, 0x8c, 0xf7, 0xc7, 0x70,
-    0x88, 0x7a, 0xfd, 0x57, 0x3c, 0xca, 0x54, 0x18,
-    0xa9, 0xae, 0xff, 0xcd, 0x7c, 0x5c, 0xed, 0xdf,
-    0xc6, 0xa7, 0x83, 0x97, 0xb9, 0xa8, 0x5b, 0x49,
-    0x9d, 0xa5, 0x58, 0x25, 0x72, 0x67, 0xca, 0xab,
-    0x2a, 0xd0, 0xb2, 0x3c, 0xa4, 0x76, 0xa5, 0x3c,
-    0xb1, 0x7f, 0xb4, 0x1c, 0x4b, 0x8b, 0x47, 0x5c,
-    0xb4, 0xf3, 0xf7, 0x16, 0x50, 0x94, 0xc2, 0x29,
-    0xc9, 0xe8, 0xc4, 0xdc, 0x0a, 0x2a, 0x5f, 0xf1,
-    0x90, 0x3e, 0x50, 0x15, 0x11, 0x22, 0x13, 0x76,
-    0xa1, 0xcd, 0xb8, 0x36, 0x4c, 0x50, 0x61, 0xa2,
-    0x0c, 0xae, 0x74, 0xbc, 0x4a, 0xcd, 0x76, 0xce,
-    0xb0, 0xab, 0xc9, 0xfd, 0x32, 0x17, 0xef, 0x9f,
-    0x8c, 0x90, 0xbe, 0x40, 0x2d, 0xdf, 0x6d, 0x86,
-    0x97, 0xf4, 0xf8, 0x80, 0xdf, 0xf1, 0x5b, 0xfb,
-    0x7a, 0x6b, 0x28, 0x24, 0x1e, 0xc8, 0xfe, 0x18,
-    0x3c, 0x2d, 0x59, 0xe3, 0xf9, 0xdf, 0xff, 0x65,
-    0x3c, 0x71, 0x26, 0xf0, 0xac, 0xb9, 0xe6, 0x42,
-    0x11, 0xf4, 0x2b, 0xae, 0x12, 0xaf, 0x46, 0x2b,
-    0x10, 0x70, 0xbe, 0xf1, 0xab, 0x5e, 0x36, 0x06,
-    0x87, 0x2c, 0xa1, 0x0d, 0xee, 0x15, 0xb3, 0x24,
-    0x9b, 0x1a, 0x1b, 0x95, 0x8f, 0x23, 0x13, 0x4c,
-    0x4b, 0xcc, 0xb7, 0xd0, 0x32, 0x00, 0xbc, 0xe4,
-    0x20, 0xa2, 0xf8, 0xeb, 0x66, 0xdc, 0xf3, 0x64,
-    0x4d, 0x14, 0x23, 0xc1, 0xb5, 0x69, 0x90, 0x03,
-    0xc1, 0x3e, 0xce, 0xf4, 0xbf, 0x38, 0xa3, 0xb6,
-    0x0e, 0xed, 0xc3, 0x40, 0x33, 0xba, 0xc1, 0x90,
-    0x27, 0x83, 0xdc, 0x6d, 0x89, 0xe2, 0xe7, 0x74,
-    0x18, 0x8a, 0x43, 0x9c, 0x7e, 0xbc, 0xc0, 0x67,
-    0x2d, 0xbd, 0xa4, 0xdd, 0xcf, 0xb2, 0x79, 0x46,
-    0x13, 0xb0, 0xbe, 0x41, 0x31, 0x5e, 0xf7, 0x78,
-    0x70, 0x8a, 0x70, 0xee, 0x7d, 0x75, 0x16, 0x5c
-};
-
-static const u8 T20[] = {
-    0x8b, 0x30, 0x7f, 0x6b, 0x33, 0x28, 0x6d, 0x0a,
-    0xb0, 0x26, 0xa9, 0xed, 0x3f, 0xe1, 0xe8, 0x5f
-};
-
-# define TEST_CASE(n)    do {                                    \
-        u8 out[sizeof(P##n)];                                   \
-        AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key);          \
-        CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);  \
-        CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
-        memset(out,0,sizeof(out));                              \
-        if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
-        if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out));     \
-        if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
-            (C##n && memcmp(out,C##n,sizeof(out))))             \
-                ret++, printf ("encrypt test#%d failed.\n",n);  \
-        CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n));          \
-        memset(out,0,sizeof(out));                              \
-        if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n));    \
-        if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out));     \
-        if (CRYPTO_gcm128_finish(&ctx,T##n,16) ||               \
-            (P##n && memcmp(out,P##n,sizeof(out))))             \
-                ret++, printf ("decrypt test#%d failed.\n",n);  \
-        } while(0)
-
-int main()
-{
-    GCM128_CONTEXT ctx;
-    AES_KEY key;
-    int ret = 0;
-
-    TEST_CASE(1);
-    TEST_CASE(2);
-    TEST_CASE(3);
-    TEST_CASE(4);
-    TEST_CASE(5);
-    TEST_CASE(6);
-    TEST_CASE(7);
-    TEST_CASE(8);
-    TEST_CASE(9);
-    TEST_CASE(10);
-    TEST_CASE(11);
-    TEST_CASE(12);
-    TEST_CASE(13);
-    TEST_CASE(14);
-    TEST_CASE(15);
-    TEST_CASE(16);
-    TEST_CASE(17);
-    TEST_CASE(18);
-    TEST_CASE(19);
-    TEST_CASE(20);
-
-# ifdef OPENSSL_CPUID_OBJ
-    {
-        size_t start, stop, gcm_t, ctr_t, OPENSSL_rdtsc();
-        union {
-            u64 u;
-            u8 c[1024];
-        } buf;
-        int i;
-
-        AES_set_encrypt_key(K1, sizeof(K1) * 8, &key);
-        CRYPTO_gcm128_init(&ctx, &key, (block128_f) AES_encrypt);
-        CRYPTO_gcm128_setiv(&ctx, IV1, sizeof(IV1));
-
-        CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
-        start = OPENSSL_rdtsc();
-        CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
-        gcm_t = OPENSSL_rdtsc() - start;
-
-        CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
-                              &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
-                              (block128_f) AES_encrypt);
-        start = OPENSSL_rdtsc();
-        CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
-                              &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
-                              (block128_f) AES_encrypt);
-        ctr_t = OPENSSL_rdtsc() - start;
-
-        printf("%.2f-%.2f=%.2f\n",
-               gcm_t / (double)sizeof(buf),
-               ctr_t / (double)sizeof(buf),
-               (gcm_t - ctr_t) / (double)sizeof(buf));
-#  ifdef GHASH
-        {
-            void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
-                                 const u8 *inp, size_t len) = ctx.ghash;
-
-            GHASH((&ctx), buf.c, sizeof(buf));
-            start = OPENSSL_rdtsc();
-            for (i = 0; i < 100; ++i)
-                GHASH((&ctx), buf.c, sizeof(buf));
-            gcm_t = OPENSSL_rdtsc() - start;
-            printf("%.2f\n", gcm_t / (double)sizeof(buf) / (double)i);
-        }
-#  endif
-    }
-# endif
-
-    return ret;
-}
-#endif
diff --git a/deps/openssl/openssl/crypto/modes/modes_lcl.h b/deps/openssl/openssl/crypto/modes/modes_lcl.h
index 4fc32e190f..f2ae01d11a 100644
--- a/deps/openssl/openssl/crypto/modes/modes_lcl.h
+++ b/deps/openssl/openssl/crypto/modes/modes_lcl.h
@@ -73,6 +73,7 @@ typedef unsigned char u8;
 #  endif
 # elif defined(_MSC_VER)
 #  if _MSC_VER>=1300
+#   include <stdlib.h>
 #   pragma intrinsic(_byteswap_uint64,_byteswap_ulong)
 #   define BSWAP8(x)    _byteswap_uint64((u64)(x))
 #   define BSWAP4(x)    _byteswap_ulong((u32)(x))
@@ -127,6 +128,9 @@ struct gcm128_context {
     unsigned int mres, ares;
     block128_f block;
     void *key;
+#if !defined(OPENSSL_SMALL_FOOTPRINT)
+    unsigned char Xn[48];
+#endif
 };
 
 struct xts128_context {
diff --git a/deps/openssl/openssl/crypto/modes/ocb128.c b/deps/openssl/openssl/crypto/modes/ocb128.c
index fc92b246bd..713b9aaf19 100644
--- a/deps/openssl/openssl/crypto/modes/ocb128.c
+++ b/deps/openssl/openssl/crypto/modes/ocb128.c
@@ -9,6 +9,7 @@
 
 #include <string.h>
 #include <openssl/crypto.h>
+#include <openssl/err.h>
 #include "modes_lcl.h"
 
 #ifndef OPENSSL_NO_OCB
@@ -41,22 +42,13 @@ static u32 ocb_ntz(u64 n)
 static void ocb_block_lshift(const unsigned char *in, size_t shift,
                              unsigned char *out)
 {
-    unsigned char shift_mask;
     int i;
-    unsigned char mask[15];
+    unsigned char carry = 0, carry_next;
 
-    shift_mask = 0xff;
-    shift_mask <<= (8 - shift);
     for (i = 15; i >= 0; i--) {
-        if (i > 0) {
-            mask[i - 1] = in[i] & shift_mask;
-            mask[i - 1] >>= 8 - shift;
-        }
-        out[i] = in[i] << shift;
-
-        if (i != 15) {
-            out[i] ^= mask[i];
-        }
+        carry_next = in[i] >> (8 - shift);
+        out[i] = (in[i] << shift) | carry;
+        carry = carry_next;
     }
 }
 
@@ -73,7 +65,7 @@ static void ocb_double(OCB_BLOCK *in, OCB_BLOCK *out)
      */
     mask = in->c[0] & 0x80;
     mask >>= 7;
-    mask *= 135;
+    mask = (0 - mask) & 0x87;
 
     ocb_block_lshift(in->c, 1, out->c);
 
@@ -118,8 +110,7 @@ static OCB_BLOCK *ocb_lookup_l(OCB128_CONTEXT *ctx, size_t idx)
          * the index.
          */
         ctx->max_l_index += (idx - ctx->max_l_index + 4) & ~3;
-        tmp_ptr =
-            OPENSSL_realloc(ctx->l, ctx->max_l_index * sizeof(OCB_BLOCK));
+        tmp_ptr = OPENSSL_realloc(ctx->l, ctx->max_l_index * sizeof(OCB_BLOCK));
         if (tmp_ptr == NULL) /* prevent ctx->l from being clobbered */
             return NULL;
         ctx->l = tmp_ptr;
@@ -164,9 +155,10 @@ int CRYPTO_ocb128_init(OCB128_CONTEXT *ctx, void *keyenc, void *keydec,
     memset(ctx, 0, sizeof(*ctx));
     ctx->l_index = 0;
     ctx->max_l_index = 5;
-    ctx->l = OPENSSL_malloc(ctx->max_l_index * 16);
-    if (ctx->l == NULL)
+    if ((ctx->l = OPENSSL_malloc(ctx->max_l_index * 16)) == NULL) {
+        CRYPTOerr(CRYPTO_F_CRYPTO_OCB128_INIT, ERR_R_MALLOC_FAILURE);
         return 0;
+    }
 
     /*
      * We set both the encryption and decryption key schedules - decryption
@@ -210,9 +202,10 @@ int CRYPTO_ocb128_copy_ctx(OCB128_CONTEXT *dest, OCB128_CONTEXT *src,
     if (keydec)
         dest->keydec = keydec;
     if (src->l) {
-        dest->l = OPENSSL_malloc(src->max_l_index * 16);
-        if (dest->l == NULL)
+        if ((dest->l = OPENSSL_malloc(src->max_l_index * 16)) == NULL) {
+            CRYPTOerr(CRYPTO_F_CRYPTO_OCB128_COPY_CTX, ERR_R_MALLOC_FAILURE);
             return 0;
+        }
         memcpy(dest->l, src->l, (src->l_index + 1) * 16);
     }
     return 1;
diff --git a/deps/openssl/openssl/crypto/modes/wrap128.c b/deps/openssl/openssl/crypto/modes/wrap128.c
index 46809a0e74..d7e56cc260 100644
--- a/deps/openssl/openssl/crypto/modes/wrap128.c
+++ b/deps/openssl/openssl/crypto/modes/wrap128.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2013-2018 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -237,7 +237,7 @@ size_t CRYPTO_128_wrap_pad(void *key, const unsigned char *icv,
  *
  *  @param[in]  key    Key value.
  *  @param[in]  icv    (Non-standard) IV, 4 bytes. NULL = use default_aiv.
- *  @param[out] out    Plaintext. Minimal buffer length = inlen bytes.
+ *  @param[out] out    Plaintext. Minimal buffer length = (inlen - 8) bytes.
  *                     Input and output buffers can overlap if block function
  *                     supports that.
  *  @param[in]  in     Ciphertext as n 64-bit blocks.
@@ -267,7 +267,6 @@ size_t CRYPTO_128_unwrap_pad(void *key, const unsigned char *icv,
     if ((inlen & 0x7) != 0 || inlen < 16 || inlen >= CRYPTO128_WRAP_MAX)
         return 0;
 
-    memmove(out, in, inlen);
     if (inlen == 16) {
         /*
          * Section 4.2 - special case in step 1: When n=1, the ciphertext
@@ -275,14 +274,17 @@ size_t CRYPTO_128_unwrap_pad(void *key, const unsigned char *icv,
          * single AES block using AES in ECB mode: AIV | P[1] = DEC(K, C[0] |
          * C[1])
          */
-        block(out, out, key);
-        memcpy(aiv, out, 8);
+        unsigned char buff[16];
+
+        block(in, buff, key);
+        memcpy(aiv, buff, 8);
         /* Remove AIV */
-        memmove(out, out + 8, 8);
+        memcpy(out, buff + 8, 8);
         padded_len = 8;
+        OPENSSL_cleanse(buff, inlen);
     } else {
         padded_len = inlen - 8;
-        ret = crypto_128_unwrap_raw(key, aiv, out, out, inlen, block);
+        ret = crypto_128_unwrap_raw(key, aiv, out, in, inlen, block);
         if (padded_len != ret) {
             OPENSSL_cleanse(out, inlen);
             return 0;