zap whitespace and tab
This commit is contained in:
parent
5b49f88fed
commit
4de47ea988
681 changed files with 35748 additions and 35743 deletions
|
@ -96,7 +96,7 @@ $code.=<<___;
|
|||
#endif
|
||||
.set noat
|
||||
___
|
||||
|
||||
|
||||
{{{
|
||||
my $FRAMESIZE=16*$SZREG;
|
||||
my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
|
||||
|
@ -437,7 +437,7 @@ $code.=<<___;
|
|||
$PTR_ADD $sp,$FRAMESIZE
|
||||
.end AES_encrypt
|
||||
___
|
||||
|
||||
|
||||
$code.=<<___;
|
||||
.align 5
|
||||
.ent _mips_AES_decrypt
|
||||
|
@ -774,7 +774,7 @@ $code.=<<___;
|
|||
.end AES_decrypt
|
||||
___
|
||||
}}}
|
||||
|
||||
|
||||
{{{
|
||||
my $FRAMESIZE=8*$SZREG;
|
||||
my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000;
|
||||
|
@ -1087,7 +1087,7 @@ $code.=<<___;
|
|||
$PTR_ADD $sp,$FRAMESIZE
|
||||
.end AES_set_encrypt_key
|
||||
___
|
||||
|
||||
|
||||
my ($head,$tail)=($inp,$bits);
|
||||
my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
|
||||
my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
|
||||
|
@ -1570,7 +1570,7 @@ AES_Td:
|
|||
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
|
||||
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
|
||||
___
|
||||
|
||||
|
||||
foreach (split("\n",$code)) {
|
||||
s/\`([^\`]*)\`/eval $1/ge;
|
||||
|
||||
|
|
|
@ -90,7 +90,7 @@ sub aesenc { aescommon(0xdc,@_); }
|
|||
sub aesenclast { aescommon(0xdd,@_); }
|
||||
sub aesdec { aescommon(0xde,@_); }
|
||||
sub aesdeclast { aescommon(0xdf,@_); }
|
||||
|
||||
|
||||
# Inline version of internal aesni_[en|de]crypt1
|
||||
{ my $sn;
|
||||
sub aesni_inline_generate1
|
||||
|
@ -157,7 +157,7 @@ sub aesni_generate1 # fully unrolled loop
|
|||
&ret();
|
||||
&function_end_B("_aesni_${p}rypt1");
|
||||
}
|
||||
|
||||
|
||||
# void $PREFIX_encrypt (const void *inp,void *out,const AES_KEY *key);
|
||||
&aesni_generate1("enc") if (!$inline);
|
||||
&function_begin_B("${PREFIX}_encrypt");
|
||||
|
@ -349,7 +349,7 @@ sub aesni_generate6
|
|||
&aesni_generate4("dec");
|
||||
&aesni_generate6("enc") if ($PREFIX eq "aesni");
|
||||
&aesni_generate6("dec");
|
||||
|
||||
|
||||
if ($PREFIX eq "aesni") {
|
||||
######################################################################
|
||||
# void aesni_ecb_encrypt (const void *in, void *out,
|
||||
|
@ -569,7 +569,7 @@ if ($PREFIX eq "aesni") {
|
|||
|
||||
&set_label("ecb_ret");
|
||||
&function_end("aesni_ecb_encrypt");
|
||||
|
||||
|
||||
######################################################################
|
||||
# void aesni_ccm64_[en|de]crypt_blocks (const void *in, void *out,
|
||||
# size_t blocks, const AES_KEY *key,
|
||||
|
@ -756,7 +756,7 @@ if ($PREFIX eq "aesni") {
|
|||
&movups (&QWP(0,$out),$cmac);
|
||||
&function_end("aesni_ccm64_decrypt_blocks");
|
||||
}
|
||||
|
||||
|
||||
######################################################################
|
||||
# void aesni_ctr32_encrypt_blocks (const void *in, void *out,
|
||||
# size_t blocks, const AES_KEY *key,
|
||||
|
@ -1010,7 +1010,7 @@ if ($PREFIX eq "aesni") {
|
|||
&set_label("ctr32_ret");
|
||||
&mov ("esp",&DWP(80,"esp"));
|
||||
&function_end("aesni_ctr32_encrypt_blocks");
|
||||
|
||||
|
||||
######################################################################
|
||||
# void aesni_xts_[en|de]crypt(const char *inp,char *out,size_t len,
|
||||
# const AES_KEY *key1, const AES_KEY *key2
|
||||
|
@ -1716,7 +1716,7 @@ if ($PREFIX eq "aesni") {
|
|||
&function_end("aesni_xts_decrypt");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
######################################################################
|
||||
# void $PREFIX_cbc_encrypt (const void *inp, void *out,
|
||||
# size_t length, const AES_KEY *key,
|
||||
|
@ -1943,7 +1943,7 @@ if ($PREFIX eq "aesni") {
|
|||
&movups (&QWP(0,$key_),$ivec); # output IV
|
||||
&set_label("cbc_abort");
|
||||
&function_end("${PREFIX}_cbc_encrypt");
|
||||
|
||||
|
||||
######################################################################
|
||||
# Mechanical port from aesni-x86_64.pl.
|
||||
#
|
||||
|
|
|
@ -256,7 +256,7 @@ bn_mul_mont:
|
|||
cmplt $i,$num,$tj # borrow $tj
|
||||
stq $hi1,16($tp)
|
||||
bne $tj,.Louter
|
||||
|
||||
|
||||
s8addq $num,sp,$tj # &tp[num]
|
||||
mov $rp,$bp # put rp aside
|
||||
mov sp,$tp
|
||||
|
|
|
@ -110,7 +110,7 @@ bn_mul_mont:
|
|||
adc $nhi,$nhi,#0
|
||||
str $nlo,[$num] @ tp[num-1]=
|
||||
str $nhi,[$num,#4] @ tp[num]=
|
||||
|
||||
|
||||
.Louter:
|
||||
sub $tj,$num,sp @ "original" $num-1 value
|
||||
sub $ap,$ap,$tj @ "rewind" ap to &ap[1]
|
||||
|
@ -158,7 +158,7 @@ bn_mul_mont:
|
|||
|
||||
cmp $tp,$tj
|
||||
bne .Louter
|
||||
|
||||
|
||||
ldr $rp,[$_rp] @ pull rp
|
||||
add $num,$num,#4 @ $num to point at &tp[num]
|
||||
sub $aj,$num,sp @ "original" num value
|
||||
|
|
|
@ -350,7 +350,7 @@ $code.=<<___;
|
|||
addu $i,$BNSZ
|
||||
sltu $t0,$i,$num
|
||||
bnez $t0,.Louter
|
||||
|
||||
|
||||
.set noreorder
|
||||
$PTR_ADD $tj,$sp,$num # &tp[num]
|
||||
move $tp,$sp
|
||||
|
|
|
@ -59,7 +59,7 @@
|
|||
# Special thanks to polarhome.com for providing HP-UX account on
|
||||
# PA-RISC 1.1 machine, and to correspondent who chose to remain
|
||||
# anonymous for testing the code on PA-RISC 2.0 machine.
|
||||
|
||||
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
|
||||
$flavour = shift;
|
||||
|
@ -228,7 +228,7 @@ $code.=<<___; # PA-RISC 2.0 code-path
|
|||
ldo 8($idx),$idx ; j++++
|
||||
addl $ab0,$nm0,$nm0 ; low part is discarded
|
||||
extrd,u $nm0,31,32,$hi1
|
||||
|
||||
|
||||
L\$1st
|
||||
xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[0]
|
||||
xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m
|
||||
|
@ -349,7 +349,7 @@ L\$outer
|
|||
extrd,u $nm0,31,32,$hi1
|
||||
fstds ${fab0},-16($xfer)
|
||||
fstds ${fnm0},-8($xfer)
|
||||
|
||||
|
||||
L\$inner
|
||||
xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[i]
|
||||
xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m
|
||||
|
@ -464,7 +464,7 @@ $code.=<<___;
|
|||
|
||||
b L\$outer
|
||||
ldo `$LOCALS+32+4`($fp),$tp
|
||||
|
||||
|
||||
L\$outerdone
|
||||
addl $hi0,$ab1,$ab1
|
||||
addl $ti1,$ab1,$ab1
|
||||
|
@ -562,7 +562,7 @@ L\$parisc11
|
|||
ldw 4($xfer),$ablo
|
||||
ldw 0($xfer),$abhi
|
||||
nop
|
||||
|
||||
|
||||
L\$1st_pa11
|
||||
xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[0]
|
||||
flddx $idx($ap),${fai} ; ap[j,j+1]
|
||||
|
@ -687,7 +687,7 @@ L\$outer_pa11
|
|||
fstds ${fnm0},-8($xfer)
|
||||
ldw 4($xfer),$ablo
|
||||
ldw 0($xfer),$abhi
|
||||
|
||||
|
||||
L\$inner_pa11
|
||||
xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[i]
|
||||
flddx $idx($ap),${fai} ; ap[j,j+1]
|
||||
|
@ -806,7 +806,7 @@ L\$inner_pa11
|
|||
|
||||
b L\$outer_pa11
|
||||
ldo `$LOCALS+32+4`($fp),$tp
|
||||
|
||||
|
||||
L\$outerdone_pa11
|
||||
add $hi0,$ablo,$ablo
|
||||
addc %r0,$abhi,$abhi
|
||||
|
@ -858,7 +858,7 @@ L\$copy_pa11
|
|||
L\$done
|
||||
___
|
||||
}
|
||||
|
||||
|
||||
$code.=<<___;
|
||||
ldi 1,%r28 ; signal "handled"
|
||||
ldo $FRAME($fp),%sp ; destroy tp[num+1]
|
||||
|
@ -877,7 +877,7 @@ L\$abort
|
|||
$POPMB -$FRAME(%sp),%r3
|
||||
.PROCEND
|
||||
___
|
||||
|
||||
|
||||
# Explicitly encode PA-RISC 2.0 instructions used in this module, so
|
||||
# that it can be compiled with .LEVEL 1.0. It should be noted that I
|
||||
# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
|
||||
|
|
|
@ -147,7 +147,7 @@ $code.=<<___;
|
|||
|
||||
$LD $n0,0($n0) ; pull n0[0] value
|
||||
addi $num,$num,-2 ; adjust $num for counter register
|
||||
|
||||
|
||||
$LD $m0,0($bp) ; m0=bp[0]
|
||||
$LD $aj,0($ap) ; ap[0]
|
||||
addi $tp,$sp,$LOCALS
|
||||
|
@ -206,7 +206,7 @@ L1st:
|
|||
addc $hi1,$hi1,$hi0
|
||||
addze $ovf,$ovf ; upmost overflow bit
|
||||
$ST $hi1,$BNSZ($tp)
|
||||
|
||||
|
||||
li $i,$BNSZ
|
||||
.align 4
|
||||
Louter:
|
||||
|
@ -230,7 +230,7 @@ Louter:
|
|||
$UMULL $nlo,$nj,$m1 ; np[1]*m1
|
||||
addze $hi1,$hi1
|
||||
$UMULH $nhi,$nj,$m1
|
||||
|
||||
|
||||
mtctr $num
|
||||
li $j,`2*$BNSZ`
|
||||
.align 4
|
||||
|
@ -277,7 +277,7 @@ Linner:
|
|||
$UCMP $i,$tj
|
||||
addi $i,$i,$BNSZ
|
||||
ble- Louter
|
||||
|
||||
|
||||
addi $num,$num,2 ; restore $num
|
||||
subfc $j,$j,$j ; j=0 and "clear" XER[CA]
|
||||
addi $tp,$sp,$LOCALS
|
||||
|
|
|
@ -154,7 +154,7 @@ $T0a="f24"; $T0b="f25";
|
|||
$T1a="f26"; $T1b="f27";
|
||||
$T2a="f28"; $T2b="f29";
|
||||
$T3a="f30"; $T3b="f31";
|
||||
|
||||
|
||||
# sp----------->+-------------------------------+
|
||||
# | saved sp |
|
||||
# +-------------------------------+
|
||||
|
@ -189,7 +189,7 @@ $T3a="f30"; $T3b="f31";
|
|||
# . .
|
||||
# . .
|
||||
# +-------------------------------+
|
||||
|
||||
|
||||
$code=<<___;
|
||||
.machine "any"
|
||||
.text
|
||||
|
@ -260,7 +260,7 @@ $code.=<<___;
|
|||
li $i,-64
|
||||
add $nap_d,$tp,$num
|
||||
and $nap_d,$nap_d,$i ; align to 64 bytes
|
||||
|
||||
|
||||
mulld $t7,$a0,$t3 ; ap[0]*bp[0]
|
||||
; nap_d is off by 1, because it's used with stfdu/lfdu
|
||||
addi $nap_d,$nap_d,-8
|
||||
|
@ -416,7 +416,7 @@ $code.=<<___;
|
|||
stfd $T2b,`$FRAME+40`($sp)
|
||||
stfd $T3a,`$FRAME+48`($sp)
|
||||
stfd $T3b,`$FRAME+56`($sp)
|
||||
|
||||
|
||||
.align 5
|
||||
L1st:
|
||||
___
|
||||
|
@ -562,7 +562,7 @@ $code.=<<___;
|
|||
std $t0,8($tp) ; tp[j-1]
|
||||
stdu $t4,16($tp) ; tp[j]
|
||||
bdnz- L1st
|
||||
|
||||
|
||||
fctid $dota,$dota
|
||||
fctid $dotb,$dotb
|
||||
|
||||
|
@ -614,7 +614,7 @@ $code.=<<___;
|
|||
|
||||
slwi $t7,$num,2
|
||||
subf $nap_d,$t7,$nap_d ; rewind pointer
|
||||
|
||||
|
||||
li $i,8 ; i=1
|
||||
.align 5
|
||||
Louter:
|
||||
|
@ -741,7 +741,7 @@ $code.=<<___;
|
|||
stfd $T2b,`$FRAME+40`($sp)
|
||||
stfd $T3a,`$FRAME+48`($sp)
|
||||
stfd $T3b,`$FRAME+56`($sp)
|
||||
|
||||
|
||||
.align 5
|
||||
Linner:
|
||||
fmul $T1a,$A1,$ba
|
||||
|
@ -857,7 +857,7 @@ $code.=<<___;
|
|||
std $t3,-16($tp) ; tp[j-1]
|
||||
std $t5,-8($tp) ; tp[j]
|
||||
bdnz- Linner
|
||||
|
||||
|
||||
fctid $dota,$dota
|
||||
fctid $dotb,$dotb
|
||||
ld $t0,`$FRAME+0`($sp)
|
||||
|
@ -933,7 +933,7 @@ $code.=<<___;
|
|||
cmpw $i,$num
|
||||
blt- Louter
|
||||
___
|
||||
|
||||
|
||||
$code.=<<___ if ($SIZE_T==8);
|
||||
subf $np,$num,$np ; rewind np
|
||||
addi $j,$j,1 ; restore counter
|
||||
|
@ -1048,7 +1048,7 @@ Lcopy: ; copy or in-place refresh
|
|||
stdu $i,16($tp)
|
||||
bdnz- Lcopy
|
||||
___
|
||||
|
||||
|
||||
$code.=<<___;
|
||||
$POP $i,0($sp)
|
||||
li r3,1 ; signal "handled"
|
||||
|
|
|
@ -102,7 +102,7 @@ $frame=32; # size of above frame rounded up to 16n
|
|||
&lea ($num,&DWP(-3,"edi")); # num=num-1 to assist modulo-scheduling
|
||||
#&mov ($_num,$num); # redundant as $num is not reused
|
||||
&mov ($_sp,"ebp"); # saved stack pointer!
|
||||
|
||||
|
||||
if($sse2) {
|
||||
$acc0="mm0"; # mmx register bank layout
|
||||
$acc1="mm1";
|
||||
|
@ -183,7 +183,7 @@ $mask="mm7";
|
|||
|
||||
&paddq ($car1,$car0);
|
||||
&movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
|
||||
|
||||
|
||||
&inc ($i); # i++
|
||||
&set_label("outer");
|
||||
&xor ($j,$j); # j=0
|
||||
|
@ -262,7 +262,7 @@ $mask="mm7";
|
|||
|
||||
&set_label("non_sse2",16);
|
||||
}
|
||||
|
||||
|
||||
if (0) {
|
||||
&mov ("esp",$_sp);
|
||||
&xor ("eax","eax"); # signal "not fast enough [yet]"
|
||||
|
@ -325,7 +325,7 @@ $carry="ebp";
|
|||
&inc ($j);
|
||||
|
||||
&jmp (&label("2ndmadd"));
|
||||
|
||||
|
||||
&set_label("1stmadd",16);
|
||||
&mov ($carry,"edx");
|
||||
&mul ($word); # ap[j]*bp[i]
|
||||
|
@ -362,7 +362,7 @@ $carry="ebp";
|
|||
&mov ("eax",&DWP(4,$inp)); # np[1]
|
||||
&adc ("edx",0);
|
||||
&mov ($j,1);
|
||||
|
||||
|
||||
&set_label("2ndmadd",16);
|
||||
&mov ($carry,"edx");
|
||||
&mul ($word); # np[j]*m
|
||||
|
@ -401,7 +401,7 @@ $carry="ebp";
|
|||
&xor ("edx","edx");
|
||||
&mov ("eax",&DWP(0,$inp));
|
||||
&jmp (&label("1stmadd"));
|
||||
|
||||
|
||||
&set_label("bn_sqr_mont",16);
|
||||
$sbit=$num;
|
||||
&mov ($_num,$num);
|
||||
|
@ -452,7 +452,7 @@ $sbit=$num;
|
|||
&adc ("edx",0);
|
||||
&mov ("eax",&DWP(4,$inp)); # np[1]
|
||||
&mov ($j,1);
|
||||
|
||||
|
||||
&set_label("3rdmadd",16);
|
||||
&mov ($carry,"edx");
|
||||
&mul ($word); # np[j]*m
|
||||
|
@ -492,7 +492,7 @@ $sbit=$num;
|
|||
&cmp ($j,$num);
|
||||
&mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
|
||||
&je (&label("common_tail"));
|
||||
|
||||
|
||||
&mov ($word,&DWP(4,$inp,$j,4)); # ap[i]
|
||||
&lea ($j,&DWP(1,$j));
|
||||
&mov ("eax",$word);
|
||||
|
@ -552,7 +552,7 @@ $sbit=$num;
|
|||
|
||||
&jmp (&label("3rdmadd"));
|
||||
}
|
||||
|
||||
|
||||
&set_label("common_tail",16);
|
||||
&mov ($np,$_np); # load modulus pointer
|
||||
&mov ($rp,$_rp); # load result pointer
|
||||
|
|
|
@ -686,7 +686,8 @@ $code.=<<___;
|
|||
.size bn_mul4x_mont,.-bn_mul4x_mont
|
||||
___
|
||||
}}}
|
||||
{{{
|
||||
|
||||
{{{
|
||||
######################################################################
|
||||
# void bn_sqr4x_mont(
|
||||
my $rptr="%rdi"; # const BN_ULONG *rptr,
|
||||
|
@ -1191,7 +1192,8 @@ $code.=<<___;
|
|||
mov $S[2],-16($tptr)
|
||||
mov $S[3],-8($tptr)
|
||||
___
|
||||
}
|
||||
}
|
||||
|
||||
##############################################################
|
||||
# Montgomery reduction part, "word-by-word" algorithm.
|
||||
#
|
||||
|
@ -1398,7 +1400,8 @@ $code.=<<___;
|
|||
mov 0(%rsp),$num # restore $num
|
||||
mov $topbit,($tptr) # save $topbit
|
||||
___
|
||||
}
|
||||
}
|
||||
|
||||
##############################################################
|
||||
# Post-condition, 4x unrolled copy from bn_mul_mont
|
||||
#
|
||||
|
|
|
@ -121,7 +121,7 @@ $code.=<<___ if ($SIZE_T==4);
|
|||
b L\$parisc1_gmult
|
||||
nop
|
||||
___
|
||||
|
||||
|
||||
$code.=<<___;
|
||||
ldb 15($Xi),$nlo
|
||||
ldo 8($Htbl),$Hll
|
||||
|
@ -207,7 +207,7 @@ L\$oop_gmult_pa2
|
|||
std $Zll,8($Xi)
|
||||
std $Zhh,0($Xi)
|
||||
___
|
||||
|
||||
|
||||
$code.=<<___ if ($SIZE_T==4);
|
||||
b L\$done_gmult
|
||||
nop
|
||||
|
@ -378,7 +378,7 @@ $code.=<<___ if ($SIZE_T==4);
|
|||
b L\$parisc1_ghash
|
||||
nop
|
||||
___
|
||||
|
||||
|
||||
$code.=<<___;
|
||||
ldb 15($Xi),$nlo
|
||||
ldo 8($Htbl),$Hll
|
||||
|
@ -475,7 +475,7 @@ L\$oop_ghash_pa2
|
|||
cmpb,*<> $inp,$len,L\$outer_ghash_pa2
|
||||
copy $Zll,$nlo
|
||||
___
|
||||
|
||||
|
||||
$code.=<<___ if ($SIZE_T==4);
|
||||
b L\$done_ghash
|
||||
nop
|
||||
|
|
|
@ -125,7 +125,7 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
|||
($Zhh,$Zhl,$Zlh,$Zll) = ("ebp","edx","ecx","ebx");
|
||||
$inp = "edi";
|
||||
$Htbl = "esi";
|
||||
|
||||
|
||||
$unroll = 0; # Affects x86 loop. Folded loop performs ~7% worse
|
||||
# than unrolled, which has to be weighted against
|
||||
# 2.5x x86-specific code size reduction.
|
||||
|
@ -243,7 +243,7 @@ sub deposit_rem_4bit {
|
|||
&mov (&DWP($bias+56,"esp"),0xA9C0<<16);
|
||||
&mov (&DWP($bias+60,"esp"),0xB5E0<<16);
|
||||
}
|
||||
|
||||
|
||||
$suffix = $x86only ? "" : "_x86";
|
||||
|
||||
&function_begin("gcm_gmult_4bit".$suffix);
|
||||
|
@ -326,7 +326,7 @@ $suffix = $x86only ? "" : "_x86";
|
|||
&mov (&DWP(0,$inp),$Zhh);
|
||||
&stack_pop(16+4+1);
|
||||
&function_end("gcm_ghash_4bit".$suffix);
|
||||
|
||||
|
||||
if (!$x86only) {{{
|
||||
|
||||
&static_label("rem_4bit");
|
||||
|
@ -425,7 +425,7 @@ $S=12; # shift factor for rem_4bit
|
|||
&mov (&DWP(8,$inp),$Zlh);
|
||||
&mov (&DWP(0,$inp),$Zhh);
|
||||
&function_end("gcm_gmult_4bit_mmx");
|
||||
|
||||
|
||||
# Streamed version performs 20% better on P4, 7% on Opteron,
|
||||
# 10% on Core2 and PIII...
|
||||
&function_begin("gcm_ghash_4bit_mmx");
|
||||
|
@ -477,7 +477,7 @@ $S=12; # shift factor for rem_4bit
|
|||
|
||||
&stack_pop(4+1);
|
||||
&function_end("gcm_ghash_4bit_mmx");
|
||||
|
||||
|
||||
}} else {{ # "June" MMX version...
|
||||
# ... has slower "April" gcm_gmult_4bit_mmx with folded
|
||||
# loop. This is done to conserve code size...
|
||||
|
@ -593,7 +593,7 @@ sub mmx_loop() {
|
|||
&mov (&DWP(8,$inp),$Zlh);
|
||||
&mov (&DWP(0,$inp),$Zhh);
|
||||
&function_end("gcm_gmult_4bit_mmx");
|
||||
|
||||
|
||||
######################################################################
|
||||
# Below subroutine is "528B" variant of "4-bit" GCM GHASH function
|
||||
# (see gcm128.c for details). It provides further 20-40% performance
|
||||
|
@ -797,7 +797,7 @@ sub mmx_loop() {
|
|||
}
|
||||
&function_end("gcm_ghash_4bit_mmx");
|
||||
}}
|
||||
|
||||
|
||||
if ($sse2) {{
|
||||
######################################################################
|
||||
# PCLMULQDQ version.
|
||||
|
@ -862,7 +862,7 @@ my ($Xhi,$Xi,$Hkey)=@_;
|
|||
&pxor ($Xhi,$T2);
|
||||
&pxor ($Xi,$T3); #
|
||||
}
|
||||
|
||||
|
||||
if (1) { # Algorithm 9 with <<1 twist.
|
||||
# Reduction is shorter and uses only two
|
||||
# temporary registers, which makes it better
|
||||
|
@ -1073,7 +1073,7 @@ my ($Xhi,$Xi) = @_;
|
|||
&pshufb ($Xi,$T3);
|
||||
&movdqu (&QWP(0,$Xip),$Xi);
|
||||
&function_end("gcm_ghash_clmul");
|
||||
|
||||
|
||||
} else { # Algorithm 5. Kept for reference purposes.
|
||||
|
||||
sub reduction_alg5 { # 19/16 times faster than Intel version
|
||||
|
@ -1250,7 +1250,7 @@ my ($Xhi,$Xi)=@_;
|
|||
&function_end("gcm_ghash_clmul");
|
||||
|
||||
}
|
||||
|
||||
|
||||
&rodataseg();
|
||||
&set_label("bswap",64);
|
||||
&data_byte(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
|
||||
|
|
|
@ -79,7 +79,7 @@ sub AUTOLOAD() # thunk [simplified] 32-bit style perlasm
|
|||
$arg = "\$$arg" if ($arg*1 eq $arg);
|
||||
$code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n";
|
||||
}
|
||||
|
||||
|
||||
{ my $N;
|
||||
sub loop() {
|
||||
my $inp = shift;
|
||||
|
@ -184,7 +184,7 @@ $code.=<<___;
|
|||
ret
|
||||
.size gcm_gmult_4bit,.-gcm_gmult_4bit
|
||||
___
|
||||
|
||||
|
||||
# per-function register layout
|
||||
$inp="%rdx";
|
||||
$len="%rcx";
|
||||
|
@ -341,7 +341,7 @@ $code.=<<___;
|
|||
ret
|
||||
.size gcm_ghash_4bit,.-gcm_ghash_4bit
|
||||
___
|
||||
|
||||
|
||||
######################################################################
|
||||
# PCLMULQDQ version.
|
||||
|
||||
|
@ -404,7 +404,7 @@ $code.=<<___;
|
|||
pxor $T2,$Xi #
|
||||
___
|
||||
}
|
||||
|
||||
|
||||
{ my ($Htbl,$Xip)=@_4args;
|
||||
|
||||
$code.=<<___;
|
||||
|
@ -465,7 +465,7 @@ $code.=<<___;
|
|||
.size gcm_gmult_clmul,.-gcm_gmult_clmul
|
||||
___
|
||||
}
|
||||
|
||||
|
||||
{ my ($Xip,$Htbl,$inp,$len)=@_4args;
|
||||
my $Xn="%xmm6";
|
||||
my $Xhn="%xmm7";
|
||||
|
@ -675,7 +675,7 @@ $code.=<<___;
|
|||
.align 64
|
||||
.text
|
||||
___
|
||||
|
||||
|
||||
# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
|
||||
# CONTEXT *context,DISPATCHER_CONTEXT *disp)
|
||||
if ($win64) {
|
||||
|
@ -802,7 +802,7 @@ se_handler:
|
|||
.byte 0x04,0xa2,0x00,0x00 #sub rsp,0x58
|
||||
___
|
||||
}
|
||||
|
||||
|
||||
$code =~ s/\`([^\`]*)\`/eval($1)/gem;
|
||||
|
||||
print $code;
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
# 9. .init segment is allowed to contain calls to functions only.
|
||||
# a. If function accepts more than 4 arguments *and* >4th argument
|
||||
# is declared as non 64-bit value, do clear its upper part.
|
||||
|
||||
|
||||
my $flavour = shift;
|
||||
my $output = shift;
|
||||
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
|
||||
|
@ -857,7 +857,8 @@ print "END\n" if ($masm);
|
|||
|
||||
close STDOUT;
|
||||
|
||||
#################################################
|
||||
|
||||
#################################################
|
||||
# Cross-reference x86_64 ABI "card"
|
||||
#
|
||||
# Unix Win64
|
||||
|
@ -922,7 +923,8 @@ close STDOUT;
|
|||
# endif
|
||||
# ret
|
||||
#
|
||||
#################################################
|
||||
|
||||
#################################################
|
||||
# Win64 SEH, Structured Exception Handling.
|
||||
#
|
||||
# Unlike on Unix systems(*) lack of Win64 stack unwinding information
|
||||
|
|
|
@ -12844,7 +12844,7 @@ static const u_int8_t cycladesz_firmware[] = {
|
|||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x48,
|
||||
0x65, 0x61, 0x64, 0x65, 0x72, 0x3a, 0x20, 0x73, 0x65, 0x74,
|
||||
0x6a, 0x6d, 0x70, 0x2e, 0x68, 0x2c, 0x76, 0x20, 0x31, 0x2e,
|
||||
0x31, 0x20, 0x38, 0x37, 0x2f, 0x30, 0x38, 0x2f, 0x31, 0x38,
|
||||
0x31, 0x20, 0x38, 0x37, 0x2f, 0x30, 0x38, 0x2f, 0x31, 0x38
|
||||
0x20, 0x31, 0x36, 0x3a, 0x33, 0x34, 0x3a, 0x31, 0x31, 0x20,
|
||||
0x6d, 0x64, 0x6f, 0x76, 0x65, 0x20, 0x45, 0x78, 0x70, 0x20,
|
||||
0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
|
|
|
@ -4440,7 +4440,7 @@ struct gop_lib1_content {
|
|||
/*
|
||||
***************************************************************************
|
||||
Scratch Register definitions
|
||||
Each number below indicates which scratch regiser request, Active and
|
||||
Each number below indicates which scratch register request, Active and
|
||||
Connect all share the same definitions as display_device_tag defines
|
||||
***************************************************************************
|
||||
*/
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue