zap whitespace and tab

This commit is contained in:
purplerain 2023-09-26 19:52:17 +00:00
parent 5b49f88fed
commit 4de47ea988
Signed by: purplerain
GPG key ID: F42C07F07E2E35B7
681 changed files with 35748 additions and 35743 deletions

View file

@ -96,7 +96,7 @@ $code.=<<___;
#endif #endif
.set noat .set noat
___ ___
{{{ {{{
my $FRAMESIZE=16*$SZREG; my $FRAMESIZE=16*$SZREG;
my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000; my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
@ -437,7 +437,7 @@ $code.=<<___;
$PTR_ADD $sp,$FRAMESIZE $PTR_ADD $sp,$FRAMESIZE
.end AES_encrypt .end AES_encrypt
___ ___
$code.=<<___; $code.=<<___;
.align 5 .align 5
.ent _mips_AES_decrypt .ent _mips_AES_decrypt
@ -774,7 +774,7 @@ $code.=<<___;
.end AES_decrypt .end AES_decrypt
___ ___
}}} }}}
{{{ {{{
my $FRAMESIZE=8*$SZREG; my $FRAMESIZE=8*$SZREG;
my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000; my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000;
@ -1087,7 +1087,7 @@ $code.=<<___;
$PTR_ADD $sp,$FRAMESIZE $PTR_ADD $sp,$FRAMESIZE
.end AES_set_encrypt_key .end AES_set_encrypt_key
___ ___
my ($head,$tail)=($inp,$bits); my ($head,$tail)=($inp,$bits);
my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3); my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2); my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
@ -1570,7 +1570,7 @@ AES_Td:
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
___ ___
foreach (split("\n",$code)) { foreach (split("\n",$code)) {
s/\`([^\`]*)\`/eval $1/ge; s/\`([^\`]*)\`/eval $1/ge;

View file

@ -90,7 +90,7 @@ sub aesenc { aescommon(0xdc,@_); }
sub aesenclast { aescommon(0xdd,@_); } sub aesenclast { aescommon(0xdd,@_); }
sub aesdec { aescommon(0xde,@_); } sub aesdec { aescommon(0xde,@_); }
sub aesdeclast { aescommon(0xdf,@_); } sub aesdeclast { aescommon(0xdf,@_); }
# Inline version of internal aesni_[en|de]crypt1 # Inline version of internal aesni_[en|de]crypt1
{ my $sn; { my $sn;
sub aesni_inline_generate1 sub aesni_inline_generate1
@ -157,7 +157,7 @@ sub aesni_generate1 # fully unrolled loop
&ret(); &ret();
&function_end_B("_aesni_${p}rypt1"); &function_end_B("_aesni_${p}rypt1");
} }
# void $PREFIX_encrypt (const void *inp,void *out,const AES_KEY *key); # void $PREFIX_encrypt (const void *inp,void *out,const AES_KEY *key);
&aesni_generate1("enc") if (!$inline); &aesni_generate1("enc") if (!$inline);
&function_begin_B("${PREFIX}_encrypt"); &function_begin_B("${PREFIX}_encrypt");
@ -349,7 +349,7 @@ sub aesni_generate6
&aesni_generate4("dec"); &aesni_generate4("dec");
&aesni_generate6("enc") if ($PREFIX eq "aesni"); &aesni_generate6("enc") if ($PREFIX eq "aesni");
&aesni_generate6("dec"); &aesni_generate6("dec");
if ($PREFIX eq "aesni") { if ($PREFIX eq "aesni") {
###################################################################### ######################################################################
# void aesni_ecb_encrypt (const void *in, void *out, # void aesni_ecb_encrypt (const void *in, void *out,
@ -569,7 +569,7 @@ if ($PREFIX eq "aesni") {
&set_label("ecb_ret"); &set_label("ecb_ret");
&function_end("aesni_ecb_encrypt"); &function_end("aesni_ecb_encrypt");
###################################################################### ######################################################################
# void aesni_ccm64_[en|de]crypt_blocks (const void *in, void *out, # void aesni_ccm64_[en|de]crypt_blocks (const void *in, void *out,
# size_t blocks, const AES_KEY *key, # size_t blocks, const AES_KEY *key,
@ -756,7 +756,7 @@ if ($PREFIX eq "aesni") {
&movups (&QWP(0,$out),$cmac); &movups (&QWP(0,$out),$cmac);
&function_end("aesni_ccm64_decrypt_blocks"); &function_end("aesni_ccm64_decrypt_blocks");
} }
###################################################################### ######################################################################
# void aesni_ctr32_encrypt_blocks (const void *in, void *out, # void aesni_ctr32_encrypt_blocks (const void *in, void *out,
# size_t blocks, const AES_KEY *key, # size_t blocks, const AES_KEY *key,
@ -1010,7 +1010,7 @@ if ($PREFIX eq "aesni") {
&set_label("ctr32_ret"); &set_label("ctr32_ret");
&mov ("esp",&DWP(80,"esp")); &mov ("esp",&DWP(80,"esp"));
&function_end("aesni_ctr32_encrypt_blocks"); &function_end("aesni_ctr32_encrypt_blocks");
###################################################################### ######################################################################
# void aesni_xts_[en|de]crypt(const char *inp,char *out,size_t len, # void aesni_xts_[en|de]crypt(const char *inp,char *out,size_t len,
# const AES_KEY *key1, const AES_KEY *key2 # const AES_KEY *key1, const AES_KEY *key2
@ -1716,7 +1716,7 @@ if ($PREFIX eq "aesni") {
&function_end("aesni_xts_decrypt"); &function_end("aesni_xts_decrypt");
} }
} }
###################################################################### ######################################################################
# void $PREFIX_cbc_encrypt (const void *inp, void *out, # void $PREFIX_cbc_encrypt (const void *inp, void *out,
# size_t length, const AES_KEY *key, # size_t length, const AES_KEY *key,
@ -1943,7 +1943,7 @@ if ($PREFIX eq "aesni") {
&movups (&QWP(0,$key_),$ivec); # output IV &movups (&QWP(0,$key_),$ivec); # output IV
&set_label("cbc_abort"); &set_label("cbc_abort");
&function_end("${PREFIX}_cbc_encrypt"); &function_end("${PREFIX}_cbc_encrypt");
###################################################################### ######################################################################
# Mechanical port from aesni-x86_64.pl. # Mechanical port from aesni-x86_64.pl.
# #

View file

@ -256,7 +256,7 @@ bn_mul_mont:
cmplt $i,$num,$tj # borrow $tj cmplt $i,$num,$tj # borrow $tj
stq $hi1,16($tp) stq $hi1,16($tp)
bne $tj,.Louter bne $tj,.Louter
s8addq $num,sp,$tj # &tp[num] s8addq $num,sp,$tj # &tp[num]
mov $rp,$bp # put rp aside mov $rp,$bp # put rp aside
mov sp,$tp mov sp,$tp

View file

@ -110,7 +110,7 @@ bn_mul_mont:
adc $nhi,$nhi,#0 adc $nhi,$nhi,#0
str $nlo,[$num] @ tp[num-1]= str $nlo,[$num] @ tp[num-1]=
str $nhi,[$num,#4] @ tp[num]= str $nhi,[$num,#4] @ tp[num]=
.Louter: .Louter:
sub $tj,$num,sp @ "original" $num-1 value sub $tj,$num,sp @ "original" $num-1 value
sub $ap,$ap,$tj @ "rewind" ap to &ap[1] sub $ap,$ap,$tj @ "rewind" ap to &ap[1]
@ -158,7 +158,7 @@ bn_mul_mont:
cmp $tp,$tj cmp $tp,$tj
bne .Louter bne .Louter
ldr $rp,[$_rp] @ pull rp ldr $rp,[$_rp] @ pull rp
add $num,$num,#4 @ $num to point at &tp[num] add $num,$num,#4 @ $num to point at &tp[num]
sub $aj,$num,sp @ "original" num value sub $aj,$num,sp @ "original" num value

View file

@ -350,7 +350,7 @@ $code.=<<___;
addu $i,$BNSZ addu $i,$BNSZ
sltu $t0,$i,$num sltu $t0,$i,$num
bnez $t0,.Louter bnez $t0,.Louter
.set noreorder .set noreorder
$PTR_ADD $tj,$sp,$num # &tp[num] $PTR_ADD $tj,$sp,$num # &tp[num]
move $tp,$sp move $tp,$sp

View file

@ -59,7 +59,7 @@
# Special thanks to polarhome.com for providing HP-UX account on # Special thanks to polarhome.com for providing HP-UX account on
# PA-RISC 1.1 machine, and to correspondent who chose to remain # PA-RISC 1.1 machine, and to correspondent who chose to remain
# anonymous for testing the code on PA-RISC 2.0 machine. # anonymous for testing the code on PA-RISC 2.0 machine.
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
$flavour = shift; $flavour = shift;
@ -228,7 +228,7 @@ $code.=<<___; # PA-RISC 2.0 code-path
ldo 8($idx),$idx ; j++++ ldo 8($idx),$idx ; j++++
addl $ab0,$nm0,$nm0 ; low part is discarded addl $ab0,$nm0,$nm0 ; low part is discarded
extrd,u $nm0,31,32,$hi1 extrd,u $nm0,31,32,$hi1
L\$1st L\$1st
xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[0] xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[0]
xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m
@ -349,7 +349,7 @@ L\$outer
extrd,u $nm0,31,32,$hi1 extrd,u $nm0,31,32,$hi1
fstds ${fab0},-16($xfer) fstds ${fab0},-16($xfer)
fstds ${fnm0},-8($xfer) fstds ${fnm0},-8($xfer)
L\$inner L\$inner
xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[i] xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[i]
xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m
@ -464,7 +464,7 @@ $code.=<<___;
b L\$outer b L\$outer
ldo `$LOCALS+32+4`($fp),$tp ldo `$LOCALS+32+4`($fp),$tp
L\$outerdone L\$outerdone
addl $hi0,$ab1,$ab1 addl $hi0,$ab1,$ab1
addl $ti1,$ab1,$ab1 addl $ti1,$ab1,$ab1
@ -562,7 +562,7 @@ L\$parisc11
ldw 4($xfer),$ablo ldw 4($xfer),$ablo
ldw 0($xfer),$abhi ldw 0($xfer),$abhi
nop nop
L\$1st_pa11 L\$1st_pa11
xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[0] xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[0]
flddx $idx($ap),${fai} ; ap[j,j+1] flddx $idx($ap),${fai} ; ap[j,j+1]
@ -687,7 +687,7 @@ L\$outer_pa11
fstds ${fnm0},-8($xfer) fstds ${fnm0},-8($xfer)
ldw 4($xfer),$ablo ldw 4($xfer),$ablo
ldw 0($xfer),$abhi ldw 0($xfer),$abhi
L\$inner_pa11 L\$inner_pa11
xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[i] xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[i]
flddx $idx($ap),${fai} ; ap[j,j+1] flddx $idx($ap),${fai} ; ap[j,j+1]
@ -806,7 +806,7 @@ L\$inner_pa11
b L\$outer_pa11 b L\$outer_pa11
ldo `$LOCALS+32+4`($fp),$tp ldo `$LOCALS+32+4`($fp),$tp
L\$outerdone_pa11 L\$outerdone_pa11
add $hi0,$ablo,$ablo add $hi0,$ablo,$ablo
addc %r0,$abhi,$abhi addc %r0,$abhi,$abhi
@ -858,7 +858,7 @@ L\$copy_pa11
L\$done L\$done
___ ___
} }
$code.=<<___; $code.=<<___;
ldi 1,%r28 ; signal "handled" ldi 1,%r28 ; signal "handled"
ldo $FRAME($fp),%sp ; destroy tp[num+1] ldo $FRAME($fp),%sp ; destroy tp[num+1]
@ -877,7 +877,7 @@ L\$abort
$POPMB -$FRAME(%sp),%r3 $POPMB -$FRAME(%sp),%r3
.PROCEND .PROCEND
___ ___
# Explicitly encode PA-RISC 2.0 instructions used in this module, so # Explicitly encode PA-RISC 2.0 instructions used in this module, so
# that it can be compiled with .LEVEL 1.0. It should be noted that I # that it can be compiled with .LEVEL 1.0. It should be noted that I
# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0 # wouldn't have to do this, if GNU assembler understood .ALLOW 2.0

View file

@ -147,7 +147,7 @@ $code.=<<___;
$LD $n0,0($n0) ; pull n0[0] value $LD $n0,0($n0) ; pull n0[0] value
addi $num,$num,-2 ; adjust $num for counter register addi $num,$num,-2 ; adjust $num for counter register
$LD $m0,0($bp) ; m0=bp[0] $LD $m0,0($bp) ; m0=bp[0]
$LD $aj,0($ap) ; ap[0] $LD $aj,0($ap) ; ap[0]
addi $tp,$sp,$LOCALS addi $tp,$sp,$LOCALS
@ -206,7 +206,7 @@ L1st:
addc $hi1,$hi1,$hi0 addc $hi1,$hi1,$hi0
addze $ovf,$ovf ; upmost overflow bit addze $ovf,$ovf ; upmost overflow bit
$ST $hi1,$BNSZ($tp) $ST $hi1,$BNSZ($tp)
li $i,$BNSZ li $i,$BNSZ
.align 4 .align 4
Louter: Louter:
@ -230,7 +230,7 @@ Louter:
$UMULL $nlo,$nj,$m1 ; np[1]*m1 $UMULL $nlo,$nj,$m1 ; np[1]*m1
addze $hi1,$hi1 addze $hi1,$hi1
$UMULH $nhi,$nj,$m1 $UMULH $nhi,$nj,$m1
mtctr $num mtctr $num
li $j,`2*$BNSZ` li $j,`2*$BNSZ`
.align 4 .align 4
@ -277,7 +277,7 @@ Linner:
$UCMP $i,$tj $UCMP $i,$tj
addi $i,$i,$BNSZ addi $i,$i,$BNSZ
ble- Louter ble- Louter
addi $num,$num,2 ; restore $num addi $num,$num,2 ; restore $num
subfc $j,$j,$j ; j=0 and "clear" XER[CA] subfc $j,$j,$j ; j=0 and "clear" XER[CA]
addi $tp,$sp,$LOCALS addi $tp,$sp,$LOCALS

View file

@ -154,7 +154,7 @@ $T0a="f24"; $T0b="f25";
$T1a="f26"; $T1b="f27"; $T1a="f26"; $T1b="f27";
$T2a="f28"; $T2b="f29"; $T2a="f28"; $T2b="f29";
$T3a="f30"; $T3b="f31"; $T3a="f30"; $T3b="f31";
# sp----------->+-------------------------------+ # sp----------->+-------------------------------+
# | saved sp | # | saved sp |
# +-------------------------------+ # +-------------------------------+
@ -189,7 +189,7 @@ $T3a="f30"; $T3b="f31";
# . . # . .
# . . # . .
# +-------------------------------+ # +-------------------------------+
$code=<<___; $code=<<___;
.machine "any" .machine "any"
.text .text
@ -260,7 +260,7 @@ $code.=<<___;
li $i,-64 li $i,-64
add $nap_d,$tp,$num add $nap_d,$tp,$num
and $nap_d,$nap_d,$i ; align to 64 bytes and $nap_d,$nap_d,$i ; align to 64 bytes
mulld $t7,$a0,$t3 ; ap[0]*bp[0] mulld $t7,$a0,$t3 ; ap[0]*bp[0]
; nap_d is off by 1, because it's used with stfdu/lfdu ; nap_d is off by 1, because it's used with stfdu/lfdu
addi $nap_d,$nap_d,-8 addi $nap_d,$nap_d,-8
@ -416,7 +416,7 @@ $code.=<<___;
stfd $T2b,`$FRAME+40`($sp) stfd $T2b,`$FRAME+40`($sp)
stfd $T3a,`$FRAME+48`($sp) stfd $T3a,`$FRAME+48`($sp)
stfd $T3b,`$FRAME+56`($sp) stfd $T3b,`$FRAME+56`($sp)
.align 5 .align 5
L1st: L1st:
___ ___
@ -562,7 +562,7 @@ $code.=<<___;
std $t0,8($tp) ; tp[j-1] std $t0,8($tp) ; tp[j-1]
stdu $t4,16($tp) ; tp[j] stdu $t4,16($tp) ; tp[j]
bdnz- L1st bdnz- L1st
fctid $dota,$dota fctid $dota,$dota
fctid $dotb,$dotb fctid $dotb,$dotb
@ -614,7 +614,7 @@ $code.=<<___;
slwi $t7,$num,2 slwi $t7,$num,2
subf $nap_d,$t7,$nap_d ; rewind pointer subf $nap_d,$t7,$nap_d ; rewind pointer
li $i,8 ; i=1 li $i,8 ; i=1
.align 5 .align 5
Louter: Louter:
@ -741,7 +741,7 @@ $code.=<<___;
stfd $T2b,`$FRAME+40`($sp) stfd $T2b,`$FRAME+40`($sp)
stfd $T3a,`$FRAME+48`($sp) stfd $T3a,`$FRAME+48`($sp)
stfd $T3b,`$FRAME+56`($sp) stfd $T3b,`$FRAME+56`($sp)
.align 5 .align 5
Linner: Linner:
fmul $T1a,$A1,$ba fmul $T1a,$A1,$ba
@ -857,7 +857,7 @@ $code.=<<___;
std $t3,-16($tp) ; tp[j-1] std $t3,-16($tp) ; tp[j-1]
std $t5,-8($tp) ; tp[j] std $t5,-8($tp) ; tp[j]
bdnz- Linner bdnz- Linner
fctid $dota,$dota fctid $dota,$dota
fctid $dotb,$dotb fctid $dotb,$dotb
ld $t0,`$FRAME+0`($sp) ld $t0,`$FRAME+0`($sp)
@ -933,7 +933,7 @@ $code.=<<___;
cmpw $i,$num cmpw $i,$num
blt- Louter blt- Louter
___ ___
$code.=<<___ if ($SIZE_T==8); $code.=<<___ if ($SIZE_T==8);
subf $np,$num,$np ; rewind np subf $np,$num,$np ; rewind np
addi $j,$j,1 ; restore counter addi $j,$j,1 ; restore counter
@ -1048,7 +1048,7 @@ Lcopy: ; copy or in-place refresh
stdu $i,16($tp) stdu $i,16($tp)
bdnz- Lcopy bdnz- Lcopy
___ ___
$code.=<<___; $code.=<<___;
$POP $i,0($sp) $POP $i,0($sp)
li r3,1 ; signal "handled" li r3,1 ; signal "handled"

View file

@ -102,7 +102,7 @@ $frame=32; # size of above frame rounded up to 16n
&lea ($num,&DWP(-3,"edi")); # num=num-1 to assist modulo-scheduling &lea ($num,&DWP(-3,"edi")); # num=num-1 to assist modulo-scheduling
#&mov ($_num,$num); # redundant as $num is not reused #&mov ($_num,$num); # redundant as $num is not reused
&mov ($_sp,"ebp"); # saved stack pointer! &mov ($_sp,"ebp"); # saved stack pointer!
if($sse2) { if($sse2) {
$acc0="mm0"; # mmx register bank layout $acc0="mm0"; # mmx register bank layout
$acc1="mm1"; $acc1="mm1";
@ -183,7 +183,7 @@ $mask="mm7";
&paddq ($car1,$car0); &paddq ($car1,$car0);
&movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1] &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
&inc ($i); # i++ &inc ($i); # i++
&set_label("outer"); &set_label("outer");
&xor ($j,$j); # j=0 &xor ($j,$j); # j=0
@ -262,7 +262,7 @@ $mask="mm7";
&set_label("non_sse2",16); &set_label("non_sse2",16);
} }
if (0) { if (0) {
&mov ("esp",$_sp); &mov ("esp",$_sp);
&xor ("eax","eax"); # signal "not fast enough [yet]" &xor ("eax","eax"); # signal "not fast enough [yet]"
@ -325,7 +325,7 @@ $carry="ebp";
&inc ($j); &inc ($j);
&jmp (&label("2ndmadd")); &jmp (&label("2ndmadd"));
&set_label("1stmadd",16); &set_label("1stmadd",16);
&mov ($carry,"edx"); &mov ($carry,"edx");
&mul ($word); # ap[j]*bp[i] &mul ($word); # ap[j]*bp[i]
@ -362,7 +362,7 @@ $carry="ebp";
&mov ("eax",&DWP(4,$inp)); # np[1] &mov ("eax",&DWP(4,$inp)); # np[1]
&adc ("edx",0); &adc ("edx",0);
&mov ($j,1); &mov ($j,1);
&set_label("2ndmadd",16); &set_label("2ndmadd",16);
&mov ($carry,"edx"); &mov ($carry,"edx");
&mul ($word); # np[j]*m &mul ($word); # np[j]*m
@ -401,7 +401,7 @@ $carry="ebp";
&xor ("edx","edx"); &xor ("edx","edx");
&mov ("eax",&DWP(0,$inp)); &mov ("eax",&DWP(0,$inp));
&jmp (&label("1stmadd")); &jmp (&label("1stmadd"));
&set_label("bn_sqr_mont",16); &set_label("bn_sqr_mont",16);
$sbit=$num; $sbit=$num;
&mov ($_num,$num); &mov ($_num,$num);
@ -452,7 +452,7 @@ $sbit=$num;
&adc ("edx",0); &adc ("edx",0);
&mov ("eax",&DWP(4,$inp)); # np[1] &mov ("eax",&DWP(4,$inp)); # np[1]
&mov ($j,1); &mov ($j,1);
&set_label("3rdmadd",16); &set_label("3rdmadd",16);
&mov ($carry,"edx"); &mov ($carry,"edx");
&mul ($word); # np[j]*m &mul ($word); # np[j]*m
@ -492,7 +492,7 @@ $sbit=$num;
&cmp ($j,$num); &cmp ($j,$num);
&mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]= &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
&je (&label("common_tail")); &je (&label("common_tail"));
&mov ($word,&DWP(4,$inp,$j,4)); # ap[i] &mov ($word,&DWP(4,$inp,$j,4)); # ap[i]
&lea ($j,&DWP(1,$j)); &lea ($j,&DWP(1,$j));
&mov ("eax",$word); &mov ("eax",$word);
@ -552,7 +552,7 @@ $sbit=$num;
&jmp (&label("3rdmadd")); &jmp (&label("3rdmadd"));
} }
&set_label("common_tail",16); &set_label("common_tail",16);
&mov ($np,$_np); # load modulus pointer &mov ($np,$_np); # load modulus pointer
&mov ($rp,$_rp); # load result pointer &mov ($rp,$_rp); # load result pointer

View file

@ -686,7 +686,8 @@ $code.=<<___;
.size bn_mul4x_mont,.-bn_mul4x_mont .size bn_mul4x_mont,.-bn_mul4x_mont
___ ___
}}} }}}
{{{
{{{
###################################################################### ######################################################################
# void bn_sqr4x_mont( # void bn_sqr4x_mont(
my $rptr="%rdi"; # const BN_ULONG *rptr, my $rptr="%rdi"; # const BN_ULONG *rptr,
@ -1191,7 +1192,8 @@ $code.=<<___;
mov $S[2],-16($tptr) mov $S[2],-16($tptr)
mov $S[3],-8($tptr) mov $S[3],-8($tptr)
___ ___
} }
############################################################## ##############################################################
# Montgomery reduction part, "word-by-word" algorithm. # Montgomery reduction part, "word-by-word" algorithm.
# #
@ -1398,7 +1400,8 @@ $code.=<<___;
mov 0(%rsp),$num # restore $num mov 0(%rsp),$num # restore $num
mov $topbit,($tptr) # save $topbit mov $topbit,($tptr) # save $topbit
___ ___
} }
############################################################## ##############################################################
# Post-condition, 4x unrolled copy from bn_mul_mont # Post-condition, 4x unrolled copy from bn_mul_mont
# #

View file

@ -121,7 +121,7 @@ $code.=<<___ if ($SIZE_T==4);
b L\$parisc1_gmult b L\$parisc1_gmult
nop nop
___ ___
$code.=<<___; $code.=<<___;
ldb 15($Xi),$nlo ldb 15($Xi),$nlo
ldo 8($Htbl),$Hll ldo 8($Htbl),$Hll
@ -207,7 +207,7 @@ L\$oop_gmult_pa2
std $Zll,8($Xi) std $Zll,8($Xi)
std $Zhh,0($Xi) std $Zhh,0($Xi)
___ ___
$code.=<<___ if ($SIZE_T==4); $code.=<<___ if ($SIZE_T==4);
b L\$done_gmult b L\$done_gmult
nop nop
@ -378,7 +378,7 @@ $code.=<<___ if ($SIZE_T==4);
b L\$parisc1_ghash b L\$parisc1_ghash
nop nop
___ ___
$code.=<<___; $code.=<<___;
ldb 15($Xi),$nlo ldb 15($Xi),$nlo
ldo 8($Htbl),$Hll ldo 8($Htbl),$Hll
@ -475,7 +475,7 @@ L\$oop_ghash_pa2
cmpb,*<> $inp,$len,L\$outer_ghash_pa2 cmpb,*<> $inp,$len,L\$outer_ghash_pa2
copy $Zll,$nlo copy $Zll,$nlo
___ ___
$code.=<<___ if ($SIZE_T==4); $code.=<<___ if ($SIZE_T==4);
b L\$done_ghash b L\$done_ghash
nop nop

View file

@ -125,7 +125,7 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
($Zhh,$Zhl,$Zlh,$Zll) = ("ebp","edx","ecx","ebx"); ($Zhh,$Zhl,$Zlh,$Zll) = ("ebp","edx","ecx","ebx");
$inp = "edi"; $inp = "edi";
$Htbl = "esi"; $Htbl = "esi";
$unroll = 0; # Affects x86 loop. Folded loop performs ~7% worse $unroll = 0; # Affects x86 loop. Folded loop performs ~7% worse
# than unrolled, which has to be weighted against # than unrolled, which has to be weighted against
# 2.5x x86-specific code size reduction. # 2.5x x86-specific code size reduction.
@ -243,7 +243,7 @@ sub deposit_rem_4bit {
&mov (&DWP($bias+56,"esp"),0xA9C0<<16); &mov (&DWP($bias+56,"esp"),0xA9C0<<16);
&mov (&DWP($bias+60,"esp"),0xB5E0<<16); &mov (&DWP($bias+60,"esp"),0xB5E0<<16);
} }
$suffix = $x86only ? "" : "_x86"; $suffix = $x86only ? "" : "_x86";
&function_begin("gcm_gmult_4bit".$suffix); &function_begin("gcm_gmult_4bit".$suffix);
@ -326,7 +326,7 @@ $suffix = $x86only ? "" : "_x86";
&mov (&DWP(0,$inp),$Zhh); &mov (&DWP(0,$inp),$Zhh);
&stack_pop(16+4+1); &stack_pop(16+4+1);
&function_end("gcm_ghash_4bit".$suffix); &function_end("gcm_ghash_4bit".$suffix);
if (!$x86only) {{{ if (!$x86only) {{{
&static_label("rem_4bit"); &static_label("rem_4bit");
@ -425,7 +425,7 @@ $S=12; # shift factor for rem_4bit
&mov (&DWP(8,$inp),$Zlh); &mov (&DWP(8,$inp),$Zlh);
&mov (&DWP(0,$inp),$Zhh); &mov (&DWP(0,$inp),$Zhh);
&function_end("gcm_gmult_4bit_mmx"); &function_end("gcm_gmult_4bit_mmx");
# Streamed version performs 20% better on P4, 7% on Opteron, # Streamed version performs 20% better on P4, 7% on Opteron,
# 10% on Core2 and PIII... # 10% on Core2 and PIII...
&function_begin("gcm_ghash_4bit_mmx"); &function_begin("gcm_ghash_4bit_mmx");
@ -477,7 +477,7 @@ $S=12; # shift factor for rem_4bit
&stack_pop(4+1); &stack_pop(4+1);
&function_end("gcm_ghash_4bit_mmx"); &function_end("gcm_ghash_4bit_mmx");
}} else {{ # "June" MMX version... }} else {{ # "June" MMX version...
# ... has slower "April" gcm_gmult_4bit_mmx with folded # ... has slower "April" gcm_gmult_4bit_mmx with folded
# loop. This is done to conserve code size... # loop. This is done to conserve code size...
@ -593,7 +593,7 @@ sub mmx_loop() {
&mov (&DWP(8,$inp),$Zlh); &mov (&DWP(8,$inp),$Zlh);
&mov (&DWP(0,$inp),$Zhh); &mov (&DWP(0,$inp),$Zhh);
&function_end("gcm_gmult_4bit_mmx"); &function_end("gcm_gmult_4bit_mmx");
###################################################################### ######################################################################
# Below subroutine is "528B" variant of "4-bit" GCM GHASH function # Below subroutine is "528B" variant of "4-bit" GCM GHASH function
# (see gcm128.c for details). It provides further 20-40% performance # (see gcm128.c for details). It provides further 20-40% performance
@ -797,7 +797,7 @@ sub mmx_loop() {
} }
&function_end("gcm_ghash_4bit_mmx"); &function_end("gcm_ghash_4bit_mmx");
}} }}
if ($sse2) {{ if ($sse2) {{
###################################################################### ######################################################################
# PCLMULQDQ version. # PCLMULQDQ version.
@ -862,7 +862,7 @@ my ($Xhi,$Xi,$Hkey)=@_;
&pxor ($Xhi,$T2); &pxor ($Xhi,$T2);
&pxor ($Xi,$T3); # &pxor ($Xi,$T3); #
} }
if (1) { # Algorithm 9 with <<1 twist. if (1) { # Algorithm 9 with <<1 twist.
# Reduction is shorter and uses only two # Reduction is shorter and uses only two
# temporary registers, which makes it better # temporary registers, which makes it better
@ -1073,7 +1073,7 @@ my ($Xhi,$Xi) = @_;
&pshufb ($Xi,$T3); &pshufb ($Xi,$T3);
&movdqu (&QWP(0,$Xip),$Xi); &movdqu (&QWP(0,$Xip),$Xi);
&function_end("gcm_ghash_clmul"); &function_end("gcm_ghash_clmul");
} else { # Algorithm 5. Kept for reference purposes. } else { # Algorithm 5. Kept for reference purposes.
sub reduction_alg5 { # 19/16 times faster than Intel version sub reduction_alg5 { # 19/16 times faster than Intel version
@ -1250,7 +1250,7 @@ my ($Xhi,$Xi)=@_;
&function_end("gcm_ghash_clmul"); &function_end("gcm_ghash_clmul");
} }
&rodataseg(); &rodataseg();
&set_label("bswap",64); &set_label("bswap",64);
&data_byte(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); &data_byte(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);

View file

@ -79,7 +79,7 @@ sub AUTOLOAD() # thunk [simplified] 32-bit style perlasm
$arg = "\$$arg" if ($arg*1 eq $arg); $arg = "\$$arg" if ($arg*1 eq $arg);
$code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n"; $code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n";
} }
{ my $N; { my $N;
sub loop() { sub loop() {
my $inp = shift; my $inp = shift;
@ -184,7 +184,7 @@ $code.=<<___;
ret ret
.size gcm_gmult_4bit,.-gcm_gmult_4bit .size gcm_gmult_4bit,.-gcm_gmult_4bit
___ ___
# per-function register layout # per-function register layout
$inp="%rdx"; $inp="%rdx";
$len="%rcx"; $len="%rcx";
@ -341,7 +341,7 @@ $code.=<<___;
ret ret
.size gcm_ghash_4bit,.-gcm_ghash_4bit .size gcm_ghash_4bit,.-gcm_ghash_4bit
___ ___
###################################################################### ######################################################################
# PCLMULQDQ version. # PCLMULQDQ version.
@ -404,7 +404,7 @@ $code.=<<___;
pxor $T2,$Xi # pxor $T2,$Xi #
___ ___
} }
{ my ($Htbl,$Xip)=@_4args; { my ($Htbl,$Xip)=@_4args;
$code.=<<___; $code.=<<___;
@ -465,7 +465,7 @@ $code.=<<___;
.size gcm_gmult_clmul,.-gcm_gmult_clmul .size gcm_gmult_clmul,.-gcm_gmult_clmul
___ ___
} }
{ my ($Xip,$Htbl,$inp,$len)=@_4args; { my ($Xip,$Htbl,$inp,$len)=@_4args;
my $Xn="%xmm6"; my $Xn="%xmm6";
my $Xhn="%xmm7"; my $Xhn="%xmm7";
@ -675,7 +675,7 @@ $code.=<<___;
.align 64 .align 64
.text .text
___ ___
# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, # EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
# CONTEXT *context,DISPATCHER_CONTEXT *disp) # CONTEXT *context,DISPATCHER_CONTEXT *disp)
if ($win64) { if ($win64) {
@ -802,7 +802,7 @@ se_handler:
.byte 0x04,0xa2,0x00,0x00 #sub rsp,0x58 .byte 0x04,0xa2,0x00,0x00 #sub rsp,0x58
___ ___
} }
$code =~ s/\`([^\`]*)\`/eval($1)/gem; $code =~ s/\`([^\`]*)\`/eval($1)/gem;
print $code; print $code;

View file

@ -57,7 +57,7 @@
# 9. .init segment is allowed to contain calls to functions only. # 9. .init segment is allowed to contain calls to functions only.
# a. If function accepts more than 4 arguments *and* >4th argument # a. If function accepts more than 4 arguments *and* >4th argument
# is declared as non 64-bit value, do clear its upper part. # is declared as non 64-bit value, do clear its upper part.
my $flavour = shift; my $flavour = shift;
my $output = shift; my $output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
@ -857,7 +857,8 @@ print "END\n" if ($masm);
close STDOUT; close STDOUT;
#################################################
#################################################
# Cross-reference x86_64 ABI "card" # Cross-reference x86_64 ABI "card"
# #
# Unix Win64 # Unix Win64
@ -922,7 +923,8 @@ close STDOUT;
# endif # endif
# ret # ret
# #
#################################################
#################################################
# Win64 SEH, Structured Exception Handling. # Win64 SEH, Structured Exception Handling.
# #
# Unlike on Unix systems(*) lack of Win64 stack unwinding information # Unlike on Unix systems(*) lack of Win64 stack unwinding information

View file

@ -12844,7 +12844,7 @@ static const u_int8_t cycladesz_firmware[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x48,
0x65, 0x61, 0x64, 0x65, 0x72, 0x3a, 0x20, 0x73, 0x65, 0x74, 0x65, 0x61, 0x64, 0x65, 0x72, 0x3a, 0x20, 0x73, 0x65, 0x74,
0x6a, 0x6d, 0x70, 0x2e, 0x68, 0x2c, 0x76, 0x20, 0x31, 0x2e, 0x6a, 0x6d, 0x70, 0x2e, 0x68, 0x2c, 0x76, 0x20, 0x31, 0x2e,
0x31, 0x20, 0x38, 0x37, 0x2f, 0x30, 0x38, 0x2f, 0x31, 0x38, 0x31, 0x20, 0x38, 0x37, 0x2f, 0x30, 0x38, 0x2f, 0x31, 0x38
0x20, 0x31, 0x36, 0x3a, 0x33, 0x34, 0x3a, 0x31, 0x31, 0x20, 0x20, 0x31, 0x36, 0x3a, 0x33, 0x34, 0x3a, 0x31, 0x31, 0x20,
0x6d, 0x64, 0x6f, 0x76, 0x65, 0x20, 0x45, 0x78, 0x70, 0x20, 0x6d, 0x64, 0x6f, 0x76, 0x65, 0x20, 0x45, 0x78, 0x70, 0x20,
0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,

View file

@ -4440,7 +4440,7 @@ struct gop_lib1_content {
/* /*
*************************************************************************** ***************************************************************************
Scratch Register definitions Scratch Register definitions
Each number below indicates which scratch regiser request, Active and Each number below indicates which scratch register request, Active and
Connect all share the same definitions as display_device_tag defines Connect all share the same definitions as display_device_tag defines
*************************************************************************** ***************************************************************************
*/ */