diff options
author | Kevin Hilman <khilman@linaro.org> | 2015-10-20 11:26:33 -0700 |
---|---|---|
committer | Kevin Hilman <khilman@linaro.org> | 2015-10-20 11:26:33 -0700 |
commit | 09147a74376428080eddfed0cbce2cab205c2508 (patch) | |
tree | 7c903b82831eb03498dfc89c615e2acd5e1d9d6f /arch | |
parent | e4bec9707eb7446e0cd1961f91e6948819327933 (diff) | |
parent | c2fab09b2ec32f252029757ba3353ea3d308c811 (diff) |
Merge branch 'test/linux-linaro-lsk-v3.18-android' of git://android.git.linaro.org/kernel/linaro-android into linux-linaro-lsk-v3.18-androidlsk-v3.18-15.10-android
Diffstat (limited to 'arch')
46 files changed, 5597 insertions, 691 deletions
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index b48fa341648d..2cee53b27238 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -6,12 +6,15 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o +obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o aes-arm-y := aes-armv4.o aes_glue.o aes-arm-bs-y := aesbs-core.o aesbs-glue.o sha1-arm-y := sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o +sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o +sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y) sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o quiet_cmd_perl = PERL $@ @@ -20,4 +23,7 @@ quiet_cmd_perl = PERL $@ $(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl $(call cmd,perl) -.PRECIOUS: $(obj)/aesbs-core.S +$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl + $(call cmd,perl) + +.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl new file mode 100644 index 000000000000..fac0533ea633 --- /dev/null +++ b/arch/arm/crypto/sha256-armv4.pl @@ -0,0 +1,716 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# +# Permission to use under GPL terms is granted. +# ==================================================================== + +# SHA256 block procedure for ARMv4. May 2007. + +# Performance is ~2x better than gcc 3.4 generated code and in "abso- +# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per +# byte [on single-issue Xscale PXA250 core]. + +# July 2010. +# +# Rescheduling for dual-issue pipeline resulted in 22% improvement on +# Cortex A8 core and ~20 cycles per processed byte. + +# February 2011. +# +# Profiler-assisted and platform-specific optimization resulted in 16% +# improvement on Cortex A8 core and ~15.4 cycles per processed byte. + +# September 2013. +# +# Add NEON implementation. On Cortex A8 it was measured to process one +# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon +# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only +# code (meaning that latter performs sub-optimally, nothing was done +# about it). + +# May 2014. +# +# Add ARMv8 code path performing at 2.0 cpb on Apple A7. + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +$ctx="r0"; $t0="r0"; +$inp="r1"; $t4="r1"; +$len="r2"; $t1="r2"; +$T1="r3"; $t3="r3"; +$A="r4"; +$B="r5"; +$C="r6"; +$D="r7"; +$E="r8"; +$F="r9"; +$G="r10"; +$H="r11"; +@V=($A,$B,$C,$D,$E,$F,$G,$H); +$t2="r12"; +$Ktbl="r14"; + +@Sigma0=( 2,13,22); +@Sigma1=( 6,11,25); +@sigma0=( 7,18, 3); +@sigma1=(17,19,10); + +sub BODY_00_15 { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; + +$code.=<<___ if ($i<16); +#if __ARM_ARCH__>=7 + @ ldr $t1,[$inp],#4 @ $i +# if $i==15 + str $inp,[sp,#17*4] @ make room for $t4 +# endif + eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` + add $a,$a,$t2 @ h+=Maj(a,b,c) from the past + eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) +# ifndef __ARMEB__ + rev $t1,$t1 +# endif +#else + @ ldrb $t1,[$inp,#3] @ $i + add $a,$a,$t2 @ h+=Maj(a,b,c) from the past + ldrb $t2,[$inp,#2] + ldrb $t0,[$inp,#1] + orr $t1,$t1,$t2,lsl#8 + ldrb $t2,[$inp],#4 + orr $t1,$t1,$t0,lsl#16 +# if $i==15 + str $inp,[sp,#17*4] @ make room for $t4 +# endif + eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` + orr $t1,$t1,$t2,lsl#24 + eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) +#endif +___ +$code.=<<___; + ldr $t2,[$Ktbl],#4 @ *K256++ + add $h,$h,$t1 @ h+=X[i] + str $t1,[sp,#`$i%16`*4] + eor $t1,$f,$g + add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e) + and $t1,$t1,$e + add $h,$h,$t2 @ h+=K256[i] + eor $t1,$t1,$g @ Ch(e,f,g) + eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]` + add $h,$h,$t1 @ h+=Ch(e,f,g) +#if $i==31 + and $t2,$t2,#0xff + cmp $t2,#0xf2 @ done? +#endif +#if $i<15 +# if __ARM_ARCH__>=7 + ldr $t1,[$inp],#4 @ prefetch +# else + ldrb $t1,[$inp,#3] +# endif + eor $t2,$a,$b @ a^b, b^c in next round +#else + ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx + eor $t2,$a,$b @ a^b, b^c in next round + ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx +#endif + eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a) + and $t3,$t3,$t2 @ (b^c)&=(a^b) + add $d,$d,$h @ d+=h + eor $t3,$t3,$b @ Maj(a,b,c) + add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a) + @ add $h,$h,$t3 @ h+=Maj(a,b,c) +___ + ($t2,$t3)=($t3,$t2); +} + +sub BODY_16_XX { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; + +$code.=<<___; + @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i + @ ldr $t4,[sp,#`($i+14)%16`*4] + mov $t0,$t1,ror#$sigma0[0] + add $a,$a,$t2 @ h+=Maj(a,b,c) from the past + mov $t2,$t4,ror#$sigma1[0] + eor $t0,$t0,$t1,ror#$sigma0[1] + eor $t2,$t2,$t4,ror#$sigma1[1] + eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1]) + ldr $t1,[sp,#`($i+0)%16`*4] + eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14]) + ldr $t4,[sp,#`($i+9)%16`*4] + + add $t2,$t2,$t0 + eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15 + add $t1,$t1,$t2 + eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) + add $t1,$t1,$t4 @ X[i] +___ + &BODY_00_15(@_); +} + +$code=<<___; +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 +#endif + +.text +#if __ARM_ARCH__<7 +.code 32 +#else +.syntax unified +# ifdef __thumb2__ +# define adrl adr +.thumb +# else +.code 32 +# endif +#endif + +.type K256,%object +.align 5 +K256: +.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.size K256,.-K256 +.word 0 @ terminator +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha256_block_data_order +#endif +.align 5 + +.global sha256_block_data_order +.type sha256_block_data_order,%function +sha256_block_data_order: +#if __ARM_ARCH__<7 + sub r3,pc,#8 @ sha256_block_data_order +#else + adr r3,sha256_block_data_order +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#ARMV8_SHA256 + bne .LARMv8 + tst r12,#ARMV7_NEON + bne .LNEON +#endif + add $len,$inp,$len,lsl#6 @ len to point at the end of inp + stmdb sp!,{$ctx,$inp,$len,r4-r11,lr} + ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} + sub $Ktbl,r3,#256+32 @ K256 + sub sp,sp,#16*4 @ alloca(X[16]) +.Loop: +# if __ARM_ARCH__>=7 + ldr $t1,[$inp],#4 +# else + ldrb $t1,[$inp,#3] +# endif + eor $t3,$B,$C @ magic + eor $t2,$t2,$t2 +___ +for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); } +$code.=".Lrounds_16_xx:\n"; +for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; +#if __ARM_ARCH__>=7 + ite eq @ Thumb2 thing, sanity check in ARM +#endif + ldreq $t3,[sp,#16*4] @ pull ctx + bne .Lrounds_16_xx + + add $A,$A,$t2 @ h+=Maj(a,b,c) from the past + ldr $t0,[$t3,#0] + ldr $t1,[$t3,#4] + ldr $t2,[$t3,#8] + add $A,$A,$t0 + ldr $t0,[$t3,#12] + add $B,$B,$t1 + ldr $t1,[$t3,#16] + add $C,$C,$t2 + ldr $t2,[$t3,#20] + add $D,$D,$t0 + ldr $t0,[$t3,#24] + add $E,$E,$t1 + ldr $t1,[$t3,#28] + add $F,$F,$t2 + ldr $inp,[sp,#17*4] @ pull inp + ldr $t2,[sp,#18*4] @ pull inp+len + add $G,$G,$t0 + add $H,$H,$t1 + stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H} + cmp $inp,$t2 + sub $Ktbl,$Ktbl,#256 @ rewind Ktbl + bne .Loop + + add sp,sp,#`16+3`*4 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + bx lr @ interoperable with Thumb ISA:-) +#endif +.size sha256_block_data_order,.-sha256_block_data_order +___ +###################################################################### +# NEON stuff +# +{{{ +my @X=map("q$_",(0..3)); +my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25"); +my $Xfer=$t4; +my $j=0; + +sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } +sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } + +sub AUTOLOAD() # thunk [simplified] x86-style perlasm +{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; + my $arg = pop; + $arg = "#$arg" if ($arg*1 eq $arg); + $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; +} + +sub Xupdate() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e,$f,$g,$h); + + &vext_8 ($T0,@X[0],@X[1],4); # X[1..4] + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vext_8 ($T1,@X[2],@X[3],4); # X[9..12] + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T2,$T0,$sigma0[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12] + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T1,$T0,$sigma0[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T2,$T0,32-$sigma0[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T3,$T0,$sigma0[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T1,$T1,$T2); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T3,$T0,32-$sigma0[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T1,$T1,$T3); # sigma0(X[1..4]) + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4]) + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); # sigma1(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vld1_32 ("{$T0}","[$Ktbl,:128]!"); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); # sigma1(X[16..17]) + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17]) + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 ($T0,$T0,@X[0]); + while($#insns>=2) { eval(shift(@insns)); } + &vst1_32 ("{$T0}","[$Xfer,:128]!"); + eval(shift(@insns)); + eval(shift(@insns)); + + push(@X,shift(@X)); # "rotate" X[] +} + +sub Xpreload() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e,$f,$g,$h); + + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vld1_32 ("{$T0}","[$Ktbl,:128]!"); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vrev32_8 (@X[0],@X[0]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 ($T0,$T0,@X[0]); + foreach (@insns) { eval; } # remaining instructions + &vst1_32 ("{$T0}","[$Xfer,:128]!"); + + push(@X,shift(@X)); # "rotate" X[] +} + +sub body_00_15 () { + ( + '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'. + '&add ($h,$h,$t1)', # h+=X[i]+K[i] + '&eor ($t1,$f,$g)', + '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))', + '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past + '&and ($t1,$t1,$e)', + '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e) + '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))', + '&eor ($t1,$t1,$g)', # Ch(e,f,g) + '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e) + '&eor ($t2,$a,$b)', # a^b, b^c in next round + '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a) + '&add ($h,$h,$t1)', # h+=Ch(e,f,g) + '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'. + '&ldr ($t1,"[$Ktbl]") if ($j==15);'. + '&ldr ($t1,"[sp,#64]") if ($j==31)', + '&and ($t3,$t3,$t2)', # (b^c)&=(a^b) + '&add ($d,$d,$h)', # d+=h + '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a) + '&eor ($t3,$t3,$b)', # Maj(a,b,c) + '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);' + ) +} + +$code.=<<___; +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.global sha256_block_data_order_neon +.type sha256_block_data_order_neon,%function +.align 4 +sha256_block_data_order_neon: +.LNEON: + stmdb sp!,{r4-r12,lr} + + sub $H,sp,#16*4+16 + adrl $Ktbl,K256 + bic $H,$H,#15 @ align for 128-bit stores + mov $t2,sp + mov sp,$H @ alloca + add $len,$inp,$len,lsl#6 @ len to point at the end of inp + + vld1.8 {@X[0]},[$inp]! + vld1.8 {@X[1]},[$inp]! + vld1.8 {@X[2]},[$inp]! + vld1.8 {@X[3]},[$inp]! + vld1.32 {$T0},[$Ktbl,:128]! + vld1.32 {$T1},[$Ktbl,:128]! + vld1.32 {$T2},[$Ktbl,:128]! + vld1.32 {$T3},[$Ktbl,:128]! + vrev32.8 @X[0],@X[0] @ yes, even on + str $ctx,[sp,#64] + vrev32.8 @X[1],@X[1] @ big-endian + str $inp,[sp,#68] + mov $Xfer,sp + vrev32.8 @X[2],@X[2] + str $len,[sp,#72] + vrev32.8 @X[3],@X[3] + str $t2,[sp,#76] @ save original sp + vadd.i32 $T0,$T0,@X[0] + vadd.i32 $T1,$T1,@X[1] + vst1.32 {$T0},[$Xfer,:128]! + vadd.i32 $T2,$T2,@X[2] + vst1.32 {$T1},[$Xfer,:128]! + vadd.i32 $T3,$T3,@X[3] + vst1.32 {$T2},[$Xfer,:128]! + vst1.32 {$T3},[$Xfer,:128]! + + ldmia $ctx,{$A-$H} + sub $Xfer,$Xfer,#64 + ldr $t1,[sp,#0] + eor $t2,$t2,$t2 + eor $t3,$B,$C + b .L_00_48 + +.align 4 +.L_00_48: +___ + &Xupdate(\&body_00_15); + &Xupdate(\&body_00_15); + &Xupdate(\&body_00_15); + &Xupdate(\&body_00_15); +$code.=<<___; + teq $t1,#0 @ check for K256 terminator + ldr $t1,[sp,#0] + sub $Xfer,$Xfer,#64 + bne .L_00_48 + + ldr $inp,[sp,#68] + ldr $t0,[sp,#72] + sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl + teq $inp,$t0 + it eq + subeq $inp,$inp,#64 @ avoid SEGV + vld1.8 {@X[0]},[$inp]! @ load next input block + vld1.8 {@X[1]},[$inp]! + vld1.8 {@X[2]},[$inp]! + vld1.8 {@X[3]},[$inp]! + it ne + strne $inp,[sp,#68] + mov $Xfer,sp +___ + &Xpreload(\&body_00_15); + &Xpreload(\&body_00_15); + &Xpreload(\&body_00_15); + &Xpreload(\&body_00_15); +$code.=<<___; + ldr $t0,[$t1,#0] + add $A,$A,$t2 @ h+=Maj(a,b,c) from the past + ldr $t2,[$t1,#4] + ldr $t3,[$t1,#8] + ldr $t4,[$t1,#12] + add $A,$A,$t0 @ accumulate + ldr $t0,[$t1,#16] + add $B,$B,$t2 + ldr $t2,[$t1,#20] + add $C,$C,$t3 + ldr $t3,[$t1,#24] + add $D,$D,$t4 + ldr $t4,[$t1,#28] + add $E,$E,$t0 + str $A,[$t1],#4 + add $F,$F,$t2 + str $B,[$t1],#4 + add $G,$G,$t3 + str $C,[$t1],#4 + add $H,$H,$t4 + str $D,[$t1],#4 + stmia $t1,{$E-$H} + + ittte ne + movne $Xfer,sp + ldrne $t1,[sp,#0] + eorne $t2,$t2,$t2 + ldreq sp,[sp,#76] @ restore original sp + itt ne + eorne $t3,$B,$C + bne .L_00_48 + + ldmia sp!,{r4-r12,pc} +.size sha256_block_data_order_neon,.-sha256_block_data_order_neon +#endif +___ +}}} +###################################################################### +# ARMv8 stuff +# +{{{ +my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2)); +my @MSG=map("q$_",(8..11)); +my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15)); +my $Ktbl="r3"; + +$code.=<<___; +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + +# ifdef __thumb2__ +# define INST(a,b,c,d) .byte c,d|0xc,a,b +# else +# define INST(a,b,c,d) .byte a,b,c,d +# endif + +.type sha256_block_data_order_armv8,%function +.align 5 +sha256_block_data_order_armv8: +.LARMv8: + vld1.32 {$ABCD,$EFGH},[$ctx] +# ifdef __thumb2__ + adr $Ktbl,.LARMv8 + sub $Ktbl,$Ktbl,#.LARMv8-K256 +# else + adrl $Ktbl,K256 +# endif + add $len,$inp,$len,lsl#6 @ len to point at the end of inp + +.Loop_v8: + vld1.8 {@MSG[0]-@MSG[1]},[$inp]! + vld1.8 {@MSG[2]-@MSG[3]},[$inp]! + vld1.32 {$W0},[$Ktbl]! + vrev32.8 @MSG[0],@MSG[0] + vrev32.8 @MSG[1],@MSG[1] + vrev32.8 @MSG[2],@MSG[2] + vrev32.8 @MSG[3],@MSG[3] + vmov $ABCD_SAVE,$ABCD @ offload + vmov $EFGH_SAVE,$EFGH + teq $inp,$len +___ +for($i=0;$i<12;$i++) { +$code.=<<___; + vld1.32 {$W1},[$Ktbl]! + vadd.i32 $W0,$W0,@MSG[0] + sha256su0 @MSG[0],@MSG[1] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + sha256su1 @MSG[0],@MSG[2],@MSG[3] +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); +} +$code.=<<___; + vld1.32 {$W1},[$Ktbl]! + vadd.i32 $W0,$W0,@MSG[0] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + + vld1.32 {$W0},[$Ktbl]! + vadd.i32 $W1,$W1,@MSG[1] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W1 + sha256h2 $EFGH,$abcd,$W1 + + vld1.32 {$W1},[$Ktbl] + vadd.i32 $W0,$W0,@MSG[2] + sub $Ktbl,$Ktbl,#256-16 @ rewind + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + + vadd.i32 $W1,$W1,@MSG[3] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W1 + sha256h2 $EFGH,$abcd,$W1 + + vadd.i32 $ABCD,$ABCD,$ABCD_SAVE + vadd.i32 $EFGH,$EFGH,$EFGH_SAVE + it ne + bne .Loop_v8 + + vst1.32 {$ABCD,$EFGH},[$ctx] + + ret @ bx lr +.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 +#endif +___ +}}} +$code.=<<___; +.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>" +.align 2 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.comm OPENSSL_armcap_P,4,4 +#endif +___ + +open SELF,$0; +while(<SELF>) { + next if (/^#!/); + last if (!s/^#/@/ and !/^$/); + print; +} +close SELF; + +{ my %opcode = ( + "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40, + "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 ); + + sub unsha256 { + my ($mnemonic,$arg)=@_; + + if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) { + my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) + |(($2&7)<<17)|(($2&8)<<4) + |(($3&7)<<1) |(($3&8)<<2); + # since ARMv7 instructions are always encoded little-endian. + # correct solution is to use .inst directive, but older + # assemblers don't implement it:-( + sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s", + $word&0xff,($word>>8)&0xff, + ($word>>16)&0xff,($word>>24)&0xff, + $mnemonic,$arg; + } + } +} + +foreach (split($/,$code)) { + + s/\`([^\`]*)\`/eval $1/geo; + + s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo; + + s/\bret\b/bx lr/go or + s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 + + print $_,"\n"; +} + +close STDOUT; # enforce flush diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped new file mode 100644 index 000000000000..555a1a8eec90 --- /dev/null +++ b/arch/arm/crypto/sha256-core.S_shipped @@ -0,0 +1,2808 @@ + +@ ==================================================================== +@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ +@ Permission to use under GPL terms is granted. +@ ==================================================================== + +@ SHA256 block procedure for ARMv4. May 2007. + +@ Performance is ~2x better than gcc 3.4 generated code and in "abso- +@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per +@ byte [on single-issue Xscale PXA250 core]. + +@ July 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 22% improvement on +@ Cortex A8 core and ~20 cycles per processed byte. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 16% +@ improvement on Cortex A8 core and ~15.4 cycles per processed byte. + +@ September 2013. +@ +@ Add NEON implementation. On Cortex A8 it was measured to process one +@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon +@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only +@ code (meaning that latter performs sub-optimally, nothing was done +@ about it). + +@ May 2014. +@ +@ Add ARMv8 code path performing at 2.0 cpb on Apple A7. + +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 +#endif + +.text +#if __ARM_ARCH__<7 +.code 32 +#else +.syntax unified +# ifdef __thumb2__ +# define adrl adr +.thumb +# else +.code 32 +# endif +#endif + +.type K256,%object +.align 5 +K256: +.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.size K256,.-K256 +.word 0 @ terminator +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha256_block_data_order +#endif +.align 5 + +.global sha256_block_data_order +.type sha256_block_data_order,%function +sha256_block_data_order: +#if __ARM_ARCH__<7 + sub r3,pc,#8 @ sha256_block_data_order +#else + adr r3,sha256_block_data_order +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#ARMV8_SHA256 + bne .LARMv8 + tst r12,#ARMV7_NEON + bne .LNEON +#endif + add r2,r1,r2,lsl#6 @ len to point at the end of inp + stmdb sp!,{r0,r1,r2,r4-r11,lr} + ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} + sub r14,r3,#256+32 @ K256 + sub sp,sp,#16*4 @ alloca(X[16]) +.Loop: +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ magic + eor r12,r12,r12 +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 0 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 0 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 0==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 0<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 1 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 1 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 1==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 1<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 2 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 2 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 2==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 2<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 3 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 3 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 3==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 3<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 4 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 4 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 4==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 4<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 5 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 5==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 5<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 6 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 6 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 6==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 6<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 7 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 7==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 7<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 8 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 8 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 8==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 8<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 9 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 9 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 9==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 9<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 10 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 10 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 10==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 10<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 11 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 11 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 11==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 11<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 12 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 12 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 12==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 12<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 13 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 13 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 13==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 13<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 14 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 14 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 14==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 14<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 15 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 15 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 15==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 15<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +.Lrounds_16_xx: + @ ldr r2,[sp,#1*4] @ 16 + @ ldr r1,[sp,#14*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#0*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#9*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 16==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 16<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#2*4] @ 17 + @ ldr r1,[sp,#15*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#1*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#10*4] + + add r3,r3,r0 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 17==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 17<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#3*4] @ 18 + @ ldr r1,[sp,#0*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#2*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#11*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 18==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 18<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#4*4] @ 19 + @ ldr r1,[sp,#1*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#3*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#12*4] + + add r3,r3,r0 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 19==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 19<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#5*4] @ 20 + @ ldr r1,[sp,#2*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#4*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#13*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 20==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 20<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#6*4] @ 21 + @ ldr r1,[sp,#3*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#5*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#14*4] + + add r3,r3,r0 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 21==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 21<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#7*4] @ 22 + @ ldr r1,[sp,#4*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#6*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#15*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 22==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 22<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#8*4] @ 23 + @ ldr r1,[sp,#5*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#7*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#0*4] + + add r3,r3,r0 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 23==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 23<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#9*4] @ 24 + @ ldr r1,[sp,#6*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#8*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#1*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 24==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 24<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#10*4] @ 25 + @ ldr r1,[sp,#7*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#9*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#2*4] + + add r3,r3,r0 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 25==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 25<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#11*4] @ 26 + @ ldr r1,[sp,#8*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#10*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#3*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 26==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 26<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#12*4] @ 27 + @ ldr r1,[sp,#9*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#11*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#4*4] + + add r3,r3,r0 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 27==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 27<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#13*4] @ 28 + @ ldr r1,[sp,#10*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#12*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#5*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 28==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 28<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#14*4] @ 29 + @ ldr r1,[sp,#11*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#13*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#6*4] + + add r3,r3,r0 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 29==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 29<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#15*4] @ 30 + @ ldr r1,[sp,#12*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#14*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#7*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 30==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 30<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#0*4] @ 31 + @ ldr r1,[sp,#13*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#15*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#8*4] + + add r3,r3,r0 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 31==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 31<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + ite eq @ Thumb2 thing, sanity check in ARM +#endif + ldreq r3,[sp,#16*4] @ pull ctx + bne .Lrounds_16_xx + + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldr r0,[r3,#0] + ldr r2,[r3,#4] + ldr r12,[r3,#8] + add r4,r4,r0 + ldr r0,[r3,#12] + add r5,r5,r2 + ldr r2,[r3,#16] + add r6,r6,r12 + ldr r12,[r3,#20] + add r7,r7,r0 + ldr r0,[r3,#24] + add r8,r8,r2 + ldr r2,[r3,#28] + add r9,r9,r12 + ldr r1,[sp,#17*4] @ pull inp + ldr r12,[sp,#18*4] @ pull inp+len + add r10,r10,r0 + add r11,r11,r2 + stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} + cmp r1,r12 + sub r14,r14,#256 @ rewind Ktbl + bne .Loop + + add sp,sp,#19*4 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size sha256_block_data_order,.-sha256_block_data_order +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.global sha256_block_data_order_neon +.type sha256_block_data_order_neon,%function +.align 4 +sha256_block_data_order_neon: +.LNEON: + stmdb sp!,{r4-r12,lr} + + sub r11,sp,#16*4+16 + adrl r14,K256 + bic r11,r11,#15 @ align for 128-bit stores + mov r12,sp + mov sp,r11 @ alloca + add r2,r1,r2,lsl#6 @ len to point at the end of inp + + vld1.8 {q0},[r1]! + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + vld1.32 {q8},[r14,:128]! + vld1.32 {q9},[r14,:128]! + vld1.32 {q10},[r14,:128]! + vld1.32 {q11},[r14,:128]! + vrev32.8 q0,q0 @ yes, even on + str r0,[sp,#64] + vrev32.8 q1,q1 @ big-endian + str r1,[sp,#68] + mov r1,sp + vrev32.8 q2,q2 + str r2,[sp,#72] + vrev32.8 q3,q3 + str r12,[sp,#76] @ save original sp + vadd.i32 q8,q8,q0 + vadd.i32 q9,q9,q1 + vst1.32 {q8},[r1,:128]! + vadd.i32 q10,q10,q2 + vst1.32 {q9},[r1,:128]! + vadd.i32 q11,q11,q3 + vst1.32 {q10},[r1,:128]! + vst1.32 {q11},[r1,:128]! + + ldmia r0,{r4-r11} + sub r1,r1,#64 + ldr r2,[sp,#0] + eor r12,r12,r12 + eor r3,r5,r6 + b .L_00_48 + +.align 4 +.L_00_48: + vext.8 q8,q0,q1,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q2,q3,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q0,q0,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#4] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d7,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d7,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d7,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q0,q0,q9 + add r10,r10,r2 + ldr r2,[sp,#8] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d7,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d7,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d0,d0,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d0,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d0,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d0,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#12] + and r3,r3,r12 + vshr.u32 d24,d0,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d0,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d1,d1,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q0 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q1,q2,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q3,q0,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q1,q1,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#20] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d1,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d1,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d1,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q1,q1,q9 + add r6,r6,r2 + ldr r2,[sp,#24] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d1,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d1,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d2,d2,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d2,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d2,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d2,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#28] + and r3,r3,r12 + vshr.u32 d24,d2,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d2,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d3,d3,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q1 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vext.8 q8,q2,q3,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q0,q1,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q2,q2,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#36] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d3,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d3,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d3,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q2,q2,q9 + add r10,r10,r2 + ldr r2,[sp,#40] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d3,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d3,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d4,d4,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d4,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d4,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d4,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#44] + and r3,r3,r12 + vshr.u32 d24,d4,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d4,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d5,d5,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q2 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q3,q0,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q1,q2,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q3,q3,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#52] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d5,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d5,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d5,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q3,q3,q9 + add r6,r6,r2 + ldr r2,[sp,#56] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d5,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d5,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d6,d6,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d6,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d6,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d6,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#60] + and r3,r3,r12 + vshr.u32 d24,d6,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d6,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d7,d7,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q3 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[r14] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + teq r2,#0 @ check for K256 terminator + ldr r2,[sp,#0] + sub r1,r1,#64 + bne .L_00_48 + + ldr r1,[sp,#68] + ldr r0,[sp,#72] + sub r14,r14,#256 @ rewind r14 + teq r1,r0 + it eq + subeq r1,r1,#64 @ avoid SEGV + vld1.8 {q0},[r1]! @ load next input block + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + it ne + strne r1,[sp,#68] + mov r1,sp + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q0,q0 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q0 + ldr r2,[sp,#4] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#8] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#12] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q1,q1 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q1 + ldr r2,[sp,#20] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#24] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#28] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q2,q2 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q2 + ldr r2,[sp,#36] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#40] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#44] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q3,q3 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q3 + ldr r2,[sp,#52] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#56] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#60] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#64] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + ldr r0,[r2,#0] + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldr r12,[r2,#4] + ldr r3,[r2,#8] + ldr r1,[r2,#12] + add r4,r4,r0 @ accumulate + ldr r0,[r2,#16] + add r5,r5,r12 + ldr r12,[r2,#20] + add r6,r6,r3 + ldr r3,[r2,#24] + add r7,r7,r1 + ldr r1,[r2,#28] + add r8,r8,r0 + str r4,[r2],#4 + add r9,r9,r12 + str r5,[r2],#4 + add r10,r10,r3 + str r6,[r2],#4 + add r11,r11,r1 + str r7,[r2],#4 + stmia r2,{r8-r11} + + ittte ne + movne r1,sp + ldrne r2,[sp,#0] + eorne r12,r12,r12 + ldreq sp,[sp,#76] @ restore original sp + itt ne + eorne r3,r5,r6 + bne .L_00_48 + + ldmia sp!,{r4-r12,pc} +.size sha256_block_data_order_neon,.-sha256_block_data_order_neon +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + +# ifdef __thumb2__ +# define INST(a,b,c,d) .byte c,d|0xc,a,b +# else +# define INST(a,b,c,d) .byte a,b,c,d +# endif + +.type sha256_block_data_order_armv8,%function +.align 5 +sha256_block_data_order_armv8: +.LARMv8: + vld1.32 {q0,q1},[r0] +# ifdef __thumb2__ + adr r3,.LARMv8 + sub r3,r3,#.LARMv8-K256 +# else + adrl r3,K256 +# endif + add r2,r1,r2,lsl#6 @ len to point at the end of inp + +.Loop_v8: + vld1.8 {q8-q9},[r1]! + vld1.8 {q10-q11},[r1]! + vld1.32 {q12},[r3]! + vrev32.8 q8,q8 + vrev32.8 q9,q9 + vrev32.8 q10,q10 + vrev32.8 q11,q11 + vmov q14,q0 @ offload + vmov q15,q1 + teq r1,r2 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + + vld1.32 {q13},[r3] + vadd.i32 q12,q12,q10 + sub r3,r3,#256-16 @ rewind + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + + vadd.i32 q13,q13,q11 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + + vadd.i32 q0,q0,q14 + vadd.i32 q1,q1,q15 + it ne + bne .Loop_v8 + + vst1.32 {q0,q1},[r0] + + bx lr @ bx lr +.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 +#endif +.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>" +.align 2 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.comm OPENSSL_armcap_P,4,4 +#endif diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c new file mode 100644 index 000000000000..ff90ced528a1 --- /dev/null +++ b/arch/arm/crypto/sha256_glue.c @@ -0,0 +1,246 @@ +/* + * Glue code for the SHA256 Secure Hash Algorithm assembly implementation + * using optimized ARM assembler and NEON instructions. + * + * Copyright © 2015 Google Inc. + * + * This file is based on sha256_ssse3_glue.c: + * Copyright (C) 2013 Intel Corporation + * Author: Tim Chen <tim.c.chen@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <crypto/internal/hash.h> +#include <linux/crypto.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <linux/string.h> +#include <crypto/sha.h> +#include <asm/byteorder.h> +#include <asm/simd.h> +#include <asm/neon.h> +#include "sha256_glue.h" + +asmlinkage void sha256_block_data_order(u32 *digest, const void *data, + unsigned int num_blks); + + +int sha256_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + sctx->state[0] = SHA256_H0; + sctx->state[1] = SHA256_H1; + sctx->state[2] = SHA256_H2; + sctx->state[3] = SHA256_H3; + sctx->state[4] = SHA256_H4; + sctx->state[5] = SHA256_H5; + sctx->state[6] = SHA256_H6; + sctx->state[7] = SHA256_H7; + sctx->count = 0; + + return 0; +} + +int sha224_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + sctx->state[0] = SHA224_H0; + sctx->state[1] = SHA224_H1; + sctx->state[2] = SHA224_H2; + sctx->state[3] = SHA224_H3; + sctx->state[4] = SHA224_H4; + sctx->state[5] = SHA224_H5; + sctx->state[6] = SHA224_H6; + sctx->state[7] = SHA224_H7; + sctx->count = 0; + + return 0; +} + +int __sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len, + unsigned int partial) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int done = 0; + + sctx->count += len; + + if (partial) { + done = SHA256_BLOCK_SIZE - partial; + memcpy(sctx->buf + partial, data, done); + sha256_block_data_order(sctx->state, sctx->buf, 1); + } + + if (len - done >= SHA256_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; + + sha256_block_data_order(sctx->state, data + done, rounds); + done += rounds * SHA256_BLOCK_SIZE; + } + + memcpy(sctx->buf, data + done, len - done); + + return 0; +} + +int sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; + + /* Handle the fast case right here */ + if (partial + len < SHA256_BLOCK_SIZE) { + sctx->count += len; + memcpy(sctx->buf + partial, data, len); + + return 0; + } + + return __sha256_update(desc, data, len, partial); +} + +/* Add padding and return the message digest. */ +static int sha256_final(struct shash_desc *desc, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + __be32 *dst = (__be32 *)out; + __be64 bits; + static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; + + /* save number of bits */ + bits = cpu_to_be64(sctx->count << 3); + + /* Pad out to 56 mod 64 and append length */ + index = sctx->count % SHA256_BLOCK_SIZE; + padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index); + + /* We need to fill a whole block for __sha256_update */ + if (padlen <= 56) { + sctx->count += padlen; + memcpy(sctx->buf + index, padding, padlen); + } else { + __sha256_update(desc, padding, padlen, index); + } + __sha256_update(desc, (const u8 *)&bits, sizeof(bits), 56); + + /* Store state in digest */ + for (i = 0; i < 8; i++) + dst[i] = cpu_to_be32(sctx->state[i]); + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} + +static int sha224_final(struct shash_desc *desc, u8 *out) +{ + u8 D[SHA256_DIGEST_SIZE]; + + sha256_final(desc, D); + + memcpy(out, D, SHA224_DIGEST_SIZE); + memzero_explicit(D, SHA256_DIGEST_SIZE); + + return 0; +} + +int sha256_export(struct shash_desc *desc, void *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + + return 0; +} + +int sha256_import(struct shash_desc *desc, const void *in) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + + return 0; +} + +static struct shash_alg algs[] = { { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_init, + .update = sha256_update, + .final = sha256_final, + .export = sha256_export, + .import = sha256_import, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-asm", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_init, + .update = sha256_update, + .final = sha224_final, + .export = sha256_export, + .import = sha256_import, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-asm", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static int __init sha256_mod_init(void) +{ + int res = crypto_register_shashes(algs, ARRAY_SIZE(algs)); + + if (res < 0) + return res; + + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) { + res = crypto_register_shashes(sha256_neon_algs, + ARRAY_SIZE(sha256_neon_algs)); + + if (res < 0) + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); + } + + return res; +} + +static void __exit sha256_mod_fini(void) +{ + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); + + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) + crypto_unregister_shashes(sha256_neon_algs, + ARRAY_SIZE(sha256_neon_algs)); +} + +module_init(sha256_mod_init); +module_exit(sha256_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm (ARM), including NEON"); + +MODULE_ALIAS("sha256"); diff --git a/arch/arm/crypto/sha256_glue.h b/arch/arm/crypto/sha256_glue.h new file mode 100644 index 000000000000..0312f4ffe8cc --- /dev/null +++ b/arch/arm/crypto/sha256_glue.h @@ -0,0 +1,23 @@ +#ifndef _CRYPTO_SHA256_GLUE_H +#define _CRYPTO_SHA256_GLUE_H + +#include <linux/crypto.h> +#include <crypto/sha.h> + +extern struct shash_alg sha256_neon_algs[2]; + +extern int sha256_init(struct shash_desc *desc); + +extern int sha224_init(struct shash_desc *desc); + +extern int __sha256_update(struct shash_desc *desc, const u8 *data, + unsigned int len, unsigned int partial); + +extern int sha256_update(struct shash_desc *desc, const u8 *data, + unsigned int len); + +extern int sha256_export(struct shash_desc *desc, void *out); + +extern int sha256_import(struct shash_desc *desc, const void *in); + +#endif /* _CRYPTO_SHA256_GLUE_H */ diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c new file mode 100644 index 000000000000..c4da10090eee --- /dev/null +++ b/arch/arm/crypto/sha256_neon_glue.c @@ -0,0 +1,172 @@ +/* + * Glue code for the SHA256 Secure Hash Algorithm assembly implementation + * using NEON instructions. + * + * Copyright © 2015 Google Inc. + * + * This file is based on sha512_neon_glue.c: + * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <crypto/internal/hash.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <linux/string.h> +#include <crypto/sha.h> +#include <asm/byteorder.h> +#include <asm/simd.h> +#include <asm/neon.h> +#include "sha256_glue.h" + +asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data, + unsigned int num_blks); + + +static int __sha256_neon_update(struct shash_desc *desc, const u8 *data, + unsigned int len, unsigned int partial) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int done = 0; + + sctx->count += len; + + if (partial) { + done = SHA256_BLOCK_SIZE - partial; + memcpy(sctx->buf + partial, data, done); + sha256_block_data_order_neon(sctx->state, sctx->buf, 1); + } + + if (len - done >= SHA256_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; + + sha256_block_data_order_neon(sctx->state, data + done, rounds); + done += rounds * SHA256_BLOCK_SIZE; + } + + memcpy(sctx->buf, data + done, len - done); + + return 0; +} + +static int sha256_neon_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; + int res; + + /* Handle the fast case right here */ + if (partial + len < SHA256_BLOCK_SIZE) { + sctx->count += len; + memcpy(sctx->buf + partial, data, len); + + return 0; + } + + if (!may_use_simd()) { + res = __sha256_update(desc, data, len, partial); + } else { + kernel_neon_begin(); + res = __sha256_neon_update(desc, data, len, partial); + kernel_neon_end(); + } + + return res; +} + +/* Add padding and return the message digest. */ +static int sha256_neon_final(struct shash_desc *desc, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + __be32 *dst = (__be32 *)out; + __be64 bits; + static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; + + /* save number of bits */ + bits = cpu_to_be64(sctx->count << 3); + + /* Pad out to 56 mod 64 and append length */ + index = sctx->count % SHA256_BLOCK_SIZE; + padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index); + + if (!may_use_simd()) { + sha256_update(desc, padding, padlen); + sha256_update(desc, (const u8 *)&bits, sizeof(bits)); + } else { + kernel_neon_begin(); + /* We need to fill a whole block for __sha256_neon_update() */ + if (padlen <= 56) { + sctx->count += padlen; + memcpy(sctx->buf + index, padding, padlen); + } else { + __sha256_neon_update(desc, padding, padlen, index); + } + __sha256_neon_update(desc, (const u8 *)&bits, + sizeof(bits), 56); + kernel_neon_end(); + } + + /* Store state in digest */ + for (i = 0; i < 8; i++) + dst[i] = cpu_to_be32(sctx->state[i]); + + /* Wipe context */ + memzero_explicit(sctx, sizeof(*sctx)); + + return 0; +} + +static int sha224_neon_final(struct shash_desc *desc, u8 *out) +{ + u8 D[SHA256_DIGEST_SIZE]; + + sha256_neon_final(desc, D); + + memcpy(out, D, SHA224_DIGEST_SIZE); + memzero_explicit(D, SHA256_DIGEST_SIZE); + + return 0; +} + +struct shash_alg sha256_neon_algs[] = { { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_init, + .update = sha256_neon_update, + .final = sha256_neon_final, + .export = sha256_export, + .import = sha256_import, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-neon", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_init, + .update = sha256_neon_update, + .final = sha224_neon_final, + .export = sha256_export, + .import = sha256_import, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-neon", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 10e78d00a0bb..2d46862e7bef 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -487,6 +487,16 @@ int set_memory_rw(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void); +void set_kernel_text_rw(void); +void set_kernel_text_ro(void); +#else +static inline void set_kernel_text_rw(void) { } +static inline void set_kernel_text_ro(void) { } +#endif + void flush_uprobe_xol_access(struct page *page, unsigned long uaddr, void *kaddr, unsigned long len); + #endif diff --git a/arch/arm/include/asm/fixmap.h b/arch/arm/include/asm/fixmap.h index 74124b0d0d79..0415eae1df27 100644 --- a/arch/arm/include/asm/fixmap.h +++ b/arch/arm/include/asm/fixmap.h @@ -2,27 +2,24 @@ #define _ASM_FIXMAP_H #define FIXADDR_START 0xffc00000UL -#define FIXADDR_TOP 0xffe00000UL -#define FIXADDR_SIZE (FIXADDR_TOP - FIXADDR_START) +#define FIXADDR_END 0xfff00000UL +#define FIXADDR_TOP (FIXADDR_END - PAGE_SIZE) -#define FIX_KMAP_NR_PTES (FIXADDR_SIZE >> PAGE_SHIFT) +#include <asm/kmap_types.h> -#define __fix_to_virt(x) (FIXADDR_START + ((x) << PAGE_SHIFT)) -#define __virt_to_fix(x) (((x) - FIXADDR_START) >> PAGE_SHIFT) +enum fixed_addresses { + FIX_KMAP_BEGIN, + FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS) - 1, -extern void __this_fixmap_does_not_exist(void); + /* Support writing RO kernel text via kprobes, jump labels, etc. */ + FIX_TEXT_POKE0, + FIX_TEXT_POKE1, -static inline unsigned long fix_to_virt(const unsigned int idx) -{ - if (idx >= FIX_KMAP_NR_PTES) - __this_fixmap_does_not_exist(); - return __fix_to_virt(idx); -} + __end_of_fixed_addresses +}; -static inline unsigned int virt_to_fix(const unsigned long vaddr) -{ - BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); - return __virt_to_fix(vaddr); -} +void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); + +#include <asm-generic/fixmap.h> #endif diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 2ecc7d15bc09..e03714662cb6 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -67,7 +67,7 @@ test-kprobes-objs += kprobes-test-arm.o endif obj-$(CONFIG_OABI_COMPAT) += sys_oabi-compat.o obj-$(CONFIG_ARM_THUMBEE) += thumbee.o -obj-$(CONFIG_KGDB) += kgdb.o +obj-$(CONFIG_KGDB) += kgdb.o patch.o obj-$(CONFIG_ARM_UNWIND) += unwind.o obj-$(CONFIG_HAVE_TCM) += tcm.o obj-$(CONFIG_OF) += devtree.o diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index af9a8a927a4e..b8c75e45a950 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -15,6 +15,7 @@ #include <linux/ftrace.h> #include <linux/uaccess.h> #include <linux/module.h> +#include <linux/stop_machine.h> #include <asm/cacheflush.h> #include <asm/opcodes.h> @@ -35,6 +36,22 @@ #define OLD_NOP 0xe1a00000 /* mov r0, r0 */ +static int __ftrace_modify_code(void *data) +{ + int *command = data; + + set_kernel_text_rw(); + ftrace_modify_all_code(*command); + set_kernel_text_ro(); + + return 0; +} + +void arch_ftrace_update_code(int command) +{ + stop_machine(__ftrace_modify_code, &command, NULL); +} + static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec) { return rec->arch.old_mcount ? OLD_NOP : NOP; @@ -73,6 +90,8 @@ int ftrace_arch_code_modify_prepare(void) int ftrace_arch_code_modify_post_process(void) { set_all_modules_text_ro(); + /* Make sure any TLB misses during machine stop are cleared. */ + flush_tlb_all(); return 0; } diff --git a/arch/arm/kernel/jump_label.c b/arch/arm/kernel/jump_label.c index 4ce4f789446d..afeeb9ea6f43 100644 --- a/arch/arm/kernel/jump_label.c +++ b/arch/arm/kernel/jump_label.c @@ -19,7 +19,7 @@ static void __arch_jump_label_transform(struct jump_entry *entry, insn = arm_gen_nop(); if (is_static) - __patch_text(addr, insn); + __patch_text_early(addr, insn); else patch_text(addr, insn); } diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 241255835abb..07f5059dac5e 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -12,8 +12,12 @@ #include <linux/irq.h> #include <linux/kdebug.h> #include <linux/kgdb.h> +#include <linux/uaccess.h> + #include <asm/traps.h> +#include "patch.h" + struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { { "r0", 4, offsetof(struct pt_regs, ARM_r0)}, @@ -248,6 +252,33 @@ void kgdb_arch_exit(void) unregister_die_notifier(&kgdb_notifier); } +int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) +{ + int err; + + /* patch_text() only supports int-sized breakpoints */ + BUILD_BUG_ON(sizeof(int) != BREAK_INSTR_SIZE); + + err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + BREAK_INSTR_SIZE); + if (err) + return err; + + /* Machine is already stopped, so we can use __patch_text() directly */ + __patch_text((void *)bpt->bpt_addr, + *(unsigned int *)arch_kgdb_ops.gdb_bpt_instr); + + return err; +} + +int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) +{ + /* Machine is already stopped, so we can use __patch_text() directly */ + __patch_text((void *)bpt->bpt_addr, *(unsigned int *)bpt->saved_instr); + + return 0; +} + /* * Register our undef instruction hooks with ARM undef core. * We regsiter a hook specifically looking for the KGB break inst diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 8cf0996aa1a8..4423a565ef6f 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -29,6 +29,7 @@ extern unsigned long kexec_boot_atags; static atomic_t waiting_for_crash_ipi; +static unsigned long dt_mem; /* * Provide a dummy crash_notes definition while crash dump arrives to arm. * This prevents breakage of crash_notes attribute in kernel/ksysfs.c. @@ -64,7 +65,7 @@ int machine_kexec_prepare(struct kimage *image) return err; if (be32_to_cpu(header) == OF_DT_HEADER) - kexec_boot_atags = current_segment->mem; + dt_mem = current_segment->mem; } return 0; } @@ -163,12 +164,12 @@ void machine_kexec(struct kimage *image) reboot_code_buffer = page_address(image->control_code_page); /* Prepare parameters for reboot_code_buffer*/ + set_kernel_text_rw(); kexec_start_address = image->start; kexec_indirection_page = page_list; kexec_mach_type = machine_arch_type; - if (!kexec_boot_atags) - kexec_boot_atags = image->start - KEXEC_ARM_ZIMAGE_OFFSET + KEXEC_ARM_ATAGS_OFFSET; - + kexec_boot_atags = dt_mem ?: image->start - KEXEC_ARM_ZIMAGE_OFFSET + + KEXEC_ARM_ATAGS_OFFSET; /* copy our kernel relocation code to the control code page */ reboot_entry = fncpy(reboot_code_buffer, diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c index 07314af47733..5038960e3c55 100644 --- a/arch/arm/kernel/patch.c +++ b/arch/arm/kernel/patch.c @@ -1,8 +1,11 @@ #include <linux/kernel.h> +#include <linux/spinlock.h> #include <linux/kprobes.h> +#include <linux/mm.h> #include <linux/stop_machine.h> #include <asm/cacheflush.h> +#include <asm/fixmap.h> #include <asm/smp_plat.h> #include <asm/opcodes.h> @@ -13,21 +16,77 @@ struct patch { unsigned int insn; }; -void __kprobes __patch_text(void *addr, unsigned int insn) +static DEFINE_SPINLOCK(patch_lock); + +static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) + __acquires(&patch_lock) +{ + unsigned int uintaddr = (uintptr_t) addr; + bool module = !core_kernel_text(uintaddr); + struct page *page; + + if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX)) + page = vmalloc_to_page(addr); + else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA)) + page = virt_to_page(addr); + else + return addr; + + if (flags) + spin_lock_irqsave(&patch_lock, *flags); + else + __acquire(&patch_lock); + + set_fixmap(fixmap, page_to_phys(page)); + + return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK)); +} + +static void __kprobes patch_unmap(int fixmap, unsigned long *flags) + __releases(&patch_lock) +{ + clear_fixmap(fixmap); + + if (flags) + spin_unlock_irqrestore(&patch_lock, *flags); + else + __release(&patch_lock); +} + +void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap) { bool thumb2 = IS_ENABLED(CONFIG_THUMB2_KERNEL); + unsigned int uintaddr = (uintptr_t) addr; + bool twopage = false; + unsigned long flags; + void *waddr = addr; int size; + if (remap) + waddr = patch_map(addr, FIX_TEXT_POKE0, &flags); + else + __acquire(&patch_lock); + if (thumb2 && __opcode_is_thumb16(insn)) { - *(u16 *)addr = __opcode_to_mem_thumb16(insn); + *(u16 *)waddr = __opcode_to_mem_thumb16(insn); size = sizeof(u16); - } else if (thumb2 && ((uintptr_t)addr & 2)) { + } else if (thumb2 && (uintaddr & 2)) { u16 first = __opcode_thumb32_first(insn); u16 second = __opcode_thumb32_second(insn); - u16 *addrh = addr; + u16 *addrh0 = waddr; + u16 *addrh1 = waddr + 2; + + twopage = (uintaddr & ~PAGE_MASK) == PAGE_SIZE - 2; + if (twopage && remap) + addrh1 = patch_map(addr + 2, FIX_TEXT_POKE1, NULL); + + *addrh0 = __opcode_to_mem_thumb16(first); + *addrh1 = __opcode_to_mem_thumb16(second); - addrh[0] = __opcode_to_mem_thumb16(first); - addrh[1] = __opcode_to_mem_thumb16(second); + if (twopage && addrh1 != addr + 2) { + flush_kernel_vmap_range(addrh1, 2); + patch_unmap(FIX_TEXT_POKE1, NULL); + } size = sizeof(u32); } else { @@ -36,10 +95,16 @@ void __kprobes __patch_text(void *addr, unsigned int insn) else insn = __opcode_to_mem_arm(insn); - *(u32 *)addr = insn; + *(u32 *)waddr = insn; size = sizeof(u32); } + if (waddr != addr) { + flush_kernel_vmap_range(waddr, twopage ? size / 2 : size); + patch_unmap(FIX_TEXT_POKE0, &flags); + } else + __release(&patch_lock); + flush_icache_range((uintptr_t)(addr), (uintptr_t)(addr) + size); } @@ -60,16 +125,5 @@ void __kprobes patch_text(void *addr, unsigned int insn) .insn = insn, }; - if (cache_ops_need_broadcast()) { - stop_machine(patch_text_stop_machine, &patch, cpu_online_mask); - } else { - bool straddles_word = IS_ENABLED(CONFIG_THUMB2_KERNEL) - && __opcode_is_thumb32(insn) - && ((uintptr_t)addr & 2); - - if (straddles_word) - stop_machine(patch_text_stop_machine, &patch, NULL); - else - __patch_text(addr, insn); - } + stop_machine(patch_text_stop_machine, &patch, NULL); } diff --git a/arch/arm/kernel/patch.h b/arch/arm/kernel/patch.h index b4731f2dac38..77e054c2f6cd 100644 --- a/arch/arm/kernel/patch.h +++ b/arch/arm/kernel/patch.h @@ -2,6 +2,16 @@ #define _ARM_KERNEL_PATCH_H void patch_text(void *addr, unsigned int insn); -void __patch_text(void *addr, unsigned int insn); +void __patch_text_real(void *addr, unsigned int insn, bool remap); + +static inline void __patch_text(void *addr, unsigned int insn) +{ + __patch_text_real(addr, insn, true); +} + +static inline void __patch_text_early(void *addr, unsigned int insn) +{ + __patch_text_real(addr, insn, false); +} #endif diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 8e95aa47457a..3afcb6c2cf06 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -8,6 +8,9 @@ #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/page.h> +#ifdef CONFIG_ARM_KERNMEM_PERMS +#include <asm/pgtable.h> +#endif #define PROC_INFO \ . = ALIGN(4); \ @@ -90,6 +93,11 @@ SECTIONS _text = .; HEAD_TEXT } + +#ifdef CONFIG_ARM_KERNMEM_PERMS + . = ALIGN(1<<SECTION_SHIFT); +#endif + .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ __exception_text_start = .; @@ -112,6 +120,9 @@ SECTIONS ARM_CPU_KEEP(PROC_INFO) } +#ifdef CONFIG_DEBUG_RODATA + . = ALIGN(1<<SECTION_SHIFT); +#endif RO_DATA(PAGE_SIZE) . = ALIGN(4); @@ -145,7 +156,11 @@ SECTIONS _etext = .; /* End of text and rodata section */ #ifndef CONFIG_XIP_KERNEL +# ifdef CONFIG_ARM_KERNMEM_PERMS + . = ALIGN(1<<SECTION_SHIFT); +# else . = ALIGN(PAGE_SIZE); +# endif __init_begin = .; #endif /* @@ -219,8 +234,12 @@ SECTIONS __data_loc = ALIGN(4); /* location in binary */ . = PAGE_OFFSET + TEXT_OFFSET; #else - . = ALIGN(THREAD_SIZE); __init_end = .; +#ifdef CONFIG_ARM_KERNMEM_PERMS + . = ALIGN(1<<SECTION_SHIFT); +#else + . = ALIGN(THREAD_SIZE); +#endif __data_loc = .; #endif diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 7eb94e6fc376..bc219b303bc7 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -1009,3 +1009,24 @@ config ARCH_SUPPORTS_BIG_ENDIAN help This option specifies the architecture can support big endian operation. + +config ARM_KERNMEM_PERMS + bool "Restrict kernel memory permissions" + help + If this is set, kernel memory other than kernel text (and rodata) + will be made non-executable. The tradeoff is that each region is + padded to section-size (1MiB) boundaries (because their permissions + are different and splitting the 1M pages into 4K ones causes TLB + performance problems), wasting memory. + +config DEBUG_RODATA + bool "Make kernel text and rodata read-only" + depends on ARM_KERNMEM_PERMS + default y + help + If this is set, kernel text and rodata will be made read-only. This + is to help catch accidental or malicious attempts to change the + kernel's executable code. Additionally splits rodata from kernel + text so it can be made explicitly non-executable. This creates + another section-size padded region, so it can waste more memory + space while gaining the read-only protections. diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index e17ed00828d7..b98895d9fe57 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -18,19 +18,20 @@ #include <asm/tlbflush.h> #include "mm.h" -pte_t *fixmap_page_table; - static inline void set_fixmap_pte(int idx, pte_t pte) { unsigned long vaddr = __fix_to_virt(idx); - set_pte_ext(fixmap_page_table + idx, pte, 0); + pte_t *ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr); + + set_pte_ext(ptep, pte, 0); local_flush_tlb_kernel_page(vaddr); } static inline pte_t get_fixmap_pte(unsigned long vaddr) { - unsigned long idx = __virt_to_fix(vaddr); - return *(fixmap_page_table + idx); + pte_t *ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr); + + return *ptep; } void *kmap(struct page *page) @@ -84,7 +85,7 @@ void *kmap_atomic(struct page *page) * With debugging enabled, kunmap_atomic forces that entry to 0. * Make sure it was indeed properly unmapped. */ - BUG_ON(!pte_none(*(fixmap_page_table + idx))); + BUG_ON(!pte_none(get_fixmap_pte(vaddr))); #endif /* * When debugging is off, kunmap_atomic leaves the previous mapping @@ -137,7 +138,7 @@ void *kmap_atomic_pfn(unsigned long pfn) idx = type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(idx); #ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(!pte_none(*(fixmap_page_table + idx))); + BUG_ON(!pte_none(get_fixmap_pte(vaddr))); #endif set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot)); diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 9481f85c56e6..4fe8ecf6a520 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -29,6 +29,7 @@ #include <asm/prom.h> #include <asm/sections.h> #include <asm/setup.h> +#include <asm/system_info.h> #include <asm/tlb.h> #include <asm/fixmap.h> @@ -570,7 +571,7 @@ void __init mem_init(void) MLK(DTCM_OFFSET, (unsigned long) dtcm_end), MLK(ITCM_OFFSET, (unsigned long) itcm_end), #endif - MLK(FIXADDR_START, FIXADDR_TOP), + MLK(FIXADDR_START, FIXADDR_END), MLM(VMALLOC_START, VMALLOC_END), MLM(PAGE_OFFSET, (unsigned long)high_memory), #ifdef CONFIG_HIGHMEM @@ -615,7 +616,145 @@ void __init mem_init(void) } } -void free_initmem(void) +#ifdef CONFIG_ARM_KERNMEM_PERMS +struct section_perm { + unsigned long start; + unsigned long end; + pmdval_t mask; + pmdval_t prot; + pmdval_t clear; +}; + +static struct section_perm nx_perms[] = { + /* Make pages tables, etc before _stext RW (set NX). */ + { + .start = PAGE_OFFSET, + .end = (unsigned long)_stext, + .mask = ~PMD_SECT_XN, + .prot = PMD_SECT_XN, + }, + /* Make init RW (set NX). */ + { + .start = (unsigned long)__init_begin, + .end = (unsigned long)_sdata, + .mask = ~PMD_SECT_XN, + .prot = PMD_SECT_XN, + }, +#ifdef CONFIG_DEBUG_RODATA + /* Make rodata NX (set RO in ro_perms below). */ + { + .start = (unsigned long)__start_rodata, + .end = (unsigned long)__init_begin, + .mask = ~PMD_SECT_XN, + .prot = PMD_SECT_XN, + }, +#endif +}; + +#ifdef CONFIG_DEBUG_RODATA +static struct section_perm ro_perms[] = { + /* Make kernel code and rodata RX (set RO). */ + { + .start = (unsigned long)_stext, + .end = (unsigned long)__init_begin, +#ifdef CONFIG_ARM_LPAE + .mask = ~PMD_SECT_RDONLY, + .prot = PMD_SECT_RDONLY, +#else + .mask = ~(PMD_SECT_APX | PMD_SECT_AP_WRITE), + .prot = PMD_SECT_APX | PMD_SECT_AP_WRITE, + .clear = PMD_SECT_AP_WRITE, +#endif + }, +}; +#endif + +/* + * Updates section permissions only for the current mm (sections are + * copied into each mm). During startup, this is the init_mm. Is only + * safe to be called with preemption disabled, as under stop_machine(). + */ +static inline void section_update(unsigned long addr, pmdval_t mask, + pmdval_t prot) +{ + struct mm_struct *mm; + pmd_t *pmd; + + mm = current->active_mm; + pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr); + +#ifdef CONFIG_ARM_LPAE + pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot); +#else + if (addr & SECTION_SIZE) + pmd[1] = __pmd((pmd_val(pmd[1]) & mask) | prot); + else + pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot); +#endif + flush_pmd_entry(pmd); + local_flush_tlb_kernel_range(addr, addr + SECTION_SIZE); +} + +/* Make sure extended page tables are in use. */ +static inline bool arch_has_strict_perms(void) +{ + if (cpu_architecture() < CPU_ARCH_ARMv6) + return false; + + return !!(get_cr() & CR_XP); +} + +#define set_section_perms(perms, field) { \ + size_t i; \ + unsigned long addr; \ + \ + if (!arch_has_strict_perms()) \ + return; \ + \ + for (i = 0; i < ARRAY_SIZE(perms); i++) { \ + if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) || \ + !IS_ALIGNED(perms[i].end, SECTION_SIZE)) { \ + pr_err("BUG: section %lx-%lx not aligned to %lx\n", \ + perms[i].start, perms[i].end, \ + SECTION_SIZE); \ + continue; \ + } \ + \ + for (addr = perms[i].start; \ + addr < perms[i].end; \ + addr += SECTION_SIZE) \ + section_update(addr, perms[i].mask, \ + perms[i].field); \ + } \ +} + +static inline void fix_kernmem_perms(void) +{ + set_section_perms(nx_perms, prot); +} + +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void) +{ + set_section_perms(ro_perms, prot); +} + +void set_kernel_text_rw(void) +{ + set_section_perms(ro_perms, clear); +} + +void set_kernel_text_ro(void) +{ + set_section_perms(ro_perms, prot); +} +#endif /* CONFIG_DEBUG_RODATA */ + +#else +static inline void fix_kernmem_perms(void) { } +#endif /* CONFIG_ARM_KERNMEM_PERMS */ + +void free_tcmmem(void) { #ifdef CONFIG_HAVE_TCM extern char __tcm_start, __tcm_end; @@ -623,6 +762,12 @@ void free_initmem(void) poison_init_mem(&__tcm_start, &__tcm_end - &__tcm_start); free_reserved_area(&__tcm_start, &__tcm_end, -1, "TCM link"); #endif +} + +void free_initmem(void) +{ + fix_kernmem_perms(); + free_tcmmem(); poison_init_mem(__init_begin, __init_end - __init_begin); if (!machine_is_integrator() && !machine_is_cintegrator()) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index fb9d305c874b..d7c0e974695c 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -22,6 +22,7 @@ #include <asm/cputype.h> #include <asm/sections.h> #include <asm/cachetype.h> +#include <asm/fixmap.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/smp_plat.h> @@ -393,6 +394,29 @@ SET_MEMORY_FN(x, pte_set_x) SET_MEMORY_FN(nx, pte_set_nx) /* + * To avoid TLB flush broadcasts, this uses local_flush_tlb_kernel_range(). + * As a result, this can only be called with preemption disabled, as under + * stop_machine(). + */ +void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) +{ + unsigned long vaddr = __fix_to_virt(idx); + pte_t *pte = pte_offset_kernel(pmd_off_k(vaddr), vaddr); + + /* Make sure fixmap region does not exceed available allocation. */ + BUILD_BUG_ON(FIXADDR_START + (__end_of_fixed_addresses * PAGE_SIZE) > + FIXADDR_END); + BUG_ON(idx >= __end_of_fixed_addresses); + + if (pgprot_val(prot)) + set_pte_at(NULL, vaddr, pte, + pfn_pte(phys >> PAGE_SHIFT, prot)); + else + pte_clear(NULL, vaddr, pte); + local_flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE); +} + +/* * Adjust the PMD section entries according to the CPU in use. */ static void __init build_mem_type_table(void) @@ -1326,10 +1350,10 @@ static void __init kmap_init(void) #ifdef CONFIG_HIGHMEM pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE), PKMAP_BASE, _PAGE_KERNEL_TABLE); - - fixmap_page_table = early_pte_alloc(pmd_off_k(FIXADDR_START), - FIXADDR_START, _PAGE_KERNEL_TABLE); #endif + + early_pte_alloc(pmd_off_k(FIXADDR_START), FIXADDR_START, + _PAGE_KERNEL_TABLE); } static void __init map_lowmem(void) @@ -1349,13 +1373,20 @@ static void __init map_lowmem(void) if (start >= end) break; - if (end < kernel_x_start || start >= kernel_x_end) { + if (end < kernel_x_start) { map.pfn = __phys_to_pfn(start); map.virtual = __phys_to_virt(start); map.length = end - start; map.type = MT_MEMORY_RWX; create_mapping(&map); + } else if (start >= kernel_x_end) { + map.pfn = __phys_to_pfn(start); + map.virtual = __phys_to_virt(start); + map.length = end - start; + map.type = MT_MEMORY_RW; + + create_mapping(&map); } else { /* This better cover the entire kernel */ if (start < kernel_x_start) { diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 43fe1434f34e..7b03c24c85d8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -649,6 +649,17 @@ config BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES Space separated list of names of dtbs to append when building a concatenated Image.gz-dtb. +config DMI + bool "Enable support for SMBIOS (DMI) tables" + depends on EFI + default y + help + This enables SMBIOS/DMI feature for systems. + + This option is only useful on systems that have UEFI firmware. + However, even with this option, the resultant kernel should + continue to boot on existing non-UEFI platforms. + endmenu menu "Userspace binary formats" diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index 8dd3a551c170..d6285ef9b5f9 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -6,6 +6,18 @@ config FRAME_POINTER bool default y +config ARM64_PTDUMP + bool "Export kernel pagetable layout to userspace via debugfs" + depends on DEBUG_KERNEL + select DEBUG_FS + help + Say Y here if you want to show the kernel pagetable layout in a + debugfs file. This information is only useful for kernel developers + who are working in architecture specific areas of the kernel. + It is probably not a good idea to enable this feature in a production + kernel. + If in doubt, say "N" + config STRICT_DEVMEM bool "Filter access to /dev/mem" depends on MMU @@ -54,6 +66,29 @@ config DEBUG_SET_MODULE_RONX against certain classes of kernel exploits. If in doubt, say "N". +config DEBUG_RODATA + bool "Make kernel text and rodata read-only" + help + If this is set, kernel text and rodata will be made read-only. This + is to help catch accidental or malicious attempts to change the + kernel's executable code. Additionally splits rodata from kernel + text so it can be made explicitly non-executable. + + If in doubt, say Y + +config DEBUG_ALIGN_RODATA + depends on DEBUG_RODATA && !ARM64_64K_PAGES + bool "Align linker sections up to SECTION_SIZE" + help + If this option is enabled, sections that may potentially be marked as + read only or non-executable will be aligned up to the section size of + the kernel. This prevents sections from being split into pages and + avoids a potential TLB penalty. The downside is an increase in + alignment and potentially wasted space. Turn on this option if + performance is more important than memory pressure. + + If in doubt, say N + source "drivers/hwtracing/coresight/Kconfig" endmenu diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 689b6379188c..81a5e4df8fec 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -152,4 +152,9 @@ int set_memory_ro(unsigned long addr, int numpages); int set_memory_rw(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); + +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void); +#endif + #endif diff --git a/arch/arm64/include/asm/dmi.h b/arch/arm64/include/asm/dmi.h new file mode 100644 index 000000000000..69d37d87b159 --- /dev/null +++ b/arch/arm64/include/asm/dmi.h @@ -0,0 +1,31 @@ +/* + * arch/arm64/include/asm/dmi.h + * + * Copyright (C) 2013 Linaro Limited. + * Written by: Yi Li (yi.li@linaro.org) + * + * based on arch/ia64/include/asm/dmi.h + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#ifndef __ASM_DMI_H +#define __ASM_DMI_H + +#include <linux/io.h> +#include <linux/slab.h> + +/* + * According to section 2.3.6 of the UEFI spec, the firmware should not + * request a virtual mapping for configuration tables such as SMBIOS. + * This means we have to map them before use. + */ +#define dmi_early_remap(x, l) ioremap_cache(x, l) +#define dmi_early_unmap(x, l) iounmap(x) +#define dmi_remap(x, l) ioremap_cache(x, l) +#define dmi_unmap(x) iounmap(x) +#define dmi_alloc(l) kzalloc(l, GFP_KERNEL) + +#endif diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index a34fd3b12e2b..7baf2cc04e1e 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -6,29 +6,35 @@ #ifdef CONFIG_EFI extern void efi_init(void); -extern void efi_idmap_init(void); +extern void efi_virtmap_init(void); #else #define efi_init() -#define efi_idmap_init() +#define efi_virtmap_init() #endif #define efi_call_virt(f, ...) \ ({ \ - efi_##f##_t *__f = efi.systab->runtime->f; \ + efi_##f##_t *__f; \ efi_status_t __s; \ \ kernel_neon_begin(); \ + efi_virtmap_load(); \ + __f = efi.systab->runtime->f; \ __s = __f(__VA_ARGS__); \ + efi_virtmap_unload(); \ kernel_neon_end(); \ __s; \ }) #define __efi_call_virt(f, ...) \ ({ \ - efi_##f##_t *__f = efi.systab->runtime->f; \ + efi_##f##_t *__f; \ \ kernel_neon_begin(); \ + efi_virtmap_load(); \ + __f = efi.systab->runtime->f; \ __f(__VA_ARGS__); \ + efi_virtmap_unload(); \ kernel_neon_end(); \ }) @@ -44,4 +50,28 @@ extern void efi_idmap_init(void); #define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) +#define EFI_ALLOC_ALIGN SZ_64K + +/* + * On ARM systems, virtually remapped UEFI runtime services are set up in three + * distinct stages: + * - The stub retrieves the final version of the memory map from UEFI, populates + * the virt_addr fields and calls the SetVirtualAddressMap() [SVAM] runtime + * service to communicate the new mapping to the firmware (Note that the new + * mapping is not live at this time) + * - During early boot, the page tables are allocated and populated based on the + * virt_addr fields in the memory map, but only if all descriptors with the + * EFI_MEMORY_RUNTIME attribute have a non-zero value for virt_addr. If this + * succeeds, the EFI_VIRTMAP flag is set to indicate that the virtual mappings + * have been installed successfully. + * - During an early initcall(), the UEFI Runtime Services are enabled and the + * EFI_RUNTIME_SERVICES bit set if some conditions are met, i.e., we need a + * non-early mapping of the UEFI system table, and we need to have the virtmap + * installed. + */ +#define EFI_VIRTMAP EFI_ARCH_1 + +void efi_virtmap_load(void); +void efi_virtmap_unload(void); + #endif /* _ASM_EFI_H */ diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 5f7bfe6df723..defa0ff98250 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -31,6 +31,7 @@ * */ enum fixed_addresses { + FIX_HOLE, FIX_EARLYCON_MEM_BASE, __end_of_permanent_fixed_addresses, @@ -48,6 +49,7 @@ enum fixed_addresses { FIX_BTMAP_END = __end_of_permanent_fixed_addresses, FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, + FIX_TEXT_POKE0, __end_of_fixed_addresses }; @@ -56,10 +58,11 @@ enum fixed_addresses { #define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE) -extern void __early_set_fixmap(enum fixed_addresses idx, - phys_addr_t phys, pgprot_t flags); +void __init early_fixmap_init(void); -#define __set_fixmap __early_set_fixmap +#define __early_set_fixmap __set_fixmap + +extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); #include <asm-generic/fixmap.h> diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index e1f7ecdde11f..1eebf5bb0b58 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -3,7 +3,6 @@ #include <asm-generic/irq.h> -extern void (*handle_arch_irq)(struct pt_regs *); extern void migrate_irqs(void); extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index c2f006c48bdb..3d311761e3c2 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -31,7 +31,8 @@ extern void paging_init(void); extern void setup_mm_for_reboot(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); extern void init_mem_pgprot(void); -/* create an identity mapping for memory (or io if map_io is true) */ -extern void create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io); +extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot); #endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 41a43bf26492..1abe4d08725b 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -264,6 +264,11 @@ static inline pmd_t pte_pmd(pte_t pte) return __pmd(pte_val(pte)); } +static inline pgprot_t mk_sect_prot(pgprot_t prot) +{ + return __pgprot(pgprot_val(prot) & ~PTE_TABLE_BIT); +} + /* * THP definitions. */ diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index d18a44940968..8ce9b0577442 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -61,7 +61,8 @@ ENTRY(efi_stub_entry) */ mov x20, x0 // DTB address ldr x0, [sp, #16] // relocated _text address - mov x21, x0 + ldr x21, =stext_offset + add x21, x0, x21 /* * Calculate size of the kernel Image (same for original and copy). diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 1d85a7c5a850..a98415b5979c 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -11,24 +11,31 @@ * */ +#include <linux/atomic.h> +#include <linux/dmi.h> #include <linux/efi.h> #include <linux/export.h> #include <linux/memblock.h> +#include <linux/mm_types.h> #include <linux/bootmem.h> #include <linux/of.h> #include <linux/of_fdt.h> +#include <linux/preempt.h> +#include <linux/rbtree.h> +#include <linux/rwsem.h> #include <linux/sched.h> #include <linux/slab.h> +#include <linux/spinlock.h> #include <asm/cacheflush.h> #include <asm/efi.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> struct efi_memory_map memmap; -static efi_runtime_services_t *runtime; - static u64 efi_system_table; static int uefi_debug __initdata; @@ -47,30 +54,33 @@ static int __init is_normal_ram(efi_memory_desc_t *md) return 0; } -static void __init efi_setup_idmap(void) +/* + * Translate a EFI virtual address into a physical address: this is necessary, + * as some data members of the EFI system table are virtually remapped after + * SetVirtualAddressMap() has been called. + */ +static phys_addr_t efi_to_phys(unsigned long addr) { - struct memblock_region *r; efi_memory_desc_t *md; - u64 paddr, npages, size; - - for_each_memblock(memory, r) - create_id_mapping(r->base, r->size, 0); - /* map runtime io spaces */ for_each_efi_memory_desc(&memmap, md) { - if (!(md->attribute & EFI_MEMORY_RUNTIME) || is_normal_ram(md)) + if (!(md->attribute & EFI_MEMORY_RUNTIME)) continue; - paddr = md->phys_addr; - npages = md->num_pages; - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - create_id_mapping(paddr, size, 1); + if (md->virt_addr == 0) + /* no virtual mapping has been installed by the stub */ + break; + if (md->virt_addr <= addr && + (addr - md->virt_addr) < (md->num_pages << EFI_PAGE_SHIFT)) + return md->phys_addr + addr - md->virt_addr; } + return addr; } static int __init uefi_init(void) { efi_char16_t *c16; + void *config_tables; + u64 table_size; char vendor[100] = "unknown"; int i, retval; @@ -98,7 +108,7 @@ static int __init uefi_init(void) efi.systab->hdr.revision & 0xffff); /* Show what we know for posterity */ - c16 = early_memremap(efi.systab->fw_vendor, + c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor), sizeof(vendor)); if (c16) { for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) @@ -111,10 +121,14 @@ static int __init uefi_init(void) efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor); - retval = efi_config_init(NULL); - if (retval == 0) - set_bit(EFI_CONFIG_TABLES, &efi.flags); + table_size = sizeof(efi_config_table_64_t) * efi.systab->nr_tables; + config_tables = early_memremap(efi_to_phys(efi.systab->tables), + table_size); + + retval = efi_config_parse_tables(config_tables, efi.systab->nr_tables, + sizeof(efi_config_table_64_t), NULL); + early_memunmap(config_tables, table_size); out: early_memunmap(efi.systab, sizeof(efi_system_table_t)); return retval; @@ -125,17 +139,17 @@ out: */ static __init int is_reserve_region(efi_memory_desc_t *md) { - if (!is_normal_ram(md)) + switch (md->type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: return 0; - - if (md->attribute & EFI_MEMORY_RUNTIME) - return 1; - - if (md->type == EFI_ACPI_RECLAIM_MEMORY || - md->type == EFI_RESERVED_TYPE) - return 1; - - return 0; + default: + break; + } + return is_normal_ram(md); } static __init void reserve_regions(void) @@ -164,9 +178,7 @@ static __init void reserve_regions(void) if (is_normal_ram(md)) early_init_dt_add_memory_arch(paddr, size); - if (is_reserve_region(md) || - md->type == EFI_BOOT_SERVICES_CODE || - md->type == EFI_BOOT_SERVICES_DATA) { + if (is_reserve_region(md)) { memblock_reserve(paddr, size); if (uefi_debug) pr_cont("*"); @@ -179,123 +191,6 @@ static __init void reserve_regions(void) set_bit(EFI_MEMMAP, &efi.flags); } - -static u64 __init free_one_region(u64 start, u64 end) -{ - u64 size = end - start; - - if (uefi_debug) - pr_info(" EFI freeing: 0x%012llx-0x%012llx\n", start, end - 1); - - free_bootmem_late(start, size); - return size; -} - -static u64 __init free_region(u64 start, u64 end) -{ - u64 map_start, map_end, total = 0; - - if (end <= start) - return total; - - map_start = (u64)memmap.phys_map; - map_end = PAGE_ALIGN(map_start + (memmap.map_end - memmap.map)); - map_start &= PAGE_MASK; - - if (start < map_end && end > map_start) { - /* region overlaps UEFI memmap */ - if (start < map_start) - total += free_one_region(start, map_start); - - if (map_end < end) - total += free_one_region(map_end, end); - } else - total += free_one_region(start, end); - - return total; -} - -static void __init free_boot_services(void) -{ - u64 total_freed = 0; - u64 keep_end, free_start, free_end; - efi_memory_desc_t *md; - - /* - * If kernel uses larger pages than UEFI, we have to be careful - * not to inadvertantly free memory we want to keep if there is - * overlap at the kernel page size alignment. We do not want to - * free is_reserve_region() memory nor the UEFI memmap itself. - * - * The memory map is sorted, so we keep track of the end of - * any previous region we want to keep, remember any region - * we want to free and defer freeing it until we encounter - * the next region we want to keep. This way, before freeing - * it, we can clip it as needed to avoid freeing memory we - * want to keep for UEFI. - */ - - keep_end = 0; - free_start = 0; - - for_each_efi_memory_desc(&memmap, md) { - u64 paddr, npages, size; - - if (is_reserve_region(md)) { - /* - * We don't want to free any memory from this region. - */ - if (free_start) { - /* adjust free_end then free region */ - if (free_end > md->phys_addr) - free_end -= PAGE_SIZE; - total_freed += free_region(free_start, free_end); - free_start = 0; - } - keep_end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); - continue; - } - - if (md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) { - /* no need to free this region */ - continue; - } - - /* - * We want to free memory from this region. - */ - paddr = md->phys_addr; - npages = md->num_pages; - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - - if (free_start) { - if (paddr <= free_end) - free_end = paddr + size; - else { - total_freed += free_region(free_start, free_end); - free_start = paddr; - free_end = paddr + size; - } - } else { - free_start = paddr; - free_end = paddr + size; - } - if (free_start < keep_end) { - free_start += PAGE_SIZE; - if (free_start >= free_end) - free_start = 0; - } - } - if (free_start) - total_freed += free_region(free_start, free_end); - - if (total_freed) - pr_info("Freed 0x%llx bytes of EFI boot services memory", - total_freed); -} - void __init efi_init(void) { struct efi_fdt_params params; @@ -320,61 +215,14 @@ void __init efi_init(void) reserve_regions(); } -void __init efi_idmap_init(void) -{ - if (!efi_enabled(EFI_BOOT)) - return; - - /* boot time idmap_pg_dir is incomplete, so fill in missing parts */ - efi_setup_idmap(); - early_memunmap(memmap.map, memmap.map_end - memmap.map); -} - -static int __init remap_region(efi_memory_desc_t *md, void **new) -{ - u64 paddr, vaddr, npages, size; - - paddr = md->phys_addr; - npages = md->num_pages; - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - - if (is_normal_ram(md)) - vaddr = (__force u64)ioremap_cache(paddr, size); - else - vaddr = (__force u64)ioremap(paddr, size); - - if (!vaddr) { - pr_err("Unable to remap 0x%llx pages @ %p\n", - npages, (void *)paddr); - return 0; - } - - /* adjust for any rounding when EFI and system pagesize differs */ - md->virt_addr = vaddr + (md->phys_addr - paddr); - - if (uefi_debug) - pr_info(" EFI remap 0x%012llx => %p\n", - md->phys_addr, (void *)md->virt_addr); - - memcpy(*new, md, memmap.desc_size); - *new += memmap.desc_size; - - return 1; -} - /* - * Switch UEFI from an identity map to a kernel virtual map + * Enable the UEFI Runtime Services if all prerequisites are in place, i.e., + * non-early mapping of the UEFI system table and virtual mappings for all + * EFI_MEMORY_RUNTIME regions. */ -static int __init arm64_enter_virtual_mode(void) +static int __init arm64_enable_runtime_services(void) { - efi_memory_desc_t *md; - phys_addr_t virtmap_phys; - void *virtmap, *virt_md; - efi_status_t status; u64 mapsize; - int count = 0; - unsigned long flags; if (!efi_enabled(EFI_BOOT)) { pr_info("EFI services will not be available.\n"); @@ -396,78 +244,116 @@ static int __init arm64_enter_virtual_mode(void) efi.memmap = &memmap; - /* Map the runtime regions */ - virtmap = kmalloc(mapsize, GFP_KERNEL); - if (!virtmap) { - pr_err("Failed to allocate EFI virtual memmap\n"); + efi.systab = (__force void *)ioremap_cache(efi_system_table, + sizeof(efi_system_table_t)); + if (!efi.systab) { + pr_err("Failed to remap EFI System Table\n"); return -1; } - virtmap_phys = virt_to_phys(virtmap); - virt_md = virtmap; + set_bit(EFI_SYSTEM_TABLES, &efi.flags); - for_each_efi_memory_desc(&memmap, md) { - if (!(md->attribute & EFI_MEMORY_RUNTIME)) - continue; - if (!remap_region(md, &virt_md)) - goto err_unmap; - ++count; + if (!efi_enabled(EFI_VIRTMAP)) { + pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n"); + return -1; } - efi.systab = (__force void *)efi_lookup_mapped_addr(efi_system_table); - if (!efi.systab) { - /* - * If we have no virtual mapping for the System Table at this - * point, the memory map doesn't cover the physical offset where - * it resides. This means the System Table will be inaccessible - * to Runtime Services themselves once the virtual mapping is - * installed. - */ - pr_err("Failed to remap EFI System Table -- buggy firmware?\n"); - goto err_unmap; - } - set_bit(EFI_SYSTEM_TABLES, &efi.flags); + /* Set up runtime services function pointers */ + efi_native_runtime_setup(); + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); - local_irq_save(flags); - cpu_switch_mm(idmap_pg_dir, &init_mm); + efi.runtime_version = efi.systab->hdr.revision; - /* Call SetVirtualAddressMap with the physical address of the map */ - runtime = efi.systab->runtime; - efi.set_virtual_address_map = runtime->set_virtual_address_map; + return 0; +} +early_initcall(arm64_enable_runtime_services); - status = efi.set_virtual_address_map(count * memmap.desc_size, - memmap.desc_size, - memmap.desc_version, - (efi_memory_desc_t *)virtmap_phys); - cpu_set_reserved_ttbr0(); +static int __init arm64_dmi_init(void) +{ + /* + * On arm64, DMI depends on UEFI, and dmi_scan_machine() needs to + * be called early because dmi_id_init(), which is an arch_initcall + * itself, depends on dmi_scan_machine() having been called already. + */ + dmi_scan_machine(); + if (dmi_available) + dmi_set_dump_stack_arch_desc(); + return 0; +} +core_initcall(arm64_dmi_init); + +static pgd_t efi_pgd[PTRS_PER_PGD] __page_aligned_bss; + +static struct mm_struct efi_mm = { + .mm_rb = RB_ROOT, + .pgd = efi_pgd, + .mm_users = ATOMIC_INIT(2), + .mm_count = ATOMIC_INIT(1), + .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), + .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), + .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), + INIT_MM_CONTEXT(efi_mm) +}; + +static void efi_set_pgd(struct mm_struct *mm) +{ + cpu_switch_mm(mm->pgd, mm); flush_tlb_all(); - local_irq_restore(flags); + if (icache_is_aivivt()) + __flush_icache_all(); +} - kfree(virtmap); +void efi_virtmap_load(void) +{ + preempt_disable(); + efi_set_pgd(&efi_mm); +} - free_boot_services(); +void efi_virtmap_unload(void) +{ + efi_set_pgd(current->active_mm); + preempt_enable(); +} - if (status != EFI_SUCCESS) { - pr_err("Failed to set EFI virtual address map! [%lx]\n", - status); - return -1; - } +void __init efi_virtmap_init(void) +{ + efi_memory_desc_t *md; - /* Set up runtime services function pointers */ - runtime = efi.systab->runtime; - efi_native_runtime_setup(); - set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + if (!efi_enabled(EFI_BOOT)) + return; - efi.runtime_version = efi.systab->hdr.revision; + for_each_efi_memory_desc(&memmap, md) { + u64 paddr, npages, size; + pgprot_t prot; - return 0; + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + if (WARN(md->virt_addr == 0, + "UEFI virtual mapping incomplete or missing -- no entry found for 0x%llx\n", + md->phys_addr)) + return; -err_unmap: - /* unmap all mappings that succeeded: there are 'count' of those */ - for (virt_md = virtmap; count--; virt_md += memmap.desc_size) { - md = virt_md; - iounmap((__force void __iomem *)md->virt_addr); + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + pr_info(" EFI remap 0x%016llx => %p\n", + md->phys_addr, (void *)md->virt_addr); + + /* + * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be + * executable, everything else can be mapped with the XN bits + * set. + */ + if (!is_normal_ram(md)) + prot = __pgprot(PROT_DEVICE_nGnRE); + else if (md->type == EFI_RUNTIME_SERVICES_CODE) + prot = PAGE_KERNEL_EXEC; + else + prot = PAGE_KERNEL; + + create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, prot); } - kfree(virtmap); - return -1; + set_bit(EFI_VIRTMAP, &efi.flags); + early_memunmap(memmap.map, memmap.map_end - memmap.map); } -early_initcall(arm64_enter_virtual_mode); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index c19999ce2fb1..a4f58c8760cf 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -188,7 +188,8 @@ tsk .req x28 // current thread_info * Interrupt handling. */ .macro irq_handler - ldr x1, handle_arch_irq + adrp x1, handle_arch_irq + ldr x1, [x1, #:lo12:handle_arch_irq] mov x0, sp blr x1 .endm @@ -729,6 +730,3 @@ ENTRY(sys_rt_sigreturn_wrapper) mov x0, sp b sys_rt_sigreturn ENDPROC(sys_rt_sigreturn_wrapper) - -ENTRY(handle_arch_irq) - .quad 0 diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 0a6e4f924df8..8ce88e08c030 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -132,6 +132,8 @@ efi_head: #endif #ifdef CONFIG_EFI + .globl stext_offset + .set stext_offset, stext - efi_head .align 3 pe_header: .ascii "PE" @@ -155,12 +157,12 @@ optional_header: .long 0 // SizeOfInitializedData .long 0 // SizeOfUninitializedData .long efi_stub_entry - efi_head // AddressOfEntryPoint - .long stext - efi_head // BaseOfCode + .long stext_offset // BaseOfCode extra_header_fields: .quad 0 // ImageBase - .long 0x20 // SectionAlignment - .long 0x8 // FileAlignment + .long 0x1000 // SectionAlignment + .long PECOFF_FILE_ALIGNMENT // FileAlignment .short 0 // MajorOperatingSystemVersion .short 0 // MinorOperatingSystemVersion .short 0 // MajorImageVersion @@ -172,7 +174,7 @@ extra_header_fields: .long _end - efi_head // SizeOfImage // Everything before the kernel image is considered part of the header - .long stext - efi_head // SizeOfHeaders + .long stext_offset // SizeOfHeaders .long 0 // CheckSum .short 0xa // Subsystem (EFI application) .short 0 // DllCharacteristics @@ -217,16 +219,24 @@ section_table: .byte 0 .byte 0 // end of 0 padding of section name .long _end - stext // VirtualSize - .long stext - efi_head // VirtualAddress + .long stext_offset // VirtualAddress .long _edata - stext // SizeOfRawData - .long stext - efi_head // PointerToRawData + .long stext_offset // PointerToRawData .long 0 // PointerToRelocations (0 for executables) .long 0 // PointerToLineNumbers (0 for executables) .short 0 // NumberOfRelocations (0 for executables) .short 0 // NumberOfLineNumbers (0 for executables) .long 0xe0500020 // Characteristics (section flags) - .align 5 + + /* + * EFI will load stext onwards at the 4k section alignment + * described in the PE/COFF header. To ensure that instruction + * sequences using an adrp and a :lo12: immediate will function + * correctly at this alignment, we must ensure that stext is + * placed at a 4k boundary in the Image to begin with. + */ + .align 12 #endif ENTRY(stext) @@ -238,7 +248,13 @@ ENTRY(stext) mov x0, x22 bl lookup_processor_type mov x23, x0 // x23=current cpu_table - cbz x23, __error_p // invalid processor (x23=0)? + /* + * __error_p may end up out of range for cbz if text areas are + * aligned up to section sizes. + */ + cbnz x23, 1f // invalid processor (x23=0)? + b __error_p +1: bl __vet_fdt bl __create_page_tables // x25=TTBR0, x26=TTBR1 /* @@ -250,13 +266,214 @@ ENTRY(stext) */ ldr x27, __switch_data // address to jump to after // MMU has been enabled - adr lr, __enable_mmu // return (PIC) address + adrp lr, __enable_mmu // return (PIC) address + add lr, lr, #:lo12:__enable_mmu ldr x12, [x23, #CPU_INFO_SETUP] add x12, x12, x28 // __virt_to_phys br x12 // initialise processor ENDPROC(stext) /* + * Determine validity of the x21 FDT pointer. + * The dtb must be 8-byte aligned and live in the first 512M of memory. + */ +__vet_fdt: + tst x21, #0x7 + b.ne 1f + cmp x21, x24 + b.lt 1f + mov x0, #(1 << 29) + add x0, x0, x24 + cmp x21, x0 + b.ge 1f + ret +1: + mov x21, #0 + ret +ENDPROC(__vet_fdt) +/* + * Macro to create a table entry to the next page. + * + * tbl: page table address + * virt: virtual address + * shift: #imm page table shift + * ptrs: #imm pointers per table page + * + * Preserves: virt + * Corrupts: tmp1, tmp2 + * Returns: tbl -> next level table page address + */ + .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 + lsr \tmp1, \virt, #\shift + and \tmp1, \tmp1, #\ptrs - 1 // table index + add \tmp2, \tbl, #PAGE_SIZE + orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type + str \tmp2, [\tbl, \tmp1, lsl #3] + add \tbl, \tbl, #PAGE_SIZE // next level table page + .endm + +/* + * Macro to populate the PGD (and possibily PUD) for the corresponding + * block entry in the next level (tbl) for the given virtual address. + * + * Preserves: tbl, next, virt + * Corrupts: tmp1, tmp2 + */ + .macro create_pgd_entry, tbl, virt, tmp1, tmp2 + create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 +#if SWAPPER_PGTABLE_LEVELS == 3 + create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 +#endif + .endm + +/* + * Macro to populate block entries in the page table for the start..end + * virtual range (inclusive). + * + * Preserves: tbl, flags + * Corrupts: phys, start, end, pstate + */ + .macro create_block_map, tbl, flags, phys, start, end + lsr \phys, \phys, #BLOCK_SHIFT + lsr \start, \start, #BLOCK_SHIFT + and \start, \start, #PTRS_PER_PTE - 1 // table index + orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry + lsr \end, \end, #BLOCK_SHIFT + and \end, \end, #PTRS_PER_PTE - 1 // table end index +9999: str \phys, [\tbl, \start, lsl #3] // store the entry + add \start, \start, #1 // next entry + add \phys, \phys, #BLOCK_SIZE // next block + cmp \start, \end + b.ls 9999b + .endm + +/* + * Setup the initial page tables. We only setup the barest amount which is + * required to get the kernel running. The following sections are required: + * - identity mapping to enable the MMU (low address, TTBR0) + * - first few MB of the kernel linear mapping to jump to once the MMU has + * been enabled, including the FDT blob (TTBR1) + * - pgd entry for fixed mappings (TTBR1) + */ +__create_page_tables: + pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses + mov x27, lr + + /* + * Invalidate the idmap and swapper page tables to avoid potential + * dirty cache lines being evicted. + */ + mov x0, x25 + add x1, x26, #SWAPPER_DIR_SIZE + bl __inval_cache_range + + /* + * Clear the idmap and swapper page tables. + */ + mov x0, x25 + add x6, x26, #SWAPPER_DIR_SIZE +1: stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + cmp x0, x6 + b.lo 1b + + ldr x7, =MM_MMUFLAGS + + /* + * Create the identity mapping. + */ + mov x0, x25 // idmap_pg_dir + ldr x3, =KERNEL_START + add x3, x3, x28 // __pa(KERNEL_START) + create_pgd_entry x0, x3, x5, x6 + ldr x6, =KERNEL_END + mov x5, x3 // __pa(KERNEL_START) + add x6, x6, x28 // __pa(KERNEL_END) + create_block_map x0, x7, x3, x5, x6 + + /* + * Map the kernel image (starting with PHYS_OFFSET). + */ + mov x0, x26 // swapper_pg_dir + mov x5, #PAGE_OFFSET + create_pgd_entry x0, x5, x3, x6 + ldr x6, =KERNEL_END + mov x3, x24 // phys offset + create_block_map x0, x7, x3, x5, x6 + + /* + * Map the FDT blob (maximum 2MB; must be within 512MB of + * PHYS_OFFSET). + */ + mov x3, x21 // FDT phys address + and x3, x3, #~((1 << 21) - 1) // 2MB aligned + mov x6, #PAGE_OFFSET + sub x5, x3, x24 // subtract PHYS_OFFSET + tst x5, #~((1 << 29) - 1) // within 512MB? + csel x21, xzr, x21, ne // zero the FDT pointer + b.ne 1f + add x5, x5, x6 // __va(FDT blob) + add x6, x5, #1 << 21 // 2MB for the FDT blob + sub x6, x6, #1 // inclusive range + create_block_map x0, x7, x3, x5, x6 +1: + /* + * Since the page tables have been populated with non-cacheable + * accesses (MMU disabled), invalidate the idmap and swapper page + * tables again to remove any speculatively loaded cache lines. + */ + mov x0, x25 + add x1, x26, #SWAPPER_DIR_SIZE + bl __inval_cache_range + + mov lr, x27 + ret +ENDPROC(__create_page_tables) + .ltorg + + .align 3 + .type __switch_data, %object +__switch_data: + .quad __mmap_switched + .quad __bss_start // x6 + .quad __bss_stop // x7 + .quad processor_id // x4 + .quad __fdt_pointer // x5 + .quad memstart_addr // x6 + .quad init_thread_union + THREAD_START_SP // sp + +/* + * The following fragment of code is executed with the MMU on in MMU mode, and + * uses absolute addresses; this is not position independent. + */ +__mmap_switched: + adr x3, __switch_data + 8 + + ldp x6, x7, [x3], #16 +1: cmp x6, x7 + b.hs 2f + str xzr, [x6], #8 // Clear BSS + b 1b +2: + ldp x4, x5, [x3], #16 + ldr x6, [x3], #8 + ldr x16, [x3] + mov sp, x16 + str x22, [x4] // Save processor ID + str x21, [x5] // Save FDT pointer + str x24, [x6] // Save PHYS_OFFSET + mov x29, #0 + b start_kernel +ENDPROC(__mmap_switched) + +/* + * end early head section, begin head code that is also used for + * hotplug and needs to have the same protections as the text region + */ + .section ".text","ax" +/* * If we're fortunate enough to boot at EL2, ensure that the world is * sane before dropping to EL1. * @@ -331,7 +548,8 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr vttbr_el2, xzr /* Hypervisor stub */ - adr x0, __hyp_stub_vectors + adrp x0, __hyp_stub_vectors + add x0, x0, #:lo12:__hyp_stub_vectors msr vbar_el2, x0 /* spsr */ @@ -492,183 +710,6 @@ ENDPROC(__calc_phys_offset) .quad PAGE_OFFSET /* - * Macro to create a table entry to the next page. - * - * tbl: page table address - * virt: virtual address - * shift: #imm page table shift - * ptrs: #imm pointers per table page - * - * Preserves: virt - * Corrupts: tmp1, tmp2 - * Returns: tbl -> next level table page address - */ - .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 - lsr \tmp1, \virt, #\shift - and \tmp1, \tmp1, #\ptrs - 1 // table index - add \tmp2, \tbl, #PAGE_SIZE - orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type - str \tmp2, [\tbl, \tmp1, lsl #3] - add \tbl, \tbl, #PAGE_SIZE // next level table page - .endm - -/* - * Macro to populate the PGD (and possibily PUD) for the corresponding - * block entry in the next level (tbl) for the given virtual address. - * - * Preserves: tbl, next, virt - * Corrupts: tmp1, tmp2 - */ - .macro create_pgd_entry, tbl, virt, tmp1, tmp2 - create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 -#if SWAPPER_PGTABLE_LEVELS == 3 - create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 -#endif - .endm - -/* - * Macro to populate block entries in the page table for the start..end - * virtual range (inclusive). - * - * Preserves: tbl, flags - * Corrupts: phys, start, end, pstate - */ - .macro create_block_map, tbl, flags, phys, start, end - lsr \phys, \phys, #BLOCK_SHIFT - lsr \start, \start, #BLOCK_SHIFT - and \start, \start, #PTRS_PER_PTE - 1 // table index - orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry - lsr \end, \end, #BLOCK_SHIFT - and \end, \end, #PTRS_PER_PTE - 1 // table end index -9999: str \phys, [\tbl, \start, lsl #3] // store the entry - add \start, \start, #1 // next entry - add \phys, \phys, #BLOCK_SIZE // next block - cmp \start, \end - b.ls 9999b - .endm - -/* - * Setup the initial page tables. We only setup the barest amount which is - * required to get the kernel running. The following sections are required: - * - identity mapping to enable the MMU (low address, TTBR0) - * - first few MB of the kernel linear mapping to jump to once the MMU has - * been enabled, including the FDT blob (TTBR1) - * - pgd entry for fixed mappings (TTBR1) - */ -__create_page_tables: - pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses - mov x27, lr - - /* - * Invalidate the idmap and swapper page tables to avoid potential - * dirty cache lines being evicted. - */ - mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE - bl __inval_cache_range - - /* - * Clear the idmap and swapper page tables. - */ - mov x0, x25 - add x6, x26, #SWAPPER_DIR_SIZE -1: stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - cmp x0, x6 - b.lo 1b - - ldr x7, =MM_MMUFLAGS - - /* - * Create the identity mapping. - */ - mov x0, x25 // idmap_pg_dir - ldr x3, =KERNEL_START - add x3, x3, x28 // __pa(KERNEL_START) - create_pgd_entry x0, x3, x5, x6 - ldr x6, =KERNEL_END - mov x5, x3 // __pa(KERNEL_START) - add x6, x6, x28 // __pa(KERNEL_END) - create_block_map x0, x7, x3, x5, x6 - - /* - * Map the kernel image (starting with PHYS_OFFSET). - */ - mov x0, x26 // swapper_pg_dir - mov x5, #PAGE_OFFSET - create_pgd_entry x0, x5, x3, x6 - ldr x6, =KERNEL_END - mov x3, x24 // phys offset - create_block_map x0, x7, x3, x5, x6 - - /* - * Map the FDT blob (maximum 2MB; must be within 512MB of - * PHYS_OFFSET). - */ - mov x3, x21 // FDT phys address - and x3, x3, #~((1 << 21) - 1) // 2MB aligned - mov x6, #PAGE_OFFSET - sub x5, x3, x24 // subtract PHYS_OFFSET - tst x5, #~((1 << 29) - 1) // within 512MB? - csel x21, xzr, x21, ne // zero the FDT pointer - b.ne 1f - add x5, x5, x6 // __va(FDT blob) - add x6, x5, #1 << 21 // 2MB for the FDT blob - sub x6, x6, #1 // inclusive range - create_block_map x0, x7, x3, x5, x6 -1: - /* - * Since the page tables have been populated with non-cacheable - * accesses (MMU disabled), invalidate the idmap and swapper page - * tables again to remove any speculatively loaded cache lines. - */ - mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE - bl __inval_cache_range - - mov lr, x27 - ret -ENDPROC(__create_page_tables) - .ltorg - - .align 3 - .type __switch_data, %object -__switch_data: - .quad __mmap_switched - .quad __bss_start // x6 - .quad __bss_stop // x7 - .quad processor_id // x4 - .quad __fdt_pointer // x5 - .quad memstart_addr // x6 - .quad init_thread_union + THREAD_START_SP // sp - -/* - * The following fragment of code is executed with the MMU on in MMU mode, and - * uses absolute addresses; this is not position independent. - */ -__mmap_switched: - adr x3, __switch_data + 8 - - ldp x6, x7, [x3], #16 -1: cmp x6, x7 - b.hs 2f - str xzr, [x6], #8 // Clear BSS - b 1b -2: - ldp x4, x5, [x3], #16 - ldr x6, [x3], #8 - ldr x16, [x3] - mov sp, x16 - str x22, [x4] // Save processor ID - str x21, [x5] // Save FDT pointer - str x24, [x6] // Save PHYS_OFFSET - mov x29, #0 - b start_kernel -ENDPROC(__mmap_switched) - -/* * Exception handling. Something went wrong and we can't proceed. We ought to * tell the user, but since we don't have any guarantee that we're even * running on the right architecture, we do virtually nothing. @@ -715,22 +756,3 @@ __lookup_processor_type_data: .quad . .quad cpu_table .size __lookup_processor_type_data, . - __lookup_processor_type_data - -/* - * Determine validity of the x21 FDT pointer. - * The dtb must be 8-byte aligned and live in the first 512M of memory. - */ -__vet_fdt: - tst x21, #0x7 - b.ne 1f - cmp x21, x24 - b.lt 1f - mov x0, #(1 << 29) - add x0, x0, x24 - cmp x21, x0 - b.ge 1f - ret -1: - mov x21, #0 - ret -ENDPROC(__vet_fdt) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 7e9327a0986d..27d4864577e5 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -17,14 +17,19 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/bitops.h> +#include <linux/bug.h> #include <linux/compiler.h> #include <linux/kernel.h> +#include <linux/mm.h> #include <linux/smp.h> +#include <linux/spinlock.h> #include <linux/stop_machine.h> +#include <linux/types.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> #include <asm/debug-monitors.h> +#include <asm/fixmap.h> #include <asm/insn.h> #define AARCH64_INSN_SF_BIT BIT(31) @@ -72,6 +77,29 @@ bool __kprobes aarch64_insn_is_nop(u32 insn) } } +static DEFINE_SPINLOCK(patch_lock); + +static void __kprobes *patch_map(void *addr, int fixmap) +{ + unsigned long uintaddr = (uintptr_t) addr; + bool module = !core_kernel_text(uintaddr); + struct page *page; + + if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX)) + page = vmalloc_to_page(addr); + else + page = virt_to_page(addr); + + BUG_ON(!page); + set_fixmap(fixmap, page_to_phys(page)); + + return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK)); +} + +static void __kprobes patch_unmap(int fixmap) +{ + clear_fixmap(fixmap); +} /* * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always * little-endian. @@ -88,10 +116,27 @@ int __kprobes aarch64_insn_read(void *addr, u32 *insnp) return ret; } +static int __kprobes __aarch64_insn_write(void *addr, u32 insn) +{ + void *waddr = addr; + unsigned long flags = 0; + int ret; + + spin_lock_irqsave(&patch_lock, flags); + waddr = patch_map(addr, FIX_TEXT_POKE0); + + ret = probe_kernel_write(waddr, &insn, AARCH64_INSN_SIZE); + + patch_unmap(FIX_TEXT_POKE0); + spin_unlock_irqrestore(&patch_lock, flags); + + return ret; +} + int __kprobes aarch64_insn_write(void *addr, u32 insn) { insn = cpu_to_le32(insn); - return probe_kernel_write(addr, &insn, AARCH64_INSN_SIZE); + return __aarch64_insn_write(addr, insn); } static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 071a6ec13bd8..240b75c0e94f 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -40,6 +40,8 @@ int arch_show_interrupts(struct seq_file *p, int prec) return 0; } +void (*handle_arch_irq)(struct pt_regs *) = NULL; + void __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) { if (handle_arch_irq) diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c index 263a166291fb..4f1fec7a46db 100644 --- a/arch/arm64/kernel/jump_label.c +++ b/arch/arm64/kernel/jump_label.c @@ -22,9 +22,8 @@ #ifdef HAVE_JUMP_LABEL -static void __arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type, - bool is_static) +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) { void *addr = (void *)entry->code; u32 insn; @@ -37,22 +36,18 @@ static void __arch_jump_label_transform(struct jump_entry *entry, insn = aarch64_insn_gen_nop(); } - if (is_static) - aarch64_insn_patch_text_nosync(addr, insn); - else - aarch64_insn_patch_text(&addr, &insn, 1); -} - -void arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) -{ - __arch_jump_label_transform(entry, type, false); + aarch64_insn_patch_text(&addr, &insn, 1); } void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type) { - __arch_jump_label_transform(entry, type, true); + /* + * We use the architected A64 NOP in arch_static_branch, so there's no + * need to patch an identical A64 NOP over the top of it here. The core + * will call arch_jump_label_transform from a module notifier if the + * NOP needs to be replaced by a branch. + */ } #endif /* HAVE_JUMP_LABEL */ diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index ae8be923f2cd..789ce6cf7ff6 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -380,6 +380,7 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; + early_fixmap_init(); early_ioremap_init(); parse_early_param(); @@ -396,7 +397,7 @@ void __init setup_arch(char **cmdline_p) paging_init(); request_standard_resources(); - efi_idmap_init(); + efi_virtmap_init(); early_ioremap_reset(); unflatten_device_tree(); diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 2f600294e8ca..5f11ebe652a4 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -8,6 +8,7 @@ #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/page.h> +#include <asm/pgtable.h> #include "image.h" @@ -32,6 +33,30 @@ jiffies = jiffies_64; *(.hyp.text) \ VMLINUX_SYMBOL(__hyp_text_end) = .; +/* + * The size of the PE/COFF section that covers the kernel image, which + * runs from stext to _edata, must be a round multiple of the PE/COFF + * FileAlignment, which we set to its minimum value of 0x200. 'stext' + * itself is 4 KB aligned, so padding out _edata to a 0x200 aligned + * boundary should be sufficient. + */ +PECOFF_FILE_ALIGNMENT = 0x200; + +#ifdef CONFIG_EFI +#define PECOFF_EDATA_PADDING \ + .pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); } +#else +#define PECOFF_EDATA_PADDING +#endif + +#ifdef CONFIG_DEBUG_ALIGN_RODATA +#define ALIGN_DEBUG_RO . = ALIGN(1<<SECTION_SHIFT); +#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO +#else +#define ALIGN_DEBUG_RO +#define ALIGN_DEBUG_RO_MIN(min) . = ALIGN(min); +#endif + SECTIONS { /* @@ -54,6 +79,7 @@ SECTIONS _text = .; HEAD_TEXT } + ALIGN_DEBUG_RO .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ __exception_text_start = .; @@ -70,19 +96,22 @@ SECTIONS *(.got) /* Global offset table */ } + ALIGN_DEBUG_RO RO_DATA(PAGE_SIZE) EXCEPTION_TABLE(8) NOTES + ALIGN_DEBUG_RO _etext = .; /* End of text and rodata section */ - . = ALIGN(PAGE_SIZE); + ALIGN_DEBUG_RO_MIN(PAGE_SIZE) __init_begin = .; INIT_TEXT_SECTION(8) .exit.text : { ARM_EXIT_KEEP(EXIT_TEXT) } - . = ALIGN(16); + + ALIGN_DEBUG_RO_MIN(16) .init.data : { INIT_DATA INIT_SETUP(16) @@ -114,6 +143,7 @@ SECTIONS _data = .; _sdata = .; RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE) + PECOFF_EDATA_PADDING _edata = .; BSS_SECTION(0, 0, 0) diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index c56179ed2c09..773d37a14039 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -3,3 +3,4 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ ioremap.o mmap.o pgd.o mmu.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_ARM64_PTDUMP) += dump.o diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 6efbb52cb92e..6ee26f82966b 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -170,7 +170,7 @@ static void *__dma_alloc_noncoherent(struct device *dev, size_t size, coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP, __get_dma_pgprot(attrs, __pgprot(PROT_NORMAL_NC), false), - NULL); + __builtin_return_address(0)); if (!coherent_ptr) goto no_map; diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c new file mode 100644 index 000000000000..bf69601be546 --- /dev/null +++ b/arch/arm64/mm/dump.c @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2014, The Linux Foundation. All rights reserved. + * Debug helper to dump the current kernel pagetables of the system + * so that we can see what the various memory ranges are set to. + * + * Derived from x86 and arm implementation: + * (C) Copyright 2008 Intel Corporation + * + * Author: Arjan van de Ven <arjan@linux.intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include <linux/debugfs.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/seq_file.h> + +#include <asm/fixmap.h> +#include <asm/pgtable.h> + +#define LOWEST_ADDR (UL(0xffffffffffffffff) << VA_BITS) + +struct addr_marker { + unsigned long start_address; + const char *name; +}; + +enum address_markers_idx { + VMALLOC_START_NR = 0, + VMALLOC_END_NR, +#ifdef CONFIG_SPARSEMEM_VMEMMAP + VMEMMAP_START_NR, + VMEMMAP_END_NR, +#endif + PCI_START_NR, + PCI_END_NR, + FIXADDR_START_NR, + FIXADDR_END_NR, + MODULES_START_NR, + MODUELS_END_NR, + KERNEL_SPACE_NR, +}; + +static struct addr_marker address_markers[] = { + { VMALLOC_START, "vmalloc() Area" }, + { VMALLOC_END, "vmalloc() End" }, +#ifdef CONFIG_SPARSEMEM_VMEMMAP + { 0, "vmemmap start" }, + { 0, "vmemmap end" }, +#endif + { (unsigned long) PCI_IOBASE, "PCI I/O start" }, + { (unsigned long) PCI_IOBASE + SZ_16M, "PCI I/O end" }, + { FIXADDR_START, "Fixmap start" }, + { FIXADDR_TOP, "Fixmap end" }, + { MODULES_VADDR, "Modules start" }, + { MODULES_END, "Modules end" }, + { PAGE_OFFSET, "Kernel Mapping" }, + { -1, NULL }, +}; + +struct pg_state { + struct seq_file *seq; + const struct addr_marker *marker; + unsigned long start_address; + unsigned level; + u64 current_prot; +}; + +struct prot_bits { + u64 mask; + u64 val; + const char *set; + const char *clear; +}; + +static const struct prot_bits pte_bits[] = { + { + .mask = PTE_USER, + .val = PTE_USER, + .set = "USR", + .clear = " ", + }, { + .mask = PTE_RDONLY, + .val = PTE_RDONLY, + .set = "ro", + .clear = "RW", + }, { + .mask = PTE_PXN, + .val = PTE_PXN, + .set = "NX", + .clear = "x ", + }, { + .mask = PTE_SHARED, + .val = PTE_SHARED, + .set = "SHD", + .clear = " ", + }, { + .mask = PTE_AF, + .val = PTE_AF, + .set = "AF", + .clear = " ", + }, { + .mask = PTE_NG, + .val = PTE_NG, + .set = "NG", + .clear = " ", + }, { + .mask = PTE_UXN, + .val = PTE_UXN, + .set = "UXN", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_DEVICE_nGnRnE), + .set = "DEVICE/nGnRnE", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_DEVICE_nGnRE), + .set = "DEVICE/nGnRE", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_DEVICE_GRE), + .set = "DEVICE/GRE", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_NORMAL_NC), + .set = "MEM/NORMAL-NC", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_NORMAL), + .set = "MEM/NORMAL", + } +}; + +struct pg_level { + const struct prot_bits *bits; + size_t num; + u64 mask; +}; + +static struct pg_level pg_level[] = { + { + }, { /* pgd */ + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, { /* pud */ + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, { /* pmd */ + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, { /* pte */ + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, +}; + +static void dump_prot(struct pg_state *st, const struct prot_bits *bits, + size_t num) +{ + unsigned i; + + for (i = 0; i < num; i++, bits++) { + const char *s; + + if ((st->current_prot & bits->mask) == bits->val) + s = bits->set; + else + s = bits->clear; + + if (s) + seq_printf(st->seq, " %s", s); + } +} + +static void note_page(struct pg_state *st, unsigned long addr, unsigned level, + u64 val) +{ + static const char units[] = "KMGTPE"; + u64 prot = val & pg_level[level].mask; + + if (addr < LOWEST_ADDR) + return; + + if (!st->level) { + st->level = level; + st->current_prot = prot; + st->start_address = addr; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } else if (prot != st->current_prot || level != st->level || + addr >= st->marker[1].start_address) { + const char *unit = units; + unsigned long delta; + + if (st->current_prot) { + seq_printf(st->seq, "0x%16lx-0x%16lx ", + st->start_address, addr); + + delta = (addr - st->start_address) >> 10; + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; + } + seq_printf(st->seq, "%9lu%c", delta, *unit); + if (pg_level[st->level].bits) + dump_prot(st, pg_level[st->level].bits, + pg_level[st->level].num); + seq_puts(st->seq, "\n"); + } + + if (addr >= st->marker[1].start_address) { + st->marker++; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } + + st->start_address = addr; + st->current_prot = prot; + st->level = level; + } + + if (addr >= st->marker[1].start_address) { + st->marker++; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } + +} + +static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) +{ + pte_t *pte = pte_offset_kernel(pmd, 0); + unsigned long addr; + unsigned i; + + for (i = 0; i < PTRS_PER_PTE; i++, pte++) { + addr = start + i * PAGE_SIZE; + note_page(st, addr, 4, pte_val(*pte)); + } +} + +static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) +{ + pmd_t *pmd = pmd_offset(pud, 0); + unsigned long addr; + unsigned i; + + for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { + addr = start + i * PMD_SIZE; + if (pmd_none(*pmd) || pmd_sect(*pmd) || pmd_bad(*pmd)) + note_page(st, addr, 3, pmd_val(*pmd)); + else + walk_pte(st, pmd, addr); + } +} + +static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) +{ + pud_t *pud = pud_offset(pgd, 0); + unsigned long addr; + unsigned i; + + for (i = 0; i < PTRS_PER_PUD; i++, pud++) { + addr = start + i * PUD_SIZE; + if (pud_none(*pud) || pud_sect(*pud) || pud_bad(*pud)) + note_page(st, addr, 2, pud_val(*pud)); + else + walk_pmd(st, pud, addr); + } +} + +static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long start) +{ + pgd_t *pgd = pgd_offset(mm, 0); + unsigned i; + unsigned long addr; + + for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { + addr = start + i * PGDIR_SIZE; + if (pgd_none(*pgd) || pgd_bad(*pgd)) + note_page(st, addr, 1, pgd_val(*pgd)); + else + walk_pud(st, pgd, addr); + } +} + +static int ptdump_show(struct seq_file *m, void *v) +{ + struct pg_state st = { + .seq = m, + .marker = address_markers, + }; + + walk_pgd(&st, &init_mm, LOWEST_ADDR); + + note_page(&st, 0, 0, 0); + return 0; +} + +static int ptdump_open(struct inode *inode, struct file *file) +{ + return single_open(file, ptdump_show, NULL); +} + +static const struct file_operations ptdump_fops = { + .open = ptdump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int ptdump_init(void) +{ + struct dentry *pe; + unsigned i, j; + + for (i = 0; i < ARRAY_SIZE(pg_level); i++) + if (pg_level[i].bits) + for (j = 0; j < pg_level[i].num; j++) + pg_level[i].mask |= pg_level[i].bits[j].mask; + + address_markers[VMEMMAP_START_NR].start_address = + (unsigned long)virt_to_page(PAGE_OFFSET); + address_markers[VMEMMAP_END_NR].start_address = + (unsigned long)virt_to_page(high_memory); + + pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, + &ptdump_fops); + return pe ? 0 : -ENOMEM; +} +device_initcall(ptdump_init); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 41cb6d3d6075..6094c64b3380 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -279,6 +279,7 @@ retry: * starvation. */ mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; + mm_flags |= FAULT_FLAG_TRIED; goto retry; } } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index b45b31397d0d..b24ed5e112ec 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -327,6 +327,7 @@ void __init mem_init(void) void free_initmem(void) { + fixup_init(); free_initmem_default(0); free_alternatives_memory(); } diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 4a07630a6616..cbb99c8f1e04 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -103,97 +103,10 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) } EXPORT_SYMBOL(ioremap_cache); -static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; -#if CONFIG_ARM64_PGTABLE_LEVELS > 2 -static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; -#endif -#if CONFIG_ARM64_PGTABLE_LEVELS > 3 -static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; -#endif - -static inline pud_t * __init early_ioremap_pud(unsigned long addr) -{ - pgd_t *pgd; - - pgd = pgd_offset_k(addr); - BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); - - return pud_offset(pgd, addr); -} - -static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) -{ - pud_t *pud = early_ioremap_pud(addr); - - BUG_ON(pud_none(*pud) || pud_bad(*pud)); - - return pmd_offset(pud, addr); -} - -static inline pte_t * __init early_ioremap_pte(unsigned long addr) -{ - pmd_t *pmd = early_ioremap_pmd(addr); - - BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd)); - - return pte_offset_kernel(pmd, addr); -} - +/* + * Must be called after early_fixmap_init + */ void __init early_ioremap_init(void) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - unsigned long addr = fix_to_virt(FIX_BTMAP_BEGIN); - - pgd = pgd_offset_k(addr); - pgd_populate(&init_mm, pgd, bm_pud); - pud = pud_offset(pgd, addr); - pud_populate(&init_mm, pud, bm_pmd); - pmd = pmd_offset(pud, addr); - pmd_populate_kernel(&init_mm, pmd, bm_pte); - - /* - * The boot-ioremap range spans multiple pmds, for which - * we are not prepared: - */ - BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) - != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); - - if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { - WARN_ON(1); - pr_warn("pmd %p != %p\n", - pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))); - pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", - fix_to_virt(FIX_BTMAP_BEGIN)); - pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", - fix_to_virt(FIX_BTMAP_END)); - - pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); - pr_warn("FIX_BTMAP_BEGIN: %d\n", - FIX_BTMAP_BEGIN); - } - early_ioremap_setup(); } - -void __init __early_set_fixmap(enum fixed_addresses idx, - phys_addr_t phys, pgprot_t flags) -{ - unsigned long addr = __fix_to_virt(idx); - pte_t *pte; - - if (idx >= __end_of_fixed_addresses) { - BUG(); - return; - } - - pte = early_ioremap_pte(addr); - - if (pgprot_val(flags)) - set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); - else { - pte_clear(&init_mm, addr, pte); - flush_tlb_kernel_range(addr, addr+PAGE_SIZE); - } -} diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h index d519f4f50c8c..ef47d99b5cbc 100644 --- a/arch/arm64/mm/mm.h +++ b/arch/arm64/mm/mm.h @@ -1,2 +1,3 @@ extern void __init bootmem_init(void); -extern void __init arm64_swiotlb_init(void); + +void fixup_init(void); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index f4f8b500f74c..e83d30e544cc 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -26,8 +26,10 @@ #include <linux/memblock.h> #include <linux/fs.h> #include <linux/io.h> +#include <linux/stop_machine.h> #include <asm/cputype.h> +#include <asm/fixmap.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/sizes.h> @@ -132,21 +134,43 @@ EXPORT_SYMBOL(phys_mem_access_prot); static void __init *early_alloc(unsigned long sz) { void *ptr = __va(memblock_alloc(sz, sz)); + BUG_ON(!ptr); memset(ptr, 0, sz); return ptr; } -static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, +/* + * remap a PMD into pages + */ +static void split_pmd(pmd_t *pmd, pte_t *pte) +{ + unsigned long pfn = pmd_pfn(*pmd); + int i = 0; + + do { + /* + * Need to have the least restrictive permissions available + * permissions will be fixed up later + */ + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); + pfn++; + } while (pte++, i++, i < PTRS_PER_PTE); +} + +static void alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, - pgprot_t prot) + pgprot_t prot, + void *(*alloc)(unsigned long size)) { pte_t *pte; - if (pmd_none(*pmd)) { - pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t)); + if (pmd_none(*pmd) || pmd_bad(*pmd)) { + pte = alloc(PTRS_PER_PTE * sizeof(pte_t)); + if (pmd_sect(*pmd)) + split_pmd(pmd, pte); __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE); + flush_tlb_all(); } - BUG_ON(pmd_bad(*pmd)); pte = pte_offset_kernel(pmd, addr); do { @@ -155,29 +179,40 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, } while (pte++, addr += PAGE_SIZE, addr != end); } -static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, - unsigned long end, phys_addr_t phys, - int map_io) +void split_pud(pud_t *old_pud, pmd_t *pmd) +{ + unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT; + pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr); + int i = 0; + + do { + set_pmd(pmd, __pmd(addr | prot)); + addr += PMD_SIZE; + } while (pmd++, i++, i < PTRS_PER_PMD); +} + +static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, + unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + void *(*alloc)(unsigned long size)) { pmd_t *pmd; unsigned long next; - pmdval_t prot_sect; - pgprot_t prot_pte; - - if (map_io) { - prot_sect = PROT_SECT_DEVICE_nGnRE; - prot_pte = __pgprot(PROT_DEVICE_nGnRE); - } else { - prot_sect = PROT_SECT_NORMAL_EXEC; - prot_pte = PAGE_KERNEL_EXEC; - } /* * Check for initial section mappings in the pgd/pud and remove them. */ if (pud_none(*pud) || pud_bad(*pud)) { - pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t)); - pud_populate(&init_mm, pud, pmd); + pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t)); + if (pud_sect(*pud)) { + /* + * need to have the 1G of mappings continue to be + * present + */ + split_pud(pud, pmd); + } + pud_populate(mm, pud, pmd); + flush_tlb_all(); } pmd = pmd_offset(pud, addr); @@ -186,7 +221,8 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0) { pmd_t old_pmd =*pmd; - set_pmd(pmd, __pmd(phys | prot_sect)); + set_pmd(pmd, __pmd(phys | + pgprot_val(mk_sect_prot(prot)))); /* * Check for previous table entries created during * boot (__create_page_tables) and flush them. @@ -195,22 +231,35 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, flush_tlb_all(); } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), - prot_pte); + prot, alloc); } phys += next - addr; } while (pmd++, addr = next, addr != end); } -static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, - unsigned long end, phys_addr_t phys, - int map_io) +static inline bool use_1G_block(unsigned long addr, unsigned long next, + unsigned long phys) +{ + if (PAGE_SHIFT != 12) + return false; + + if (((addr | next | phys) & ~PUD_MASK) != 0) + return false; + + return true; +} + +static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + void *(*alloc)(unsigned long size)) { pud_t *pud; unsigned long next; if (pgd_none(*pgd)) { - pud = early_alloc(PTRS_PER_PUD * sizeof(pud_t)); - pgd_populate(&init_mm, pgd, pud); + pud = alloc(PTRS_PER_PUD * sizeof(pud_t)); + pgd_populate(mm, pgd, pud); } BUG_ON(pgd_bad(*pgd)); @@ -221,10 +270,10 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, /* * For 4K granule only, attempt to put down a 1GB block */ - if (!map_io && (PAGE_SHIFT == 12) && - ((addr | next | phys) & ~PUD_MASK) == 0) { + if (use_1G_block(addr, next, phys)) { pud_t old_pud = *pud; - set_pud(pud, __pud(phys | PROT_SECT_NORMAL_EXEC)); + set_pud(pud, __pud(phys | + pgprot_val(mk_sect_prot(prot)))); /* * If we have an old value for a pud, it will @@ -239,7 +288,7 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, flush_tlb_all(); } } else { - alloc_init_pmd(pud, addr, next, phys, map_io); + alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc); } phys += next - addr; } while (pud++, addr = next, addr != end); @@ -249,9 +298,10 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, * Create the page directory entries and any necessary page tables for the * mapping specified by 'md'. */ -static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys, - unsigned long virt, phys_addr_t size, - int map_io) +static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, + phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot, + void *(*alloc)(unsigned long size)) { unsigned long addr, length, end, next; @@ -261,32 +311,96 @@ static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(pgd, addr, next, phys, map_io); + alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc); phys += next - addr; } while (pgd++, addr = next, addr != end); } -static void __init create_mapping(phys_addr_t phys, unsigned long virt, - phys_addr_t size) +static void *late_alloc(unsigned long size) +{ + void *ptr; + + BUG_ON(size > PAGE_SIZE); + ptr = (void *)__get_free_page(PGALLOC_GFP); + BUG_ON(!ptr); + return ptr; +} + +static void __ref create_mapping(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) { if (virt < VMALLOC_START) { pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; } - __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt, size, 0); + __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt, + size, prot, early_alloc); +} + +void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot) +{ + __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot, + early_alloc); } -void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io) +static void create_mapping_late(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) { - if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) { - pr_warn("BUG: not creating id mapping for %pa\n", &addr); + if (virt < VMALLOC_START) { + pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", + &phys, virt); return; } - __create_mapping(&idmap_pg_dir[pgd_index(addr)], - addr, addr, size, map_io); + + return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), + phys, virt, size, prot, late_alloc); } +#ifdef CONFIG_DEBUG_RODATA +static void __init __map_memblock(phys_addr_t start, phys_addr_t end) +{ + /* + * Set up the executable regions using the existing section mappings + * for now. This will get more fine grained later once all memory + * is mapped + */ + unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); + unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); + + if (end < kernel_x_start) { + create_mapping(start, __phys_to_virt(start), + end - start, PAGE_KERNEL); + } else if (start >= kernel_x_end) { + create_mapping(start, __phys_to_virt(start), + end - start, PAGE_KERNEL); + } else { + if (start < kernel_x_start) + create_mapping(start, __phys_to_virt(start), + kernel_x_start - start, + PAGE_KERNEL); + create_mapping(kernel_x_start, + __phys_to_virt(kernel_x_start), + kernel_x_end - kernel_x_start, + PAGE_KERNEL_EXEC); + if (kernel_x_end < end) + create_mapping(kernel_x_end, + __phys_to_virt(kernel_x_end), + end - kernel_x_end, + PAGE_KERNEL); + } + +} +#else +static void __init __map_memblock(phys_addr_t start, phys_addr_t end) +{ + create_mapping(start, __phys_to_virt(start), end - start, + PAGE_KERNEL_EXEC); +} +#endif + static void __init map_mem(void) { struct memblock_region *reg; @@ -331,14 +445,53 @@ static void __init map_mem(void) memblock_set_current_limit(limit); } #endif - - create_mapping(start, __phys_to_virt(start), end - start); + __map_memblock(start, end); } /* Limit no longer required. */ memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); } +void __init fixup_executable(void) +{ +#ifdef CONFIG_DEBUG_RODATA + /* now that we are actually fully mapped, make the start/end more fine grained */ + if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) { + unsigned long aligned_start = round_down(__pa(_stext), + SECTION_SIZE); + + create_mapping(aligned_start, __phys_to_virt(aligned_start), + __pa(_stext) - aligned_start, + PAGE_KERNEL); + } + + if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) { + unsigned long aligned_end = round_up(__pa(__init_end), + SECTION_SIZE); + create_mapping(__pa(__init_end), (unsigned long)__init_end, + aligned_end - __pa(__init_end), + PAGE_KERNEL); + } +#endif +} + +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void) +{ + create_mapping_late(__pa(_stext), (unsigned long)_stext, + (unsigned long)_etext - (unsigned long)_stext, + PAGE_KERNEL_EXEC | PTE_RDONLY); + +} +#endif + +void fixup_init(void) +{ + create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin, + (unsigned long)__init_end - (unsigned long)__init_begin, + PAGE_KERNEL); +} + /* * paging_init() sets up the page tables, initialises the zone memory * maps and sets up the zero page. @@ -348,6 +501,7 @@ void __init paging_init(void) void *zero_page; map_mem(); + fixup_executable(); /* * Finally flush the caches and tlb to ensure that we're in a @@ -463,3 +617,96 @@ void vmemmap_free(unsigned long start, unsigned long end) { } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ + +static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; +#endif +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; +#endif + +static inline pud_t * fixmap_pud(unsigned long addr) +{ + pgd_t *pgd = pgd_offset_k(addr); + + BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); + + return pud_offset(pgd, addr); +} + +static inline pmd_t * fixmap_pmd(unsigned long addr) +{ + pud_t *pud = fixmap_pud(addr); + + BUG_ON(pud_none(*pud) || pud_bad(*pud)); + + return pmd_offset(pud, addr); +} + +static inline pte_t * fixmap_pte(unsigned long addr) +{ + pmd_t *pmd = fixmap_pmd(addr); + + BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd)); + + return pte_offset_kernel(pmd, addr); +} + +void __init early_fixmap_init(void) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + unsigned long addr = FIXADDR_START; + + pgd = pgd_offset_k(addr); + pgd_populate(&init_mm, pgd, bm_pud); + pud = pud_offset(pgd, addr); + pud_populate(&init_mm, pud, bm_pmd); + pmd = pmd_offset(pud, addr); + pmd_populate_kernel(&init_mm, pmd, bm_pte); + + /* + * The boot-ioremap range spans multiple pmds, for which + * we are not preparted: + */ + BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) + != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); + + if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN))) + || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) { + WARN_ON(1); + pr_warn("pmd %p != %p, %p\n", + pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)), + fixmap_pmd(fix_to_virt(FIX_BTMAP_END))); + pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", + fix_to_virt(FIX_BTMAP_BEGIN)); + pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", + fix_to_virt(FIX_BTMAP_END)); + + pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); + pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN); + } +} + +void __set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) +{ + unsigned long addr = __fix_to_virt(idx); + pte_t *pte; + + if (idx >= __end_of_fixed_addresses) { + BUG(); + return; + } + + pte = fixmap_pte(addr); + + if (pgprot_val(flags)) { + set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); + } else { + pte_clear(&init_mm, addr, pte); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE); + } +} |