#include "textflag.h" // func maskAsm(b *byte, len int, key uint32) TEXT ·maskAsm(SB), NOSPLIT, $0-28 // R0 = b // R1 = len // R3 = key (uint32) // R2 = uint64(key)<<32 | uint64(key) MOVD b_ptr+0(FP), R0 MOVD b_len+8(FP), R1 MOVWU key+16(FP), R3 MOVD R3, R2 ORR R2<<32, R2, R2 VDUP R2, V0.D2 CMP $64, R1 BLT less_than_64 loop_64: VLD1 (R0), [V1.B16, V2.B16, V3.B16, V4.B16] VEOR V1.B16, V0.B16, V1.B16 VEOR V2.B16, V0.B16, V2.B16 VEOR V3.B16, V0.B16, V3.B16 VEOR V4.B16, V0.B16, V4.B16 VST1.P [V1.B16, V2.B16, V3.B16, V4.B16], 64(R0) SUBS $64, R1 CMP $64, R1 BGE loop_64 less_than_64: CBZ R1, end TBZ $5, R1, less_than_32 VLD1 (R0), [V1.B16, V2.B16] VEOR V1.B16, V0.B16, V1.B16 VEOR V2.B16, V0.B16, V2.B16 VST1.P [V1.B16, V2.B16], 32(R0) less_than_32: TBZ $4, R1, less_than_16 LDP (R0), (R11, R12) EOR R11, R2, R11 EOR R12, R2, R12 STP.P (R11, R12), 16(R0) less_than_16: TBZ $3, R1, less_than_8 MOVD (R0), R11 EOR R2, R11, R11 MOVD.P R11, 8(R0) less_than_8: TBZ $2, R1, less_than_4 MOVWU (R0), R11 EORW R2, R11, R11 MOVWU.P R11, 4(R0) less_than_4: TBZ $1, R1, less_than_2 MOVHU (R0), R11 EORW R3, R11, R11 MOVHU.P R11, 2(R0) RORW $16, R3 less_than_2: TBZ $0, R1, end MOVBU (R0), R11 EORW R3, R11, R11 MOVBU.P R11, 1(R0) RORW $8, R3 end: MOVWU R3, ret+24(FP) RET