mirror of
https://git.freebsd.org/src.git
synced 2026-01-11 19:57:22 +00:00
lib/libmd: import aarch64 md5 SIMD implementation
Reviewed by: andrew, imp Approved by: markj (mentor) Differential Revision: https://reviews.freebsd.org/D45670 MFC after: 1 month
This commit is contained in:
parent
d92e987421
commit
c1135b2b54
1 changed files with 206 additions and 0 deletions
206
lib/libmd/aarch64/md5block.S
Normal file
206
lib/libmd/aarch64/md5block.S
Normal file
|
|
@ -0,0 +1,206 @@
|
|||
/*-
|
||||
* Copyright (c) 2024 Robert Clausecker <fuz@FreeBSD.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <sys/elf_common.h>
|
||||
#include <machine/asm.h>
|
||||
|
||||
# optimal instruction sequence for k = \key + \m
|
||||
.macro addkm key, m
|
||||
.if 0x100000000 - \key > 0x00ffffff
|
||||
movz k, #\key & 0xffff
|
||||
movk k, #\key >> 16, lsl #16
|
||||
add k, k, \m
|
||||
.elseif 0x100000000 - \key > 0x0000ffff
|
||||
sub k, \m, #(0x100000000 - \key) & 0xfff000
|
||||
sub k, k, #(0x100000000 - \key) & 0xfff
|
||||
.else
|
||||
movz k, #0x100000000 - \key
|
||||
sub k, \m, k
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro round a, b, c, d, f, key, m, s
|
||||
\f f, \b, \c, \d
|
||||
addkm \key, \m // k[i] + m[g]
|
||||
add \a, \a, k // k[i] + m[g] + a
|
||||
add \a, \a, f // k[i] + m[g] + a + f
|
||||
ror \a, \a, #32-\s
|
||||
add \a, \a, \b
|
||||
.endm
|
||||
|
||||
/* f = b ? c : d */
|
||||
.macro f0 f, b, c, d
|
||||
eor \f, \c, \d
|
||||
and \f, \f, \b
|
||||
eor \f, \f, \d
|
||||
.endm
|
||||
|
||||
/*
|
||||
* special cased round 1 function
|
||||
* f1 = d ? b : c = (d & b) + (~d & c)
|
||||
*/
|
||||
.macro round1 a, b, c, d, key, m, s
|
||||
bic tmp, \c, \d // ~d & c
|
||||
addkm \key, \m // k[i] + m[g]
|
||||
add \a, \a, k // k[i] + m[g] + a
|
||||
and f, \b, \d // d & b
|
||||
add \a, \a, tmp // k[i] + m[g] + a + (~d & c)
|
||||
add \a, \a, f // k[i] + m[g] + a + (~d & c) + (d & b)
|
||||
ror \a, \a, #32-\s
|
||||
add \a, \a, \b
|
||||
.endm
|
||||
|
||||
/* f = b ^ c ^ d */
|
||||
.macro f2 f, b, c, d
|
||||
eor \f, \c, \d
|
||||
eor \f, \f, \b
|
||||
.endm
|
||||
|
||||
/* f = c ^ (b | ~d) */
|
||||
.macro f3 f, b, c, d
|
||||
orn \f, \b, \d
|
||||
eor \f, \f, \c
|
||||
.endm
|
||||
|
||||
/* do 4 rounds */
|
||||
.macro rounds f, m0, m1, m2, m3, s0, s1, s2, s3, k0, k1, k2, k3
|
||||
round a, b, c, d, \f, \k0, \m0, \s0
|
||||
round d, a, b, c, \f, \k1, \m1, \s1
|
||||
round c, d, a, b, \f, \k2, \m2, \s2
|
||||
round b, c, d, a, \f, \k3, \m3, \s3
|
||||
.endm
|
||||
|
||||
/* do 4 rounds with f0, f1, f2, f3 */
|
||||
.macro rounds0 m0, m1, m2, m3, k0, k1, k2, k3
|
||||
rounds f0, \m0, \m1, \m2, \m3, 7, 12, 17, 22, \k0, \k1, \k2, \k3
|
||||
.endm
|
||||
|
||||
.macro rounds1 m0, m1, m2, m3, k0, k1, k2, k3
|
||||
round1 a, b, c, d, \k0, \m0, 5
|
||||
round1 d, a, b, c, \k1, \m1, 9
|
||||
round1 c, d, a, b, \k2, \m2, 14
|
||||
round1 b, c, d, a, \k3, \m3, 20
|
||||
.endm
|
||||
|
||||
.macro rounds2 m0, m1, m2, m3, k0, k1, k2, k3
|
||||
rounds f2, \m0, \m1, \m2, \m3, 4, 11, 16, 23, \k0, \k1, \k2, \k3
|
||||
.endm
|
||||
|
||||
.macro rounds3 m0, m1, m2, m3, k0, k1, k2, k3
|
||||
rounds f3, \m0, \m1, \m2, \m3, 6, 10, 15, 21, \k0, \k1, \k2, \k3
|
||||
.endm
|
||||
|
||||
/* md5block(MD5_CTX, buf, len) */
|
||||
ENTRY(_libmd_md5block)
|
||||
ctx .req x0
|
||||
buf .req x1
|
||||
len .req x2
|
||||
end .req x2 // aliases len
|
||||
a .req w3
|
||||
b .req w4
|
||||
c .req w5
|
||||
d .req w6
|
||||
f .req w7
|
||||
tmp .req w8
|
||||
k .req w9
|
||||
m0 .req w10
|
||||
m1 .req w11
|
||||
m2 .req w12
|
||||
m3 .req w13
|
||||
m4 .req w14
|
||||
m5 .req w15
|
||||
m6 .req w16
|
||||
m7 .req w17
|
||||
// x18 is the platform register
|
||||
m8 .req w19
|
||||
m9 .req w20
|
||||
m10 .req w21
|
||||
m11 .req w22
|
||||
m12 .req w23
|
||||
m13 .req w24
|
||||
m14 .req w25
|
||||
m15 .req w26
|
||||
|
||||
a_ .req m0
|
||||
b_ .req m7
|
||||
c_ .req m14
|
||||
d_ .req m5
|
||||
|
||||
stp x19, x20, [sp, #-0x40]!
|
||||
stp x21, x22, [sp, #0x10]
|
||||
stp x23, x24, [sp, #0x20]
|
||||
stp x25, x26, [sp, #0x30]
|
||||
|
||||
bics len, len, #63 // length in blocks
|
||||
add end, buf, len // end pointer
|
||||
|
||||
beq .Lend // was len == 0 after BICS?
|
||||
|
||||
ldp a, b, [ctx, #0]
|
||||
ldp c, d, [ctx, #8]
|
||||
|
||||
/* first eight rounds interleaved with data loads */
|
||||
.Lloop: ldp m0, m1, [buf, #0]
|
||||
round a, b, c, d, f0, 0xd76aa478, m0, 7
|
||||
ldp m2, m3, [buf, #8]
|
||||
round d, a, b, c, f0, 0xe8c7b756, m1, 12
|
||||
ldp m4, m5, [buf, #16]
|
||||
round c, d, a, b, f0, 0x242070db, m2, 17
|
||||
ldp m6, m7, [buf, #24]
|
||||
round b, c, d, a, f0, 0xc1bdceee, m3, 22
|
||||
|
||||
ldp m8, m9, [buf, #32]
|
||||
round a, b, c, d, f0, 0xf57c0faf, m4, 7
|
||||
ldp m10, m11, [buf, #40]
|
||||
round d, a, b, c, f0, 0x4787c62a, m5, 12
|
||||
ldp m12, m13, [buf, #48]
|
||||
round c, d, a, b, f0, 0xa8304613, m6, 17
|
||||
ldp m14, m15, [buf, #56]
|
||||
round b, c, d, a, f0, 0xfd469501, m7, 22
|
||||
|
||||
/* remaining rounds use the roundsX macros */
|
||||
rounds0 m8, m9, m10, m11, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be
|
||||
rounds0 m12, m13, m14, m15, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821
|
||||
|
||||
rounds1 m1, m6, m11, m0, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa
|
||||
rounds1 m5, m10, m15, m4, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8
|
||||
rounds1 m9, m14, m3, m8, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed
|
||||
rounds1 m13, m2, m7, m12, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a
|
||||
|
||||
rounds2 m5, m8, m11, m14, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c
|
||||
rounds2 m1, m4, m7, m10, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70
|
||||
rounds2 m13, m0, m3, m6, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05
|
||||
rounds2 m9, m12, m15, m2, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665
|
||||
|
||||
rounds3 m0, m7, m14, m5, 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039
|
||||
rounds3 m12, m3, m10, m1, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1
|
||||
rounds3 m8, m15, m6, m13, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1
|
||||
rounds3 m4, m11, m2, m9, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
|
||||
|
||||
ldp a_, b_, [ctx, #0]
|
||||
ldp c_, d_, [ctx, #8]
|
||||
add a, a, a_
|
||||
add b, b, b_
|
||||
add c, c, c_
|
||||
add d, d, d_
|
||||
stp a, b, [ctx, #0]
|
||||
stp c, d, [ctx, #8]
|
||||
|
||||
add buf, buf, #64
|
||||
cmp buf, end
|
||||
bne .Lloop
|
||||
|
||||
.Lend: ldp x25, x26, [sp, #0x30]
|
||||
ldp x23, x24, [sp, #0x20]
|
||||
ldp x21, x22, [sp, #0x10]
|
||||
ldp x19, x20, [sp], #0x40
|
||||
|
||||
ret
|
||||
END(_libmd_md5block)
|
||||
|
||||
GNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL)
|
||||
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
Loading…
Add table
Reference in a new issue