mirror of
https://git.freebsd.org/src.git
synced 2026-01-11 19:57:22 +00:00
libc/amd64: fix stpncpy.S again
Some checks are pending
Cross-build Kernel / aarch64 macos-latest (clang-18) (push) Waiting to run
Cross-build Kernel / amd64 ubuntu-22.04 (clang-15) (push) Waiting to run
Cross-build Kernel / aarch64 ubuntu-22.04 (clang-15) (push) Waiting to run
Cross-build Kernel / amd64 ubuntu-24.04 (clang-18) (push) Waiting to run
Cross-build Kernel / aarch64 ubuntu-24.04 (clang-18) (push) Waiting to run
Cross-build Kernel / amd64 macos-latest (clang-18) (push) Waiting to run
Some checks are pending
Cross-build Kernel / aarch64 macos-latest (clang-18) (push) Waiting to run
Cross-build Kernel / amd64 ubuntu-22.04 (clang-15) (push) Waiting to run
Cross-build Kernel / aarch64 ubuntu-22.04 (clang-15) (push) Waiting to run
Cross-build Kernel / amd64 ubuntu-24.04 (clang-18) (push) Waiting to run
Cross-build Kernel / aarch64 ubuntu-24.04 (clang-18) (push) Waiting to run
Cross-build Kernel / amd64 macos-latest (clang-18) (push) Waiting to run
The previous fix introduced a regression on machines without the BMI1
instruction set extension. The TZCNT instruction used in this function
behaves different on old machines when the source operand is zero, but
the code was originally designed to never trigger this case. The bug
fix caused this case to be possible, leading to a regression on
sufficiently old hardware.
Fix the code by messing with things such that the source operand is
never zero.
PR: 291720
Fixes: 66eb78377b
Tested by: cy
Approved by: markj (mentor)
Differential Revision: https://reviews.freebsd.org/D54303
This commit is contained in:
parent
9595055ae7
commit
2f83319214
1 changed files with 3 additions and 6 deletions
|
|
@ -36,9 +36,7 @@
|
|||
.set stpncpy, __stpncpy
|
||||
ARCHFUNCS(__stpncpy)
|
||||
ARCHFUNC(__stpncpy, scalar)
|
||||
#if 0 /* temporarily disabled cf. PR 291720 */
|
||||
ARCHFUNC(__stpncpy, baseline)
|
||||
#endif
|
||||
ENDARCHFUNCS(__stpncpy)
|
||||
|
||||
ARCHENTRY(__stpncpy, scalar)
|
||||
|
|
@ -93,7 +91,6 @@ ARCHEND(__stpncpy, scalar)
|
|||
/* stpncpy(char *restrict rdi, const char *rsi, size_t rdx) */
|
||||
ARCHENTRY(__stpncpy, baseline)
|
||||
#define bounce (-3*16-8) /* location of on-stack bounce buffer */
|
||||
|
||||
test %rdx, %rdx # no bytes to copy?
|
||||
jz .L0
|
||||
|
||||
|
|
@ -225,8 +222,8 @@ ARCHENTRY(__stpncpy, baseline)
|
|||
|
||||
/* 1--32 bytes to copy, bounce through the stack */
|
||||
.Lrunt: movdqa %xmm1, bounce+16(%rsp) # clear out rest of on-stack copy
|
||||
bts %r10, %r8 # treat end of buffer as end of string
|
||||
and %r9d, %r8d # mask out head before string
|
||||
bts %r10, %r8 # treat end of buffer as end of string
|
||||
test $0x1ffff, %r8d # end of string within first chunk or right after?
|
||||
jnz 0f # if yes, do not inspect second buffer
|
||||
|
||||
|
|
@ -235,10 +232,10 @@ ARCHENTRY(__stpncpy, baseline)
|
|||
pcmpeqb %xmm1, %xmm0 # NUL in second chunk?
|
||||
pmovmskb %xmm0, %r9d
|
||||
shl $16, %r9d
|
||||
or %r9d, %r8d # merge found NUL bytes into NUL mask
|
||||
or %r9, %r8 # merge found NUL bytes into NUL mask
|
||||
|
||||
/* end of string after one buffer */
|
||||
0: tzcnt %r8d, %r8d # location of last char in string
|
||||
0: tzcnt %r8, %r8 # location of last char in string
|
||||
movdqu %xmm1, bounce(%rsp, %r8, 1) # clear bytes behind string
|
||||
lea bounce(%rsp, %rcx, 1), %rsi # start of string copy on stack
|
||||
lea (%rdi, %r8, 1), %rax # return pointer to NUL byte
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue