amd64 csu: microoptimize startup

Reorder operations to reduce moving the values among registers.
Use the LEA instruction to combine arithmetic with the moves.

Reviewed by:	markj
Sponsored by:	The FreeBSD Foundation
MFC after:	1 week
Differential revision:	https://reviews.freebsd.org/D49206
This commit is contained in:
Konstantin Belousov 2025-03-03 01:54:24 +02:00
parent e7c0cb7208
commit cb991a2281

View file

@ -49,15 +49,12 @@ _start:
#ifdef GCRT
subq $16, %rsp
#endif
movq %rsi, %rcx
movq %rdi, %rsi /* argv = ap */
addq $8, %rsi /* argv += 1 */
movq %rdi, %rdx /* env = ap */
addq $16, %rdx /* env += 2 */
movslq (%rdi), %rax
movl %eax, %edi /* argc = *(long *)(void *)ap */
shlq $3, %rax
addq %rax, %rdx /* env += argc */
movq %rsi, %rcx /* cleanup */
movslq (%rdi), %rax /* long *ap; tmpargc = *ap */
leaq 0x8(%rdi), %rsi /* argv = ap + 1 */
leaq 0x10(%rdi, %rax, 8), %rdx /* env = ap + 2 + tmpargc */
movl %eax, %edi /* argc = tmpargc */
#ifdef PIC
/*
* XXX. %rip relative addressing is not intended for use in the