Files
tlib/oversampling/WDL/eel2/asm-nseel-x64-sse.asm
2024-05-24 13:28:31 +02:00

1358 lines
23 KiB
NASM

; these must be synced with any changes in ns-eel-int.h
%define NSEEL_RAM_BLOCKS_DEFAULTMAX 128
%define NSEEL_RAM_BLOCKS_LOG2 9
%define NSEEL_RAM_ITEMSPERBLOCK_LOG2 16
%define NSEEL_RAM_BLOCKS (1 << NSEEL_RAM_BLOCKS_LOG2)
%define NSEEL_RAM_ITEMSPERBLOCK (1<<NSEEL_RAM_ITEMSPERBLOCK_LOG2)
%define EEL_F_SIZE 8
; todo: also determine FTZ?
; also: do FP flags rounding mode affect SSE ops? other things? tbd
; %define EEL_X64_NO_CHANGE_FPFLAGS
SECTION .text
%ifdef AMD64ABI
; non-win64 needs to preserve xmm4-xmm7 when calling other functions
%macro pre_call 0
sub rsp, 32
movsd [rsp], xmm4
movsd [rsp+8], xmm5
movsd [rsp+16], xmm6
movsd [rsp+24], xmm7
%endmacro
%macro post_call 0
movsd xmm4, [rsp]
movsd xmm5, [rsp+8]
movsd xmm6, [rsp+16]
movsd xmm7, [rsp+24]
add rsp, 32
%endmacro
%else
; win64 doesn't need to preserve any spill registers when calling functions, but must when called
%macro pre_call 0
sub rsp, 32
%endmacro
%macro post_call 0
add rsp, 32
%endmacro
%macro save_spill_full 0
sub rsp, 64
movdqu [rsp], xmm6
movdqu [rsp+16], xmm7
movdqu [rsp+32], xmm8
movdqu [rsp+48], xmm9
%endmacro
%macro restore_spill_full 0
movdqu xmm6, [rsp]
movdqu xmm7, [rsp+16]
movdqu xmm8, [rsp+32]
movdqu xmm9, [rsp+48]
add rsp, 64
%endmacro
%endif
global nseel_asm_1pdd
nseel_asm_1pdd:
mov rdi, qword 0xFEFEFEFEFEFEFEFE
pre_call
%ifdef AMD64ABI
mov r15, rsi
call rdi
mov rsi, r15
%else
call rdi
%endif
post_call
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_2pdd
nseel_asm_2pdd:
movsd xmm2, xmm0
movsd xmm0, xmm1
movsd xmm1, xmm2
mov rdi, qword 0xFEFEFEFEFEFEFEFE
pre_call
%ifdef AMD64ABI
mov r15, rsi
call rdi
mov rsi, r15
%else
call rdi
%endif
post_call
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_2pdds
nseel_asm_2pdds:
mov rax, qword 0xFEFEFEFEFEFEFEFE
movsd xmm1, xmm0
movsd xmm0, [rdi]
pre_call
%ifdef AMD64ABI
mov r15, rsi
mov r14, rdi
call rax
mov rsi, r15
movq [r14], xmm0
mov rax, r14 ; set return value
%else
call rax
movq [rdi], xmm0
mov rax, rdi ; set return value
%endif
post_call
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_invsqrt
nseel_asm_invsqrt:
; optimize me
movsd [rsi], xmm0
fld qword [rsi]
mov rdx, 0x5f3759df
fst dword [rsi]
mov rax, qword 0xFEFEFEFEFEFEFEFE
fmul qword [rax]
movsx rcx, dword [rsi]
sar rcx, 1
sub rdx, rcx
mov dword [rsi], edx
fmul dword [rsi]
fmul dword [rsi]
mov rax, qword 0xFEFEFEFEFEFEFEFE
fadd qword [rax]
fmul dword [rsi]
; optimize me
fstp qword [rsi]
movsd xmm0, [rsi]
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_dbg_getstackptr
nseel_asm_dbg_getstackptr:
mov qword [rsi], rsp
cvtsi2sd xmm0, qword [rsi]
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_sqr
nseel_asm_sqr:
mulsd xmm0, xmm0
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_abs
nseel_asm_abs:
andps xmm0, [r12-32]
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_assign
nseel_asm_assign:
mov rdx, qword [rax]
sub eax, eax
mov rcx, rdx
shr rdx, 32
add edx, 0x00100000
and edx, 0x7ff00000
cmp edx, 0x00200000
cmovle rcx, rax
mov rax, rdi
mov qword [rdi], rcx
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_assign_fromfp
nseel_asm_assign_fromfp:
movq rdx, xmm0
sub eax, eax
mov rcx, rdx
shr rdx, 32
add edx, 0x00100000
and edx, 0x7ff00000
cmp edx, 0x00200000
cmovle rcx, rax
mov rax, rdi
mov qword [rdi], rcx
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_assign_fast_fromfp
nseel_asm_assign_fast_fromfp:
mov rax, rdi
movsd [rdi], xmm0
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_assign_fast
nseel_asm_assign_fast:
mov rdx, qword [rax]
mov qword [rdi], rdx
mov rax, rdi
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_add
nseel_asm_add:
addsd xmm0, xmm1
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_add_op
nseel_asm_add_op:
addsd xmm0, [rdi]
movsd [rdi], xmm0
mov rax, rdi
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_add_op_fast
nseel_asm_add_op_fast:
addsd xmm0, [rdi]
movsd [rdi], xmm0
mov rax, rdi
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_sub
nseel_asm_sub:
subsd xmm1, xmm0
movsd xmm0, xmm1
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_sub_op
nseel_asm_sub_op:
movsd xmm1, [rdi]
subsd xmm1, xmm0
movsd [rdi], xmm1
mov rax, rdi
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_sub_op_fast
nseel_asm_sub_op_fast:
movsd xmm1, [rdi]
subsd xmm1, xmm0
movsd [rdi], xmm1
mov rax, rdi
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_mul
nseel_asm_mul:
mulsd xmm0, xmm1
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_mul_op
nseel_asm_mul_op:
mulsd xmm0, [rdi]
movsd [rdi], xmm0
mov rax, rdi
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_mul_op_fast
nseel_asm_mul_op_fast:
mulsd xmm0, [rdi]
movsd [rdi], xmm0
mov rax, rdi
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_div
nseel_asm_div:
divsd xmm1, xmm0
movapd xmm0, xmm1
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_div_op
nseel_asm_div_op:
movsd xmm1, [rdi]
divsd xmm1, xmm0
movsd [rdi], xmm1
mov rax, rdi
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_div_op_fast
nseel_asm_div_op_fast:
movsd xmm1, [rdi]
divsd xmm1, xmm0
movsd [rdi], xmm1
mov rax, rdi
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_mod
nseel_asm_mod:
andps xmm0, [r12-32]
andps xmm1, [r12-32]
cvttsd2si rcx, xmm0
cvttsd2si rax, xmm1
xor rdx, rdx
cmp rcx, 0
je label_0
div rcx
label_0:
cvtsi2sd xmm0, rdx
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_shl
nseel_asm_shl:
cvttsd2si rcx, xmm0
cvttsd2si rax, xmm1
and eax, 0xffffffff ; match x87 impl
shl rax, cl
cvtsi2sd xmm0, eax
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_shr
nseel_asm_shr:
cvttsd2si rcx, xmm0
cvttsd2si rax, xmm1
and eax, 0xffffffff ; match x87 impl
sar rax, cl
cvtsi2sd xmm0, eax
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_mod_op
nseel_asm_mod_op:
movsd xmm1, [rdi]
andps xmm0, [r12-32]
andps xmm1, [r12-32]
cvttsd2si rcx, xmm0
cvttsd2si rax, xmm1
xor rdx, rdx
cmp rcx, 0
je label_1 ; skip devide, set return to 0
div rcx
label_1:
cvtsi2sd xmm0, rdx
movsd [rdi], xmm0
mov rax, rdi
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_or
nseel_asm_or:
cvttsd2si rcx, xmm0
cvttsd2si rax, xmm1
or rax, rcx
cvtsi2sd xmm0, rax
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_or0
nseel_asm_or0:
cvttsd2si rax, xmm0
cvtsi2sd xmm0, rax
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_or_op
nseel_asm_or_op:
movsd xmm1, [rdi]
cvttsd2si rcx, xmm0
cvttsd2si rax, xmm1
or rax, rcx
cvtsi2sd xmm0, rax
movsd [rdi], xmm0
mov rax, rdi
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_xor
nseel_asm_xor:
cvttsd2si rcx, xmm0
cvttsd2si rax, xmm1
xor rax, rcx
cvtsi2sd xmm0, rax
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_xor_op
nseel_asm_xor_op:
movsd xmm1, [rdi]
cvttsd2si rcx, xmm0
cvttsd2si rax, xmm1
xor rax, rcx
cvtsi2sd xmm0, rax
movsd [rdi], xmm0
mov rax, rdi
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_and
nseel_asm_and:
cvttsd2si rcx, xmm0
cvttsd2si rax, xmm1
and rax, rcx
cvtsi2sd xmm0, rax
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_and_op
nseel_asm_and_op:
movsd xmm1, [rdi]
cvttsd2si rcx, xmm0
cvttsd2si rax, xmm1
and rax, rcx
cvtsi2sd xmm0, rax
movsd [rdi], xmm0
mov rax, rdi
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_uminus
nseel_asm_uminus:
xorps xmm0, [r12-48]
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_sign
nseel_asm_sign:
movsd [rsi], xmm0
mov rdx, qword [rsi]
sub rcx, rcx
dec rcx
shr rcx, 1 ; 7ffff...
test rdx, rcx
jz label_2 ; zero zero, return the value passed directly
; calculate sign
inc rcx ; rcx becomes 0x80000...
sub rax, rax
test rdx, rcx
jnz label_3
inc rax
jmp label_4
label_3:
dec rax
label_4:
cvtsi2sd xmm0, rax
label_2:
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_bnot
nseel_asm_bnot:
test rax, rax
setz al
and eax, 0xff
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_fcall
nseel_asm_fcall:
mov rdx, qword 0xFEFEFEFEFEFEFEFE
sub rsp, 8
call rdx
add rsp, 8
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_band
nseel_asm_band:
test rax, rax
jz label_5
mov rcx, qword 0xFEFEFEFEFEFEFEFE
sub rsp, 8
call rcx
add rsp, 8
label_5:
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_bor
nseel_asm_bor:
test rax, rax
jnz label_6
mov rcx, qword 0xFEFEFEFEFEFEFEFE
sub rsp, 8
call rcx
add rsp, 8
label_6:
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_equal
nseel_asm_equal:
subsd xmm0, xmm1
andps xmm0, [r12-32]
xor eax,eax
ucomisd xmm0, [r12-8]
setb al
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_equal_exact
nseel_asm_equal_exact:
xor eax,eax
ucomisd xmm0,xmm1
sete al
label_7:
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_notequal_exact
nseel_asm_notequal_exact:
xor eax,eax
ucomisd xmm0,xmm1
setne al
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_notequal
nseel_asm_notequal:
subsd xmm0, xmm1
andps xmm0, [r12-32]
xor eax,eax
ucomisd xmm0, [r12-8]
setnb al
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_above
nseel_asm_above:
xor eax,eax
ucomisd xmm1,xmm0
seta al
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_beloweq
nseel_asm_beloweq:
xor eax,eax
ucomisd xmm1,xmm0
setbe al
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_aboveeq
nseel_asm_aboveeq:
xor eax,eax
ucomisd xmm1,xmm0
setae al
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_below
nseel_asm_below:
xor eax,eax
ucomisd xmm1,xmm0
setb al
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_booltofp
nseel_asm_booltofp:
mov rdi, rax
sub eax, eax
test rdi, rdi
setnz al
cvtsi2sd xmm0, rax
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_fptobool
nseel_asm_fptobool:
andps xmm0, [r12-32]
xor eax,eax
ucomisd xmm0, [r12-8]
setnb al
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_fptobool_rev
nseel_asm_fptobool_rev:
andps xmm0, [r12-32]
xor eax,eax
ucomisd xmm0, [r12-8]
setb al
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_min
nseel_asm_min:
movsd xmm1, [rdi]
ucomisd xmm1, [rax]
cmovna rax, rdi
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_max
nseel_asm_max:
movsd xmm0, [rax]
movsd xmm1, [rdi]
ucomisd xmm1, [rax]
cmova rax, rdi
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_min_fp
nseel_asm_min_fp:
minsd xmm0, xmm1
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_max_fp
nseel_asm_max_fp:
maxsd xmm0, xmm1
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global _asm_generic3parm
_asm_generic3parm:
pre_call
%ifdef AMD64ABI
mov r15, rsi
mov rdx, rdi ; third parameter = parm
mov rdi, qword 0xFEFEFEFEFEFEFEFE ; first parameter= context
mov rsi, rcx ; second parameter = parm
mov rcx, rax ; fourth parameter = parm
mov rax, qword 0xFEFEFEFEFEFEFEFE ; call function
call rax
mov rsi, r15
%else
mov rdx, rcx ; second parameter = parm
mov rcx, qword 0xFEFEFEFEFEFEFEFE ; first parameter= context
mov r8, rdi ; third parameter = parm
mov r9, rax ; fourth parameter = parm
mov rdi, qword 0xFEFEFEFEFEFEFEFE ; call function
call rdi
%endif
post_call
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global _asm_generic3parm_retd
_asm_generic3parm_retd:
pre_call
%ifdef AMD64ABI
mov r15, rsi
mov rdx, rdi ; third parameter = parm
mov rdi, qword 0xFEFEFEFEFEFEFEFE ; first parameter= context
mov rsi, rcx ; second parameter = parm
mov rcx, rax ; fourth parameter = parm
mov rax, qword 0xFEFEFEFEFEFEFEFE ; call function
call rax
mov rsi, r15
%else
mov rdx, rcx ; second parameter = parm
mov rcx, qword 0xFEFEFEFEFEFEFEFE ; first parameter= context
mov r8, rdi ; third parameter = parm
mov r9, rax ; fourth parameter = parm
mov rdi, qword 0xFEFEFEFEFEFEFEFE ; call function
call rdi
%endif
post_call
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global _asm_generic2parm
_asm_generic2parm:
pre_call
%ifdef AMD64ABI
mov r15, rsi
mov rsi, rdi ; second parameter = parm
mov rdi, qword 0xFEFEFEFEFEFEFEFE ; first parameter= context
mov rdx, rax ; third parameter = parm
mov rcx, qword 0xFEFEFEFEFEFEFEFE ; call function
call rcx
mov rsi, r15
%else
mov rcx, qword 0xFEFEFEFEFEFEFEFE ; first parameter= context
mov rdx, rdi ; second parameter = parm
mov r8, rax ; third parameter = parm
mov rdi, qword 0xFEFEFEFEFEFEFEFE ; call function
call rdi
%endif
post_call
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global _asm_generic2parm_retd
_asm_generic2parm_retd:
pre_call
%ifdef AMD64ABI
mov r15, rsi
mov rsi, rdi ; second parameter = parm
mov rdi, qword 0xFEFEFEFEFEFEFEFE ; first parameter= context
mov rcx, qword 0xFEFEFEFEFEFEFEFE ; call function
mov rdx, rax ; third parameter = parm
call rcx
mov rsi, r15
%else
mov rdx, rdi ; second parameter = parm
mov rcx, qword 0xFEFEFEFEFEFEFEFE ; first parameter= context
mov rdi, qword 0xFEFEFEFEFEFEFEFE ; call function
mov r8, rax ; third parameter = parm
call rdi
%endif
post_call
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global _asm_generic2xparm_retd
_asm_generic2xparm_retd:
pre_call
%ifdef AMD64ABI
mov r15, rsi
mov rdx, rdi ; third parameter = parm
mov rcx, rax ; fourth parameter = parm
mov rdi, qword 0xFEFEFEFEFEFEFEFE ; first parameter= context
mov rsi, qword 0xFEFEFEFEFEFEFEFE ; second parameter= context
mov rax, qword 0xFEFEFEFEFEFEFEFE ; call function
call rax
mov rsi, r15
%else
mov r8, rdi ; third parameter = parm
mov rcx, qword 0xFEFEFEFEFEFEFEFE ; first parameter= context
mov rdx, qword 0xFEFEFEFEFEFEFEFE ; second parameter= context
mov rdi, qword 0xFEFEFEFEFEFEFEFE ; call function
mov r9, rax ; fourth parameter = parm
call rdi
%endif
post_call
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global _asm_generic1parm
_asm_generic1parm:
pre_call
%ifdef AMD64ABI
mov rdi, qword 0xFEFEFEFEFEFEFEFE ; first parameter= context
mov r15, rsi
mov rsi, rax ; second parameter = parm
mov rcx, qword 0xFEFEFEFEFEFEFEFE ; call function
call rcx
mov rsi, r15
%else
mov rcx, qword 0xFEFEFEFEFEFEFEFE ; first parameter= context
mov rdx, rax ; second parameter = parm
mov rdi, qword 0xFEFEFEFEFEFEFEFE ; call function
call rdi
%endif
post_call
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global _asm_generic1parm_retd
_asm_generic1parm_retd:
pre_call
%ifdef AMD64ABI
mov rdi, qword 0xFEFEFEFEFEFEFEFE ; first parameter = context pointer
mov rcx, qword 0xFEFEFEFEFEFEFEFE ; function address
mov r15, rsi ; save rsi
mov rsi, rax ; second parameter = parameter
call rcx
mov rsi, r15
%else
mov rcx, qword 0xFEFEFEFEFEFEFEFE ; first parameter= context
mov rdi, qword 0xFEFEFEFEFEFEFEFE ; call function
mov rdx, rax ; second parameter = parm
call rdi
%endif
post_call
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global _asm_megabuf
_asm_megabuf:
addsd xmm0, qword [r12-8]
%ifdef AMD64ABI
cvttsd2si rdx, xmm0
; check if edx is in range, and buffer available, otherwise call function
cmp rdx, ((NSEEL_RAM_BLOCKS*NSEEL_RAM_ITEMSPERBLOCK)) ; REPLACE=((NSEEL_RAM_BLOCKS*NSEEL_RAM_ITEMSPERBLOCK))
jae label_15
mov rax, rdx
shr rax, (NSEEL_RAM_ITEMSPERBLOCK_LOG2 - 3 ) ; log2(sizeof(void *)) ; REPLACE=(NSEEL_RAM_ITEMSPERBLOCK_LOG2 - 3 ) ; log2(sizeof(void *))
and rax, ((NSEEL_RAM_BLOCKS-1)*8 ) ; sizeof(void*) ; REPLACE=((NSEEL_RAM_BLOCKS-1)*8 ) ; sizeof(void*)
mov rax, qword [r12+rax]
test rax, rax
jnz label_16
label_15:
mov rax, qword 0xFEFEFEFEFEFEFEFE
mov rdi, r12 ; set first parm to ctx
mov r15, rsi ; save rsi
mov rsi, rdx ; esi becomes second parameter (edi is first, context pointer)
pre_call
call rax
post_call
mov rsi, r15 ; restore rsi
jmp label_17
label_16:
and rdx, (NSEEL_RAM_ITEMSPERBLOCK-1) ; REPLACE=(NSEEL_RAM_ITEMSPERBLOCK-1)
shl rdx, 3 ; 3 is log2(sizeof(EEL_F))
add rax, rdx
label_17:
%else
; check if (%rsi) is in range...
cvttsd2si rdi, xmm0
cmp rdi, ((NSEEL_RAM_BLOCKS*NSEEL_RAM_ITEMSPERBLOCK)) ; REPLACE=((NSEEL_RAM_BLOCKS*NSEEL_RAM_ITEMSPERBLOCK))
jae label_18
mov rax, rdi
shr rax, (NSEEL_RAM_ITEMSPERBLOCK_LOG2 - 3 ) ; log2(sizeof(void *)) ; REPLACE=(NSEEL_RAM_ITEMSPERBLOCK_LOG2 - 3 ) ; log2(sizeof(void *))
and rax, ((NSEEL_RAM_BLOCKS-1)*8 ) ; sizeof(void*) ; REPLACE=((NSEEL_RAM_BLOCKS-1)*8 ) ; sizeof(void*)
mov rax, qword [r12+rax]
test rax, rax
jnz label_19
label_18:
mov rax, qword 0xFEFEFEFEFEFEFEFE ; function ptr
mov rcx, r12 ; set first parm to ctx
mov rdx, rdi ; rdx is second parameter (rcx is first)
pre_call
call rax
post_call
jmp label_20
label_19:
and rdi, (NSEEL_RAM_ITEMSPERBLOCK-1) ; REPLACE=(NSEEL_RAM_ITEMSPERBLOCK-1)
shl rdi, 3 ; 3 is log2(sizeof(EEL_F))
add rax, rdi
label_20:
%endif
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global _asm_gmegabuf
_asm_gmegabuf:
addsd xmm0, qword [r12-8]
pre_call
%ifdef AMD64ABI
mov r15, rsi
mov rdi, qword 0xFEFEFEFEFEFEFEFE ; first parameter = context pointer
mov rdx, qword 0xFEFEFEFEFEFEFEFE
cvttsd2si rsi, xmm0
call rdx
mov rsi, r15
%else
mov rcx, qword 0xFEFEFEFEFEFEFEFE ; first parameter = context pointer
mov rdi, qword 0xFEFEFEFEFEFEFEFE
cvttsd2si rdx, xmm0
call rdi
%endif
post_call
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_stack_push
nseel_asm_stack_push:
mov rdi, qword 0xFEFEFEFEFEFEFEFE
mov rcx, qword [rax]
mov rax, qword [rdi]
add rax, 8
mov rdx, qword 0xFEFEFEFEFEFEFEFE
and rax, rdx
mov rdx, qword 0xFEFEFEFEFEFEFEFE
or rax, rdx
mov qword [rax], rcx
mov qword [rdi], rax
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_stack_pop
nseel_asm_stack_pop:
mov rdi, qword 0xFEFEFEFEFEFEFEFE
mov rcx, qword [rdi]
movq xmm0, [rcx]
sub rcx, 8
mov rdx, qword 0xFEFEFEFEFEFEFEFE
and rcx, rdx
mov rdx, qword 0xFEFEFEFEFEFEFEFE
or rcx, rdx
mov qword [rdi], rcx
movq [rax], xmm0
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_stack_pop_fast
nseel_asm_stack_pop_fast:
mov rdi, qword 0xFEFEFEFEFEFEFEFE
mov rcx, qword [rdi]
mov rax, rcx
sub rcx, 8
mov rdx, qword 0xFEFEFEFEFEFEFEFE
and rcx, rdx
mov rdx, qword 0xFEFEFEFEFEFEFEFE
or rcx, rdx
mov qword [rdi], rcx
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_stack_peek_int
nseel_asm_stack_peek_int:
mov rdi, qword 0xFEFEFEFEFEFEFEFE
mov rax, qword [rdi]
mov rdx, qword 0xFEFEFEFEFEFEFEFE
sub rax, rdx
mov rdx, qword 0xFEFEFEFEFEFEFEFE
and rax, rdx
mov rdx, qword 0xFEFEFEFEFEFEFEFE
or rax, rdx
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_stack_peek
nseel_asm_stack_peek:
mov rdi, qword 0xFEFEFEFEFEFEFEFE
cvttsd2si rdx, xmm0
mov rax, qword [rdi]
shl rdx, 3 ; log2(sizeof(EEL_F))
sub rax, rdx
mov rdx, qword 0xFEFEFEFEFEFEFEFE
and rax, rdx
mov rdx, qword 0xFEFEFEFEFEFEFEFE
or rax, rdx
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_stack_peek_top
nseel_asm_stack_peek_top:
mov rdi, qword 0xFEFEFEFEFEFEFEFE
mov rax, qword [rdi]
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global nseel_asm_stack_exch
nseel_asm_stack_exch:
mov rdi, qword 0xFEFEFEFEFEFEFEFE
mov rcx, qword [rdi]
movq xmm0, [rcx]
movq xmm1, [rax]
movq [rax], xmm0
movq [rcx], xmm1
db 0x89,0x90,0x90,0x90,0x90,0x90,0x90,0x00
global eel_callcode64
eel_callcode64:
sub rsp, 16
%ifndef EEL_X64_NO_CHANGE_FPFLAGS
fnstcw [rsp]
mov ax, [rsp]
or ax, 0x23F ; 53 or 64 bit precision, masking all exceptions
mov [rsp+4], ax
fldcw [rsp+4]
%endif
stmxcsr [rsp+8]
mov eax, [rsp+8]
or ah, 136 ; 128|8, bits 15 and 11
mov [rsp+12], eax
ldmxcsr [rsp+12]
push rbx
push rbp
push r12
push r13
push r14
push r15
%ifdef AMD64ABI
mov r12, rsi ; second parameter is ram-blocks pointer
call rdi
%else
push rdi
push rsi
mov r12, rdx ; second parameter is ram-blocks pointer
save_spill_full
call rcx
restore_spill_full
pop rsi
pop rdi
%endif
fclex
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
ldmxcsr [rsp+8]
%ifndef EEL_X64_NO_CHANGE_FPFLAGS
fldcw [rsp]
%endif
add rsp, 16
ret
global eel_callcode64_fast
eel_callcode64_fast:
push rbx
push rbp
push r12
push r13
push r14
push r15
%ifdef AMD64ABI
mov r12, rsi ; second parameter is ram-blocks pointer
call rdi
%else
push rdi
push rsi
mov r12, rdx ; second parameter is ram-blocks pointer
save_spill_full
call rcx
restore_spill_full
pop rsi
pop rdi
%endif
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
ret
global eel_enterfp
eel_enterfp:
%ifdef AMD64ABI
fnstcw [rdi]
mov ax, [rdi]
or ax, 0x23F ; 53 or 64 bit precision, masking all exceptions
mov [rdi+4], ax
fldcw [rdi+4]
sub rsp, 16
stmxcsr [rdi+4]
mov eax, [rdi+4]
or ah, 136 ; 128|8, bits 15 and 11
mov [rsp], eax
ldmxcsr [rsp]
add rsp, 16
%else
fnstcw [rcx]
mov ax, [rcx]
or ax, 0x23F ; 53 or 64 bit precision, masking all exceptions
mov [rcx+4], ax
fldcw [rcx+4]
sub rsp, 16
stmxcsr [rcx+4]
mov eax, [rcx+4]
or ah, 136 ; 128|8, bits 15 and 11
mov [rsp], eax
ldmxcsr [rsp]
add rsp, 16
%endif
ret
global eel_leavefp
eel_leavefp:
%ifdef AMD64ABI
fldcw [rdi]
ldmxcsr [rdi+4]
%else
fldcw [rcx]
ldmxcsr [rcx+4]
%endif
ret;