add oversampler

This commit is contained in:
2024-05-24 13:28:31 +02:00
parent e4a4a661a0
commit 989dba5a6b
484 changed files with 313937 additions and 0 deletions

View File

@@ -0,0 +1,448 @@
#ifndef _NSEEL_GLUE_AARCH64_H_
#define _NSEEL_GLUE_AARCH64_H_
#define GLUE_MOD_IS_64
// x0=return value, first parm, x1-x2 parms (x3-x7 more params)
// x8 return struct?
// x9-x15 temporary
// x16-x17 = PLT, linker
// x18 reserved (TLS)
// x19-x28 callee-saved
// x19 = worktable
// x20 = ramtable
// x21 = consttab
// x22 = worktable ptr
// x23-x28 spare
// x29 frame pointer
// x30 link register
// x31 SP/zero
// x0=p1
// x1=p2
// x2=p3
// d0 is return value for fp?
// d/v/f0-7 = arguments/results
// 8-15 callee saved
// 16-31 temporary
// v8-v15 spill registers
#define GLUE_MAX_SPILL_REGS 8
#define GLUE_SAVE_TO_SPILL_SIZE(x) (4)
#define GLUE_RESTORE_SPILL_TO_FPREG2_SIZE(x) (4)
static void GLUE_RESTORE_SPILL_TO_FPREG2(void *b, int ws)
{
*(unsigned int *)b = 0x1e604101 + (ws<<5); // fmov d1, d8+ws
}
static void GLUE_SAVE_TO_SPILL(void *b, int ws)
{
*(unsigned int *)b = 0x1e604008 + ws; // fmov d8+ws, d0
}
#define GLUE_HAS_FPREG2 1
static const unsigned int GLUE_COPY_FPSTACK_TO_FPREG2[] = { 0x1e604001 }; // fmov d1, d0
static unsigned int GLUE_POP_STACK_TO_FPREG2[] = {
0xfc4107e1 // ldr d1, [sp], #16
};
#define GLUE_MAX_FPSTACK_SIZE 0 // no stack support
#define GLUE_MAX_JMPSIZE ((1<<20) - 1024) // maximum relative jump size
// endOfInstruction is end of jump with relative offset, offset passed in is offset from end of dest instruction.
// 0 = current instruction
static void GLUE_JMP_SET_OFFSET(void *endOfInstruction, int offset)
{
unsigned int *a = (unsigned int*) endOfInstruction - 1;
offset += 4;
offset >>= 2; // as dwords
if ((a[0] & 0xFC000000) == 0x14000000)
{
// NC b = 0x14 + 26 bit offset
a[0] = 0x14000000 | (offset & 0x3FFFFFF);
}
else if ((a[0] & 0xFF000000) == 0x54000000)
{
// condb = 0x54 + 20 bit offset + 5 bit condition: 0=eq, 1=ne, b=lt, c=gt, d=le, a=ge
a[0] = 0x54000000 | (a[0] & 0xF) | ((offset & 0x7FFFF) << 5);
}
}
static const unsigned int GLUE_JMP_NC[] = { 0x14000000 };
static const unsigned int GLUE_JMP_IF_P1_Z[]=
{
0x7100001f, // cmp w0, #0
0x54000000, // b.eq
};
static const unsigned int GLUE_JMP_IF_P1_NZ[]=
{
0x7100001f, // cmp w0, #0
0x54000001, // b.ne
};
#define GLUE_MOV_PX_DIRECTVALUE_TOFPREG2_SIZE 16 // wr=-2, sets d1
#define GLUE_MOV_PX_DIRECTVALUE_SIZE 12
static void GLUE_MOV_PX_DIRECTVALUE_GEN(void *b, INT_PTR v, int wv)
{
static const unsigned int tab[3] = {
0xd2800000, // mov x0, #0000 (val<<5) | reg
0xf2a00000, // movk x0, #0000, lsl 16 (val<<5) | reg
0xf2c00000, // movk x0, #0000, lsl 32 (val<<5) | reg
};
// 0xABAAA, B is register, A are bits of word
unsigned int *p=(unsigned int *)b;
int wvo = wv;
if (wv<0) wv=0;
p[0] = tab[0] | wv | ((v&0xFFFF)<<5);
p[1] = tab[1] | wv | (((v>>16)&0xFFFF)<<5);
p[2] = tab[2] | wv | (((v>>32)&0xFFFF)<<5);
if (wvo == -2) p[3] = 0xfd400001; // ldr d1, [x0]
}
const static unsigned int GLUE_FUNC_ENTER[2] = { 0xa9bf7bfd, 0x910003fd }; // stp x29, x30, [sp, #-16]! ; mov x29, sp
#define GLUE_FUNC_ENTER_SIZE 4
const static unsigned int GLUE_FUNC_LEAVE[1] = { 0 }; // let GLUE_RET pop
#define GLUE_FUNC_LEAVE_SIZE 0
const static unsigned int GLUE_RET[]={ 0xa8c17bfd, 0xd65f03c0 }; // ldp x29,x30, [sp], #16 ; ret
static int GLUE_RESET_WTP(unsigned char *out, void *ptr)
{
const static unsigned int GLUE_SET_WTP_FROM_R19 = 0xaa1303f6; // mov r22, r19
if (out) memcpy(out,&GLUE_SET_WTP_FROM_R19,sizeof(GLUE_SET_WTP_FROM_R19));
return 4;
}
const static unsigned int GLUE_PUSH_P1[1]={ 0xf81f0fe0 }; // str x0, [sp, #-16]!
#define GLUE_STORE_P1_TO_STACK_AT_OFFS_SIZE(offs) ((offs)>=32768 ? 8 : 4)
static void GLUE_STORE_P1_TO_STACK_AT_OFFS(void *b, int offs)
{
if (offs >= 32768)
{
// add x1, sp, (offs/4096) lsl 12
*(unsigned int *)b = 0x914003e1 + ((offs>>12)<<10);
// str x0, [x1, #offs & 4095]
offs &= 4095;
offs <<= 10-3;
offs &= 0x7FFC00;
((unsigned int *)b)[1] = 0xf9000020 + offs;
}
else
{
// str x0, [sp, #offs]
offs <<= 10-3;
offs &= 0x7FFC00;
*(unsigned int *)b = 0xf90003e0 + offs;
}
}
#define GLUE_MOVE_PX_STACKPTR_SIZE 4
static void GLUE_MOVE_PX_STACKPTR_GEN(void *b, int wv)
{
// mov xX, sp
*(unsigned int *)b = 0x910003e0 + wv;
}
#define GLUE_MOVE_STACK_SIZE 4
static void GLUE_MOVE_STACK(void *b, int amt)
{
if (amt>=0)
{
if (amt >= 4096)
*(unsigned int*)b = 0x914003ff | (((amt+4095)>>12)<<10);
else
*(unsigned int*)b = 0x910003ff | (amt << 10);
}
else
{
amt = -amt;
if (amt >= 4096)
*(unsigned int*)b = 0xd14003ff | (((amt+4095)>>12)<<10);
else
*(unsigned int*)b = 0xd10003ff | (amt << 10);
}
}
#define GLUE_POP_PX_SIZE 4
static void GLUE_POP_PX(void *b, int wv)
{
((unsigned int *)b)[0] = 0xf84107e0 | wv; // ldr x, [sp], 16
}
#define GLUE_SET_PX_FROM_P1_SIZE 4
static void GLUE_SET_PX_FROM_P1(void *b, int wv)
{
*(unsigned int *)b = 0xaa0003e0 | wv;
}
static const unsigned int GLUE_PUSH_P1PTR_AS_VALUE[] =
{
0xfd400007, // ldr d7, [x0]
0xfc1f0fe7, // str d7, [sp, #-16]!
};
static int GLUE_POP_VALUE_TO_ADDR(unsigned char *buf, void *destptr)
{
if (buf)
{
unsigned int *bufptr = (unsigned int *)buf;
*bufptr++ = 0xfc4107e7; // ldr d7, [sp], #16
GLUE_MOV_PX_DIRECTVALUE_GEN(bufptr, (INT_PTR)destptr,0);
bufptr += GLUE_MOV_PX_DIRECTVALUE_SIZE/4;
*bufptr++ = 0xfd000007; // str d7, [x0]
}
return 2*4 + GLUE_MOV_PX_DIRECTVALUE_SIZE;
}
static int GLUE_COPY_VALUE_AT_P1_TO_PTR(unsigned char *buf, void *destptr)
{
if (buf)
{
unsigned int *bufptr = (unsigned int *)buf;
*bufptr++ = 0xfd400007; // ldr d7, [x0]
GLUE_MOV_PX_DIRECTVALUE_GEN(bufptr, (INT_PTR)destptr,0);
bufptr += GLUE_MOV_PX_DIRECTVALUE_SIZE/4;
*bufptr++ = 0xfd000007; // str d7, [x0]
}
return 2*4 + GLUE_MOV_PX_DIRECTVALUE_SIZE;
}
#define GLUE_CALL_CODE(bp, cp, rt) do { \
GLUE_SCR_TYPE f; \
static const double consttab[] = { \
NSEEL_CLOSEFACTOR, \
0.0, \
1.0, \
-1.0, \
-0.5, /* for invsqrt */ \
1.5, \
}; \
if (!(h->compile_flags&NSEEL_CODE_COMPILE_FLAG_NOFPSTATE) && \
!((f=glue_getscr())&(1<<24))) { \
glue_setscr(f|(1<<24)); \
eel_callcode64(bp, cp, rt, (void *)consttab); \
glue_setscr(f); \
} else eel_callcode64(bp, cp, rt, (void *)consttab);\
} while(0)
#ifndef _MSC_VER
static void eel_callcode64(INT_PTR bp, INT_PTR cp, INT_PTR rt, void *consttab)
{
__asm__(
"mov x1, %2\n"
"mov x2, %3\n"
"mov x3, %1\n"
"mov x0, %0\n"
"stp x29, x30, [sp, #-64]!\n"
"stp x18, x20, [sp, 16]\n"
"stp x21, x19, [sp, 32]\n"
"stp x22, x23, [sp, 48]\n"
"mov x29, sp\n"
"mov x19, x3\n"
"mov x20, x1\n"
"mov x21, x2\n"
"blr x0\n"
"ldp x29, x30, [sp], 16\n"
"ldp x18, x20, [sp], 16\n"
"ldp x21, x19, [sp], 16\n"
"ldp x22, x23, [sp], 16\n"
::"r" (cp), "r" (bp), "r" (rt), "r" (consttab) :"x0","x1","x2","x3","x4","x5","x6","x7",
"x8","x9","x10","x11","x12","x13","x14","x15",
"v8","v9","v10","v11","v12","v13","v14","v15");
};
#else
void eel_callcode64(INT_PTR bp, INT_PTR cp, INT_PTR rt, void *consttab);
#endif
static unsigned char *EEL_GLUE_set_immediate(void *_p, INT_PTR newv)
{
unsigned int *p=(unsigned int *)_p;
WDL_ASSERT(!(newv>>48));
// 0xd2800000, // mov x0, #0000 (val<<5) | reg
// 0xf2a00000, // movk x0, #0000, lsl 16 (val<<5) | reg
// 0xf2c00000, // movk x0, #0000, lsl 32 (val<<5) | reg
while (((p[0]>>5)&0xffff)!=0xdead ||
((p[1]>>5)&0xffff)!=0xbeef ||
((p[2]>>5)&0xffff)!=0xbeef) p++;
p[0] = (p[0] & 0xFFE0001F) | ((newv&0xffff)<<5);
p[1] = (p[1] & 0xFFE0001F) | (((newv>>16)&0xffff)<<5);
p[2] = (p[2] & 0xFFE0001F) | (((newv>>32)&0xffff)<<5);
return (unsigned char *)(p+2);
}
#define GLUE_SET_PX_FROM_WTP_SIZE sizeof(int)
static void GLUE_SET_PX_FROM_WTP(void *b, int wv)
{
*(unsigned int *)b = 0xaa1603e0 + wv; // mov x, x22
}
static int GLUE_POP_FPSTACK_TO_PTR(unsigned char *buf, void *destptr)
{
if (buf)
{
unsigned int *bufptr = (unsigned int *)buf;
GLUE_MOV_PX_DIRECTVALUE_GEN(bufptr, (INT_PTR)destptr,0);
bufptr += GLUE_MOV_PX_DIRECTVALUE_SIZE/4;
*bufptr++ = 0xfd000000; // str d0, [x0]
}
return GLUE_MOV_PX_DIRECTVALUE_SIZE + sizeof(int);
}
#define GLUE_POP_FPSTACK_SIZE 0
static const unsigned int GLUE_POP_FPSTACK[1] = { 0 }; // no need to pop, not a stack
static const unsigned int GLUE_POP_FPSTACK_TOSTACK[] = {
0xfc1f0fe0, // str d0, [sp, #-16]!
};
static const unsigned int GLUE_POP_FPSTACK_TO_WTP[] = {
0xfc0086c0, // str d0, [x22], #8
};
#define GLUE_PUSH_VAL_AT_PX_TO_FPSTACK_SIZE 4
static void GLUE_PUSH_VAL_AT_PX_TO_FPSTACK(void *b, int wv)
{
*(unsigned int *)b = 0xfd400000 + (wv<<5); // ldr d0, [xX]
}
#define GLUE_POP_FPSTACK_TO_WTP_TO_PX_SIZE (sizeof(GLUE_POP_FPSTACK_TO_WTP) + GLUE_SET_PX_FROM_WTP_SIZE)
static void GLUE_POP_FPSTACK_TO_WTP_TO_PX(unsigned char *buf, int wv)
{
GLUE_SET_PX_FROM_WTP(buf,wv);
memcpy(buf + GLUE_SET_PX_FROM_WTP_SIZE,GLUE_POP_FPSTACK_TO_WTP,sizeof(GLUE_POP_FPSTACK_TO_WTP));
};
static const unsigned int GLUE_SET_P1_Z[] = { 0x52800000 }; // mov w0, #0
static const unsigned int GLUE_SET_P1_NZ[] = { 0x52800020 }; // mov w0, #1
static void *GLUE_realAddress(void *fn, int *size)
{
while ((*(int*)fn & 0xFC000000) == 0x14000000)
{
int offset = (*(int*)fn & 0x3FFFFFF);
if (offset & 0x2000000)
offset |= 0xFC000000;
fn = (int*)fn + offset;
}
static const unsigned int sig[] = {
#ifndef _MSC_VER
0xaa0003e0,
#endif
0xaa0103e1,
#ifndef _MSC_VER
0xaa0203e2
#endif
};
unsigned char *p = (unsigned char *)fn;
while (memcmp(p,sig,sizeof(sig))) p+=4;
p+=sizeof(sig);
fn = p;
while (memcmp(p,sig,sizeof(sig))) p+=4;
*size = p - (unsigned char *)fn;
return fn;
}
#ifndef _MSC_VER
#define GLUE_SCR_TYPE unsigned long
static unsigned long __attribute__((unused)) glue_getscr()
{
unsigned long rv;
asm volatile ( "mrs %0, fpcr" : "=r" (rv));
return rv;
}
static void __attribute__((unused)) glue_setscr(unsigned long v)
{
asm volatile ( "msr fpcr, %0" :: "r"(v));
}
#else
#define GLUE_SCR_TYPE unsigned long long
GLUE_SCR_TYPE glue_getscr();
void glue_setscr(unsigned long long);
#endif
void eel_enterfp(int _s[2])
{
GLUE_SCR_TYPE *s = (GLUE_SCR_TYPE*)_s;
s[0] = glue_getscr();
glue_setscr(s[0] | (1<<24));
}
void eel_leavefp(int _s[2])
{
const GLUE_SCR_TYPE *s = (GLUE_SCR_TYPE*)_s;
glue_setscr(s[0]);
}
#define GLUE_HAS_FUSE 1
static int GLUE_FUSE(compileContext *ctx, unsigned char *code, int left_size, int right_size, int fuse_flags, int spill_reg)
{
if (left_size>=4 && right_size == 4)
{
unsigned int instr = ((unsigned int *)code)[-1];
if (spill_reg >= 0 && (instr & 0xfffffc1f) == 0x1e604001) // fmov d1, dX
{
const int src_reg = (instr>>5)&0x1f;
if (src_reg == spill_reg + 8)
{
instr = ((unsigned int *)code)[0];
if ((instr & 0xffffcfff) == 0x1e600820)
{
((unsigned int *)code)[-1] = instr + ((src_reg-1) << 5);
return -4;
}
}
}
}
return 0;
}
#ifdef _M_ARM64EC
#define DEF_F1(n) static double eel_##n(double a) { return n(a); }
#define DEF_F2(n) static double eel_##n(double a, double b) { return n(a,b); }
DEF_F1(cos)
#define cos eel_cos
DEF_F1(sin)
#define sin eel_sin
DEF_F1(tan)
#define tan eel_tan
DEF_F1(log)
#define log eel_log
DEF_F1(log10)
#define log10 eel_log10
DEF_F1(acos)
#define acos eel_acos
DEF_F1(asin)
#define asin eel_asin
DEF_F1(atan)
#define atan eel_atan
DEF_F1(exp)
#define exp eel_exp
DEF_F2(pow)
#define pow eel_pow
DEF_F2(atan2)
#define atan2 eel_atan2
// ceil and floor will be wrapped by defs in nseel-compiler.c
#pragma comment(lib,"onecore.lib")
#endif
#endif