add oversampler
This commit is contained in:
335
oversampling/WDL/eel2/glue_arm.h
Normal file
335
oversampling/WDL/eel2/glue_arm.h
Normal file
@@ -0,0 +1,335 @@
|
||||
#ifndef _NSEEL_GLUE_ARM_H_
|
||||
#define _NSEEL_GLUE_ARM_H_
|
||||
|
||||
// r0=return value, first parm, r1-r2 parms
|
||||
// r3+ should be reserved
|
||||
// blx addr
|
||||
// stmfd sp!, {register list, lr}
|
||||
// ldmfd sp!, {register list, pc}
|
||||
|
||||
// let's make r8 = worktable
|
||||
// let's make r7 = ramtable
|
||||
// r6 = consttab
|
||||
// r5 = worktable ptr
|
||||
|
||||
// r0=p1
|
||||
// r1=p2
|
||||
// r2=p3
|
||||
|
||||
// d0 is return value?
|
||||
|
||||
|
||||
#define GLUE_HAS_FPREG2 1
|
||||
|
||||
static const unsigned int GLUE_COPY_FPSTACK_TO_FPREG2[] = {
|
||||
0xeeb01b40 // fcpyd d1, d0
|
||||
};
|
||||
|
||||
static unsigned int GLUE_POP_STACK_TO_FPREG2[] = {
|
||||
0xed9d1b00,// vldr d1, [sp]
|
||||
0xe28dd008,// add sp, sp, #8
|
||||
};
|
||||
|
||||
#define GLUE_MAX_SPILL_REGS 8
|
||||
#define GLUE_SAVE_TO_SPILL_SIZE(x) (4)
|
||||
#define GLUE_RESTORE_SPILL_TO_FPREG2_SIZE(x) (4)
|
||||
|
||||
static void GLUE_RESTORE_SPILL_TO_FPREG2(void *b, int ws)
|
||||
{
|
||||
*(unsigned int *)b = 0xeeb01b48 + ws; // fcpyd d1, d8+ws
|
||||
}
|
||||
static void GLUE_SAVE_TO_SPILL(void *b, int ws)
|
||||
{
|
||||
*(unsigned int *)b = 0xeeb08b40 + (ws<<12); // fcpyd d8+ws, d0
|
||||
}
|
||||
|
||||
|
||||
#define GLUE_MAX_FPSTACK_SIZE 0 // no stack support
|
||||
#define GLUE_MAX_JMPSIZE ((1<<25) - 1024) // maximum relative jump size
|
||||
|
||||
// endOfInstruction is end of jump with relative offset, offset passed in is offset from end of dest instruction.
|
||||
// TODO: verify, but offset probably from next instruction (PC is ahead)
|
||||
#define GLUE_JMP_SET_OFFSET(endOfInstruction,offset) (((int *)(endOfInstruction))[-1] = (((int *)(endOfInstruction))[-1]&0xFF000000)|((((offset)>>2)-1)))
|
||||
|
||||
// /=conditional=always = 0xE
|
||||
// |/= 101(L), so 8+2+0 = 10 = A
|
||||
static const unsigned int GLUE_JMP_NC[] = { 0xEA000000 };
|
||||
|
||||
static const unsigned int GLUE_JMP_IF_P1_Z[]=
|
||||
{
|
||||
0xe1100000, // tst r0, r0
|
||||
0x0A000000, // branch if Z set
|
||||
};
|
||||
static const unsigned int GLUE_JMP_IF_P1_NZ[]=
|
||||
{
|
||||
0xe1100000, // tst r0, r0
|
||||
0x1A000000, // branch if Z clear
|
||||
};
|
||||
|
||||
#define GLUE_MOV_PX_DIRECTVALUE_TOFPREG2_SIZE 12 // wr=-2, sets d1
|
||||
#define GLUE_MOV_PX_DIRECTVALUE_SIZE 8
|
||||
static void GLUE_MOV_PX_DIRECTVALUE_GEN(void *b, INT_PTR v, int wv)
|
||||
{
|
||||
// requires ARMv6thumb2 or later
|
||||
const unsigned int reg_add = wdl_max(wv,0) << 12;
|
||||
static const unsigned int tab[2] = {
|
||||
0xe3000000, // movw r0, #0000
|
||||
0xe3400000, // movt r0, #0000
|
||||
};
|
||||
// 0xABAAA, B is register, A are bits of word
|
||||
unsigned int *p=(unsigned int *)b;
|
||||
p[0] = tab[0] | reg_add | (v&0xfff) | ((v&0xf000)<<4);
|
||||
p[1] = tab[1] | reg_add | ((v>>16)&0xfff) | ((v&0xf0000000)>>12);
|
||||
if (wv == -2) p[2] = 0xed901b00; // fldd d1, [r0]
|
||||
}
|
||||
|
||||
const static unsigned int GLUE_FUNC_ENTER[1] = { 0xe92d4010 }; // push {r4, lr}
|
||||
#define GLUE_FUNC_ENTER_SIZE 4
|
||||
const static unsigned int GLUE_FUNC_LEAVE[1] = { 0 }; // let GLUE_RET pop
|
||||
#define GLUE_FUNC_LEAVE_SIZE 0
|
||||
const static unsigned int GLUE_RET[]={ 0xe8bd8010 }; // pop {r4, pc}
|
||||
|
||||
static int GLUE_RESET_WTP(unsigned char *out, void *ptr)
|
||||
{
|
||||
const static unsigned int GLUE_SET_WTP_FROM_R8 = 0xe1a05008; // mov r5, r8
|
||||
if (out) memcpy(out,&GLUE_SET_WTP_FROM_R8,sizeof(GLUE_SET_WTP_FROM_R8));
|
||||
return sizeof(GLUE_SET_WTP_FROM_R8);
|
||||
}
|
||||
|
||||
|
||||
const static unsigned int GLUE_PUSH_P1[1]={ 0xe52d0008 }; // push {r0}, aligned to 8
|
||||
|
||||
|
||||
static int arm_encode_constforalu(int amt)
|
||||
{
|
||||
int nrot = 16;
|
||||
while (amt >= 0x100 && nrot > 1)
|
||||
{
|
||||
// ARM encodes integers for ALU operations as rotated right by third nibble*2
|
||||
amt = (amt + 3)>>2;
|
||||
nrot--;
|
||||
}
|
||||
return ((nrot&15) << 8) | amt;
|
||||
}
|
||||
|
||||
|
||||
#define GLUE_STORE_P1_TO_STACK_AT_OFFS_SIZE(x) ((x)>=4096 ? 8 : 4)
|
||||
static void GLUE_STORE_P1_TO_STACK_AT_OFFS(void *b, int offs)
|
||||
{
|
||||
if (offs >= 4096)
|
||||
{
|
||||
// add r2, sp, (offs&~4095)
|
||||
*(unsigned int *)b = 0xe28d2000 | arm_encode_constforalu(offs&~4095);
|
||||
// str r0, [r2, offs&4095]
|
||||
((unsigned int *)b)[1] = 0xe5820000 + (offs&4095);
|
||||
}
|
||||
else
|
||||
{
|
||||
// str r0, [sp, #offs]
|
||||
*(unsigned int *)b = 0xe58d0000 + offs;
|
||||
}
|
||||
}
|
||||
|
||||
#define GLUE_MOVE_PX_STACKPTR_SIZE 4
|
||||
static void GLUE_MOVE_PX_STACKPTR_GEN(void *b, int wv)
|
||||
{
|
||||
// mov rX, sp
|
||||
*(unsigned int *)b = 0xe1a0000d + (wv<<12);
|
||||
}
|
||||
|
||||
#define GLUE_MOVE_STACK_SIZE 4
|
||||
static void GLUE_MOVE_STACK(void *b, int amt)
|
||||
{
|
||||
unsigned int instr = 0xe28dd000;
|
||||
if (amt < 0)
|
||||
{
|
||||
instr = 0xe24dd000;
|
||||
amt=-amt;
|
||||
}
|
||||
*(unsigned int*)b = instr | arm_encode_constforalu(amt);
|
||||
}
|
||||
|
||||
#define GLUE_POP_PX_SIZE 4
|
||||
static void GLUE_POP_PX(void *b, int wv)
|
||||
{
|
||||
((unsigned int *)b)[0] = 0xe49d0008 | (wv<<12); // pop {rX}, aligned to 8
|
||||
}
|
||||
|
||||
#define GLUE_SET_PX_FROM_P1_SIZE 4
|
||||
static void GLUE_SET_PX_FROM_P1(void *b, int wv)
|
||||
{
|
||||
*(unsigned int *)b = 0xe1a00000 | (wv<<12); // mov rX, r0
|
||||
}
|
||||
|
||||
|
||||
static const unsigned int GLUE_PUSH_P1PTR_AS_VALUE[] =
|
||||
{
|
||||
0xed907b00, // fldd d7, [r0]
|
||||
0xe24dd008, // sub sp, sp, #8
|
||||
0xed8d7b00, // fstd d7, [sp]
|
||||
};
|
||||
|
||||
static int GLUE_POP_VALUE_TO_ADDR(unsigned char *buf, void *destptr)
|
||||
{
|
||||
if (buf)
|
||||
{
|
||||
unsigned int *bufptr = (unsigned int *)buf;
|
||||
*bufptr++ = 0xed9d7b00; // fldd d7, [sp]
|
||||
*bufptr++ = 0xe28dd008; // add sp, sp, #8
|
||||
GLUE_MOV_PX_DIRECTVALUE_GEN(bufptr, (INT_PTR)destptr,0);
|
||||
bufptr += GLUE_MOV_PX_DIRECTVALUE_SIZE/4;
|
||||
*bufptr++ = 0xed807b00; // fstd d7, [r0]
|
||||
}
|
||||
return 3*4 + GLUE_MOV_PX_DIRECTVALUE_SIZE;
|
||||
}
|
||||
|
||||
static int GLUE_COPY_VALUE_AT_P1_TO_PTR(unsigned char *buf, void *destptr)
|
||||
{
|
||||
if (buf)
|
||||
{
|
||||
unsigned int *bufptr = (unsigned int *)buf;
|
||||
*bufptr++ = 0xed907b00; // fldd d7, [r0]
|
||||
GLUE_MOV_PX_DIRECTVALUE_GEN(bufptr, (INT_PTR)destptr,0);
|
||||
bufptr += GLUE_MOV_PX_DIRECTVALUE_SIZE/4;
|
||||
*bufptr++ = 0xed807b00; // fstd d7, [r0]
|
||||
}
|
||||
return 2*4 + GLUE_MOV_PX_DIRECTVALUE_SIZE;
|
||||
}
|
||||
|
||||
|
||||
#ifndef _MSC_VER
|
||||
#define GLUE_CALL_CODE(bp, cp, rt) do { \
|
||||
unsigned int f; \
|
||||
if (!(h->compile_flags&NSEEL_CODE_COMPILE_FLAG_NOFPSTATE) && \
|
||||
!((f=glue_getscr())&(1<<24))) { \
|
||||
glue_setscr(f|(1<<24)); \
|
||||
eel_callcode32(bp, cp, rt); \
|
||||
glue_setscr(f); \
|
||||
} else eel_callcode32(bp, cp, rt);\
|
||||
} while(0)
|
||||
|
||||
static const double __consttab[] = {
|
||||
NSEEL_CLOSEFACTOR,
|
||||
0.0,
|
||||
1.0,
|
||||
-1.0,
|
||||
-0.5, // for invsqrt
|
||||
1.5,
|
||||
};
|
||||
|
||||
static void eel_callcode32(INT_PTR bp, INT_PTR cp, INT_PTR rt)
|
||||
{
|
||||
__asm__ volatile(
|
||||
"mov r7, %2\n"
|
||||
"mov r6, %3\n"
|
||||
"mov r8, %1\n"
|
||||
"mov r0, %0\n"
|
||||
"mov r1, sp\n"
|
||||
"bic sp, sp, #7\n"
|
||||
"push {r1, lr}\n"
|
||||
"blx r0\n"
|
||||
"pop {r1, lr}\n"
|
||||
"mov sp, r1\n"
|
||||
::"r" (cp), "r" (bp), "r" (rt), "r" (__consttab) :
|
||||
"r5", "r6", "r7", "r8", "r10",
|
||||
"d8","d9","d10","d11","d12","d13","d14","d15");
|
||||
};
|
||||
#endif
|
||||
|
||||
static unsigned char *EEL_GLUE_set_immediate(void *_p, INT_PTR newv)
|
||||
{
|
||||
unsigned int *p=(unsigned int *)_p;
|
||||
while ((p[0]&0x000F0FFF) != 0x000d0ead &&
|
||||
(p[1]&0x000F0FFF) != 0x000b0eef) p++;
|
||||
p[0] = (p[0]&0xFFF0F000) | (newv&0xFFF) | ((newv << 4) & 0xF0000);
|
||||
p[1] = (p[1]&0xFFF0F000) | ((newv>>16)&0xFFF) | ((newv >> 12)&0xF0000);
|
||||
|
||||
return (unsigned char *)(p+1);
|
||||
}
|
||||
|
||||
#define GLUE_SET_PX_FROM_WTP_SIZE sizeof(int)
|
||||
static void GLUE_SET_PX_FROM_WTP(void *b, int wv)
|
||||
{
|
||||
*(unsigned int *)b = 0xe1a00005 + (wv<<12); // mov rX, r5
|
||||
}
|
||||
|
||||
static int GLUE_POP_FPSTACK_TO_PTR(unsigned char *buf, void *destptr)
|
||||
{
|
||||
if (buf)
|
||||
{
|
||||
unsigned int *bufptr = (unsigned int *)buf;
|
||||
GLUE_MOV_PX_DIRECTVALUE_GEN(bufptr, (INT_PTR)destptr,0);
|
||||
bufptr += GLUE_MOV_PX_DIRECTVALUE_SIZE/4;
|
||||
|
||||
*bufptr++ = 0xed800b00; // fstd d0, [r0]
|
||||
}
|
||||
return GLUE_MOV_PX_DIRECTVALUE_SIZE + sizeof(int);
|
||||
}
|
||||
|
||||
#define GLUE_POP_FPSTACK_SIZE 0
|
||||
static const unsigned int GLUE_POP_FPSTACK[1] = { 0 }; // no need to pop, not a stack
|
||||
|
||||
static const unsigned int GLUE_POP_FPSTACK_TOSTACK[] = {
|
||||
0xe24dd008, // sub sp, sp, #8
|
||||
0xed8d0b00, // fstd d0, [sp]
|
||||
};
|
||||
|
||||
static const unsigned int GLUE_POP_FPSTACK_TO_WTP[] = {
|
||||
0xed850b00, // fstd d0, [r5]
|
||||
0xe2855008, // add r5, r5, #8
|
||||
};
|
||||
|
||||
#define GLUE_PUSH_VAL_AT_PX_TO_FPSTACK_SIZE 4
|
||||
static void GLUE_PUSH_VAL_AT_PX_TO_FPSTACK(void *b, int wv)
|
||||
{
|
||||
*(unsigned int *)b = 0xed900b00 + (wv<<16); // fldd d0, [rX]
|
||||
}
|
||||
|
||||
#define GLUE_POP_FPSTACK_TO_WTP_TO_PX_SIZE (sizeof(GLUE_POP_FPSTACK_TO_WTP) + GLUE_SET_PX_FROM_WTP_SIZE)
|
||||
static void GLUE_POP_FPSTACK_TO_WTP_TO_PX(unsigned char *buf, int wv)
|
||||
{
|
||||
GLUE_SET_PX_FROM_WTP(buf,wv);
|
||||
memcpy(buf + GLUE_SET_PX_FROM_WTP_SIZE,GLUE_POP_FPSTACK_TO_WTP,sizeof(GLUE_POP_FPSTACK_TO_WTP));
|
||||
};
|
||||
|
||||
static const unsigned int GLUE_SET_P1_Z[] = { 0xe3a00000 }; // mov r0, #0
|
||||
static const unsigned int GLUE_SET_P1_NZ[] = { 0xe3a00001 }; // mov r0, #1
|
||||
|
||||
|
||||
static void *GLUE_realAddress(void *fn, int *size)
|
||||
{
|
||||
static const unsigned int sig[3] = { 0xe1a00000, 0xe1a01001, 0xe1a02002 };
|
||||
unsigned char *p = (unsigned char *)fn;
|
||||
|
||||
while (memcmp(p,sig,sizeof(sig))) p+=4;
|
||||
p+=sizeof(sig);
|
||||
fn = p;
|
||||
|
||||
while (memcmp(p,sig,sizeof(sig))) p+=4;
|
||||
*size = p - (unsigned char *)fn;
|
||||
return fn;
|
||||
}
|
||||
|
||||
static unsigned int __attribute__((unused)) glue_getscr()
|
||||
{
|
||||
unsigned int rv;
|
||||
asm volatile ( "fmrx %0, fpscr" : "=r" (rv));
|
||||
return rv;
|
||||
}
|
||||
static void __attribute__((unused)) glue_setscr(unsigned int v)
|
||||
{
|
||||
asm volatile ( "fmxr fpscr, %0" :: "r"(v));
|
||||
}
|
||||
|
||||
void eel_enterfp(int s[2])
|
||||
{
|
||||
s[0] = glue_getscr();
|
||||
glue_setscr(s[0] | (1<<24)); // could also do 3<<22 for RTZ
|
||||
}
|
||||
void eel_leavefp(int s[2])
|
||||
{
|
||||
glue_setscr(s[0]);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user