336 lines
8.9 KiB
C
336 lines
8.9 KiB
C
#ifndef _NSEEL_GLUE_ARM_H_
|
|
#define _NSEEL_GLUE_ARM_H_
|
|
|
|
// r0=return value, first parm, r1-r2 parms
|
|
// r3+ should be reserved
|
|
// blx addr
|
|
// stmfd sp!, {register list, lr}
|
|
// ldmfd sp!, {register list, pc}
|
|
|
|
// let's make r8 = worktable
|
|
// let's make r7 = ramtable
|
|
// r6 = consttab
|
|
// r5 = worktable ptr
|
|
|
|
// r0=p1
|
|
// r1=p2
|
|
// r2=p3
|
|
|
|
// d0 is return value?
|
|
|
|
|
|
#define GLUE_HAS_FPREG2 1
|
|
|
|
static const unsigned int GLUE_COPY_FPSTACK_TO_FPREG2[] = {
|
|
0xeeb01b40 // fcpyd d1, d0
|
|
};
|
|
|
|
static unsigned int GLUE_POP_STACK_TO_FPREG2[] = {
|
|
0xed9d1b00,// vldr d1, [sp]
|
|
0xe28dd008,// add sp, sp, #8
|
|
};
|
|
|
|
#define GLUE_MAX_SPILL_REGS 8
|
|
#define GLUE_SAVE_TO_SPILL_SIZE(x) (4)
|
|
#define GLUE_RESTORE_SPILL_TO_FPREG2_SIZE(x) (4)
|
|
|
|
static void GLUE_RESTORE_SPILL_TO_FPREG2(void *b, int ws)
|
|
{
|
|
*(unsigned int *)b = 0xeeb01b48 + ws; // fcpyd d1, d8+ws
|
|
}
|
|
static void GLUE_SAVE_TO_SPILL(void *b, int ws)
|
|
{
|
|
*(unsigned int *)b = 0xeeb08b40 + (ws<<12); // fcpyd d8+ws, d0
|
|
}
|
|
|
|
|
|
#define GLUE_MAX_FPSTACK_SIZE 0 // no stack support
|
|
#define GLUE_MAX_JMPSIZE ((1<<25) - 1024) // maximum relative jump size
|
|
|
|
// endOfInstruction is end of jump with relative offset, offset passed in is offset from end of dest instruction.
|
|
// TODO: verify, but offset probably from next instruction (PC is ahead)
|
|
#define GLUE_JMP_SET_OFFSET(endOfInstruction,offset) (((int *)(endOfInstruction))[-1] = (((int *)(endOfInstruction))[-1]&0xFF000000)|((((offset)>>2)-1)))
|
|
|
|
// /=conditional=always = 0xE
|
|
// |/= 101(L), so 8+2+0 = 10 = A
|
|
static const unsigned int GLUE_JMP_NC[] = { 0xEA000000 };
|
|
|
|
static const unsigned int GLUE_JMP_IF_P1_Z[]=
|
|
{
|
|
0xe1100000, // tst r0, r0
|
|
0x0A000000, // branch if Z set
|
|
};
|
|
static const unsigned int GLUE_JMP_IF_P1_NZ[]=
|
|
{
|
|
0xe1100000, // tst r0, r0
|
|
0x1A000000, // branch if Z clear
|
|
};
|
|
|
|
#define GLUE_MOV_PX_DIRECTVALUE_TOFPREG2_SIZE 12 // wr=-2, sets d1
|
|
#define GLUE_MOV_PX_DIRECTVALUE_SIZE 8
|
|
static void GLUE_MOV_PX_DIRECTVALUE_GEN(void *b, INT_PTR v, int wv)
|
|
{
|
|
// requires ARMv6thumb2 or later
|
|
const unsigned int reg_add = wdl_max(wv,0) << 12;
|
|
static const unsigned int tab[2] = {
|
|
0xe3000000, // movw r0, #0000
|
|
0xe3400000, // movt r0, #0000
|
|
};
|
|
// 0xABAAA, B is register, A are bits of word
|
|
unsigned int *p=(unsigned int *)b;
|
|
p[0] = tab[0] | reg_add | (v&0xfff) | ((v&0xf000)<<4);
|
|
p[1] = tab[1] | reg_add | ((v>>16)&0xfff) | ((v&0xf0000000)>>12);
|
|
if (wv == -2) p[2] = 0xed901b00; // fldd d1, [r0]
|
|
}
|
|
|
|
const static unsigned int GLUE_FUNC_ENTER[1] = { 0xe92d4010 }; // push {r4, lr}
|
|
#define GLUE_FUNC_ENTER_SIZE 4
|
|
const static unsigned int GLUE_FUNC_LEAVE[1] = { 0 }; // let GLUE_RET pop
|
|
#define GLUE_FUNC_LEAVE_SIZE 0
|
|
const static unsigned int GLUE_RET[]={ 0xe8bd8010 }; // pop {r4, pc}
|
|
|
|
static int GLUE_RESET_WTP(unsigned char *out, void *ptr)
|
|
{
|
|
const static unsigned int GLUE_SET_WTP_FROM_R8 = 0xe1a05008; // mov r5, r8
|
|
if (out) memcpy(out,&GLUE_SET_WTP_FROM_R8,sizeof(GLUE_SET_WTP_FROM_R8));
|
|
return sizeof(GLUE_SET_WTP_FROM_R8);
|
|
}
|
|
|
|
|
|
const static unsigned int GLUE_PUSH_P1[1]={ 0xe52d0008 }; // push {r0}, aligned to 8
|
|
|
|
|
|
static int arm_encode_constforalu(int amt)
|
|
{
|
|
int nrot = 16;
|
|
while (amt >= 0x100 && nrot > 1)
|
|
{
|
|
// ARM encodes integers for ALU operations as rotated right by third nibble*2
|
|
amt = (amt + 3)>>2;
|
|
nrot--;
|
|
}
|
|
return ((nrot&15) << 8) | amt;
|
|
}
|
|
|
|
|
|
#define GLUE_STORE_P1_TO_STACK_AT_OFFS_SIZE(x) ((x)>=4096 ? 8 : 4)
|
|
static void GLUE_STORE_P1_TO_STACK_AT_OFFS(void *b, int offs)
|
|
{
|
|
if (offs >= 4096)
|
|
{
|
|
// add r2, sp, (offs&~4095)
|
|
*(unsigned int *)b = 0xe28d2000 | arm_encode_constforalu(offs&~4095);
|
|
// str r0, [r2, offs&4095]
|
|
((unsigned int *)b)[1] = 0xe5820000 + (offs&4095);
|
|
}
|
|
else
|
|
{
|
|
// str r0, [sp, #offs]
|
|
*(unsigned int *)b = 0xe58d0000 + offs;
|
|
}
|
|
}
|
|
|
|
#define GLUE_MOVE_PX_STACKPTR_SIZE 4
|
|
static void GLUE_MOVE_PX_STACKPTR_GEN(void *b, int wv)
|
|
{
|
|
// mov rX, sp
|
|
*(unsigned int *)b = 0xe1a0000d + (wv<<12);
|
|
}
|
|
|
|
#define GLUE_MOVE_STACK_SIZE 4
|
|
static void GLUE_MOVE_STACK(void *b, int amt)
|
|
{
|
|
unsigned int instr = 0xe28dd000;
|
|
if (amt < 0)
|
|
{
|
|
instr = 0xe24dd000;
|
|
amt=-amt;
|
|
}
|
|
*(unsigned int*)b = instr | arm_encode_constforalu(amt);
|
|
}
|
|
|
|
#define GLUE_POP_PX_SIZE 4
|
|
static void GLUE_POP_PX(void *b, int wv)
|
|
{
|
|
((unsigned int *)b)[0] = 0xe49d0008 | (wv<<12); // pop {rX}, aligned to 8
|
|
}
|
|
|
|
#define GLUE_SET_PX_FROM_P1_SIZE 4
|
|
static void GLUE_SET_PX_FROM_P1(void *b, int wv)
|
|
{
|
|
*(unsigned int *)b = 0xe1a00000 | (wv<<12); // mov rX, r0
|
|
}
|
|
|
|
|
|
static const unsigned int GLUE_PUSH_P1PTR_AS_VALUE[] =
|
|
{
|
|
0xed907b00, // fldd d7, [r0]
|
|
0xe24dd008, // sub sp, sp, #8
|
|
0xed8d7b00, // fstd d7, [sp]
|
|
};
|
|
|
|
static int GLUE_POP_VALUE_TO_ADDR(unsigned char *buf, void *destptr)
|
|
{
|
|
if (buf)
|
|
{
|
|
unsigned int *bufptr = (unsigned int *)buf;
|
|
*bufptr++ = 0xed9d7b00; // fldd d7, [sp]
|
|
*bufptr++ = 0xe28dd008; // add sp, sp, #8
|
|
GLUE_MOV_PX_DIRECTVALUE_GEN(bufptr, (INT_PTR)destptr,0);
|
|
bufptr += GLUE_MOV_PX_DIRECTVALUE_SIZE/4;
|
|
*bufptr++ = 0xed807b00; // fstd d7, [r0]
|
|
}
|
|
return 3*4 + GLUE_MOV_PX_DIRECTVALUE_SIZE;
|
|
}
|
|
|
|
static int GLUE_COPY_VALUE_AT_P1_TO_PTR(unsigned char *buf, void *destptr)
|
|
{
|
|
if (buf)
|
|
{
|
|
unsigned int *bufptr = (unsigned int *)buf;
|
|
*bufptr++ = 0xed907b00; // fldd d7, [r0]
|
|
GLUE_MOV_PX_DIRECTVALUE_GEN(bufptr, (INT_PTR)destptr,0);
|
|
bufptr += GLUE_MOV_PX_DIRECTVALUE_SIZE/4;
|
|
*bufptr++ = 0xed807b00; // fstd d7, [r0]
|
|
}
|
|
return 2*4 + GLUE_MOV_PX_DIRECTVALUE_SIZE;
|
|
}
|
|
|
|
|
|
#ifndef _MSC_VER
|
|
#define GLUE_CALL_CODE(bp, cp, rt) do { \
|
|
unsigned int f; \
|
|
if (!(h->compile_flags&NSEEL_CODE_COMPILE_FLAG_NOFPSTATE) && \
|
|
!((f=glue_getscr())&(1<<24))) { \
|
|
glue_setscr(f|(1<<24)); \
|
|
eel_callcode32(bp, cp, rt); \
|
|
glue_setscr(f); \
|
|
} else eel_callcode32(bp, cp, rt);\
|
|
} while(0)
|
|
|
|
static const double __consttab[] = {
|
|
NSEEL_CLOSEFACTOR,
|
|
0.0,
|
|
1.0,
|
|
-1.0,
|
|
-0.5, // for invsqrt
|
|
1.5,
|
|
};
|
|
|
|
static void eel_callcode32(INT_PTR bp, INT_PTR cp, INT_PTR rt)
|
|
{
|
|
__asm__ volatile(
|
|
"mov r7, %2\n"
|
|
"mov r6, %3\n"
|
|
"mov r8, %1\n"
|
|
"mov r0, %0\n"
|
|
"mov r1, sp\n"
|
|
"bic sp, sp, #7\n"
|
|
"push {r1, lr}\n"
|
|
"blx r0\n"
|
|
"pop {r1, lr}\n"
|
|
"mov sp, r1\n"
|
|
::"r" (cp), "r" (bp), "r" (rt), "r" (__consttab) :
|
|
"r5", "r6", "r7", "r8", "r10",
|
|
"d8","d9","d10","d11","d12","d13","d14","d15");
|
|
};
|
|
#endif
|
|
|
|
static unsigned char *EEL_GLUE_set_immediate(void *_p, INT_PTR newv)
|
|
{
|
|
unsigned int *p=(unsigned int *)_p;
|
|
while ((p[0]&0x000F0FFF) != 0x000d0ead &&
|
|
(p[1]&0x000F0FFF) != 0x000b0eef) p++;
|
|
p[0] = (p[0]&0xFFF0F000) | (newv&0xFFF) | ((newv << 4) & 0xF0000);
|
|
p[1] = (p[1]&0xFFF0F000) | ((newv>>16)&0xFFF) | ((newv >> 12)&0xF0000);
|
|
|
|
return (unsigned char *)(p+1);
|
|
}
|
|
|
|
#define GLUE_SET_PX_FROM_WTP_SIZE sizeof(int)
|
|
static void GLUE_SET_PX_FROM_WTP(void *b, int wv)
|
|
{
|
|
*(unsigned int *)b = 0xe1a00005 + (wv<<12); // mov rX, r5
|
|
}
|
|
|
|
static int GLUE_POP_FPSTACK_TO_PTR(unsigned char *buf, void *destptr)
|
|
{
|
|
if (buf)
|
|
{
|
|
unsigned int *bufptr = (unsigned int *)buf;
|
|
GLUE_MOV_PX_DIRECTVALUE_GEN(bufptr, (INT_PTR)destptr,0);
|
|
bufptr += GLUE_MOV_PX_DIRECTVALUE_SIZE/4;
|
|
|
|
*bufptr++ = 0xed800b00; // fstd d0, [r0]
|
|
}
|
|
return GLUE_MOV_PX_DIRECTVALUE_SIZE + sizeof(int);
|
|
}
|
|
|
|
#define GLUE_POP_FPSTACK_SIZE 0
|
|
static const unsigned int GLUE_POP_FPSTACK[1] = { 0 }; // no need to pop, not a stack
|
|
|
|
static const unsigned int GLUE_POP_FPSTACK_TOSTACK[] = {
|
|
0xe24dd008, // sub sp, sp, #8
|
|
0xed8d0b00, // fstd d0, [sp]
|
|
};
|
|
|
|
static const unsigned int GLUE_POP_FPSTACK_TO_WTP[] = {
|
|
0xed850b00, // fstd d0, [r5]
|
|
0xe2855008, // add r5, r5, #8
|
|
};
|
|
|
|
#define GLUE_PUSH_VAL_AT_PX_TO_FPSTACK_SIZE 4
|
|
static void GLUE_PUSH_VAL_AT_PX_TO_FPSTACK(void *b, int wv)
|
|
{
|
|
*(unsigned int *)b = 0xed900b00 + (wv<<16); // fldd d0, [rX]
|
|
}
|
|
|
|
#define GLUE_POP_FPSTACK_TO_WTP_TO_PX_SIZE (sizeof(GLUE_POP_FPSTACK_TO_WTP) + GLUE_SET_PX_FROM_WTP_SIZE)
|
|
static void GLUE_POP_FPSTACK_TO_WTP_TO_PX(unsigned char *buf, int wv)
|
|
{
|
|
GLUE_SET_PX_FROM_WTP(buf,wv);
|
|
memcpy(buf + GLUE_SET_PX_FROM_WTP_SIZE,GLUE_POP_FPSTACK_TO_WTP,sizeof(GLUE_POP_FPSTACK_TO_WTP));
|
|
};
|
|
|
|
static const unsigned int GLUE_SET_P1_Z[] = { 0xe3a00000 }; // mov r0, #0
|
|
static const unsigned int GLUE_SET_P1_NZ[] = { 0xe3a00001 }; // mov r0, #1
|
|
|
|
|
|
static void *GLUE_realAddress(void *fn, int *size)
|
|
{
|
|
static const unsigned int sig[3] = { 0xe1a00000, 0xe1a01001, 0xe1a02002 };
|
|
unsigned char *p = (unsigned char *)fn;
|
|
|
|
while (memcmp(p,sig,sizeof(sig))) p+=4;
|
|
p+=sizeof(sig);
|
|
fn = p;
|
|
|
|
while (memcmp(p,sig,sizeof(sig))) p+=4;
|
|
*size = p - (unsigned char *)fn;
|
|
return fn;
|
|
}
|
|
|
|
static unsigned int __attribute__((unused)) glue_getscr()
|
|
{
|
|
unsigned int rv;
|
|
asm volatile ( "fmrx %0, fpscr" : "=r" (rv));
|
|
return rv;
|
|
}
|
|
static void __attribute__((unused)) glue_setscr(unsigned int v)
|
|
{
|
|
asm volatile ( "fmxr fpscr, %0" :: "r"(v));
|
|
}
|
|
|
|
void eel_enterfp(int s[2])
|
|
{
|
|
s[0] = glue_getscr();
|
|
glue_setscr(s[0] | (1<<24)); // could also do 3<<22 for RTZ
|
|
}
|
|
void eel_leavefp(int s[2])
|
|
{
|
|
glue_setscr(s[0]);
|
|
}
|
|
|
|
|
|
#endif
|