#ifndef _NSEEL_GLUE_ARM_H_ #define _NSEEL_GLUE_ARM_H_ // r0=return value, first parm, r1-r2 parms // r3+ should be reserved // blx addr // stmfd sp!, {register list, lr} // ldmfd sp!, {register list, pc} // let's make r8 = worktable // let's make r7 = ramtable // r6 = consttab // r5 = worktable ptr // r0=p1 // r1=p2 // r2=p3 // d0 is return value? #define GLUE_HAS_FPREG2 1 static const unsigned int GLUE_COPY_FPSTACK_TO_FPREG2[] = { 0xeeb01b40 // fcpyd d1, d0 }; static unsigned int GLUE_POP_STACK_TO_FPREG2[] = { 0xed9d1b00,// vldr d1, [sp] 0xe28dd008,// add sp, sp, #8 }; #define GLUE_MAX_SPILL_REGS 8 #define GLUE_SAVE_TO_SPILL_SIZE(x) (4) #define GLUE_RESTORE_SPILL_TO_FPREG2_SIZE(x) (4) static void GLUE_RESTORE_SPILL_TO_FPREG2(void *b, int ws) { *(unsigned int *)b = 0xeeb01b48 + ws; // fcpyd d1, d8+ws } static void GLUE_SAVE_TO_SPILL(void *b, int ws) { *(unsigned int *)b = 0xeeb08b40 + (ws<<12); // fcpyd d8+ws, d0 } #define GLUE_MAX_FPSTACK_SIZE 0 // no stack support #define GLUE_MAX_JMPSIZE ((1<<25) - 1024) // maximum relative jump size // endOfInstruction is end of jump with relative offset, offset passed in is offset from end of dest instruction. // TODO: verify, but offset probably from next instruction (PC is ahead) #define GLUE_JMP_SET_OFFSET(endOfInstruction,offset) (((int *)(endOfInstruction))[-1] = (((int *)(endOfInstruction))[-1]&0xFF000000)|((((offset)>>2)-1))) // /=conditional=always = 0xE // |/= 101(L), so 8+2+0 = 10 = A static const unsigned int GLUE_JMP_NC[] = { 0xEA000000 }; static const unsigned int GLUE_JMP_IF_P1_Z[]= { 0xe1100000, // tst r0, r0 0x0A000000, // branch if Z set }; static const unsigned int GLUE_JMP_IF_P1_NZ[]= { 0xe1100000, // tst r0, r0 0x1A000000, // branch if Z clear }; #define GLUE_MOV_PX_DIRECTVALUE_TOFPREG2_SIZE 12 // wr=-2, sets d1 #define GLUE_MOV_PX_DIRECTVALUE_SIZE 8 static void GLUE_MOV_PX_DIRECTVALUE_GEN(void *b, INT_PTR v, int wv) { // requires ARMv6thumb2 or later const unsigned int reg_add = wdl_max(wv,0) << 12; static const unsigned int tab[2] = { 0xe3000000, // movw r0, #0000 0xe3400000, // movt r0, #0000 }; // 0xABAAA, B is register, A are bits of word unsigned int *p=(unsigned int *)b; p[0] = tab[0] | reg_add | (v&0xfff) | ((v&0xf000)<<4); p[1] = tab[1] | reg_add | ((v>>16)&0xfff) | ((v&0xf0000000)>>12); if (wv == -2) p[2] = 0xed901b00; // fldd d1, [r0] } const static unsigned int GLUE_FUNC_ENTER[1] = { 0xe92d4010 }; // push {r4, lr} #define GLUE_FUNC_ENTER_SIZE 4 const static unsigned int GLUE_FUNC_LEAVE[1] = { 0 }; // let GLUE_RET pop #define GLUE_FUNC_LEAVE_SIZE 0 const static unsigned int GLUE_RET[]={ 0xe8bd8010 }; // pop {r4, pc} static int GLUE_RESET_WTP(unsigned char *out, void *ptr) { const static unsigned int GLUE_SET_WTP_FROM_R8 = 0xe1a05008; // mov r5, r8 if (out) memcpy(out,&GLUE_SET_WTP_FROM_R8,sizeof(GLUE_SET_WTP_FROM_R8)); return sizeof(GLUE_SET_WTP_FROM_R8); } const static unsigned int GLUE_PUSH_P1[1]={ 0xe52d0008 }; // push {r0}, aligned to 8 static int arm_encode_constforalu(int amt) { int nrot = 16; while (amt >= 0x100 && nrot > 1) { // ARM encodes integers for ALU operations as rotated right by third nibble*2 amt = (amt + 3)>>2; nrot--; } return ((nrot&15) << 8) | amt; } #define GLUE_STORE_P1_TO_STACK_AT_OFFS_SIZE(x) ((x)>=4096 ? 8 : 4) static void GLUE_STORE_P1_TO_STACK_AT_OFFS(void *b, int offs) { if (offs >= 4096) { // add r2, sp, (offs&~4095) *(unsigned int *)b = 0xe28d2000 | arm_encode_constforalu(offs&~4095); // str r0, [r2, offs&4095] ((unsigned int *)b)[1] = 0xe5820000 + (offs&4095); } else { // str r0, [sp, #offs] *(unsigned int *)b = 0xe58d0000 + offs; } } #define GLUE_MOVE_PX_STACKPTR_SIZE 4 static void GLUE_MOVE_PX_STACKPTR_GEN(void *b, int wv) { // mov rX, sp *(unsigned int *)b = 0xe1a0000d + (wv<<12); } #define GLUE_MOVE_STACK_SIZE 4 static void GLUE_MOVE_STACK(void *b, int amt) { unsigned int instr = 0xe28dd000; if (amt < 0) { instr = 0xe24dd000; amt=-amt; } *(unsigned int*)b = instr | arm_encode_constforalu(amt); } #define GLUE_POP_PX_SIZE 4 static void GLUE_POP_PX(void *b, int wv) { ((unsigned int *)b)[0] = 0xe49d0008 | (wv<<12); // pop {rX}, aligned to 8 } #define GLUE_SET_PX_FROM_P1_SIZE 4 static void GLUE_SET_PX_FROM_P1(void *b, int wv) { *(unsigned int *)b = 0xe1a00000 | (wv<<12); // mov rX, r0 } static const unsigned int GLUE_PUSH_P1PTR_AS_VALUE[] = { 0xed907b00, // fldd d7, [r0] 0xe24dd008, // sub sp, sp, #8 0xed8d7b00, // fstd d7, [sp] }; static int GLUE_POP_VALUE_TO_ADDR(unsigned char *buf, void *destptr) { if (buf) { unsigned int *bufptr = (unsigned int *)buf; *bufptr++ = 0xed9d7b00; // fldd d7, [sp] *bufptr++ = 0xe28dd008; // add sp, sp, #8 GLUE_MOV_PX_DIRECTVALUE_GEN(bufptr, (INT_PTR)destptr,0); bufptr += GLUE_MOV_PX_DIRECTVALUE_SIZE/4; *bufptr++ = 0xed807b00; // fstd d7, [r0] } return 3*4 + GLUE_MOV_PX_DIRECTVALUE_SIZE; } static int GLUE_COPY_VALUE_AT_P1_TO_PTR(unsigned char *buf, void *destptr) { if (buf) { unsigned int *bufptr = (unsigned int *)buf; *bufptr++ = 0xed907b00; // fldd d7, [r0] GLUE_MOV_PX_DIRECTVALUE_GEN(bufptr, (INT_PTR)destptr,0); bufptr += GLUE_MOV_PX_DIRECTVALUE_SIZE/4; *bufptr++ = 0xed807b00; // fstd d7, [r0] } return 2*4 + GLUE_MOV_PX_DIRECTVALUE_SIZE; } #ifndef _MSC_VER #define GLUE_CALL_CODE(bp, cp, rt) do { \ unsigned int f; \ if (!(h->compile_flags&NSEEL_CODE_COMPILE_FLAG_NOFPSTATE) && \ !((f=glue_getscr())&(1<<24))) { \ glue_setscr(f|(1<<24)); \ eel_callcode32(bp, cp, rt); \ glue_setscr(f); \ } else eel_callcode32(bp, cp, rt);\ } while(0) static const double __consttab[] = { NSEEL_CLOSEFACTOR, 0.0, 1.0, -1.0, -0.5, // for invsqrt 1.5, }; static void eel_callcode32(INT_PTR bp, INT_PTR cp, INT_PTR rt) { __asm__ volatile( "mov r7, %2\n" "mov r6, %3\n" "mov r8, %1\n" "mov r0, %0\n" "mov r1, sp\n" "bic sp, sp, #7\n" "push {r1, lr}\n" "blx r0\n" "pop {r1, lr}\n" "mov sp, r1\n" ::"r" (cp), "r" (bp), "r" (rt), "r" (__consttab) : "r5", "r6", "r7", "r8", "r10", "d8","d9","d10","d11","d12","d13","d14","d15"); }; #endif static unsigned char *EEL_GLUE_set_immediate(void *_p, INT_PTR newv) { unsigned int *p=(unsigned int *)_p; while ((p[0]&0x000F0FFF) != 0x000d0ead && (p[1]&0x000F0FFF) != 0x000b0eef) p++; p[0] = (p[0]&0xFFF0F000) | (newv&0xFFF) | ((newv << 4) & 0xF0000); p[1] = (p[1]&0xFFF0F000) | ((newv>>16)&0xFFF) | ((newv >> 12)&0xF0000); return (unsigned char *)(p+1); } #define GLUE_SET_PX_FROM_WTP_SIZE sizeof(int) static void GLUE_SET_PX_FROM_WTP(void *b, int wv) { *(unsigned int *)b = 0xe1a00005 + (wv<<12); // mov rX, r5 } static int GLUE_POP_FPSTACK_TO_PTR(unsigned char *buf, void *destptr) { if (buf) { unsigned int *bufptr = (unsigned int *)buf; GLUE_MOV_PX_DIRECTVALUE_GEN(bufptr, (INT_PTR)destptr,0); bufptr += GLUE_MOV_PX_DIRECTVALUE_SIZE/4; *bufptr++ = 0xed800b00; // fstd d0, [r0] } return GLUE_MOV_PX_DIRECTVALUE_SIZE + sizeof(int); } #define GLUE_POP_FPSTACK_SIZE 0 static const unsigned int GLUE_POP_FPSTACK[1] = { 0 }; // no need to pop, not a stack static const unsigned int GLUE_POP_FPSTACK_TOSTACK[] = { 0xe24dd008, // sub sp, sp, #8 0xed8d0b00, // fstd d0, [sp] }; static const unsigned int GLUE_POP_FPSTACK_TO_WTP[] = { 0xed850b00, // fstd d0, [r5] 0xe2855008, // add r5, r5, #8 }; #define GLUE_PUSH_VAL_AT_PX_TO_FPSTACK_SIZE 4 static void GLUE_PUSH_VAL_AT_PX_TO_FPSTACK(void *b, int wv) { *(unsigned int *)b = 0xed900b00 + (wv<<16); // fldd d0, [rX] } #define GLUE_POP_FPSTACK_TO_WTP_TO_PX_SIZE (sizeof(GLUE_POP_FPSTACK_TO_WTP) + GLUE_SET_PX_FROM_WTP_SIZE) static void GLUE_POP_FPSTACK_TO_WTP_TO_PX(unsigned char *buf, int wv) { GLUE_SET_PX_FROM_WTP(buf,wv); memcpy(buf + GLUE_SET_PX_FROM_WTP_SIZE,GLUE_POP_FPSTACK_TO_WTP,sizeof(GLUE_POP_FPSTACK_TO_WTP)); }; static const unsigned int GLUE_SET_P1_Z[] = { 0xe3a00000 }; // mov r0, #0 static const unsigned int GLUE_SET_P1_NZ[] = { 0xe3a00001 }; // mov r0, #1 static void *GLUE_realAddress(void *fn, int *size) { static const unsigned int sig[3] = { 0xe1a00000, 0xe1a01001, 0xe1a02002 }; unsigned char *p = (unsigned char *)fn; while (memcmp(p,sig,sizeof(sig))) p+=4; p+=sizeof(sig); fn = p; while (memcmp(p,sig,sizeof(sig))) p+=4; *size = p - (unsigned char *)fn; return fn; } static unsigned int __attribute__((unused)) glue_getscr() { unsigned int rv; asm volatile ( "fmrx %0, fpscr" : "=r" (rv)); return rv; } static void __attribute__((unused)) glue_setscr(unsigned int v) { asm volatile ( "fmxr fpscr, %0" :: "r"(v)); } void eel_enterfp(int s[2]) { s[0] = glue_getscr(); glue_setscr(s[0] | (1<<24)); // could also do 3<<22 for RTZ } void eel_leavefp(int s[2]) { glue_setscr(s[0]); } #endif