	@ 32 bit ARM context switch
	@ This function assumes that r9 has no special meaning on the platform it's
	@ being built on.
	@ If r9 is special, uncomment the following line and it will be left alone

	@ #define R9_SPECIAL

	#define PTR_BYTE        4
	#define SP_OFFSET       ( 0 * PTR_BYTE )
	#define FP_OFFSET       ( 1 * PTR_BYTE )
	#define PC_OFFSET       ( 2 * PTR_BYTE )

	.text
	.align  2
	.global __cfactx_switch
	.type   __cfactx_switch, %function

__cfactx_switch:
	@ save callee-saved registers: r4-r8, r10, r11, r13(sp) (plus r9 depending on platform specification)
	@ I've seen reference to 31 registers on 64-bit, if this is the case, more need to be saved
	@ save thread state registers: r14(lr)
	@ r12(ip) is intra-procedure-call scratch register, does not need saving between function calls

	#ifdef R9_SPECIAL
	stmfd r13!, {r4-r8,r10,r11,r14}
	#else
	stmfd r13!, {r4-r11,r14}
	#endif // R9_SPECIAL

	@ save floating point registers: s16-s31
	vstmdb r13!, {s16-s31}

	@ save frame pointer and stack pointer to outgoing datastructure
	str sp, [r0, #SP_OFFSET]
	str fp, [r0, #FP_OFFSET]

	@ restore frame pointer and stack pointer from incoming datastructure
	ldr fp, [r1, #FP_OFFSET]
	ldr sp, [r1, #SP_OFFSET]

	@ restore floating point registers: s16-s31
	vldm r13!, {s16-s31}
	@ restore r14(lr)
	@ restore 64-bit extra registers?
	@ restore callee-saved registers: r4-r8, r10, r11, r13

	#ifdef R9_SPECIAL
	ldmfd r13!, {r4-r8,r10,r11,r15}
	#else
	ldmfd r13!, {r4-r11,r14}    @ loading r14 back into r15 returns

	mov r15, r14
	#endif // R9_SPECIAL

	.text
	.align  2
	.global __cfactx_invoke_stub
	.type   __cfactx_invoke_stub, %function

__cfactx_invoke_stub:
        ldmfd r13!, {r0-r1}
	mov r15, r1
