powerpc: Optimize AltiVec context switch

Use r8 instead of r5 to slightly optimize _CPU_Context_switch(). It is not a big deal, however, we already assume r12 is used by _CPU_Context_switch(). Treat r5 the in same way.
2017-03-07 07:58:11 +01:00
parent c6f7639250
commit a11e1ff576
2 changed files with 30 additions and 31 deletions
--- a/c/src/lib/libcpu/powerpc/mpc6xx/altivec/vec_sup_asm.S
+++ b/c/src/lib/libcpu/powerpc/mpc6xx/altivec/vec_sup_asm.S
@@ -73,9 +73,10 @@
 	.set   r0,   0
 	.set   r3,   3
 	.set   r4,   4
-	.set   r5,   5
+	/* Do not use r5, since this is used by _CPU_Context_switch() */
 	.set   r6,   6
 	.set   r7,   7
+	.set   r8,   8
 	.set   r9,   9
 	.set   r10, 10
 	.set   r11, 11
@@ -578,12 +579,12 @@ _CPU_save_altivec_volatile:
 	mfcr      r9
 #endif

-	PREP_FOR_SAVE r0, r3, r4, r5, r6, r10
+	PREP_FOR_SAVE r0, r3, r4, r8, r6, r10
 	/* r0 now contains VRSAVE, r3 still the aligned memory area
-	 * and r4, r5, r6 are offset by 16, 32, and 48 bytes from r3,
+	 * and r4, r8, r6 are offset by 16, 32, and 48 bytes from r3,
 	 * respectively. r10 holds zero
 	 */
-	S_V0TOV19     _B0=r3, _B1=r4, _B2=r5, _B3=r6, _O1=r10, _O2=r11
+	S_V0TOV19     _B0=r3, _B1=r4, _B2=r8, _B3=r6, _O1=r10, _O2=r11
 	mfvscr        v0
 	/* Store vrsave (still in r0) and vscr (in v0) to memory area */
 	S_VSCR_VRSAVE r0, v0, r3, r11
@@ -613,10 +614,10 @@ _CPU_load_altivec_volatile:
 	/* Start preloading 2nd line (where first two vectors are)    */
 	dcbt      0, r3
 	L_VSCR_VRSAVE r3, r0, v0
-	CMP_BASES     r3, r4, r5, r6, r10
+	CMP_BASES     r3, r4, r8, r6, r10
 	/* Start preloading 3rd line (where vectors 3 and 4 are)      */
-	dcbt      0, r5
-	L_V0TOV19 r3, r4, r5, r6, r10, r11
+	dcbt      0, r8
+	L_V0TOV19 r3, r4, r8, r6, r10, r11

 #ifndef IGNORE_VRSAVE
 	mtcr      r9
@@ -627,9 +628,9 @@ _CPU_load_altivec_volatile:
 _CPU_Context_switch_altivec:

 	/* fetch offset of altivec area in context                   */
-	CMPOFF    r5
+	CMPOFF    r8
 	/* down-align 'to' area to cache-line boundary               */
-	add       r4, r4, r5
+	add       r4, r4, r8
 	CACHE_DOWNALGN r4

 	/* Check for PSIM                                            */
@@ -658,21 +659,21 @@ _CPU_Context_switch_altivec:

 	/* SAVE NON-VOLATILE REGISTERS                               */

-	/* Compute aligned destination pointer (r5 still holds offset
+	/* Compute aligned destination pointer (r8 still holds offset
 	 * to 'altivec' area in context)
 	 */
-	add       r3, r3, r5
+	add       r3, r3, r8
 	CACHE_DOWNALGN r3

-	PREP_FOR_SAVE r0, r3, r5, r6, r7, r10
+	PREP_FOR_SAVE r0, r3, r8, r6, r7, r10
 	/* The manual says reading vscr can take some time - do 
 	 * read it here (into a volatile vector register) while
 	 * we wait for cache blocks to be allocated
 	 */
 	mfvscr    v0
-	S_V20TOV31 _LRU=l, _B0=r3, _B1=r5, _B2=r6, _B3=r7, _O1=r10, _O2=r11
+	S_V20TOV31 _LRU=l, _B0=r3, _B1=r8, _B2=r6, _B3=r7, _O1=r10, _O2=r11
 	/* vrsave is now in r0 (PREP_FOR_SAVE), vscr in v0 */
-	S_VSCR_VRSAVE r0, v0, r3, r5
+	S_VSCR_VRSAVE r0, v0, r3, r8

 1:

@@ -681,8 +682,8 @@ _CPU_Context_switch_altivec:
 	/* Advance past vrsave/vscr area                             */
 	addi      r4, r4, PPC_CACHE_ALIGNMENT
 	L_VSCR_VRSAVE r4, r0, v0
-	CMP_BASES r4, r5, r6, r7, r10
-	L_V20TOV31 r4, r5, r6, r7, r10, r11
+	CMP_BASES r4, r8, r6, r7, r10
+	L_V20TOV31 r4, r8, r6, r7, r10, r11

 #ifndef IGNORE_VRSAVE
 	mtcr      r9
@@ -691,12 +692,12 @@ _CPU_Context_switch_altivec:

 	.global _CPU_Context_initialize_altivec
 _CPU_Context_initialize_altivec:
-	CMPOFF    r5
-	add       r3, r3, r5
+	CMPOFF    r8
+	add       r3, r3, r8
 	CACHE_DOWNALGN r3
-	lis       r5, _CPU_altivec_vrsave_initval@ha
-	lwz       r5, _CPU_altivec_vrsave_initval@l(r5)
-	stw       r5, VRSAVE_OFF(r3)
+	lis       r8, _CPU_altivec_vrsave_initval@ha
+	lwz       r8, _CPU_altivec_vrsave_initval@l(r8)
+	stw       r8, VRSAVE_OFF(r3)
 	lis       r6, _CPU_altivec_vscr_initval@ha
 	lwz       r6, _CPU_altivec_vscr_initval@l(r6)
 	stw       r6, VSCR_OFF(r3)
@@ -715,8 +716,8 @@ _CPU_Context_initialize_altivec:
 	 */
 	.global _CPU_altivec_set_vrsave_initval
 _CPU_altivec_set_vrsave_initval:
-	lis       r5, _CPU_altivec_vrsave_initval@ha
-	stw       r3, _CPU_altivec_vrsave_initval@l(r5)
+	lis       r8, _CPU_altivec_vrsave_initval@ha
+	stw       r3, _CPU_altivec_vrsave_initval@l(r8)
 	mtvrsave  r3
 	blr

@@ -771,10 +772,10 @@ _CPU_altivec_load_all:
 	/* Start preloading 2nd line (where first two vectors are)    */
 	dcbt      0, r3
 	L_VSCR_VRSAVE r3, r0, v0
-	CMP_BASES     r3, r4, r5, r6, r10
+	CMP_BASES     r3, r4, r8, r6, r10
 	/* Start preloading 3rd line (where vectors 3 and 4 are)      */
-	dcbt      0, r5
-	L_V0TOV31 r3, r4, r5, r6, r10, r11
+	dcbt      0, r8
+	L_V0TOV31 r3, r4, r8, r6, r10, r11

 #ifndef IGNORE_VRSAVE
 	mtcr      r9
@@ -794,12 +795,12 @@ _CPU_altivec_save_all:
 	mfcr      r9
 #endif

-	PREP_FOR_SAVE r0, r3, r4, r5, r6, r10
+	PREP_FOR_SAVE r0, r3, r4, r8, r6, r10
 	/* r0 now contains VRSAVE, r3 still the aligned memory area
-	 * and r4, r5, r6 are offset by 16, 32, and 48 bytes from r3,
+	 * and r4, r8, r6 are offset by 16, 32, and 48 bytes from r3,
 	 * respectively. r10 holds zero
 	 */
-	S_V0TOV31     _B0=r3, _B1=r4, _B2=r5, _B3=r6, _O1=r10, _O2=r11
+	S_V0TOV31     _B0=r3, _B1=r4, _B2=r8, _B3=r6, _O1=r10, _O2=r11
 	mfvscr        v0
 	/* Store vrsave (still in r0) and vscr (in v0) to memory area */
 	S_VSCR_VRSAVE r0, v0, r3, r11
--- a/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S
+++ b/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S
@@ -435,11 +435,9 @@ PROC (_CPU_Context_switch):
 restore_context:

 #if defined(__ALTIVEC__) && !defined(PPC_MULTILIB_ALTIVEC)
-	mr	r14, r5
 	mr	r4, r5
 	.extern	_CPU_Context_switch_altivec
 	bl	_CPU_Context_switch_altivec
-	mr	r5, r14
 #endif

 	lwz	r1, PPC_CONTEXT_OFFSET_GPR1(r5)