powerpc: Optimize AltiVec context switch

Use r8 instead of r5 to slightly optimize _CPU_Context_switch().  It is
not a big deal, however, we already assume r12 is used by
_CPU_Context_switch().  Treat r5 the in same way.
This commit is contained in:
Sebastian Huber
2017-03-07 07:58:11 +01:00
parent c6f7639250
commit a11e1ff576
2 changed files with 30 additions and 31 deletions

View File

@@ -73,9 +73,10 @@
.set r0, 0 .set r0, 0
.set r3, 3 .set r3, 3
.set r4, 4 .set r4, 4
.set r5, 5 /* Do not use r5, since this is used by _CPU_Context_switch() */
.set r6, 6 .set r6, 6
.set r7, 7 .set r7, 7
.set r8, 8
.set r9, 9 .set r9, 9
.set r10, 10 .set r10, 10
.set r11, 11 .set r11, 11
@@ -578,12 +579,12 @@ _CPU_save_altivec_volatile:
mfcr r9 mfcr r9
#endif #endif
PREP_FOR_SAVE r0, r3, r4, r5, r6, r10 PREP_FOR_SAVE r0, r3, r4, r8, r6, r10
/* r0 now contains VRSAVE, r3 still the aligned memory area /* r0 now contains VRSAVE, r3 still the aligned memory area
* and r4, r5, r6 are offset by 16, 32, and 48 bytes from r3, * and r4, r8, r6 are offset by 16, 32, and 48 bytes from r3,
* respectively. r10 holds zero * respectively. r10 holds zero
*/ */
S_V0TOV19 _B0=r3, _B1=r4, _B2=r5, _B3=r6, _O1=r10, _O2=r11 S_V0TOV19 _B0=r3, _B1=r4, _B2=r8, _B3=r6, _O1=r10, _O2=r11
mfvscr v0 mfvscr v0
/* Store vrsave (still in r0) and vscr (in v0) to memory area */ /* Store vrsave (still in r0) and vscr (in v0) to memory area */
S_VSCR_VRSAVE r0, v0, r3, r11 S_VSCR_VRSAVE r0, v0, r3, r11
@@ -613,10 +614,10 @@ _CPU_load_altivec_volatile:
/* Start preloading 2nd line (where first two vectors are) */ /* Start preloading 2nd line (where first two vectors are) */
dcbt 0, r3 dcbt 0, r3
L_VSCR_VRSAVE r3, r0, v0 L_VSCR_VRSAVE r3, r0, v0
CMP_BASES r3, r4, r5, r6, r10 CMP_BASES r3, r4, r8, r6, r10
/* Start preloading 3rd line (where vectors 3 and 4 are) */ /* Start preloading 3rd line (where vectors 3 and 4 are) */
dcbt 0, r5 dcbt 0, r8
L_V0TOV19 r3, r4, r5, r6, r10, r11 L_V0TOV19 r3, r4, r8, r6, r10, r11
#ifndef IGNORE_VRSAVE #ifndef IGNORE_VRSAVE
mtcr r9 mtcr r9
@@ -627,9 +628,9 @@ _CPU_load_altivec_volatile:
_CPU_Context_switch_altivec: _CPU_Context_switch_altivec:
/* fetch offset of altivec area in context */ /* fetch offset of altivec area in context */
CMPOFF r5 CMPOFF r8
/* down-align 'to' area to cache-line boundary */ /* down-align 'to' area to cache-line boundary */
add r4, r4, r5 add r4, r4, r8
CACHE_DOWNALGN r4 CACHE_DOWNALGN r4
/* Check for PSIM */ /* Check for PSIM */
@@ -658,21 +659,21 @@ _CPU_Context_switch_altivec:
/* SAVE NON-VOLATILE REGISTERS */ /* SAVE NON-VOLATILE REGISTERS */
/* Compute aligned destination pointer (r5 still holds offset /* Compute aligned destination pointer (r8 still holds offset
* to 'altivec' area in context) * to 'altivec' area in context)
*/ */
add r3, r3, r5 add r3, r3, r8
CACHE_DOWNALGN r3 CACHE_DOWNALGN r3
PREP_FOR_SAVE r0, r3, r5, r6, r7, r10 PREP_FOR_SAVE r0, r3, r8, r6, r7, r10
/* The manual says reading vscr can take some time - do /* The manual says reading vscr can take some time - do
* read it here (into a volatile vector register) while * read it here (into a volatile vector register) while
* we wait for cache blocks to be allocated * we wait for cache blocks to be allocated
*/ */
mfvscr v0 mfvscr v0
S_V20TOV31 _LRU=l, _B0=r3, _B1=r5, _B2=r6, _B3=r7, _O1=r10, _O2=r11 S_V20TOV31 _LRU=l, _B0=r3, _B1=r8, _B2=r6, _B3=r7, _O1=r10, _O2=r11
/* vrsave is now in r0 (PREP_FOR_SAVE), vscr in v0 */ /* vrsave is now in r0 (PREP_FOR_SAVE), vscr in v0 */
S_VSCR_VRSAVE r0, v0, r3, r5 S_VSCR_VRSAVE r0, v0, r3, r8
1: 1:
@@ -681,8 +682,8 @@ _CPU_Context_switch_altivec:
/* Advance past vrsave/vscr area */ /* Advance past vrsave/vscr area */
addi r4, r4, PPC_CACHE_ALIGNMENT addi r4, r4, PPC_CACHE_ALIGNMENT
L_VSCR_VRSAVE r4, r0, v0 L_VSCR_VRSAVE r4, r0, v0
CMP_BASES r4, r5, r6, r7, r10 CMP_BASES r4, r8, r6, r7, r10
L_V20TOV31 r4, r5, r6, r7, r10, r11 L_V20TOV31 r4, r8, r6, r7, r10, r11
#ifndef IGNORE_VRSAVE #ifndef IGNORE_VRSAVE
mtcr r9 mtcr r9
@@ -691,12 +692,12 @@ _CPU_Context_switch_altivec:
.global _CPU_Context_initialize_altivec .global _CPU_Context_initialize_altivec
_CPU_Context_initialize_altivec: _CPU_Context_initialize_altivec:
CMPOFF r5 CMPOFF r8
add r3, r3, r5 add r3, r3, r8
CACHE_DOWNALGN r3 CACHE_DOWNALGN r3
lis r5, _CPU_altivec_vrsave_initval@ha lis r8, _CPU_altivec_vrsave_initval@ha
lwz r5, _CPU_altivec_vrsave_initval@l(r5) lwz r8, _CPU_altivec_vrsave_initval@l(r8)
stw r5, VRSAVE_OFF(r3) stw r8, VRSAVE_OFF(r3)
lis r6, _CPU_altivec_vscr_initval@ha lis r6, _CPU_altivec_vscr_initval@ha
lwz r6, _CPU_altivec_vscr_initval@l(r6) lwz r6, _CPU_altivec_vscr_initval@l(r6)
stw r6, VSCR_OFF(r3) stw r6, VSCR_OFF(r3)
@@ -715,8 +716,8 @@ _CPU_Context_initialize_altivec:
*/ */
.global _CPU_altivec_set_vrsave_initval .global _CPU_altivec_set_vrsave_initval
_CPU_altivec_set_vrsave_initval: _CPU_altivec_set_vrsave_initval:
lis r5, _CPU_altivec_vrsave_initval@ha lis r8, _CPU_altivec_vrsave_initval@ha
stw r3, _CPU_altivec_vrsave_initval@l(r5) stw r3, _CPU_altivec_vrsave_initval@l(r8)
mtvrsave r3 mtvrsave r3
blr blr
@@ -771,10 +772,10 @@ _CPU_altivec_load_all:
/* Start preloading 2nd line (where first two vectors are) */ /* Start preloading 2nd line (where first two vectors are) */
dcbt 0, r3 dcbt 0, r3
L_VSCR_VRSAVE r3, r0, v0 L_VSCR_VRSAVE r3, r0, v0
CMP_BASES r3, r4, r5, r6, r10 CMP_BASES r3, r4, r8, r6, r10
/* Start preloading 3rd line (where vectors 3 and 4 are) */ /* Start preloading 3rd line (where vectors 3 and 4 are) */
dcbt 0, r5 dcbt 0, r8
L_V0TOV31 r3, r4, r5, r6, r10, r11 L_V0TOV31 r3, r4, r8, r6, r10, r11
#ifndef IGNORE_VRSAVE #ifndef IGNORE_VRSAVE
mtcr r9 mtcr r9
@@ -794,12 +795,12 @@ _CPU_altivec_save_all:
mfcr r9 mfcr r9
#endif #endif
PREP_FOR_SAVE r0, r3, r4, r5, r6, r10 PREP_FOR_SAVE r0, r3, r4, r8, r6, r10
/* r0 now contains VRSAVE, r3 still the aligned memory area /* r0 now contains VRSAVE, r3 still the aligned memory area
* and r4, r5, r6 are offset by 16, 32, and 48 bytes from r3, * and r4, r8, r6 are offset by 16, 32, and 48 bytes from r3,
* respectively. r10 holds zero * respectively. r10 holds zero
*/ */
S_V0TOV31 _B0=r3, _B1=r4, _B2=r5, _B3=r6, _O1=r10, _O2=r11 S_V0TOV31 _B0=r3, _B1=r4, _B2=r8, _B3=r6, _O1=r10, _O2=r11
mfvscr v0 mfvscr v0
/* Store vrsave (still in r0) and vscr (in v0) to memory area */ /* Store vrsave (still in r0) and vscr (in v0) to memory area */
S_VSCR_VRSAVE r0, v0, r3, r11 S_VSCR_VRSAVE r0, v0, r3, r11

View File

@@ -435,11 +435,9 @@ PROC (_CPU_Context_switch):
restore_context: restore_context:
#if defined(__ALTIVEC__) && !defined(PPC_MULTILIB_ALTIVEC) #if defined(__ALTIVEC__) && !defined(PPC_MULTILIB_ALTIVEC)
mr r14, r5
mr r4, r5 mr r4, r5
.extern _CPU_Context_switch_altivec .extern _CPU_Context_switch_altivec
bl _CPU_Context_switch_altivec bl _CPU_Context_switch_altivec
mr r5, r14
#endif #endif
lwz r1, PPC_CONTEXT_OFFSET_GPR1(r5) lwz r1, PPC_CONTEXT_OFFSET_GPR1(r5)