forked from Imagelibrary/rtems
powerpc: Optimize AltiVec context switch
Use r8 instead of r5 to slightly optimize _CPU_Context_switch(). It is not a big deal, however, we already assume r12 is used by _CPU_Context_switch(). Treat r5 the in same way.
This commit is contained in:
@@ -73,9 +73,10 @@
|
|||||||
.set r0, 0
|
.set r0, 0
|
||||||
.set r3, 3
|
.set r3, 3
|
||||||
.set r4, 4
|
.set r4, 4
|
||||||
.set r5, 5
|
/* Do not use r5, since this is used by _CPU_Context_switch() */
|
||||||
.set r6, 6
|
.set r6, 6
|
||||||
.set r7, 7
|
.set r7, 7
|
||||||
|
.set r8, 8
|
||||||
.set r9, 9
|
.set r9, 9
|
||||||
.set r10, 10
|
.set r10, 10
|
||||||
.set r11, 11
|
.set r11, 11
|
||||||
@@ -578,12 +579,12 @@ _CPU_save_altivec_volatile:
|
|||||||
mfcr r9
|
mfcr r9
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
PREP_FOR_SAVE r0, r3, r4, r5, r6, r10
|
PREP_FOR_SAVE r0, r3, r4, r8, r6, r10
|
||||||
/* r0 now contains VRSAVE, r3 still the aligned memory area
|
/* r0 now contains VRSAVE, r3 still the aligned memory area
|
||||||
* and r4, r5, r6 are offset by 16, 32, and 48 bytes from r3,
|
* and r4, r8, r6 are offset by 16, 32, and 48 bytes from r3,
|
||||||
* respectively. r10 holds zero
|
* respectively. r10 holds zero
|
||||||
*/
|
*/
|
||||||
S_V0TOV19 _B0=r3, _B1=r4, _B2=r5, _B3=r6, _O1=r10, _O2=r11
|
S_V0TOV19 _B0=r3, _B1=r4, _B2=r8, _B3=r6, _O1=r10, _O2=r11
|
||||||
mfvscr v0
|
mfvscr v0
|
||||||
/* Store vrsave (still in r0) and vscr (in v0) to memory area */
|
/* Store vrsave (still in r0) and vscr (in v0) to memory area */
|
||||||
S_VSCR_VRSAVE r0, v0, r3, r11
|
S_VSCR_VRSAVE r0, v0, r3, r11
|
||||||
@@ -613,10 +614,10 @@ _CPU_load_altivec_volatile:
|
|||||||
/* Start preloading 2nd line (where first two vectors are) */
|
/* Start preloading 2nd line (where first two vectors are) */
|
||||||
dcbt 0, r3
|
dcbt 0, r3
|
||||||
L_VSCR_VRSAVE r3, r0, v0
|
L_VSCR_VRSAVE r3, r0, v0
|
||||||
CMP_BASES r3, r4, r5, r6, r10
|
CMP_BASES r3, r4, r8, r6, r10
|
||||||
/* Start preloading 3rd line (where vectors 3 and 4 are) */
|
/* Start preloading 3rd line (where vectors 3 and 4 are) */
|
||||||
dcbt 0, r5
|
dcbt 0, r8
|
||||||
L_V0TOV19 r3, r4, r5, r6, r10, r11
|
L_V0TOV19 r3, r4, r8, r6, r10, r11
|
||||||
|
|
||||||
#ifndef IGNORE_VRSAVE
|
#ifndef IGNORE_VRSAVE
|
||||||
mtcr r9
|
mtcr r9
|
||||||
@@ -627,9 +628,9 @@ _CPU_load_altivec_volatile:
|
|||||||
_CPU_Context_switch_altivec:
|
_CPU_Context_switch_altivec:
|
||||||
|
|
||||||
/* fetch offset of altivec area in context */
|
/* fetch offset of altivec area in context */
|
||||||
CMPOFF r5
|
CMPOFF r8
|
||||||
/* down-align 'to' area to cache-line boundary */
|
/* down-align 'to' area to cache-line boundary */
|
||||||
add r4, r4, r5
|
add r4, r4, r8
|
||||||
CACHE_DOWNALGN r4
|
CACHE_DOWNALGN r4
|
||||||
|
|
||||||
/* Check for PSIM */
|
/* Check for PSIM */
|
||||||
@@ -658,21 +659,21 @@ _CPU_Context_switch_altivec:
|
|||||||
|
|
||||||
/* SAVE NON-VOLATILE REGISTERS */
|
/* SAVE NON-VOLATILE REGISTERS */
|
||||||
|
|
||||||
/* Compute aligned destination pointer (r5 still holds offset
|
/* Compute aligned destination pointer (r8 still holds offset
|
||||||
* to 'altivec' area in context)
|
* to 'altivec' area in context)
|
||||||
*/
|
*/
|
||||||
add r3, r3, r5
|
add r3, r3, r8
|
||||||
CACHE_DOWNALGN r3
|
CACHE_DOWNALGN r3
|
||||||
|
|
||||||
PREP_FOR_SAVE r0, r3, r5, r6, r7, r10
|
PREP_FOR_SAVE r0, r3, r8, r6, r7, r10
|
||||||
/* The manual says reading vscr can take some time - do
|
/* The manual says reading vscr can take some time - do
|
||||||
* read it here (into a volatile vector register) while
|
* read it here (into a volatile vector register) while
|
||||||
* we wait for cache blocks to be allocated
|
* we wait for cache blocks to be allocated
|
||||||
*/
|
*/
|
||||||
mfvscr v0
|
mfvscr v0
|
||||||
S_V20TOV31 _LRU=l, _B0=r3, _B1=r5, _B2=r6, _B3=r7, _O1=r10, _O2=r11
|
S_V20TOV31 _LRU=l, _B0=r3, _B1=r8, _B2=r6, _B3=r7, _O1=r10, _O2=r11
|
||||||
/* vrsave is now in r0 (PREP_FOR_SAVE), vscr in v0 */
|
/* vrsave is now in r0 (PREP_FOR_SAVE), vscr in v0 */
|
||||||
S_VSCR_VRSAVE r0, v0, r3, r5
|
S_VSCR_VRSAVE r0, v0, r3, r8
|
||||||
|
|
||||||
1:
|
1:
|
||||||
|
|
||||||
@@ -681,8 +682,8 @@ _CPU_Context_switch_altivec:
|
|||||||
/* Advance past vrsave/vscr area */
|
/* Advance past vrsave/vscr area */
|
||||||
addi r4, r4, PPC_CACHE_ALIGNMENT
|
addi r4, r4, PPC_CACHE_ALIGNMENT
|
||||||
L_VSCR_VRSAVE r4, r0, v0
|
L_VSCR_VRSAVE r4, r0, v0
|
||||||
CMP_BASES r4, r5, r6, r7, r10
|
CMP_BASES r4, r8, r6, r7, r10
|
||||||
L_V20TOV31 r4, r5, r6, r7, r10, r11
|
L_V20TOV31 r4, r8, r6, r7, r10, r11
|
||||||
|
|
||||||
#ifndef IGNORE_VRSAVE
|
#ifndef IGNORE_VRSAVE
|
||||||
mtcr r9
|
mtcr r9
|
||||||
@@ -691,12 +692,12 @@ _CPU_Context_switch_altivec:
|
|||||||
|
|
||||||
.global _CPU_Context_initialize_altivec
|
.global _CPU_Context_initialize_altivec
|
||||||
_CPU_Context_initialize_altivec:
|
_CPU_Context_initialize_altivec:
|
||||||
CMPOFF r5
|
CMPOFF r8
|
||||||
add r3, r3, r5
|
add r3, r3, r8
|
||||||
CACHE_DOWNALGN r3
|
CACHE_DOWNALGN r3
|
||||||
lis r5, _CPU_altivec_vrsave_initval@ha
|
lis r8, _CPU_altivec_vrsave_initval@ha
|
||||||
lwz r5, _CPU_altivec_vrsave_initval@l(r5)
|
lwz r8, _CPU_altivec_vrsave_initval@l(r8)
|
||||||
stw r5, VRSAVE_OFF(r3)
|
stw r8, VRSAVE_OFF(r3)
|
||||||
lis r6, _CPU_altivec_vscr_initval@ha
|
lis r6, _CPU_altivec_vscr_initval@ha
|
||||||
lwz r6, _CPU_altivec_vscr_initval@l(r6)
|
lwz r6, _CPU_altivec_vscr_initval@l(r6)
|
||||||
stw r6, VSCR_OFF(r3)
|
stw r6, VSCR_OFF(r3)
|
||||||
@@ -715,8 +716,8 @@ _CPU_Context_initialize_altivec:
|
|||||||
*/
|
*/
|
||||||
.global _CPU_altivec_set_vrsave_initval
|
.global _CPU_altivec_set_vrsave_initval
|
||||||
_CPU_altivec_set_vrsave_initval:
|
_CPU_altivec_set_vrsave_initval:
|
||||||
lis r5, _CPU_altivec_vrsave_initval@ha
|
lis r8, _CPU_altivec_vrsave_initval@ha
|
||||||
stw r3, _CPU_altivec_vrsave_initval@l(r5)
|
stw r3, _CPU_altivec_vrsave_initval@l(r8)
|
||||||
mtvrsave r3
|
mtvrsave r3
|
||||||
blr
|
blr
|
||||||
|
|
||||||
@@ -771,10 +772,10 @@ _CPU_altivec_load_all:
|
|||||||
/* Start preloading 2nd line (where first two vectors are) */
|
/* Start preloading 2nd line (where first two vectors are) */
|
||||||
dcbt 0, r3
|
dcbt 0, r3
|
||||||
L_VSCR_VRSAVE r3, r0, v0
|
L_VSCR_VRSAVE r3, r0, v0
|
||||||
CMP_BASES r3, r4, r5, r6, r10
|
CMP_BASES r3, r4, r8, r6, r10
|
||||||
/* Start preloading 3rd line (where vectors 3 and 4 are) */
|
/* Start preloading 3rd line (where vectors 3 and 4 are) */
|
||||||
dcbt 0, r5
|
dcbt 0, r8
|
||||||
L_V0TOV31 r3, r4, r5, r6, r10, r11
|
L_V0TOV31 r3, r4, r8, r6, r10, r11
|
||||||
|
|
||||||
#ifndef IGNORE_VRSAVE
|
#ifndef IGNORE_VRSAVE
|
||||||
mtcr r9
|
mtcr r9
|
||||||
@@ -794,12 +795,12 @@ _CPU_altivec_save_all:
|
|||||||
mfcr r9
|
mfcr r9
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
PREP_FOR_SAVE r0, r3, r4, r5, r6, r10
|
PREP_FOR_SAVE r0, r3, r4, r8, r6, r10
|
||||||
/* r0 now contains VRSAVE, r3 still the aligned memory area
|
/* r0 now contains VRSAVE, r3 still the aligned memory area
|
||||||
* and r4, r5, r6 are offset by 16, 32, and 48 bytes from r3,
|
* and r4, r8, r6 are offset by 16, 32, and 48 bytes from r3,
|
||||||
* respectively. r10 holds zero
|
* respectively. r10 holds zero
|
||||||
*/
|
*/
|
||||||
S_V0TOV31 _B0=r3, _B1=r4, _B2=r5, _B3=r6, _O1=r10, _O2=r11
|
S_V0TOV31 _B0=r3, _B1=r4, _B2=r8, _B3=r6, _O1=r10, _O2=r11
|
||||||
mfvscr v0
|
mfvscr v0
|
||||||
/* Store vrsave (still in r0) and vscr (in v0) to memory area */
|
/* Store vrsave (still in r0) and vscr (in v0) to memory area */
|
||||||
S_VSCR_VRSAVE r0, v0, r3, r11
|
S_VSCR_VRSAVE r0, v0, r3, r11
|
||||||
|
|||||||
@@ -435,11 +435,9 @@ PROC (_CPU_Context_switch):
|
|||||||
restore_context:
|
restore_context:
|
||||||
|
|
||||||
#if defined(__ALTIVEC__) && !defined(PPC_MULTILIB_ALTIVEC)
|
#if defined(__ALTIVEC__) && !defined(PPC_MULTILIB_ALTIVEC)
|
||||||
mr r14, r5
|
|
||||||
mr r4, r5
|
mr r4, r5
|
||||||
.extern _CPU_Context_switch_altivec
|
.extern _CPU_Context_switch_altivec
|
||||||
bl _CPU_Context_switch_altivec
|
bl _CPU_Context_switch_altivec
|
||||||
mr r5, r14
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
lwz r1, PPC_CONTEXT_OFFSET_GPR1(r5)
|
lwz r1, PPC_CONTEXT_OFFSET_GPR1(r5)
|
||||||
|
|||||||
Reference in New Issue
Block a user