arm: Support VFP-D32 and Neon

This commit is contained in:
Sebastian Huber
2013-05-08 09:30:31 +02:00
parent 9dcc6837de
commit cfd8d7a3d7
12 changed files with 415 additions and 40 deletions

View File

@@ -2,6 +2,6 @@ include $(RTEMS_ROOT)/make/custom/default.cfg
RTEMS_CPU = arm
CPU_CFLAGS = -mcpu=cortex-a9 -mthumb
CPU_CFLAGS = -march=armv7-a -mthumb -mfpu=neon -mfloat-abi=hard -mtune=cortex-a9
CFLAGS_OPTIMIZE_V ?= -O0 -g

View File

@@ -154,6 +154,26 @@ _start:
/* Stay in SVC mode */
#ifdef ARM_MULTILIB_VFP_D32
/* Read CPACR */
mrc p15, 0, r0, c1, c0, 2
/* Enable CP10 and CP11 */
orr r0, r0, #(1 << 20)
orr r0, r0, #(1 << 22)
/* Clear ASEDIS and D32DIS */
bic r0, r0, #(3 << 30)
/* Write CPACR */
mcr p15, 0, r0, c1, c0, 2
isb
/* Enable FPU */
mov r0, #(1 << 30)
vmsr FPEXC, r0
#endif
/*
* Branch to start hook 0.
*

View File

@@ -2,6 +2,6 @@ include $(RTEMS_ROOT)/make/custom/default.cfg
RTEMS_CPU = arm
CPU_CFLAGS = -mcpu=cortex-a9 -mthumb
CPU_CFLAGS = -march=armv7-a -mthumb -mfpu=neon -mfloat-abi=hard -mtune=cortex-a9
CFLAGS_OPTIMIZE_V ?= -O0 -g

View File

@@ -17,6 +17,7 @@
#endif
#include <rtems/asm.h>
#include <rtems/score/cpu.h>
#define FRAME_OFFSET_R4 0
#define FRAME_OFFSET_R5 4
@@ -28,7 +29,20 @@
#define FRAME_OFFSET_R11 28
#define FRAME_OFFSET_LR 32
#define FRAME_SIZE (FRAME_OFFSET_LR + 4)
#ifdef ARM_MULTILIB_VFP_D32
#define FRAME_OFFSET_D8 40
#define FRAME_OFFSET_D9 48
#define FRAME_OFFSET_D10 56
#define FRAME_OFFSET_D11 64
#define FRAME_OFFSET_D12 72
#define FRAME_OFFSET_D13 80
#define FRAME_OFFSET_D14 88
#define FRAME_OFFSET_D15 96
#define FRAME_SIZE (FRAME_OFFSET_D15 + 8)
#else
#define FRAME_SIZE (FRAME_OFFSET_LR + 4)
#endif
.section .text
@@ -57,6 +71,17 @@ FUNCTION_THUMB_ENTRY(_CPU_Context_validate)
mov r1, lr
str r1, [sp, #FRAME_OFFSET_LR]
#ifdef ARM_MULTILIB_VFP_D32
vstr d8, [sp, #FRAME_OFFSET_D8]
vstr d9, [sp, #FRAME_OFFSET_D9]
vstr d10, [sp, #FRAME_OFFSET_D10]
vstr d11, [sp, #FRAME_OFFSET_D11]
vstr d12, [sp, #FRAME_OFFSET_D12]
vstr d13, [sp, #FRAME_OFFSET_D13]
vstr d14, [sp, #FRAME_OFFSET_D14]
vstr d15, [sp, #FRAME_OFFSET_D15]
#endif
/* Fill */
/* R1 is used for temporary values */
@@ -70,7 +95,20 @@ FUNCTION_THUMB_ENTRY(_CPU_Context_validate)
mov \reg, r1
.endm
#ifdef ARM_MULTILIB_VFP_D32
/* R3 contains the FPSCR */
vmrs r3, FPSCR
movs r4, #0x001f
movt r4, #0xf800
bic r3, r3, r4
and r4, r4, r0
orr r3, r3, r4
vmsr FPSCR, r3
#else
fill_register r3
#endif
fill_register r4
fill_register r5
fill_register r6
@@ -82,6 +120,46 @@ FUNCTION_THUMB_ENTRY(_CPU_Context_validate)
fill_register r12
fill_register lr
#ifdef ARM_MULTILIB_VFP_D32
.macro fill_vfp_register reg
add r1, r1, #1
vmov \reg, r1, r1
.endm
fill_vfp_register d0
fill_vfp_register d1
fill_vfp_register d2
fill_vfp_register d3
fill_vfp_register d4
fill_vfp_register d5
fill_vfp_register d6
fill_vfp_register d7
fill_vfp_register d8
fill_vfp_register d9
fill_vfp_register d10
fill_vfp_register d11
fill_vfp_register d12
fill_vfp_register d13
fill_vfp_register d14
fill_vfp_register d15
fill_vfp_register d16
fill_vfp_register d17
fill_vfp_register d18
fill_vfp_register d19
fill_vfp_register d20
fill_vfp_register d21
fill_vfp_register d22
fill_vfp_register d23
fill_vfp_register d24
fill_vfp_register d25
fill_vfp_register d26
fill_vfp_register d27
fill_vfp_register d28
fill_vfp_register d29
fill_vfp_register d30
fill_vfp_register d31
#endif
/* Check */
check:
@@ -96,7 +174,10 @@ check:
mov r1, r0
#ifndef ARM_MULTILIB_VFP_D32
check_register r3
#endif
check_register r4
check_register r5
check_register r6
@@ -108,6 +189,10 @@ check:
check_register r12
check_register lr
#ifdef ARM_MULTILIB_VFP_D32
b check_vfp
#endif
b check
/* Restore */
@@ -132,8 +217,81 @@ restore:
ldr r1, [sp, #FRAME_OFFSET_LR]
mov lr, r1
#ifdef ARM_MULTILIB_VFP_D32
vldr d8, [sp, #FRAME_OFFSET_D8]
vldr d9, [sp, #FRAME_OFFSET_D9]
vldr d10, [sp, #FRAME_OFFSET_D10]
vldr d11, [sp, #FRAME_OFFSET_D11]
vldr d12, [sp, #FRAME_OFFSET_D12]
vldr d13, [sp, #FRAME_OFFSET_D13]
vldr d14, [sp, #FRAME_OFFSET_D14]
vldr d15, [sp, #FRAME_OFFSET_D15]
#endif
add sp, sp, #FRAME_SIZE
bx lr
FUNCTION_END(_CPU_Context_validate)
#ifdef ARM_MULTILIB_VFP_D32
check_vfp:
.macro check_vfp_register reg
add r1, r1, #1
vmov r4, r5, \reg
cmp r4, r5
bne 1f
cmp r1, r4
bne 1f
b 2f
1:
b restore
2:
.endm
vmrs r4, FPSCR
cmp r4, r3
bne restore
check_vfp_register d0
check_vfp_register d1
check_vfp_register d2
check_vfp_register d3
check_vfp_register d4
check_vfp_register d5
check_vfp_register d6
check_vfp_register d7
check_vfp_register d8
check_vfp_register d9
check_vfp_register d10
check_vfp_register d11
check_vfp_register d12
check_vfp_register d13
check_vfp_register d14
check_vfp_register d15
check_vfp_register d16
check_vfp_register d17
check_vfp_register d18
check_vfp_register d19
check_vfp_register d20
check_vfp_register d21
check_vfp_register d22
check_vfp_register d23
check_vfp_register d24
check_vfp_register d25
check_vfp_register d26
check_vfp_register d27
check_vfp_register d28
check_vfp_register d29
check_vfp_register d30
check_vfp_register d31
/* Restore r4 and r5 */
mov r1, r0
fill_register r4
fill_register r5
b check
#endif

View File

@@ -27,6 +27,46 @@ FUNCTION_THUMB_ENTRY(_CPU_Context_volatile_clobber)
mov \reg, r0
.endm
#ifdef ARM_MULTILIB_VFP_D32
vmrs r1, FPSCR
movs r2, #0x001f
movt r2, #0xf800
bic r1, r1, r2
and r2, r2, r0
orr r1, r1, r2
vmsr FPSCR, r1
.macro clobber_vfp_register reg
sub r0, r0, #1
vmov \reg, r0, r0
.endm
clobber_vfp_register d0
clobber_vfp_register d1
clobber_vfp_register d2
clobber_vfp_register d3
clobber_vfp_register d4
clobber_vfp_register d5
clobber_vfp_register d6
clobber_vfp_register d7
clobber_vfp_register d16
clobber_vfp_register d17
clobber_vfp_register d18
clobber_vfp_register d19
clobber_vfp_register d20
clobber_vfp_register d21
clobber_vfp_register d22
clobber_vfp_register d23
clobber_vfp_register d24
clobber_vfp_register d25
clobber_vfp_register d26
clobber_vfp_register d27
clobber_vfp_register d28
clobber_vfp_register d29
clobber_vfp_register d30
clobber_vfp_register d31
#endif
clobber_register r1
clobber_register r2
clobber_register r3

View File

@@ -19,6 +19,29 @@
#include <rtems/score/cpu.h>
#include <rtems/bspIo.h>
static void _ARM_VFP_context_print( const ARM_VFP_context *vfp_context )
{
#ifdef ARM_MULTILIB_VFP_D32
if ( vfp_context != NULL ) {
const uint64_t *dx = &vfp_context->register_d0;
int i;
printk(
"FPEXC = 0x%08x\nFPSCR = 0x%08x\n",
vfp_context->register_fpexc,
vfp_context->register_fpscr
);
for ( i = 0; i < 32; ++i ) {
uint32_t low = (uint32_t) dx[i];
uint32_t high = (uint32_t) (dx[i] >> 32);
printk( "D%02i = 0x%08x%08x\n", i, high, low );
}
}
#endif
}
void _CPU_Exception_frame_print( const CPU_Exception_frame *frame )
{
printk(
@@ -59,4 +82,6 @@ void _CPU_Exception_frame_print( const CPU_Exception_frame *frame )
#endif
frame->vector
);
_ARM_VFP_context_print( frame->vfp_context );
}

View File

@@ -7,12 +7,13 @@
*/
/*
* Copyright (c) 2009
* embedded brains GmbH
* Obere Lagerstr. 30
* D-82178 Puchheim
* Germany
* <rtems@embedded-brains.de>
* Copyright (c) 2009-2013 embedded brains GmbH. All rights reserved.
*
* embedded brains GmbH
* Dornierstr. 4
* 82178 Puchheim
* Germany
* <rtems@embedded-brains.de>
*
* The license and distribution terms for this file may be
* found in the file LICENSE in this distribution or at
@@ -45,6 +46,10 @@
#define CONTEXT_LIST {r0, r1, r2, r3, EXCHANGE_LR, EXCHANGE_SPSR, r12}
#define CONTEXT_SIZE 28
#ifdef ARM_MULTILIB_VFP_D32
#define VFP_CONTEXT_WITH_ALIGNMENT_SPACE (24 * 8 + 4 + 4)
#endif
.extern _Thread_Dispatch_disable_level
.extern bsp_interrupt_dispatch
@@ -74,6 +79,17 @@ _ARMV4_Exception_interrupt:
stmdb sp!, CONTEXT_LIST
stmdb sp!, {lr}
#ifdef ARM_MULTILIB_VFP_D32
/* Save VFP context */
sub sp, #VFP_CONTEXT_WITH_ALIGNMENT_SPACE
add r1, sp, #4
vmrs r0, FPSCR
bic r1, r1, #7
vstmia r1!, {d0-d7}
vstmia r1!, {d16-d31}
str r0, [r1]
#endif
/* Remember INT stack pointer */
mov r1, EXCHANGE_INT_SP
@@ -144,6 +160,17 @@ thread_dispatch_done:
/* Switch to ARM instructions if necessary */
SWITCH_FROM_THUMB_TO_ARM
#ifdef ARM_MULTILIB_VFP_D32
/* Restore VFP context */
add r1, sp, #4
bic r1, r1, #7
vldmia r1!, {d0-d7}
vldmia r1!, {d16-d31}
ldr r0, [r1]
add sp, #VFP_CONTEXT_WITH_ALIGNMENT_SPACE
vmsr FPSCR, r0
#endif
/* Restore link register */
ldmia sp!, {lr}

View File

@@ -21,6 +21,9 @@
#ifdef ARM_MULTILIB_ARCH_V4
#define MORE_CONTEXT_SIZE \
(ARM_EXCEPTION_FRAME_SIZE - ARM_EXCEPTION_FRAME_REGISTER_SP_OFFSET)
.extern _ARM_Exception_default
.globl _ARMV4_Exception_undef_default
@@ -38,7 +41,7 @@
_ARMV4_Exception_undef_default:
/* Save context and load vector */
sub sp, #20
sub sp, #MORE_CONTEXT_SIZE
stmdb sp!, {r0-r12}
mov r4, #1
@@ -47,7 +50,7 @@ _ARMV4_Exception_undef_default:
_ARMV4_Exception_swi_default:
/* Save context and load vector */
sub sp, #20
sub sp, #MORE_CONTEXT_SIZE
stmdb sp!, {r0-r12}
mov r4, #2
@@ -56,7 +59,7 @@ _ARMV4_Exception_swi_default:
_ARMV4_Exception_pref_abort_default:
/* Save context and load vector */
sub sp, #20
sub sp, #MORE_CONTEXT_SIZE
stmdb sp!, {r0-r12}
mov r4, #3
@@ -65,28 +68,28 @@ _ARMV4_Exception_pref_abort_default:
_ARMV4_Exception_data_abort_default:
/* Save context and load vector */
sub sp, #20
sub sp, #MORE_CONTEXT_SIZE
stmdb sp!, {r0-r12}
mov r4, #4
_ARMV4_Exception_reserved_default:
/* Save context and load vector */
sub sp, #20
sub sp, #MORE_CONTEXT_SIZE
stmdb sp!, {r0-r12}
mov r4, #5
_ARMV4_Exception_irq_default:
/* Save context and load vector */
sub sp, #20
sub sp, #MORE_CONTEXT_SIZE
stmdb sp!, {r0-r12}
mov r4, #6
_ARMV4_Exception_fiq_default:
/* Save context and load vector */
sub sp, #20
sub sp, #MORE_CONTEXT_SIZE
stmdb sp!, {r0-r12}
mov r4, #7
@@ -99,14 +102,36 @@ save_more_context:
orr r5, r3, #ARM_PSR_I
bic r5, #ARM_PSR_T
msr cpsr, r5
mov r0, sp
sub r0, sp, #ARM_EXCEPTION_FRAME_SIZE
mov r1, lr
msr cpsr, r7
add r5, sp, #72
stmdb r5!, {r0-r4}
mov r5, #0
add r6, sp, #ARM_EXCEPTION_FRAME_REGISTER_SP_OFFSET
stm r6, {r0-r5}
/* Argument for high level handler */
mov r0, sp
#ifdef ARM_MULTILIB_VFP_D32
/* Ensure that the FPU is enabled */
vmrs r1, FPEXC
tst r1, #(1 << 30)
beq fpu_save_done
add r3, sp, #ARM_EXCEPTION_FRAME_VFP_CONTEXT_OFFSET
sub sp, #(ARM_VFP_CONTEXT_SIZE + 4)
add r4, sp, #4
bic r4, r4, #7
str r4, [r3]
vmrs r2, FPSCR
stmia r4!, {r1-r2}
vstmia r4!, {d0-d15}
vstmia r4!, {d16-d31}
fpu_save_done:
#endif
/* Call high level handler */
mov r0, sp
SWITCH_FROM_ARM_TO_THUMB r1
bl _ARM_Exception_default

View File

@@ -34,6 +34,29 @@
#include <rtems/score/thread.h>
#include <rtems/score/cpu.h>
#ifdef ARM_MULTILIB_VFP_D32
RTEMS_STATIC_ASSERT(
offsetof( Context_Control, register_d8 ) == ARM_CONTEXT_CONTROL_D8_OFFSET,
ARM_CONTEXT_CONTROL_D8_OFFSET
);
#endif
RTEMS_STATIC_ASSERT(
sizeof( CPU_Exception_frame ) == ARM_EXCEPTION_FRAME_SIZE,
ARM_EXCEPTION_FRAME_SIZE
);
RTEMS_STATIC_ASSERT(
offsetof( CPU_Exception_frame, register_sp )
== ARM_EXCEPTION_FRAME_REGISTER_SP_OFFSET,
ARM_EXCEPTION_FRAME_REGISTER_SP_OFFSET
);
RTEMS_STATIC_ASSERT(
sizeof( ARM_VFP_context ) == ARM_VFP_CONTEXT_SIZE,
ARM_VFP_CONTEXT_SIZE
);
#ifdef ARM_MULTILIB_ARCH_V4
/*

View File

@@ -30,7 +30,7 @@
#endif
#include <rtems/asm.h>
#include <rtems/score/cpu_asm.h>
#include <rtems/score/cpu.h>
#ifdef ARM_MULTILIB_ARCH_V4
@@ -57,9 +57,19 @@ DEFINE_FUNCTION_ARM(_CPU_Context_switch)
mrs r2, cpsr
stmia r0, {r2, r4, r5, r6, r7, r8, r9, r10, r11, r13, r14}
#ifdef ARM_MULTILIB_VFP_D32
add r3, r0, #ARM_CONTEXT_CONTROL_D8_OFFSET
vstm r3, {d8-d15}
#endif
/* Start restoring context */
_restore:
#ifdef ARM_MULTILIB_VFP_D32
add r3, r1, #ARM_CONTEXT_CONTROL_D8_OFFSET
vldm r3, {d8-d15}
#endif
ldmia r1, {r2, r4, r5, r6, r7, r8, r9, r10, r11, r13, r14}
msr cpsr, r2
#ifdef __thumb__

View File

@@ -37,15 +37,12 @@ extern "C" {
#define ARM_MULTILIB_ARCH_V4
#endif
/* All ARM CPUs are assumed to not have floating point units */
#if defined(__SOFTFP__)
#define ARM_HAS_FPU 0
#else
#define ARM_HAS_FPU 1
#warning "FPU-support not yet implemented for the arm"
#if defined(__ARM_NEON__)
#define ARM_MULTILIB_VFP_D32
#elif !defined(__SOFTFP__)
#error "FPU support not implemented"
#endif
/*
* Define the name of the CPU family.
*/

View File

@@ -8,7 +8,7 @@
* This include file contains information pertaining to the ARM
* processor.
*
* Copyright (c) 2009-2011 embedded brains GmbH.
* Copyright (c) 2009-2013 embedded brains GmbH.
*
* Copyright (c) 2007 Ray Xu <Rayx.cn@gmail.com>
*
@@ -128,11 +128,7 @@
#define CPU_ISR_PASSES_FRAME_POINTER 0
#if ( ARM_HAS_FPU == 1 )
#define CPU_HARDWARE_FP TRUE
#else
#define CPU_HARDWARE_FP FALSE
#endif
#define CPU_HARDWARE_FP FALSE
#define CPU_SOFTWARE_FP FALSE
@@ -214,6 +210,18 @@
/** @} */
#ifdef ARM_MULTILIB_VFP_D32
#define ARM_CONTEXT_CONTROL_D8_OFFSET 48
#endif
#define ARM_EXCEPTION_FRAME_SIZE 76
#define ARM_EXCEPTION_FRAME_REGISTER_SP_OFFSET 52
#define ARM_EXCEPTION_FRAME_VFP_CONTEXT_OFFSET 72
#define ARM_VFP_CONTEXT_SIZE 264
#ifndef ASM
#ifdef __cplusplus
@@ -253,14 +261,22 @@ typedef struct {
#else
void *register_sp;
#endif
#ifdef ARM_MULTILIB_VFP_D32
uint64_t register_d8;
uint64_t register_d9;
uint64_t register_d10;
uint64_t register_d11;
uint64_t register_d12;
uint64_t register_d13;
uint64_t register_d14;
uint64_t register_d15;
#endif
} Context_Control;
typedef struct {
/* Not supported */
} Context_Control_fp;
SCORE_EXTERN Context_Control_fp _CPU_Null_fp_context;
extern uint32_t arm_cpu_mode;
static inline uint32_t arm_interrupt_disable( void )
@@ -419,10 +435,6 @@ void _CPU_Context_restore( Context_Control *new_context )
#define _CPU_Stop_multitasking _ARMV7M_Stop_multitasking
#endif
void _CPU_Context_save_fp( Context_Control_fp **fp_context_ptr );
void _CPU_Context_restore_fp( Context_Control_fp **fp_context_ptr );
void _CPU_Context_volatile_clobber( uintptr_t pattern );
void _CPU_Context_validate( uintptr_t pattern );
@@ -500,6 +512,43 @@ typedef enum {
#endif /* defined(ARM_MULTILIB_ARCH_V4) */
typedef struct {
uint32_t register_fpexc;
uint32_t register_fpscr;
uint64_t register_d0;
uint64_t register_d1;
uint64_t register_d2;
uint64_t register_d3;
uint64_t register_d4;
uint64_t register_d5;
uint64_t register_d6;
uint64_t register_d7;
uint64_t register_d8;
uint64_t register_d9;
uint64_t register_d10;
uint64_t register_d11;
uint64_t register_d12;
uint64_t register_d13;
uint64_t register_d14;
uint64_t register_d15;
uint64_t register_d16;
uint64_t register_d17;
uint64_t register_d18;
uint64_t register_d19;
uint64_t register_d20;
uint64_t register_d21;
uint64_t register_d22;
uint64_t register_d23;
uint64_t register_d24;
uint64_t register_d25;
uint64_t register_d26;
uint64_t register_d27;
uint64_t register_d28;
uint64_t register_d29;
uint64_t register_d30;
uint64_t register_d31;
} ARM_VFP_context;
typedef struct {
uint32_t register_r0;
uint32_t register_r1;
@@ -524,6 +573,7 @@ typedef struct {
uint32_t register_xpsr;
uint32_t vector;
#endif
const ARM_VFP_context *vfp_context;
} CPU_Exception_frame;
typedef CPU_Exception_frame CPU_Interrupt_frame;