forked from Imagelibrary/rtems
This bootloader is only used by the motorola_powerpc BSP. This patch is a part of the BSP source reorganization. Update #3285.
4554 lines
111 KiB
ArmAsm
4554 lines
111 KiB
ArmAsm
/*
|
|
* em86real.S
|
|
*
|
|
* Copyright (C) 1998, 1999 Gabriel Paubert, paubert@iram.es
|
|
*
|
|
* Modified to compile in RTEMS development environment
|
|
* by Eric Valette
|
|
*
|
|
* Copyright (C) 1999 Eric Valette. valette@crf.canon.fr
|
|
*
|
|
* The license and distribution terms for this file may be
|
|
* found in the file LICENSE in this distribution or at
|
|
* http://www.rtems.org/license/LICENSE.
|
|
*/
|
|
|
|
/* If the symbol __BOOT__ is defined, a slightly different version is
|
|
* generated to be compiled with the -m relocatable option
|
|
*/
|
|
|
|
#ifdef __BOOT__
|
|
#include "bootldr.h"
|
|
/* It is impossible to gather statistics in the boot version */
|
|
#undef EIP_STATS
|
|
#endif
|
|
|
|
/*
|
|
*
|
|
* Given the size of this code, it deserves a few comments on how it works,
|
|
* and why it was implemented the way it is.
|
|
*
|
|
* The goal is to have a real mode i486SX emulator to initialize hardware,
|
|
* mostly graphics boards, by interpreting ROM BIOSes. The choice of a 486SX
|
|
* is logical since this is the lowest processor that PCI ROM BIOSes must run
|
|
* on.
|
|
*
|
|
* The goal of this emulator is not performance, but a small enough memory
|
|
* footprint to include it in a bootloader.
|
|
*
|
|
* It is actually likely to be comparable to a 25MHz 386DX on a 200MHz 603e !
|
|
* This is not as serious as it seems since most of the BIOS code performs
|
|
* a lot of accesses to I/O and non-cacheable memory spaces. For such
|
|
* instructions, the execution time is often dominated by bus accesses.
|
|
* Statistics of the code also shows that it spends a large function of
|
|
* the time in loops waiting for vertical retrace or programs one of the
|
|
* timers and waits for the count to go down to zero. This type of loop
|
|
* runs emulated at the same speed as on 5 GHz Pentium IV++ ;)
|
|
*
|
|
*/
|
|
|
|
/*
|
|
* Known bugs or differences with a real 486SX (real mode):
|
|
* - segment limits are not enforced (too costly)
|
|
* - xchg instructions with memory are not locked
|
|
* - lock prefixes are not implemented at all
|
|
* - long divides implemented but perhaps still buggy
|
|
* - miscellaneous system instructions not implemented
|
|
* (some probably cannot be implemented)
|
|
* - neither control nor debug registers are implemented for the time being
|
|
* (debug registers are impossible to implement at a reasonable cost)
|
|
*/
|
|
|
|
/* Code options, put them on the compiler command line */
|
|
/* #define EIP_STATS */ /* EIP based profiling */
|
|
/* #undef EIP_STATS */
|
|
|
|
/*
|
|
* Implementation notes:
|
|
*
|
|
* A) flags emulation.
|
|
*
|
|
* The most important decisions when it comes to obtain a reasonable speed
|
|
* are related to how the EFLAGS register is emulated.
|
|
*
|
|
* Note: the code to set up flags is complex, but it is only seldom
|
|
* executed since cmp and test instructions use much faster flag evaluation
|
|
* paths. For example the overflow flag is almost only needed for pushf and
|
|
* int. Comparison results only involve (SF^OF) or (SF^OF)+ZF and the
|
|
* implementation is fast in this case.
|
|
*
|
|
* Rarely used flags: AC, NT and IOPL are kept in a memory EFLAGS image.
|
|
* All other flags are either kept explicitly in PPC cr (DF, IF, and TF) or
|
|
* lazily evaluated from the state of 4 registers called flags, result, op1,
|
|
* op2, and sometimes the cr itself. The emulation has been designed for
|
|
* minimal overhead for the common case where the flags are never used. With
|
|
* few exceptions, all instructions that set flags leave the result of the
|
|
* computation in a register called result, and operands are taken from op1
|
|
* and op2 registers. However a few instructions like cmp, test and bit tests
|
|
* (bt/btc/btr/bts/bsf/bsr) explicitly set cr bits to short circuit
|
|
* condition code evaluation of conditional instructions.
|
|
*
|
|
* As a very brief summary:
|
|
*
|
|
* - the result of the last flag setting operation is often either in the
|
|
* result register or in op2 after increment or decrement instructions
|
|
* because result and op1 may be needed to compute the carry.
|
|
*
|
|
* - compare instruction leave the result of the unsigned comparison
|
|
* in cr4 and of signed comparison in cr6. This means that:
|
|
* - cr4[0]=CF (short circuit for jc/jnc)
|
|
* - cr4[1]=~(CF+ZF) (short circuit for ja/jna)
|
|
* - cr6[0]=(OF^SF) (short circuit for jl/jnl)
|
|
* - cr6[1]=~((SF^OF)+ZF) (short circuit for jg/jng)
|
|
* - cr6[2]=ZF (short circuit for jz/jnz)
|
|
*
|
|
* - test instruction set flags in cr6 and clear overflow. This means that:
|
|
* - cr6[0]=SF=(SF^OF) (short circuit for jl/jnl/js/jns)
|
|
* - cr6[1]=~((SF^OF)+ZF) (short circuit for jg/jng)
|
|
* - cr6[2]=ZF (short circuit for jz/jnz)
|
|
*
|
|
* All flags may be lazily evaluated from several values kept in registers:
|
|
*
|
|
* Flag: Depends upon:
|
|
* OF result, op1, op2, flags[INCDEC_FIELD,SUBTRACTING,OF_STATE_MASK]
|
|
* SF result, op2, flags[INCDEC_FIELD,RES_SIZE]
|
|
* ZF result, op2, cr6[2], flags[INCDEC_FIELD,RES_SIZE,ZF_PROTECT]
|
|
* AF op1, op2, flags[INCDEC_FIELD,SUBTRACTING,CF_IN]
|
|
* PF result, op2, flags[INCDEC_FIELD]
|
|
* CF result, op1, flags[CF_STATE_MASK, CF_IN]
|
|
*
|
|
* The order of the fields in the flags register has been chosen so that a
|
|
* single rlwimi is necessary for common instruction that do not affect all
|
|
* flags. (See the code for inc/dec emulation).
|
|
*
|
|
*
|
|
* B) opcodes and prefixes.
|
|
*
|
|
* The register called opcode holds in its low order 8 bits the opcode
|
|
* (second byte if the first byte is 0x0f). More precisely it holds the
|
|
* last byte fetched before the modrm byte or the immediate operand(s)
|
|
* of the instruction, if any. High order 24 bits are zero unless the
|
|
* instruction has prefixes. These higher order bits have the following
|
|
* meaning:
|
|
* 0x80000000 segment override prefix
|
|
* 0x00001000 repnz prefix (0xf2)
|
|
* 0x00000800 repz prefix (0xf3)
|
|
* 0x00000400 address size prefix (0x67)
|
|
* 0x00000200 operand size prefix (0x66)
|
|
* (bit 0x1000 and 0x800 cannot be set simultaneously)
|
|
*
|
|
* Therefore if there is a segment override the value will be between very
|
|
* negative (between 0x80000000 and 0x800016ff), if there is no segment
|
|
* override, the value will be between 0 and 0x16ff. The reason for
|
|
* this choice will be understood in the next part.
|
|
*
|
|
* C) addresing mode description tables.
|
|
*
|
|
* the encoding of the modrm bytes (especially in 16 bit mode) is quite
|
|
* complex. Hence a table, indexed by the five useful bits of the modrm
|
|
* byte is used to simplify decoding. Here is a description:
|
|
*
|
|
* bit mask meaning
|
|
* 0x80000000 use ss as default segment register
|
|
* 0x00004000 means that this addressing mode needs a base register
|
|
* (set for all entries except sib and displacement-only)
|
|
* 0x00002000 set if preceding is not set
|
|
* 0x00001000 set if an sib follows
|
|
* 0x00000700 base register to use (16 and 32 bit)
|
|
* 0x00000080 set in 32 bit addressing mode table, cleared in 16 bit
|
|
* (so extsb mask,entry; ori mask,mask,0xffff gives a mask)
|
|
* 0x00000070 kludge field, possible values are
|
|
* 0: 16 bit addressing mode without index
|
|
* 10: 32 bit addressing mode
|
|
* 60: 16 bit addressing mode with %si as index
|
|
* 70: 16 bit addressing mode with %di as index
|
|
*
|
|
* This convention leads to the following special values used to check for
|
|
* sib present and displacement-only, which happen to the three lowest
|
|
* values in the table (unsigned):
|
|
* 0x00003090 sib follows (implies it is a 32 bit mode)
|
|
* 0x00002090 32 bit displacement-only
|
|
* 0x00002000 16 bit displacement-only
|
|
*
|
|
* This means that all entries are either very negative in the 0x80002000
|
|
* range if the segment defaults to ss or higher than 0x2000 if it defaults
|
|
* to ds. Combined with the value in opcode this gives the following table:
|
|
* opcode entry entry>opcode ? segment to use
|
|
* positive positive yes ds (default)
|
|
* negative positive yes overriden by prefix
|
|
* positive negative no ss
|
|
* negative negative yes overridden by prefix
|
|
*
|
|
* Hence a simple comparison allows to check for the need to override
|
|
* the current base with ss, i.e., when ss is the default base and the
|
|
* instruction has no override prefix.
|
|
*
|
|
* D) BUGS
|
|
*
|
|
* This software is obviously bug-free :-). Nevertheless, if you encounter
|
|
* an interesting feature. Mail me a note, if possible with a detailed
|
|
* instruction example showing where and how it fails.
|
|
*
|
|
*/
|
|
|
|
/* Now the details of flag evaluation with the necessary macros */
|
|
|
|
/* Alignment check is toggable so the system believes it is a 486, but
|
|
CPUID is not to avoid unnecessary complexities. However, alignment
|
|
is actually never checked (real mode is CPL 0 anyway). */
|
|
#define AC86 13 /* Can only be toggled */
|
|
#define VM86 14 /* Not used for now */
|
|
#define RF86 15 /* Not emulated precisely */
|
|
/* Actually NT and IOPL are kept in memory */
|
|
#define NT86 17
|
|
#define IOPL86 18 /* Actually 18 and 19 */
|
|
#define OF86 20
|
|
#define DF86 21
|
|
#define IF86 22
|
|
#define TF86 23
|
|
#define SF86 24
|
|
#define ZF86 25
|
|
#define AF86 27
|
|
#define PF86 29
|
|
#define CF86 31
|
|
|
|
/* Where the less important flags are placed in PPC cr */
|
|
#define RF 20 /* Suppress trap flag: cr5[0] */
|
|
#define DF 21 /* Direction flag: cr5[1] */
|
|
#define IF 22 /* Interrupt flag: cr5[2] */
|
|
#define TF 23 /* Single step flag: cr5[3] */
|
|
|
|
/* Now the flags which are frequently used */
|
|
/*
|
|
* CF_IN is a copy of the input carry with PPC polarity,
|
|
* it is cleared for add, set for sub and cmp,
|
|
* equal to the x86 carry for adc and to its complement for sbb.
|
|
* it is used to evaluate AF and CF.
|
|
*/
|
|
#define CF_IN 0x80000000
|
|
|
|
/* #define GET_CF_IN(dst) rlwinm dst,flags,1,0x01 */
|
|
|
|
/* CF_IN_CR set in flags means that cr4[0] is a copy of carry bit */
|
|
#define CF_IN_CR 0x40000000
|
|
|
|
#define EVAL_CF andis. r3,flags,(CF_IN_CR)>>16; beql- _eval_cf
|
|
|
|
/*
|
|
* CF_STATE tells how to compute the carry bit.
|
|
* NOTRESULT16 and NOTRESULT8 are never set explicitly,
|
|
* but they may happen after a cmc instruction.
|
|
*/
|
|
#define CF 16 /* cr4[0] */
|
|
#define CF_LOCATION 0x30000000
|
|
#define CF_ZERO 0x00000000
|
|
#define CF_EXPLICIT 0x00000000
|
|
#define CF_COMPLEMENT 0x08000000 /* Indeed a polarity bit */
|
|
#define CF_STATE_MASK (CF_LOCATION|CF_COMPLEMENT)
|
|
#define CF_VALUE 0x08000000
|
|
#define CF_SET 0x08000000
|
|
#define CF_RES32 0x10000000
|
|
#define CF_NOTRES32 0x18000000
|
|
#define CF_RES16 0x20000000
|
|
#define CF_NOTRES16 0x28000000
|
|
#define CF_RES8 0x30000000
|
|
#define CF_NOTRES8 0x38000000
|
|
|
|
#define CF_ADDL CF_RES32
|
|
#define CF_SUBL CF_NOTRES32
|
|
#define CF_ADDW CF_RES16
|
|
#define CF_SUBW CF_RES16
|
|
#define CF_ADDB CF_RES8
|
|
#define CF_SUBB CF_RES8
|
|
|
|
#define CF_ROTCNT(dst) rlwinm dst,flags,7,0x18
|
|
#define CF_POL(dst,pos) rlwinm dst,flags,(36-pos)%32,pos,pos
|
|
#define CF_POL_INSERT(dst,pos) \
|
|
rlwimi dst,flags,(36-pos)%32,pos,pos
|
|
#define RES2CF(dst) rlwinm dst,result,8,7,15
|
|
|
|
/*
|
|
* OF_STATE tells how to compute the overflow bit. When the low order bit
|
|
* is set (OF_EXPLICIT), it means that OF is the exclusive or of the
|
|
* two other bits. For the reason of this choice, see rotate instructions.
|
|
*/
|
|
#define OF 1 /* Only after EVAL_OF */
|
|
#define OF_STATE_MASK 0x07000000
|
|
#define OF_INCDEC 0x00000000
|
|
#define OF_EXPLICIT 0x01000000
|
|
#define OF_ZERO 0x01000000
|
|
#define OF_VALUE 0x04000000
|
|
#define OF_SET 0x04000000
|
|
#define OF_ONE 0x05000000
|
|
#define OF_XOR 0x06000000
|
|
#define OF_ARITHL 0x06000000
|
|
#define OF_ARITHW 0x02000000
|
|
#define OF_ARITHB 0x04000000
|
|
|
|
#define EVAL_OF rlwinm. r3,flags,6,0,1; bngl+ _eval_of; andis. r3,flags,OF_VALUE>>16
|
|
|
|
/* See _eval_of to see how this can be used */
|
|
#define OF_ROTCNT(dst) rlwinm dst,flags,10,0x1c
|
|
|
|
/*
|
|
* SIGNED_IN_CR means that cr6 is set as after a signed compare:
|
|
* - cr6[0] is SF^OF for jl/jnl/setl/setnl...
|
|
* - cr6[1] is ~((SF^OF)+ZF) for jg/jng/setg/setng...
|
|
* - cr6[2] is ZF (ZF_IN_CR is always set if this bit is set)
|
|
*/
|
|
#define SLT 24 /* cr6[0], signed less than */
|
|
#define SGT 25 /* cr6[1], signed greater than */
|
|
#define SIGNED_IN_CR 0x00800000
|
|
|
|
#define EVAL_SIGNED andis. r3,flags,SIGNED_IN_CR>>16; beql- _eval_signed
|
|
|
|
/*
|
|
* Above in CR means that cr4 is set as after an unsigned compare:
|
|
* - cr4[0] is CF (CF_IN_CR is also set)
|
|
* - cr4[1] is ~(CF+ZF) (ZF_IN_CR is also set)
|
|
*/
|
|
#define ABOVE 17 /* cr4[1] */
|
|
#define ABOVE_IN_CR 0x00400000
|
|
|
|
#define EVAL_ABOVE andis. r3,flags,ABOVE_IN_CR>>16; beql- _eval_above
|
|
|
|
/* SF_IN_CR means cr6[0] is a copy of SF. It implies ZF_IN_CR is also set */
|
|
#define SF 24 /* cr6[0] */
|
|
#define SF_IN_CR 0x00200000
|
|
|
|
#define EVAL_SF andis. r3,flags,SF_IN_CR>>16; beql- _eval_sf_zf
|
|
|
|
/* ZF_IN_CR means cr6[2] is a copy of ZF. */
|
|
#define ZF 26
|
|
#define ZF_IN_CR 0x00100000
|
|
|
|
#define EVAL_ZF andis. r3,flags,ZF_IN_CR>>16; beql- _eval_sf_zf
|
|
#define ZF2ZF86(s,d) rlwimi d,s,ZF-ZF86,ZF86,ZF86
|
|
#define ZF862ZF(reg) rlwimi reg,reg,32+ZF86-ZF,ZF,ZF
|
|
|
|
/*
|
|
* ZF_PROTECT means cr6[2] is the only valid value for ZF. This is necessary
|
|
* because some infrequent instructions may leave SF and ZF in an apparently
|
|
* inconsistent state (both set): sahf, popf and the few (not implemented)
|
|
* instructions that only affect ZF.
|
|
*/
|
|
#define ZF_PROTECT 0x00080000
|
|
|
|
/* The parity is always evaluated when it is needed */
|
|
#define PF 0 /* Only after EVAL_PF */
|
|
#define EVAL_PF bl _eval_pf
|
|
|
|
/* This field gives the shift amount to use to evaluate SF
|
|
and ZF when ZF_PROTECT is not set */
|
|
#define RES_SIZE_MASK 0x00060000
|
|
#define RESL 0x00000000
|
|
#define RESW 0x00040000
|
|
#define RESB 0x00060000
|
|
|
|
#define RES_SHIFT(dst) rlwinm dst,flags,18,0x18
|
|
|
|
/* SUBTRACTING is set if the last flag setting instruction was sub/sbb/cmp,
|
|
used to evaluate OF and AF */
|
|
#define SUBTRACTING 0x00010000
|
|
|
|
#define GET_ADDSUB(dst) rlwinm dst,flags,16,0x01
|
|
|
|
/* rotate (rcl/rcr/rol/ror) affect CF and OF but not other flags */
|
|
#define ROTATE_MASK (CF_IN_CR|CF_STATE_MASK|ABOVE_IN_CR|OF_STATE_MASK|SIGNED_IN_CR)
|
|
#define ROTATE_FLAGS rlwimi flags,one,24,ROTATE_MASK
|
|
|
|
/*
|
|
* INCDEC_FIELD has at most one bit set when the last flag setting instruction
|
|
* was either inc or dec (which do not affect the carry). When one of these
|
|
* bits is set, it affects the way OF, SF, ZF, AF, and PF are evaluated.
|
|
*/
|
|
#define INCDEC_FIELD 0x0000ff00
|
|
|
|
#define DECB_SHIFT 8
|
|
#define INCB_SHIFT 9
|
|
#define DECW_SHIFT 10
|
|
#define INCW_SHIFT 11
|
|
#define DECL_SHIFT 14
|
|
#define INCL_SHIFT 15
|
|
|
|
#define INCDEC_MASK (OF_STATE_MASK|SIGNED_IN_CR|ABOVE_IN_CR|SF_IN_CR|\
|
|
ZF_IN_CR|ZF_PROTECT|RES_SIZE_MASK|SUBTRACTING|\
|
|
INCDEC_FIELD)
|
|
/* Operations to perform to tell where the flags are after inc or dec */
|
|
#define INC_FLAGS(BWL) rlwimi flags,one,INC##BWL##_SHIFT,INCDEC_MASK
|
|
#define DEC_FLAGS(BWL) rlwimi flags,one,DEC##BWL##_SHIFT,INCDEC_MASK
|
|
|
|
/* How the flags are set after arithmetic operations */
|
|
#define FLAGS_ADD(BWL) (CF_ADD##BWL|OF_ARITH##BWL|RES##BWL)
|
|
#define FLAGS_SBB(BWL) (CF_SUB##BWL|OF_ARITH##BWL|RES##BWL|SUBTRACTING)
|
|
#define FLAGS_SUB(BWL) FLAGS_SBB(BWL)|CF_IN
|
|
#define FLAGS_CMP(BWL) FLAGS_SUB(BWL)|ZF_IN_CR|CF_IN_CR|SIGNED_IN_CR|ABOVE_IN_CR
|
|
|
|
/* How the flags are set after logical operations */
|
|
#define FLAGS_LOG(BWL) (CF_ZERO|OF_ZERO|RES##BWL)
|
|
#define FLAGS_TEST(BWL) FLAGS_LOG(BWL)|ZF_IN_CR|SIGNED_IN_CR|SF_IN_CR
|
|
|
|
/* How the flags are set after bt/btc/btr/bts. */
|
|
#define FLAGS_BTEST CF_IN_CR|CF_ADDL|OF_ZERO|RESL
|
|
|
|
/* How the flags are set after bsf/bsr. */
|
|
#define FLAGS_BSRCH(WL) CF_ZERO|OF_ZERO|RES##WL|ZF_IN_CR
|
|
|
|
/* How the flags are set after logical right shifts */
|
|
#define FLAGS_SHR(BWL) (CF_EXPLICIT|OF_ARITH##BWL|RES##BWL)
|
|
|
|
/* How the flags are set after double length shifts */
|
|
#define FLAGS_DBLSH(WL) (CF_EXPLICIT|OF_ARITH##WL|RES##WL)
|
|
|
|
/* How the flags are set after multiplies */
|
|
#define FLAGS_MUL (CF_EXPLICIT|OF_EXPLICIT)
|
|
|
|
#define SET_FLAGS(fl) lis flags,(fl)>>16
|
|
#define ADD_FLAGS(fl) addis flags,flags,(fl)>>16
|
|
|
|
/*
|
|
* We are always off by one when compared with Intel's eip, this shortens
|
|
* code by allowing to load next byte with lbzu x,1(eip). The register
|
|
* called eip actually contains csbase+eip, and thus should be called lip
|
|
* for linear ip.
|
|
*/
|
|
|
|
/*
|
|
* Reason codes passed to the C part of the emulator, this includes all
|
|
* instructions which may change the current code segment. These definitions
|
|
* will soon go into a separate include file. Codes 0 to 255 correspond
|
|
* directly to the interrupt/trap that has to be generated.
|
|
*/
|
|
|
|
#define code_divide_err 0
|
|
#define code_trap 1
|
|
#define code_int3 3
|
|
#define code_into 4
|
|
#define code_bound 5
|
|
#define code_ud 6
|
|
#define code_dna 7 /* FPU not available */
|
|
|
|
#define code_iretw 256 /* Interrupt returns */
|
|
#define code_iretl 257
|
|
#define code_lcallw 258 /* Far calls and jumps */
|
|
#define code_lcalll 259
|
|
#define code_ljmpw 260
|
|
#define code_ljmpl 261
|
|
#define code_lretw 262 /* Far returns */
|
|
#define code_lretl 263
|
|
#define code_softint 264 /* int $xx */
|
|
#define code_lock 265 /* Lock prefix */
|
|
/* Codes 1024 to 2047 are used for I/O port access instructions:
|
|
- The three LSB define the port size (1, 2 or 4)
|
|
- bit of weight 512 means out if set, in if clear
|
|
- bit of weight 256 means ins/outs if set, in/out if clear
|
|
- bit of weight 128 means use 32 bit addresses if set, 16 bit if clear
|
|
(only used for ins/outs instructions, always clear for in/out)
|
|
*/
|
|
#define code_inb 1024+1
|
|
#define code_inw 1024+2
|
|
#define code_inl 1024+4
|
|
#define code_outb 1024+512+1
|
|
#define code_outw 1024+512+2
|
|
#define code_outl 1024+512+4
|
|
#define code_insb_a16 1024+256+1
|
|
#define code_insw_a16 1024+256+2
|
|
#define code_insl_a16 1024+256+4
|
|
#define code_outsb_a16 1024+512+256+1
|
|
#define code_outsw_a16 1024+512+256+2
|
|
#define code_outsl_a16 1024+512+256+4
|
|
#define code_insb_a32 1024+256+128+1
|
|
#define code_insw_a32 1024+256+128+2
|
|
#define code_insl_a32 1024+256+128+4
|
|
#define code_outsb_a32 1024+512+256+128+1
|
|
#define code_outsw_a32 1024+512+256+128+2
|
|
#define code_outsl_a32 1024+512+256+128+4
|
|
|
|
#define state 31
|
|
/* r31 (state) is a pointer to a structure describing the emulated x86
|
|
processor, its layout is the following:
|
|
|
|
first the general purpose registers, they are in little endian byte order
|
|
|
|
offset name
|
|
|
|
0 eax/ax/al
|
|
1 ah
|
|
4 ecx/cx/cl
|
|
5 ch
|
|
8 edx/dx/dl
|
|
9 dh
|
|
12 ebx/bx/bl
|
|
13 bh
|
|
16 esp/sp
|
|
20 ebp/bp
|
|
24 esi/si
|
|
28 edi/di
|
|
*/
|
|
|
|
#define AL 0
|
|
#define AX 0
|
|
#define EAX 0
|
|
#define AH 1
|
|
#define CL 4
|
|
#define CX 4
|
|
#define ECX 4
|
|
#define DX 8
|
|
#define EDX 8
|
|
#define BX 12
|
|
#define EBX 12
|
|
#define SP 16
|
|
#define ESP 16
|
|
#define BP 20
|
|
#define EBP 20
|
|
#define SI 24
|
|
#define ESI 24
|
|
#define DI 28
|
|
#define EDI 28
|
|
|
|
/*
|
|
than the rest of the machine state, big endian !
|
|
|
|
offset name
|
|
|
|
32 essel segment register selectors (values)
|
|
36 cssel
|
|
40 sssel
|
|
44 dssel
|
|
48 fssel
|
|
52 gssel
|
|
56 eipimg true eip (register named eip is csbase+eip)
|
|
60 eflags eip and eflags only valid when C code running !
|
|
64 esbase segment registers bases
|
|
68 csbase
|
|
72 ssbase
|
|
76 dsbase
|
|
80 fsbase
|
|
84 gsbase
|
|
88 iobase For I/O instructions, I/O space virtual base
|
|
92 ioperm I/O permission bitmap pointer
|
|
96 reason Reason code when calling external emulator
|
|
100 nexteip eip past instruction for external emulator
|
|
104 parm1 parameter for external emulator
|
|
108 parm2 parameter for external emulator
|
|
112 _opcode current opcode register for external emulator
|
|
116 _base segment register base for external emulator
|
|
120 _offset intruction operand offset
|
|
More internal state was dumped here for debugging in first versions
|
|
|
|
128 vbase where the 1Mb memory is mapped
|
|
132 cntimg instruction counter
|
|
136 scratch
|
|
192 eipstat array of 32k unsigned long pairs for eip stats
|
|
*/
|
|
|
|
#define essel 32
|
|
#define cssel 36
|
|
#define sssel 40
|
|
#define dssel 44
|
|
#define fssel 48
|
|
#define gssel 52
|
|
#define eipimg 56
|
|
#define eflags 60
|
|
#define esbase 64
|
|
#define csbase 68
|
|
#define ssbase 72
|
|
#define dsbase 76
|
|
#define fsbase 80
|
|
#define gsbase 84
|
|
#define iobase 88
|
|
#define ioperm 92
|
|
#define reason 96
|
|
#define nexteip 100
|
|
#define parm1 104
|
|
#define parm2 108
|
|
#define _opcode 112
|
|
#define _base 116
|
|
#define _offset 120
|
|
#define vbase 128
|
|
#define cntimg 132
|
|
#ifdef EIP_STATS
|
|
#define eipstat 192
|
|
#endif
|
|
/* Global registers */
|
|
|
|
/* Some segment register bases are permanently kept in registers since they
|
|
are often used: these are csb, esb and ssb because they are
|
|
required for jumps, string instructions, and pushes/pops/calls/rets.
|
|
dsbase is not kept in a register but loaded from memory to allow somewhat
|
|
more parallelism in the main emulation loop.
|
|
*/
|
|
|
|
#define one 30 /* Constant one, so pervasive */
|
|
#define ssb 29
|
|
#define csb 28
|
|
#define esb 27
|
|
#define eip 26 /* That one is indeed csbase+(e)ip-1 */
|
|
#define result 25 /* For the use of result, op1, op2 */
|
|
#define op1 24 /* see the section on flag emulation */
|
|
#define op2 23
|
|
#define opbase 22 /* default opcode table */
|
|
#define flags 21 /* See earlier description */
|
|
#define opcode 20 /* Opcode */
|
|
#define opreg 19 /* Opcode extension/register number */
|
|
/* base is reloaded with the base of the ds segment at the beginning of
|
|
every instruction, it is modified by segment override prefixes, when
|
|
the default base segment is ss, or when the modrm byte specifies a
|
|
register operand */
|
|
#define base 18 /* Instruction's operand segment base */
|
|
#define offset 17 /* Instruction's memory operand offset */
|
|
/* used to address a table telling how to decode the addressing mode
|
|
specified by the modrm byte */
|
|
#define adbase 16 /* addressing mode table */
|
|
/* Following registers are used only as dedicated temporaries during decoding,
|
|
they are free for use during emulation */
|
|
/*
|
|
* ceip (current eip) is only in use when we call the external emulator for
|
|
* instructions that fault. Note that it is forbidden to change flags before
|
|
* the check for the fault happens (divide by zero...) ! ceip is also used
|
|
* when measuring timing.
|
|
*/
|
|
#define ceip 15
|
|
|
|
/* A register used to measure timing information (when enabled) */
|
|
#ifdef EIP_STATS
|
|
#define tstamp 14
|
|
#endif
|
|
|
|
#define count 12 /* Instruction counter. */
|
|
|
|
#define r0 0
|
|
#define r1 1 /* PPC Stack pointer. */
|
|
#define r3 3
|
|
#define r4 4
|
|
#define r5 5
|
|
#define r6 6
|
|
#define r7 7
|
|
|
|
/* Macros to read code stream */
|
|
#define NEXTBYTE(dest) lbzu dest,1(eip)
|
|
#define NEXTWORD(dest) lhbrx dest,eip,one; la eip,2(eip)
|
|
#define NEXTDWORD(dest) lwbrx dest,eip,one; la eip,4(eip)
|
|
#define NEXT b nop
|
|
#define GOTNEXT b gotopcode
|
|
|
|
#ifdef __BOOT__
|
|
START_GOT
|
|
GOT_ENTRY(_jtables)
|
|
GOT_ENTRY(jtab_www)
|
|
GOT_ENTRY(adtable)
|
|
END_GOT
|
|
#else
|
|
.text
|
|
#endif
|
|
.align 2
|
|
.global em86_enter
|
|
.type em86_enter,@function
|
|
em86_enter: stwu r1,-96(r1) # allocate stack
|
|
mflr r0
|
|
stmw 14,24(r1)
|
|
mfcr r4
|
|
stw r0,100(r1)
|
|
mr state,r3
|
|
stw r4,20(r1)
|
|
#ifdef __BOOT__
|
|
/* We need this since r30 is the default GOT pointer */
|
|
#define r30 30
|
|
GET_GOT
|
|
/* The relocation of these tables is explicit, this could be done
|
|
* automatically with fixups but would add more than 8kb in the fixup tables.
|
|
*/
|
|
lwz r3,GOT(_jtables)
|
|
lwz r4,_endjtables-_jtables(r3)
|
|
sub. r4,r3,r4
|
|
beq+ 1f
|
|
li r0,((_endjtables-_jtables)>>2)+1
|
|
addi r3,r3,-4
|
|
mtctr r0
|
|
0: lwzu r5,4(r3)
|
|
add r5,r5,r4
|
|
stw r5,0(r3)
|
|
bdnz 0b
|
|
1: lwz adbase,GOT(adtable)
|
|
lwz opbase,GOT(jtab_www)
|
|
/* Now r30 is only used as constant 1 */
|
|
#undef r30
|
|
li one,1 # pervasive constant
|
|
#else
|
|
lis opbase,jtab_www@ha
|
|
lis adbase,adtable@ha
|
|
li one,1 # pervasive constant
|
|
addi opbase,opbase,jtab_www@l
|
|
addi adbase,adbase,adtable@l
|
|
#ifdef EIP_STATS
|
|
li ceip,0
|
|
mftb tstamp
|
|
#endif
|
|
#endif
|
|
/* We branch back here when calling an external function tells us to resume */
|
|
restart: lwz r3,eflags(state)
|
|
lis flags,(OF_EXPLICIT|ZF_IN_CR|ZF_PROTECT|SF_IN_CR)>>16
|
|
lwz csb,csbase(state)
|
|
extsb result,r3 # SF/PF
|
|
rlwinm op1,r3,31,0x08 # AF
|
|
lwz eip,eipimg(state)
|
|
ZF862ZF(r3) # cr6
|
|
addi op2,op1,0 # AF
|
|
lwz ssb,ssbase(state)
|
|
rlwimi flags,r3,15,OF_VALUE # OF
|
|
rlwimi r3,r3,32+RF86-RF,RF,RF # RF
|
|
lwz esb,esbase(state)
|
|
ori result,result,0xfb # PF
|
|
mtcrf 0x06,r3 # RF/DF/IF/TF/SF/ZF
|
|
lbzux opcode,eip,csb
|
|
rlwimi flags,r3,27,CF_VALUE # CF
|
|
xori result,result,0xff # PF
|
|
lwz count,cntimg(state)
|
|
GOTNEXT # start the emulator
|
|
|
|
/* Now return */
|
|
exit: lwz r0,100(r1)
|
|
lwz r4,20(r1)
|
|
mtlr r0
|
|
lmw 14,24(r1)
|
|
mtcr r4
|
|
addi r1,r1,96
|
|
blr
|
|
|
|
trap: crmove 0,RF
|
|
crclr RF
|
|
bt- 0,resume
|
|
sub ceip,eip,csb
|
|
li r3,code_trap
|
|
complex: addi eip,eip,1
|
|
stw r3,reason(state)
|
|
sub eip,eip,csb
|
|
stw op1,240(state)
|
|
stw op2,244(state)
|
|
stw result,248(state)
|
|
stw flags,252(state)
|
|
stw r4,parm1(state)
|
|
stw r5,parm2(state)
|
|
stw opcode,_opcode(state)
|
|
bl _eval_flags
|
|
stw base,_base(state)
|
|
stw eip,nexteip(state)
|
|
stw r3,eflags(state)
|
|
mr r3,state
|
|
stw offset,_offset(state)
|
|
stw ceip,eipimg(state)
|
|
stw count,cntimg(state)
|
|
bl em86_trap
|
|
cmpwi r3,0
|
|
bne exit
|
|
b restart
|
|
|
|
/* Main loop */
|
|
/*
|
|
* The two LSB of each entry in the main table mean the following:
|
|
* 00: indirect opcode: modrm follows and the three middle bits are an
|
|
* opcode extension. The entry points to another jump table.
|
|
* 01: direct instruction, branch directly to the routine.
|
|
* 10: modrm specifies byte size memory and register operands.
|
|
* 11: modrm specifies word/long memory and register operands.
|
|
*
|
|
* The modrm byte, if present, is always loaded in r7.
|
|
*
|
|
* Note: most "mr x,y" instructions have been replaced by "addi x,y,0" since
|
|
* the latter can be executed in the second integer unit on 603e.
|
|
*/
|
|
|
|
/*
|
|
* This code is very good example of absolutely unmaintainable code.
|
|
* It was actually much easier to write than it is to understand !
|
|
* If my computations are right, the maximum path length from fetching
|
|
* the opcode to exiting to the actual instruction execution is
|
|
* 46 instructions (for non-prefixed, single byte opcode instructions).
|
|
*
|
|
*/
|
|
.align 5
|
|
#ifdef EIP_STATS
|
|
nop: NEXTBYTE(opcode)
|
|
gotopcode: slwi r3,opcode,2
|
|
bt- TF,trap
|
|
resume: lwzx r4,opbase,r3
|
|
addi r5,state,eipstat+4
|
|
clrlslwi r6,ceip,17,3
|
|
mtctr r4
|
|
lwzux r7,r5,r6
|
|
slwi. r0,r4,30 # two lsb of table entry
|
|
sub r7,r7,tstamp
|
|
lwz r6,-4(r5)
|
|
mftb tstamp
|
|
addi r6,r6,1
|
|
sub ceip,eip,csb
|
|
stw r6,-4(r5)
|
|
add r7,r7,tstamp
|
|
lwz base,dsbase(state)
|
|
stw r7,0(r5)
|
|
#else
|
|
nop: NEXTBYTE(opcode)
|
|
gotopcode: slwi r3,opcode,2
|
|
bt- TF,trap
|
|
resume: lwzx r4,opbase,r3
|
|
sub ceip,eip,csb
|
|
mtctr r4
|
|
slwi. r0,r4,30 # two lsb of table entry
|
|
lwz base,dsbase(state)
|
|
addi count,count,1
|
|
#endif
|
|
bgtctr- # for instructions without modrm
|
|
|
|
/* modrm byte present */
|
|
NEXTBYTE(r7) # modrm byte
|
|
cmplwi cr1,r7,192
|
|
rlwinm opreg,r7,31,0x1c
|
|
beq- cr0,8f # extended opcode
|
|
/* modrm with middle 3 bits specifying a register (non prefixed) */
|
|
rlwinm r0,r4,3,0x8
|
|
li r4,0x1c0d
|
|
rlwimi opreg,r7,27,0x01
|
|
srw r4,r4,r0
|
|
and opreg,opreg,r4
|
|
blt cr1,9f
|
|
/* modrm with 2 register operands */
|
|
1: rlwinm offset,r7,2,0x1c
|
|
addi base,state,0
|
|
rlwimi offset,r7,30,0x01
|
|
and offset,offset,r4
|
|
bctr
|
|
|
|
/* Prefixes: first segment overrides */
|
|
.align 4
|
|
_es: NEXTBYTE(r7); addi base,esb,0
|
|
oris opcode,opcode,0x8000; b 2f
|
|
_cs: NEXTBYTE(r7); addi base,csb,0
|
|
oris opcode,opcode,0x8000; b 2f
|
|
_fs: NEXTBYTE(r7); lwz base,fsbase(state)
|
|
oris opcode,opcode,0x8000; b 2f
|
|
_gs: NEXTBYTE(r7); lwz base,gsbase(state)
|
|
oris opcode,opcode,0x8000; b 2f
|
|
_ss: NEXTBYTE(r7); addi base,ssb,0
|
|
oris opcode,opcode,0x8000; b 2f
|
|
_ds: NEXTBYTE(r7)
|
|
oris opcode,opcode,0x8000; b 2f
|
|
|
|
/* Lock (unimplemented) and repeat prefixes */
|
|
_lock: li r3,code_lock; b complex
|
|
_repnz: NEXTBYTE(r7); rlwimi opcode,one,12,0x1800; b 2f
|
|
_repz: NEXTBYTE(r7); rlwimi opcode,one,11,0x1800; b 2f
|
|
|
|
/* Operand and address size prefixes */
|
|
.align 4
|
|
_opsize: NEXTBYTE(r7); ori opcode,opcode,0x200
|
|
rlwinm r3,opcode,2,0x1ffc; b 2f
|
|
_adsize: NEXTBYTE(r7); ori opcode,opcode,0x400
|
|
rlwinm r3,opcode,2,0x1ffc; b 2f
|
|
|
|
_twobytes: NEXTBYTE(r7); addi r3,r3,0x400
|
|
2: rlwimi r3,r7,2,0x3fc
|
|
lwzx r4,opbase,r3
|
|
rlwimi opcode,r7,0,0xff
|
|
mtctr r4
|
|
slwi. r0,r4,30
|
|
bgtctr- # direct instruction
|
|
/* modrm byte in a prefixed instruction */
|
|
NEXTBYTE(r7) # modrm byte
|
|
cmpwi cr1,r7,192
|
|
rlwinm opreg,r7,31,0x1c
|
|
beq- 6f
|
|
/* modrm with middle 3 bits specifying a register (prefixed) */
|
|
rlwinm r0,r4,3,0x8
|
|
li r4,0x1c0d
|
|
rlwimi opreg,r7,27,0x01
|
|
srw r4,r4,r0
|
|
and opreg,opreg,r4
|
|
bnl cr1,1b # 2 register operands
|
|
/* modrm specifying memory with prefix */
|
|
3: rlwinm r3,r3,27,0xff80
|
|
rlwimi adbase,r7,2,0x1c
|
|
extsh r3,r3
|
|
rlwimi r3,r7,31,0x60
|
|
lwzx r4,r3,adbase
|
|
cmpwi cr1,r4,0x3090
|
|
bnl+ cr1,10f
|
|
/* displacement only addressing modes */
|
|
4: cmpwi r4,0x2000
|
|
bne 5f
|
|
NEXTWORD(offset)
|
|
bctr
|
|
5: NEXTDWORD(offset)
|
|
bctr
|
|
/* modrm with opcode extension (prefixed) */
|
|
6: lwzx r4,r4,opreg
|
|
mtctr r4
|
|
blt cr1,3b
|
|
/* modrm with opcode extension and register operand */
|
|
7: rlwinm offset,r7,2,0x1c
|
|
addi base,state,0
|
|
rlwinm r0,r4,3,0x8
|
|
li r4,0x1c0d
|
|
rlwimi offset,r7,30,0x01
|
|
srw r4,r4,r0
|
|
and offset,offset,r4
|
|
bctr
|
|
/* modrm with opcode extension (non prefixed) */
|
|
8: lwzx r4,r4,opreg
|
|
mtctr r4
|
|
/* FIXME ? We continue fetching even if the opcode extension is undefined.
|
|
* It shouldn't do any harm on real mode emulation anyway, and for ROM
|
|
* BIOS emulation, we are supposed to read valid code.
|
|
*/
|
|
bnl cr1,7b
|
|
/* modrm specifying memory without prefix */
|
|
9: rlwimi adbase,r7,2,0x1c # memory addressing mode computation
|
|
rlwinm r3,r7,31,0x60
|
|
lwzx r4,r3,adbase
|
|
cmplwi cr1,r4,0x3090
|
|
blt- cr1,4b # displacement only addressing mode
|
|
10: rlwinm. r0,r7,24,0,1 # three cases distinguished
|
|
beq- cr1,15f # an sib follows
|
|
rlwinm r3,r4,30,0x1c # 16bit/32bit/%si index/%di index
|
|
cmpwi cr1,r3,8 # set cr1 as early as possible
|
|
rlwinm r6,r4,26,0x1c # base register
|
|
lwbrx offset,state,r6 # load the base register
|
|
beq cr0,14f # no displacement
|
|
cmpw cr2,r4,opcode # check for ss as default base
|
|
bgt cr0,12f # byte offset
|
|
beq cr1,11f # 32 bit displacement
|
|
NEXTWORD(r5) # 16 bit displacement
|
|
bgt cr1,13f # d16(base,index)
|
|
/* d16(base) */
|
|
add offset,offset,r5
|
|
clrlwi offset,offset,16
|
|
bgtctr cr2
|
|
addi base,ssb,0
|
|
bctr
|
|
/* d32(base) */
|
|
11: NEXTDWORD(r5)
|
|
add offset,offset,r5
|
|
bgtctr cr2
|
|
addi base,ssb,0
|
|
bctr
|
|
/* 8 bit displacement */
|
|
12: NEXTBYTE(r5)
|
|
extsb r5,r5
|
|
bgt cr1,13f
|
|
/* d8(base) */
|
|
extsb r6,r4
|
|
add offset,offset,r5
|
|
ori r6,r6,0xffff
|
|
and offset,offset,r6
|
|
bgtctr cr2
|
|
addi base,ssb,0
|
|
bctr
|
|
/* d8(base,index) and d16(base,index) share this code ! */
|
|
13: lhbrx r3,state,r3
|
|
add offset,offset,r5
|
|
add offset,offset,r3
|
|
clrlwi offset,offset,16
|
|
bgtctr cr2
|
|
addi base,ssb,0
|
|
bctr
|
|
/* no displacement: only indexed modes may use ss as default base */
|
|
14: beqctr cr1 # 32 bit register indirect
|
|
clrlwi offset,offset,16
|
|
bltctr cr1 # 16 bit register indirect
|
|
/* (base,index) */
|
|
lhbrx r3,state,r3 # 16 bit [{bp,bx}+{si,di}]
|
|
cmpw cr2,r4,opcode # check for ss as default base
|
|
add offset,offset,r3
|
|
clrlwi offset,offset,r3
|
|
bgtctr+ cr2
|
|
addi base,ssb,0
|
|
bctr
|
|
/* sib modes, note that the size of the offset can be known from cr0 */
|
|
15: NEXTBYTE(r7) # get sib
|
|
rlwinm r3,r7,31,0x1c # index
|
|
rlwinm offset,r7,2,0x1c # base
|
|
cmpwi cr1,r3,ESP # has index ?
|
|
bne cr0,18f # base+d8/d32
|
|
cmpwi offset,EBP
|
|
beq 17f # d32(,index,scale)
|
|
xori r4,one,0xcc01 # build 0x0000cc00
|
|
rlwnm r4,r4,offset,0,1 # 0 or 0xc0000000
|
|
lwbrx offset,state,offset
|
|
cmpw cr2,r4,opcode # use ss ?
|
|
beq- cr1,16f # no index
|
|
/* (base,index,scale) */
|
|
lwbrx r3,state,r3
|
|
srwi r6,r7,6
|
|
slw r3,r3,r6
|
|
add offset,offset,r3
|
|
bgtctr cr2
|
|
addi base,ssb,0
|
|
bctr
|
|
/* (base), in practice only (%esp) is coded this way */
|
|
16: bgtctr cr2
|
|
addi base,ssb,0
|
|
bctr
|
|
/* d32(,index,scale) */
|
|
17: NEXTDWORD(offset)
|
|
beqctr- cr1 # no index: very unlikely
|
|
lwbrx r3,state,r3
|
|
srwi r6,r7,6
|
|
slw r3,r3,r6
|
|
add offset,offset,r3
|
|
bctr
|
|
/* 8 or 32 bit displacement */
|
|
18: xori r4,one,0xcc01 # build 0x0000cc00
|
|
rlwnm r4,r4,offset,0,1 # 0 or 0xc0000000
|
|
lwbrx offset,state,offset
|
|
cmpw cr2,r4,opcode # use ss ?
|
|
bgt cr0,20f # 8 bit offset
|
|
/* 32 bit displacement */
|
|
NEXTDWORD(r5)
|
|
beq- cr1,21f
|
|
/* d(base,index,scale) */
|
|
19: lwbrx r3,state,r3
|
|
add offset,offset,r5
|
|
add offset,offset,r3
|
|
bgtctr cr2
|
|
addi base,ssb,0
|
|
bctr
|
|
/* 8 bit displacement */
|
|
20: NEXTBYTE(r5)
|
|
extsb r5,r5
|
|
bne+ cr1,19b
|
|
/* d(base), in practice base is %esp */
|
|
21: add offset,offset,r5
|
|
bgtctr- cr2
|
|
addi base,ssb,0
|
|
bctr
|
|
|
|
/*
|
|
* Flag evaluation subroutines: they have not been written for performance
|
|
* since they are not often used in practice. The rule of the game was to
|
|
* write them with as few branches as possible.
|
|
* The first routines eveluate either one or 2 (ZF and SF simultaneously)
|
|
* flags and do not use r0 and r7.
|
|
* The more complex routines (_eval_above, _eval_signed and _eval_flags)
|
|
* call the former ones, using r0 as a return address save register and
|
|
* r7 as a safe temporary.
|
|
*/
|
|
|
|
/*
|
|
* _eval_sf_zf evaluates simultaneously SF and ZF unless ZF is already valid
|
|
* and protected because it is possible, although it is exceptional, to have
|
|
* SF and ZF set at the same time after a few instructions which may leave the
|
|
* flags in this apparently inconsistent state: sahf, popf, iret and the few
|
|
* (for now unimplemented) instructions which only affect ZF (lar, lsl, arpl,
|
|
* cmpxchg8b). This also solves the obscure case of ZF set and PF clear.
|
|
* On return: SF=cr6[0], ZF=cr6[2].
|
|
*/
|
|
|
|
_eval_sf_zf: andis. r5,flags,ZF_PROTECT>>16
|
|
rlwinm r3,flags,0,INCDEC_FIELD
|
|
RES_SHIFT(r4)
|
|
cntlzw r3,r3
|
|
slw r4,result,r4
|
|
srwi r5,r3,5 # ? use result : use op1
|
|
rlwinm r3,r3,2,0x18
|
|
oris flags,flags,(SF_IN_CR|SIGNED_IN_CR|ZF_IN_CR)>>16
|
|
neg r5,r5 # mux result/op2
|
|
slw r3,op2,r3
|
|
and r4,r4,r5
|
|
andc r3,r3,r5
|
|
xoris flags,flags,(SIGNED_IN_CR)>>16
|
|
bne- 1f # 12 instructions between set
|
|
or r3,r3,r4 # and test, good for folding
|
|
cmpwi cr6,r3,0
|
|
blr
|
|
1: or. r3,r3,r4
|
|
crmove SF,0
|
|
blr
|
|
|
|
/*
|
|
* _eval_cf may be called at any time, no other flag is affected.
|
|
* On return: CF=cr4[0], r3= CF ? 0x100:0 = CF<<8.
|
|
*/
|
|
_eval_cf: addc r3,flags,flags # CF_IN to xer[ca]
|
|
RES2CF(r4) # get 8 or 16 bit carry
|
|
subfe r3,result,op1 # generate PPC carry for
|
|
CF_ROTCNT(r5) # preceding operation
|
|
addze r3,r4 # put carry into LSB
|
|
CF_POL(r4,23) # polarity & 0x100
|
|
oris flags,flags,(CF_IN_CR|ABOVE_IN_CR)>>16
|
|
rlwnm r3,r3,r5,23,23 # shift carry there
|
|
xor r3,r3,r4 # CF <<8
|
|
xoris flags,flags,(ABOVE_IN_CR)>>16
|
|
cmplw cr4,one,r3 # sets cr4[0]
|
|
blr
|
|
|
|
/*
|
|
* eval_of returns the overflow flag in OF_STATE field, which will be
|
|
* either 001 (OF clear) or 101 (OF set), is is only called when the two
|
|
* low order bits of OF_STATE are not 01 (otherwise it will work but
|
|
* it is an elaborate variant of a nop with a few registers destroyed)
|
|
* The code multiplexes several sources in a branchless way, was fun to write.
|
|
*/
|
|
_eval_of: GET_ADDSUB(r4) # 0(add)/1(sub)
|
|
rlwinm r3,flags,0,INCDEC_FIELD
|
|
neg r4,r4 # 0(add)/-1(sub)
|
|
eqv r5,result,op1 # result[]==op1[] (bit by bit)
|
|
cntlzw r3,r3 # inc/dec
|
|
xor r4,r4,op2 # true sign of op2
|
|
oris r5,r5,0x0808 # bits to clear
|
|
clrlwi r6,r3,31 # 0(inc)/1(dec)
|
|
eqv r4,r4,op1 # op1[]==op2[] (bit by bit)
|
|
add r6,op2,r6 # add 1 if dec
|
|
rlwinm r3,r3,2,0x18 # incdec_shift
|
|
andc r4,r4,r5 # arithmetic overflow
|
|
slw r3,r6,r3 # shifted inc/dec result
|
|
addis r3,r3,0x8000 # compare with 0x80000000
|
|
ori r4,r4,0x0808 # bits to set
|
|
cntlzw r3,r3 # 32 if inc/dec overflow
|
|
OF_ROTCNT(r6)
|
|
rlwimi r4,r3,18,0x00800000 # insert inc/dec overflow
|
|
rlwimi flags,one,24,OF_STATE_MASK
|
|
rlwnm r3,r4,r6,8,8 # get field
|
|
rlwimi flags,r3,3,OF_VALUE # insert OF
|
|
blr
|
|
|
|
/*
|
|
* _eval_pf will always be called when needed (complex but infrequent),
|
|
* there are a few quirks for a branchless solution.
|
|
* On return: PF=cr0[0], PF=MSB(r3)
|
|
*/
|
|
_eval_pf: rlwinm r3,flags,0,INCDEC_FIELD
|
|
rotrwi r4,op2,4 # from inc/dec
|
|
rotrwi r5,result,4 # from result
|
|
cntlzw r3,r3 # use result if 32
|
|
xor r4,r4,op2
|
|
xor r5,r5,result
|
|
rlwinm r3,r3,26,0,0 # 32 becomes 0x80000000
|
|
clrlwi r4,r4,28
|
|
lis r6,0x9669 # constant to shift
|
|
clrlwi r5,r5,28
|
|
rlwnm r4,r6,r4,0,0 # parity from inc/dec
|
|
rlwnm r5,r6,r5,0,0 # parity from result
|
|
andc r4,r4,r3 # select which one
|
|
and r5,r5,r3
|
|
add. r3,r4,r5 # and test to simplify
|
|
blr # returns in r3 and cr0 set.
|
|
|
|
/*
|
|
* _eval_af will always be called when needed (complex but infrequent):
|
|
* - if after inc, af is set when 4 low order bits of op1 are 0
|
|
* - if after dec, af is set when 4 low order bits of op1 are 1
|
|
* (or 0 after adding 1 as implemented here)
|
|
* - if after add/sub/adc/sbb/cmp af is set from sum of 4 LSB of op1
|
|
* and 4 LSB of op2 (eventually complemented) plus carry in.
|
|
* - other instructions leave AF undefined so the returned value is irrelevant.
|
|
* Returned value must be masked with 0x10, since all other bits are undefined.
|
|
* There branchless code is perhaps not the most efficient, but quite parallel.
|
|
*/
|
|
_eval_af: rlwinm r3,flags,0,INCDEC_FIELD
|
|
clrlwi r5,op2,28 # 4 LSB of op2
|
|
addc r4,flags,flags # carry_in
|
|
GET_ADDSUB(r6)
|
|
cntlzw r3,r3 # if inc/dec 16..23 else 32
|
|
neg r6,r6 # add/sub
|
|
clrlwi r4,r3,31 # if dec 1 else 0
|
|
xor r5,r5,r6 # conditionally complement
|
|
clrlwi r6,op1,28 # 4 LSB of op1
|
|
add r4,op2,r4 # op2+(dec ? 1 : 0)
|
|
clrlwi r4,r4,28 # 4 LSB of op2+(dec ? 1 : 0)
|
|
adde r5,r6,r5 # op1+cy_in+(op2/~op2)
|
|
cntlzw r4,r4 # 28..31 if not AF, 32 if set
|
|
andc r5,r5,r3 # masked AF from add/sub...
|
|
andc r4,r3,r4 # masked AF from inc/dec
|
|
or r3,r4,r5
|
|
blr
|
|
|
|
/*
|
|
* _eval_above will only be called if ABOVE_IN_CR is not set.
|
|
* On return: ZF=cr6[2], CF=cr4[0], ABOVE=cr4[1]
|
|
*/
|
|
_eval_above: andis. r3,flags,ZF_IN_CR>>16
|
|
mflr r0
|
|
beql+ _eval_sf_zf
|
|
andis. r3,flags,CF_IN_CR>>16
|
|
beql+ _eval_cf
|
|
mtlr r0
|
|
oris flags,flags,ABOVE_IN_CR>>16
|
|
crnor ABOVE,ZF,CF
|
|
blr
|
|
|
|
/* _eval_signed may only be called when signed_in_cr is clear ! */
|
|
_eval_signed: andis. r3,flags,SF_IN_CR>>16
|
|
mflr r0
|
|
beql+ _eval_sf_zf
|
|
/* SF_IN_CR and ZF_IN_CR are set, SIGNED_IN_CR is clear */
|
|
rlwinm. r3,flags,5,0,1
|
|
xoris flags,flags,(SIGNED_IN_CR|SF_IN_CR)>>16
|
|
bngl+ _eval_of
|
|
andis. r3,flags,OF_VALUE>>16
|
|
mtlr r0
|
|
crxor SLT,SF,OF
|
|
crnor SGT,SLT,ZF
|
|
blr
|
|
|
|
_eval_flags: mflr r0
|
|
bl _eval_cf
|
|
li r7,2
|
|
rlwimi r7,r3,24,CF86,CF86 # 2 if CF clear, 3 if set
|
|
bl _eval_pf
|
|
andis. r4,flags,SF_IN_CR>>16
|
|
rlwimi r7,r3,32+PF-PF86,PF86,PF86
|
|
bl _eval_af
|
|
rlwimi r7,r3,0,AF86,AF86
|
|
beql+ _eval_sf_zf
|
|
mfcr r3
|
|
rlwinm. r4,flags,5,0,1
|
|
rlwimi r7,r3,0,DF86,SF86
|
|
ZF2ZF86(r3,r7)
|
|
bngl+ _eval_of
|
|
mtlr r0
|
|
lis r4,0x0004
|
|
lwz r3,eflags(state)
|
|
addi r4,r4,0x7000
|
|
rlwimi r7,flags,17,OF86,OF86
|
|
and r3,r3,r4
|
|
or r3,r3,r7
|
|
blr
|
|
|
|
/* Quite simple for real mode, input in r4, returns in r3. */
|
|
_segment_load: lwz r5,vbase(state)
|
|
rlwinm r3,r4,4,0xffff0 # segment selector * 16
|
|
add r3,r3,r5
|
|
blr
|
|
|
|
/* To allow I/O port virtualization if necessary, code for exception in r3,
|
|
port number in r4 */
|
|
_check_port: lwz r5,ioperm(state)
|
|
rlwinm r6,r4,29,0x1fff # 0 to 8kB
|
|
lis r0,0xffff
|
|
lhbrx r5,r5,r6
|
|
clrlwi r6,r4,29 # modulo 8
|
|
rlwnm r0,r0,r3,0x0f # 1, 3, or 0xf
|
|
slw r0,r0,r6
|
|
and. r0,r0,r5
|
|
bne- complex
|
|
blr
|
|
/*
|
|
* Instructions are in approximate functional order:
|
|
* 1) move, exchange, lea, push/pop, pusha/popa
|
|
* 2) cbw/cwde/cwd/cdq, zero/sign extending moves, in/out
|
|
* 3) arithmetic: add/sub/adc/sbb/cmp/inc/dec/neg
|
|
* 4) logical: and/or/xor/test/not/bt/btc/btr/bts/bsf/bsr
|
|
* 5) jump, call, ret
|
|
* 6) string instructions and xlat
|
|
* 7) rotate/shift/mul/div
|
|
* 8) segment register, far jumps, calls and rets, interrupts
|
|
* 9) miscellenaous (flags, bcd,...)
|
|
*/
|
|
|
|
#define MEM offset,base
|
|
#define REG opreg,state
|
|
#define SELECTORS 32
|
|
#define SELBASES 64
|
|
|
|
/* Immediate moves */
|
|
movb_imm_reg: rlwinm opreg,opcode,2,28,29; lbz r3,1(eip)
|
|
rlwimi opreg,opcode,30,31,31; lbzu opcode,2(eip)
|
|
stbx r3,REG; GOTNEXT
|
|
|
|
movw_imm_reg: lhz r3,1(eip); clrlslwi opreg,opcode,29,2; lbzu opcode,3(eip)
|
|
sthx r3,REG; GOTNEXT
|
|
|
|
movl_imm_reg: lwz r3,1(eip); clrlslwi opreg,opcode,29,2; lbzu opcode,5(eip)
|
|
stwx r3,REG; GOTNEXT
|
|
|
|
movb_imm_mem: lbz r0,1(eip); cmpwi opreg,0
|
|
lbzu opcode,2(eip); bne- ud
|
|
stbx r0,MEM; GOTNEXT
|
|
|
|
movw_imm_mem: lhz r0,1(eip); cmpwi opreg,0
|
|
lbzu opcode,3(eip); bne- ud
|
|
sthx r0,MEM; GOTNEXT
|
|
|
|
movl_imm_mem: lwz r0,1(eip); cmpwi opreg,0
|
|
lbzu opcode,5(eip); bne- ud
|
|
stwx r0,MEM; GOTNEXT
|
|
|
|
/* The special short form moves between memory and al/ax/eax */
|
|
movb_al_a32: lwbrx offset,eip,one; lbz r0,AL(state); lbzu opcode,5(eip)
|
|
stbx r0,MEM; GOTNEXT
|
|
|
|
movb_al_a16: lhbrx offset,eip,one; lbz r0,AL(state); lbzu opcode,3(eip)
|
|
stbx r0,MEM; GOTNEXT
|
|
|
|
movw_ax_a32: lwbrx offset,eip,one; lhz r0,AX(state); lbzu opcode,5(eip)
|
|
sthx r0,MEM; GOTNEXT
|
|
|
|
movw_ax_a16: lhbrx offset,eip,one; lhz r0,AX(state); lbzu opcode,3(eip)
|
|
sthx r0,MEM; GOTNEXT
|
|
|
|
movl_eax_a32: lwbrx offset,eip,one; lwz r0,EAX(state); lbzu opcode,5(eip)
|
|
stwx r0,MEM; GOTNEXT
|
|
|
|
movl_eax_a16: lhbrx offset,eip,one; lwz r0,EAX(state); lbzu opcode,3(eip)
|
|
stwx r0,MEM; GOTNEXT
|
|
|
|
movb_a32_al: lwbrx offset,eip,one; lbzu opcode,5(eip); lbzx r0,MEM
|
|
stb r0,AL(state); GOTNEXT
|
|
|
|
movb_a16_al: lhbrx offset,eip,one; lbzu opcode,3(eip); lbzx r0,MEM
|
|
stb r0,AL(state); GOTNEXT
|
|
|
|
movw_a32_ax: lwbrx offset,eip,one; lbzu opcode,5(eip); lhzx r0,MEM
|
|
sth r0,AX(state); GOTNEXT
|
|
|
|
movw_a16_ax: lhbrx offset,eip,one; lbzu opcode,3(eip); lhzx r0,MEM
|
|
sth r0,AX(state); GOTNEXT
|
|
|
|
movl_a32_eax: lwbrx offset,eip,one; lbzu opcode,5(eip); lwzx r0,MEM
|
|
stw r0,EAX(state); GOTNEXT
|
|
|
|
movl_a16_eax: lhbrx offset,eip,one; lbzu opcode,3(eip); lwzx r0,MEM
|
|
stw r0,EAX(state); GOTNEXT
|
|
|
|
/* General purpose move (all are exactly 4 instructions long) */
|
|
.align 4
|
|
movb_reg_mem: lbzx r0,REG
|
|
NEXTBYTE(opcode)
|
|
stbx r0,MEM
|
|
GOTNEXT
|
|
|
|
movw_reg_mem: lhzx r0,REG
|
|
NEXTBYTE(opcode)
|
|
sthx r0,MEM
|
|
GOTNEXT
|
|
|
|
movl_reg_mem: lwzx r0,REG
|
|
NEXTBYTE(opcode)
|
|
stwx r0,MEM
|
|
GOTNEXT
|
|
|
|
movb_mem_reg: lbzx r0,MEM
|
|
NEXTBYTE(opcode)
|
|
stbx r0,REG
|
|
GOTNEXT
|
|
|
|
movw_mem_reg: lhzx r0,MEM
|
|
NEXTBYTE(opcode)
|
|
sthx r0,REG
|
|
GOTNEXT
|
|
|
|
movl_mem_reg: lwzx r0,MEM
|
|
NEXTBYTE(opcode)
|
|
stwx r0,REG
|
|
GOTNEXT
|
|
|
|
/* short form exchange ax/eax with register */
|
|
xchgw_ax_reg: clrlslwi opreg,opcode,29,2
|
|
lhz r3,AX(state)
|
|
lhzx r4,REG
|
|
sthx r3,REG
|
|
sth r4,AX(state)
|
|
NEXT
|
|
|
|
xchgl_eax_reg: clrlslwi opreg,opcode,29,2
|
|
lwz r3,EAX(state)
|
|
lwzx r4,REG
|
|
stwx r3,REG
|
|
stw r4,EAX(state)
|
|
NEXT
|
|
|
|
/* General exchange (unlocked!) */
|
|
xchgb_reg_mem: lbzx r3,MEM
|
|
lbzx r4,REG
|
|
NEXTBYTE(opcode)
|
|
stbx r3,REG
|
|
stbx r4,MEM
|
|
GOTNEXT
|
|
|
|
xchgw_reg_mem: lhzx r3,MEM
|
|
lhzx r4,REG
|
|
sthx r3,REG
|
|
sthx r4,MEM
|
|
NEXT
|
|
|
|
xchgl_reg_mem: lwzx r3,MEM
|
|
lwzx r4,REG
|
|
stwx r3,REG
|
|
stwx r4,MEM
|
|
NEXT
|
|
|
|
/* lea, one of the simplest instructions */
|
|
leaw: cmpw base,state
|
|
beq- ud
|
|
sthbrx offset,REG
|
|
NEXT
|
|
|
|
leal: cmpw base,state
|
|
beq- ud
|
|
stwbrx offset,REG
|
|
NEXT
|
|
|
|
/* Short form pushes and pops */
|
|
pushw_sp_reg: li r3,SP
|
|
lhbrx r4,state,r3
|
|
clrlslwi opreg,opcode,29,2
|
|
lhzx r0,REG
|
|
addi r4,r4,-2
|
|
sthbrx r4,state,r3
|
|
clrlwi r4,r4,16
|
|
sthx r0,ssb,r4
|
|
NEXT
|
|
|
|
pushl_sp_reg: li r3,SP
|
|
lhbrx r4,state,r3
|
|
clrlslwi opreg,opcode,29,2
|
|
lwzx r0,REG
|
|
addi r4,r4,-4
|
|
sthbrx r4,state,r3
|
|
clrlwi r4,r4,16
|
|
stwx r0,ssb,r4
|
|
NEXT
|
|
|
|
popw_sp_reg: li r3,SP
|
|
lhbrx r4,state,r3
|
|
clrlslwi opreg,opcode,29,2
|
|
lhzx r0,ssb,r4
|
|
addi r4,r4,2 # order is important in case of pop sp
|
|
sthbrx r4,state,r3
|
|
sthx r0,REG
|
|
NEXT
|
|
|
|
popl_sp_reg: li r3,SP
|
|
lhbrx r4,state,r3
|
|
clrlslwi opreg,opcode,29,2
|
|
lwzx r0,ssb,r4
|
|
addi r4,r4,4
|
|
sthbrx r4,state,r3
|
|
stwx r0,REG
|
|
NEXT
|
|
|
|
/* Push immediate */
|
|
pushw_sp_imm: li r3,SP
|
|
lhbrx r4,state,r3
|
|
lhz r0,1(eip)
|
|
addi r4,r4,-2
|
|
sthbrx r4,state,r3
|
|
clrlwi r4,r4,16
|
|
lbzu opcode,3(eip)
|
|
sthx r0,ssb,r4
|
|
GOTNEXT
|
|
|
|
pushl_sp_imm: li r3,SP
|
|
lhbrx r4,state,r3
|
|
lwz r0,1(eip)
|
|
addi r4,r4,-4
|
|
sthbrx r4,state,r3
|
|
clrlwi r4,r4,16
|
|
lbzu opcode,5(eip)
|
|
stwx r0,ssb,r4
|
|
GOTNEXT
|
|
|
|
pushw_sp_imm8: li r3,SP
|
|
lhbrx r4,state,r3
|
|
lhz r0,1(eip)
|
|
addi r4,r4,-2
|
|
sthbrx r4,state,r3
|
|
clrlwi r4,r4,16
|
|
lbzu opcode,2(eip)
|
|
extsb r0,r0
|
|
sthx r0,ssb,r4
|
|
GOTNEXT
|
|
|
|
pushl_sp_imm8: li r3,SP
|
|
lhbrx r4,state,r3
|
|
lhz r0,1(eip)
|
|
addi r4,r4,-4
|
|
sthbrx r4,state,r3
|
|
clrlwi r4,r4,16
|
|
lbzu opcode,2(eip)
|
|
extsb r0,r0
|
|
stwx r0,ssb,r4
|
|
GOTNEXT
|
|
|
|
/* General push/pop */
|
|
pushw_sp: lhbrx r0,MEM
|
|
li r3,SP
|
|
lhbrx r4,state,r3
|
|
addi r4,r4,-2
|
|
sthbrx r4,state,r3
|
|
clrlwi r4,r4,16
|
|
sthbrx r0,r4,ssb
|
|
NEXT
|
|
|
|
pushl_sp: lwbrx r0,MEM
|
|
li r3,SP
|
|
lhbrx r4,state,r3
|
|
addi r4,r4,-4
|
|
sthbrx r4,state,r3
|
|
clrlwi r4,r4,16
|
|
stwbrx r0,r4,ssb
|
|
NEXT
|
|
|
|
/* pop is an exception with 32 bit addressing modes, it is possible
|
|
to calculate wrongly the address when esp is used as base. But 16 bit
|
|
addressing modes are safe */
|
|
|
|
popw_sp_a16: cmpw cr1,opreg,0 # first check the opcode
|
|
li r3,SP
|
|
lhbrx r4,state,r3
|
|
bne- cr1,ud
|
|
lhzx r0,ssb,r4
|
|
addi r4,r4,2
|
|
sthx r0,MEM
|
|
sthbrx r4,state,r3
|
|
NEXT
|
|
|
|
popl_sp_a16: cmpw cr1,opreg,0
|
|
li r3,SP
|
|
lhbrx r4,state,r3
|
|
bne- cr1,ud
|
|
lwzx r0,ssb,r4
|
|
addi r4,r4,2
|
|
stwx r0,MEM
|
|
sthbrx r4,state,r3
|
|
NEXT
|
|
|
|
/* 32 bit addressing modes for pop not implemented for now. */
|
|
.equ popw_sp_a32,unimpl
|
|
.equ popl_sp_a32,unimpl
|
|
|
|
/* pusha/popa */
|
|
pushaw_sp: li r3,SP
|
|
li r0,8
|
|
lhbrx r4,r3,state
|
|
mtctr r0
|
|
addi r5,state,-4
|
|
1: addi r4,r4,-2
|
|
lhzu r6,4(r5)
|
|
clrlwi r4,r4,16
|
|
sthx r6,ssb,r4
|
|
bdnz 1b
|
|
sthbrx r4,r3,state # new sp
|
|
NEXT
|
|
|
|
pushal_sp: li r3,SP
|
|
li r0,8
|
|
lhbrx r4,r3,state
|
|
mtctr r0
|
|
addi r5,state,-4
|
|
1: addi r4,r4,-4
|
|
lwzu r6,4(r5)
|
|
clrlwi r4,r4,16
|
|
stwx r6,ssb,r4
|
|
bdnz 1b
|
|
sthbrx r4,r3,state # new sp
|
|
NEXT
|
|
|
|
popaw_sp: li r3,SP
|
|
li r0,8
|
|
lhbrx r4,state,r3
|
|
mtctr r0
|
|
addi r5,state,32
|
|
1: lhzx r6,ssb,r4
|
|
addi r4,r4,2
|
|
sthu r6,-4(r5)
|
|
clrlwi r4,r4,16
|
|
bdnz 1b
|
|
sthbrx r4,r3,state # updated sp
|
|
NEXT
|
|
|
|
popal_sp: li r3,SP
|
|
lis r0,0xef00 # mask to skip esp
|
|
lhbrx r4,state,r3
|
|
addi r5,state,32
|
|
1: add. r0,r0,r0
|
|
lwzx r6,ssb,r4
|
|
addi r4,r4,4
|
|
stwu r6,-4(r5)
|
|
clrlwi r4,r4,16
|
|
blt 1b
|
|
addi r6,r6,-4
|
|
beq 2f
|
|
addi r4,r4,4
|
|
clrlwi r4,r4,16
|
|
b 1b
|
|
2: sthbrx r4,state,r3 # updated sp
|
|
NEXT
|
|
|
|
/* Moves with zero or sign extension: first the special cases */
|
|
cbw: lbz r3,AL(state)
|
|
extsb r3,r3
|
|
sthbrx r3,AX,state
|
|
NEXT
|
|
|
|
cwde: lhbrx r3,AX,state
|
|
extsh r3,r3
|
|
stwbrx r3,EAX,state
|
|
NEXT
|
|
|
|
cwd: lbz r3,AH(state)
|
|
extsb r3,r3
|
|
srwi r3,r3,8 # get sign bits
|
|
sth r3,DX(state)
|
|
NEXT
|
|
|
|
cdq: lwbrx r3,EAX,state
|
|
srawi r3,r3,31
|
|
stw r3,EDX(state) # byte order unimportant !
|
|
NEXT
|
|
|
|
/* The move with zero or sign extension are special since the source
|
|
and destination are not the same size. The register describing the destination
|
|
is modified to take this into account. */
|
|
|
|
movsbw: lbzx r3,MEM
|
|
rlwimi opreg,opreg,4,0x10
|
|
extsb r3,r3
|
|
rlwinm opreg,opreg,0,0x1c
|
|
sthbrx r3,REG
|
|
NEXT
|
|
|
|
movsbl: lbzx r3,MEM
|
|
rlwimi opreg,opreg,4,0x10
|
|
extsb r3,r3
|
|
rlwinm opreg,opreg,0,0x1c
|
|
stwbrx r3,REG
|
|
NEXT
|
|
|
|
.equ movsww, movw_mem_reg
|
|
|
|
movswl: lhbrx r3,MEM
|
|
extsh r3,r3
|
|
stwbrx r3,REG
|
|
NEXT
|
|
|
|
movzbw: lbzx r3,MEM
|
|
rlwimi opreg,opreg,4,0x10
|
|
rlwinm opreg,opreg,0,0x1c
|
|
sthbrx r3,REG
|
|
NEXT
|
|
|
|
movzbl: lbzx r3,MEM
|
|
rlwimi opreg,opreg,4,0x10
|
|
rlwinm opreg,opreg,0,0x1c
|
|
stwbrx r3,REG
|
|
NEXT
|
|
|
|
.equ movzww, movw_mem_reg
|
|
|
|
movzwl: lhbrx r3,MEM
|
|
stwbrx r3,REG
|
|
NEXT
|
|
|
|
/* Byte swapping */
|
|
bswap: clrlslwi opreg,opcode,29,2 # extract reg from opcode
|
|
lwbrx r0,REG
|
|
stwx r0,REG
|
|
NEXT
|
|
|
|
/* Input/output */
|
|
inb_port_al: NEXTBYTE(r4)
|
|
b 1f
|
|
inb_dx_al: li r4,DX
|
|
lhbrx r4,r4,state
|
|
1: li r3,code_inb
|
|
bl _check_port
|
|
lwz r3,iobase(state)
|
|
lbzx r5,r4,r3
|
|
eieio
|
|
stb r5,AL(state)
|
|
NEXT
|
|
|
|
inw_port_ax: NEXTBYTE(r4)
|
|
b 1f
|
|
inw_dx_ax: li r4,DX
|
|
lhbrx r4,r4,state
|
|
1: li r3,code_inw
|
|
bl _check_port
|
|
lwz r3,iobase(state)
|
|
lhzx r5,r4,r3
|
|
eieio
|
|
sth r5,AX(state)
|
|
NEXT
|
|
|
|
inl_port_eax: NEXTBYTE(r4)
|
|
b 1f
|
|
inl_dx_eax: li r4,DX
|
|
lhbrx r4,r4,state
|
|
1: li r3,code_inl
|
|
bl _check_port
|
|
lwz r3,iobase(state)
|
|
lwzx r5,r4,r3
|
|
eieio
|
|
stw r5,EAX(state)
|
|
NEXT
|
|
|
|
outb_al_port: NEXTBYTE(r4)
|
|
b 1f
|
|
outb_al_dx: li r4,DX
|
|
lhbrx r4,r4,state
|
|
1: li r3,code_outb
|
|
bl _check_port
|
|
lwz r3,iobase(state)
|
|
lbz r5,AL(state)
|
|
stbx r5,r4,r3
|
|
eieio
|
|
NEXT
|
|
|
|
outw_ax_port: NEXTBYTE(r4)
|
|
b 1f
|
|
outw_ax_dx: li r4,DX
|
|
lhbrx r4,r4,state
|
|
1: li r3,code_outw
|
|
bl _check_port
|
|
lwz r3,iobase(state)
|
|
lhz r5,AX(state)
|
|
sthx r5,r4,r3
|
|
eieio
|
|
NEXT
|
|
|
|
outl_eax_port: NEXTBYTE(r4)
|
|
b 1f
|
|
outl_eax_dx: li r4,DX
|
|
lhbrx r4,r4,state
|
|
1: li r3,code_outl
|
|
bl _check_port
|
|
lwz r4,iobase(state)
|
|
lwz r5,EAX(state)
|
|
stwx r5,r4,r3
|
|
eieio
|
|
NEXT
|
|
|
|
/* Macro used for add and sub */
|
|
#define ARITH(op,fl) \
|
|
op##b_reg_mem: lbzx op1,MEM; SET_FLAGS(fl(B)); lbzx op2,REG; \
|
|
op result,op1,op2; \
|
|
stbx result,MEM; NEXT; \
|
|
op##w_reg_mem: lhbrx op1,MEM; SET_FLAGS(fl(W)); lhbrx op2,REG; \
|
|
op result,op1,op2; \
|
|
sthbrx result,MEM; NEXT; \
|
|
op##l_reg_mem: lwbrx op1,MEM; SET_FLAGS(fl(L)); lwbrx op2,REG; \
|
|
op result,op1,op2; \
|
|
stwbrx result,MEM; NEXT; \
|
|
op##b_mem_reg: lbzx op2,MEM; SET_FLAGS(fl(B)); lbzx op1,REG; \
|
|
op result,op1,op2; \
|
|
stbx result,REG; NEXT; \
|
|
op##w_mem_reg: lhbrx op2,MEM; SET_FLAGS(fl(W)); lhbrx op1,REG; \
|
|
op result,op1,op2; \
|
|
sthbrx result,REG; NEXT; \
|
|
op##l_mem_reg: lwbrx op2,MEM; SET_FLAGS(fl(L)); lwbrx op1,REG; \
|
|
op result,op1,op2; \
|
|
stwbrx result,REG; NEXT; \
|
|
op##b_imm_al: addi base,state,0; li offset,AL; \
|
|
op##b_imm: lbzx op1,MEM; SET_FLAGS(fl(B)); lbz op2,1(eip); \
|
|
op result,op1,op2; \
|
|
lbzu opcode,2(eip); \
|
|
stbx result,MEM; GOTNEXT; \
|
|
op##w_imm_ax: addi base,state,0; li offset,AX; \
|
|
op##w_imm: lhbrx op1,MEM; SET_FLAGS(fl(W)); lhbrx op2,eip,one; \
|
|
op result,op1,op2; \
|
|
lbzu opcode,3(eip); \
|
|
sthbrx result,MEM; GOTNEXT; \
|
|
op##w_imm8: lbz op2,1(eip); SET_FLAGS(fl(W)); lhbrx op1,MEM; \
|
|
extsb op2,op2; clrlwi op2,op2,16; \
|
|
op result,op1,op2; \
|
|
lbzu opcode,2(eip); \
|
|
sthbrx result,MEM; GOTNEXT; \
|
|
op##l_imm_eax: addi base,state,0; li offset,EAX; \
|
|
op##l_imm: lwbrx op1,MEM; SET_FLAGS(fl(L)); lwbrx op2,eip,one; \
|
|
op result,op1,op2; lbzu opcode,5(eip); \
|
|
stwbrx result,MEM; GOTNEXT; \
|
|
op##l_imm8: lbz op2,1(eip); SET_FLAGS(fl(L)); lwbrx op1,MEM; \
|
|
extsb op2,op2; lbzu opcode,2(eip); \
|
|
op result,op1,op2; \
|
|
stwbrx result,MEM; GOTNEXT
|
|
|
|
ARITH(add, FLAGS_ADD)
|
|
ARITH(sub, FLAGS_SUB)
|
|
|
|
#define adc(result, op1, op2) adde result,op1,op2
|
|
#define sbb(result, op1, op2) subfe result,op2,op1
|
|
|
|
#define ARITH_WITH_CARRY(op, fl) \
|
|
op##b_reg_mem: lbzx op1,MEM; bl carryfor##op; lbzx op2,REG; \
|
|
ADD_FLAGS(fl(B)); op(result, op1, op2); \
|
|
stbx result,MEM; NEXT; \
|
|
op##w_reg_mem: lhbrx op1,MEM; bl carryfor##op; lhbrx op2,REG; \
|
|
ADD_FLAGS(fl(W)); op(result, op1, op2); \
|
|
sthbrx result,MEM; NEXT; \
|
|
op##l_reg_mem: lwbrx op1,MEM; bl carryfor##op; lwbrx op2,REG; \
|
|
ADD_FLAGS(fl(L)); op(result, op1, op2); \
|
|
stwbrx result,MEM; NEXT; \
|
|
op##b_mem_reg: lbzx op1,MEM; bl carryfor##op; lbzx op2,REG; \
|
|
ADD_FLAGS(fl(B)); op(result, op1, op2); \
|
|
stbx result,REG; NEXT; \
|
|
op##w_mem_reg: lhbrx op1,MEM; bl carryfor##op; lhbrx op2,REG; \
|
|
ADD_FLAGS(fl(W)); op(result, op1, op2); \
|
|
sthbrx result,REG; NEXT; \
|
|
op##l_mem_reg: lwbrx op1,MEM; bl carryfor##op; lwbrx op2,REG; \
|
|
ADD_FLAGS(fl(L)); op(result, op1, op2); \
|
|
stwbrx result,REG; NEXT; \
|
|
op##b_imm_al: addi base,state,0; li offset,AL; \
|
|
op##b_imm: lbzx op1,MEM; bl carryfor##op; lbz op2,1(eip); \
|
|
ADD_FLAGS(fl(B)); lbzu opcode,2(eip); op(result, op1, op2); \
|
|
stbx result,MEM; GOTNEXT; \
|
|
op##w_imm_ax: addi base,state,0; li offset,AX; \
|
|
op##w_imm: lhbrx op1,MEM; bl carryfor##op; lhbrx op2,eip,one; \
|
|
ADD_FLAGS(fl(W)); lbzu opcode,3(eip); op(result, op1, op2); \
|
|
sthbrx result,MEM; GOTNEXT; \
|
|
op##w_imm8: lbz op2,1(eip); bl carryfor##op; lhbrx op1,MEM; \
|
|
extsb op2,op2; ADD_FLAGS(fl(W)); clrlwi op2,op2,16; \
|
|
lbzu opcode,2(eip); op(result, op1, op2); \
|
|
sthbrx result,MEM; GOTNEXT; \
|
|
op##l_imm_eax: addi base,state,0; li offset,EAX; \
|
|
op##l_imm: lwbrx op1,MEM; bl carryfor##op; lwbrx op2,eip,one; \
|
|
ADD_FLAGS(fl(L)); lbzu opcode,5(eip); op(result, op1, op2); \
|
|
stwbrx result,MEM; GOTNEXT; \
|
|
op##l_imm8: lbz op2,1(eip); SET_FLAGS(fl(L)); lwbrx op1,MEM; \
|
|
extsb op2,op2; lbzu opcode,2(eip); \
|
|
op(result, op1, op2); \
|
|
stwbrx result,MEM; GOTNEXT
|
|
|
|
carryforadc: addc r3,flags,flags # CF_IN to xer[ca]
|
|
RES2CF(r4) # get 8 or 16 bit carry
|
|
subfe r3,result,op1 # generate PPC carry for
|
|
CF_ROTCNT(r5) # preceding operation
|
|
addze r3,r4 # 32 bit carry in LSB
|
|
CF_POL(r4,23) # polarity
|
|
rlwnm r3,r3,r5,0x100 # shift carry there
|
|
xor flags,r4,r3 # CF86 ? 0x100 : 0
|
|
addic r4,r3,0xffffff00 # set xer[ca]
|
|
rlwinm flags,r3,23,CF_IN
|
|
blr
|
|
|
|
ARITH_WITH_CARRY(adc, FLAGS_ADD)
|
|
|
|
/* for sbb the input carry must be the complement of the x86 carry */
|
|
carryforsbb: addc r3,flags,flags # CF_IN to xer[ca]
|
|
RES2CF(r4) # 8/16 bit carry from result
|
|
subfe r3,result,op1
|
|
CF_ROTCNT(r5)
|
|
addze r3,r4
|
|
CF_POL(r4,23)
|
|
rlwnm r3,r3,r5,0x100
|
|
eqv flags,r4,r3 # CF86 ? 0xfffffeff:0xffffffff
|
|
addic r4,r3,1 # set xer[ca]
|
|
rlwinm flags,r3,23,CF_IN # keep only the carry
|
|
blr
|
|
|
|
ARITH_WITH_CARRY(sbb, FLAGS_SBB)
|
|
|
|
cmpb_reg_mem: lbzx op1,MEM
|
|
SET_FLAGS(FLAGS_CMP(B))
|
|
lbzx op2,REG
|
|
extsb r3,op1
|
|
cmplw cr4,op1,op2
|
|
extsb r4,op2
|
|
sub result,op1,op2
|
|
cmpw cr6,r3,r4
|
|
NEXT
|
|
|
|
cmpw_reg_mem: lhbrx op1,MEM
|
|
SET_FLAGS(FLAGS_CMP(W))
|
|
lhbrx op2,REG
|
|
extsh r3,op1
|
|
cmplw cr4,op1,op2
|
|
extsh r4,op2
|
|
sub result,op1,op2
|
|
cmpw cr6,r3,r4
|
|
NEXT
|
|
|
|
cmpl_reg_mem: lwbrx op1,MEM
|
|
SET_FLAGS(FLAGS_CMP(L))
|
|
lwbrx op2,REG
|
|
cmplw cr4,op1,op2
|
|
sub result,op1,op2
|
|
cmpw cr6,op1,op2
|
|
NEXT
|
|
|
|
cmpb_mem_reg: lbzx op2,MEM
|
|
SET_FLAGS(FLAGS_CMP(B))
|
|
lbzx op1,REG
|
|
extsb r4,op2
|
|
cmplw cr4,op1,op2
|
|
extsb r3,op1
|
|
sub result,op1,op2
|
|
cmpw cr6,r3,r4
|
|
NEXT
|
|
|
|
cmpw_mem_reg: lhbrx op2,MEM
|
|
SET_FLAGS(FLAGS_CMP(W))
|
|
lhbrx op1,REG
|
|
extsh r4,op2
|
|
cmplw cr4,op1,op2
|
|
extsh r3,op1
|
|
sub result,op1,op2
|
|
cmpw cr6,r3,r4
|
|
NEXT
|
|
|
|
cmpl_mem_reg: lwbrx op2,MEM
|
|
SET_FLAGS(FLAGS_CMP(L))
|
|
lwbrx op1,REG
|
|
cmpw cr6,op1,op2
|
|
sub result,op1,op2
|
|
cmplw cr4,op1,op2
|
|
NEXT
|
|
|
|
cmpb_imm_al: addi base,state,0
|
|
li offset,AL
|
|
cmpb_imm: lbzx op1,MEM
|
|
SET_FLAGS(FLAGS_CMP(B))
|
|
lbz op2,1(eip)
|
|
extsb r3,op1
|
|
cmplw cr4,op1,op2
|
|
lbzu opcode,2(eip)
|
|
extsb r4,op2
|
|
sub result,op1,op2
|
|
cmpw cr6,r3,r4
|
|
GOTNEXT
|
|
|
|
cmpw_imm_ax: addi base,state,0
|
|
li offset,AX
|
|
cmpw_imm: lhbrx op1,MEM
|
|
SET_FLAGS(FLAGS_CMP(W))
|
|
lhbrx op2,eip,one
|
|
extsh r3,op1
|
|
cmplw cr4,op1,op2
|
|
lbzu opcode,3(eip)
|
|
extsh r4,op2
|
|
sub result,op1,op2
|
|
cmpw cr6,r3,r4
|
|
GOTNEXT
|
|
|
|
cmpw_imm8: lbz op2,1(eip)
|
|
SET_FLAGS(FLAGS_CMP(W))
|
|
lhbrx op1,MEM
|
|
extsb r4,op2
|
|
extsh r3,op1
|
|
lbzu opcode,2(eip)
|
|
clrlwi op2,r4,16
|
|
cmpw cr6,r3,r4
|
|
sub result,op1,op2
|
|
cmplw cr4,op1,op2
|
|
GOTNEXT
|
|
|
|
cmpl_imm_eax: addi base,state,0
|
|
li offset,EAX
|
|
cmpl_imm: lwbrx op1,MEM
|
|
SET_FLAGS(FLAGS_CMP(L))
|
|
lwbrx op2,eip,one
|
|
cmpw cr6,op1,op2
|
|
lbzu opcode,5(eip)
|
|
sub result,op1,op2
|
|
cmplw cr4,op1,op2
|
|
GOTNEXT
|
|
|
|
cmpl_imm8: lbz op2,1(eip)
|
|
SET_FLAGS(FLAGS_CMP(L))
|
|
lwbrx op1,MEM
|
|
extsb op2,op2
|
|
lbzu opcode,2(eip)
|
|
cmpw cr6,op1,op2
|
|
sub result,op1,op2
|
|
cmplw cr4,op1,op2
|
|
GOTNEXT
|
|
|
|
/* Increment and decrement */
|
|
incb: lbzx op2,MEM
|
|
INC_FLAGS(B)
|
|
addi op2,op2,1
|
|
stbx op2,MEM
|
|
NEXT
|
|
|
|
incw_reg: clrlslwi opreg,opcode,29,2 # extract reg from opcode
|
|
lhbrx op2,REG
|
|
INC_FLAGS(W)
|
|
addi op2,op2,1
|
|
sthbrx op2,REG
|
|
NEXT
|
|
|
|
incw: lhbrx op2,MEM
|
|
INC_FLAGS(W)
|
|
addi op2,op2,1
|
|
sthbrx op2,MEM
|
|
NEXT
|
|
|
|
incl_reg: clrlslwi opreg,opcode,29,2
|
|
lwbrx op2,REG
|
|
INC_FLAGS(L)
|
|
addi op2,op2,1
|
|
sthbrx op2,REG
|
|
NEXT
|
|
|
|
incl: lwbrx op2,MEM
|
|
INC_FLAGS(L)
|
|
addi op2,op2,1
|
|
stwbrx op2,MEM
|
|
NEXT
|
|
|
|
decb: lbzx op2,MEM
|
|
DEC_FLAGS(B)
|
|
addi op2,op2,-1
|
|
stbx op2,MEM
|
|
NEXT
|
|
|
|
decw_reg: clrlslwi opreg,opcode,29,2 # extract reg from opcode
|
|
lhbrx op2,REG
|
|
DEC_FLAGS(W)
|
|
addi op2,op2,-1
|
|
sthbrx op2,REG
|
|
NEXT
|
|
|
|
decw: lhbrx op2,MEM
|
|
DEC_FLAGS(W)
|
|
addi op2,op2,-1
|
|
sthbrx op2,MEM
|
|
NEXT
|
|
|
|
decl_reg: clrlslwi opreg,opcode,29,2
|
|
lwbrx op2,REG
|
|
DEC_FLAGS(L)
|
|
addi op2,op2,-1
|
|
sthbrx op2,REG
|
|
NEXT
|
|
|
|
decl: lwbrx op2,MEM
|
|
DEC_FLAGS(L)
|
|
addi op2,op2,-1
|
|
stwbrx op2,MEM
|
|
NEXT
|
|
|
|
negb: lbzx op2,MEM
|
|
SET_FLAGS(FLAGS_SUB(B))
|
|
neg result,op2
|
|
li op1,0
|
|
stbx result,MEM
|
|
NEXT
|
|
|
|
negw: lhbrx op2,MEM
|
|
SET_FLAGS(FLAGS_SUB(W))
|
|
neg result,op2
|
|
li op1,0
|
|
sthbrx r0,MEM
|
|
NEXT
|
|
|
|
negl: lwbrx op2,MEM
|
|
SET_FLAGS(FLAGS_SUB(L))
|
|
subfic result,op2,0
|
|
li op1,0
|
|
stwbrx result,MEM
|
|
NEXT
|
|
|
|
/* Macro used to generate code for OR/AND/XOR */
|
|
#define LOGICAL(op) \
|
|
op##b_reg_mem: lbzx op1,MEM; SET_FLAGS(FLAGS_LOG(B)); lbzx op2,REG; \
|
|
op result,op1,op2; \
|
|
stbx result,MEM; NEXT; \
|
|
op##w_reg_mem: lhbrx op1,MEM; SET_FLAGS(FLAGS_LOG(W)); lhbrx op2,REG; \
|
|
op result,op1,op2; \
|
|
sthbrx result,MEM; NEXT; \
|
|
op##l_reg_mem: lwbrx op1,MEM; SET_FLAGS(FLAGS_LOG(L)); lwbrx op2,REG; \
|
|
op result,op1,op2; \
|
|
stwbrx result,MEM; NEXT; \
|
|
op##b_mem_reg: lbzx op1,MEM; SET_FLAGS(FLAGS_LOG(B)); lbzx op2,REG; \
|
|
op result,op1,op2; \
|
|
stbx result,REG; NEXT; \
|
|
op##w_mem_reg: lhbrx op2,MEM; SET_FLAGS(FLAGS_LOG(W)); lhbrx op1,REG; \
|
|
op result,op1,op2; \
|
|
sthbrx result,REG; NEXT; \
|
|
op##l_mem_reg: lwbrx op2,MEM; SET_FLAGS(FLAGS_LOG(L)); lwbrx op1,REG; \
|
|
op result,op1,op2; \
|
|
stwbrx result,REG; NEXT; \
|
|
op##b_imm_al: addi base,state,0; li offset,AL; \
|
|
op##b_imm: lbzx op1,MEM; SET_FLAGS(FLAGS_LOG(B)); lbz op2,1(eip); \
|
|
op result,op1,op2; lbzu opcode,2(eip); \
|
|
stbx result,MEM; GOTNEXT; \
|
|
op##w_imm_ax: addi base,state,0; li offset,AX; \
|
|
op##w_imm: lhbrx op1,MEM; SET_FLAGS(FLAGS_LOG(W)); lhbrx op2,eip,one; \
|
|
op result,op1,op2; lbzu opcode,3(eip); \
|
|
sthbrx result,MEM; GOTNEXT; \
|
|
op##w_imm8: lbz op2,1(eip); SET_FLAGS(FLAGS_LOG(W)); lhbrx op1,MEM; \
|
|
extsb op2,op2; lbzu opcode,2(eip); \
|
|
op result,op1,op2; \
|
|
sthbrx result,MEM; GOTNEXT; \
|
|
op##l_imm_eax: addi base,state,0; li offset,EAX; \
|
|
op##l_imm: lwbrx op1,MEM; SET_FLAGS(FLAGS_LOG(L)); lwbrx op2,eip,one; \
|
|
op result,op1,op2; lbzu opcode,5(eip); \
|
|
stwbrx result,MEM; GOTNEXT; \
|
|
op##l_imm8: lbz op2,1(eip); SET_FLAGS(FLAGS_LOG(L)); lwbrx op1,MEM; \
|
|
extsb op2,op2; lbzu opcode,2(eip); \
|
|
op result,op1,op2; \
|
|
stwbrx result,MEM; GOTNEXT
|
|
|
|
LOGICAL(or)
|
|
|
|
LOGICAL(and)
|
|
|
|
LOGICAL(xor)
|
|
|
|
testb_reg_mem: lbzx op1,MEM
|
|
SET_FLAGS(FLAGS_TEST(B))
|
|
lbzx op2,REG
|
|
and result,op1,op2
|
|
extsb r3,result
|
|
cmpwi cr6,r3,0
|
|
NEXT
|
|
|
|
testw_reg_mem: lhbrx op1,MEM
|
|
SET_FLAGS(FLAGS_TEST(W))
|
|
lhbrx op2,REG
|
|
and result,op1,op2
|
|
extsh r3,result
|
|
cmpwi cr6,r3,0
|
|
NEXT
|
|
|
|
testl_reg_mem: lwbrx r3,MEM
|
|
SET_FLAGS(FLAGS_TEST(L))
|
|
lwbrx r4,REG
|
|
and result,op1,op2
|
|
cmpwi cr6,result,0
|
|
NEXT
|
|
|
|
testb_imm_al: addi base,state,0
|
|
li offset,AL
|
|
testb_imm: lbzx op1,MEM
|
|
SET_FLAGS(FLAGS_TEST(B))
|
|
lbz op2,1(eip)
|
|
and result,op1,op2
|
|
lbzu opcode,2(eip)
|
|
extsb r3,result
|
|
cmpwi cr6,r3,0
|
|
GOTNEXT
|
|
|
|
testw_imm_ax: addi base,state,0
|
|
li offset,AX
|
|
testw_imm: lhbrx op1,MEM
|
|
SET_FLAGS(FLAGS_TEST(W))
|
|
lhbrx op2,eip,one
|
|
and result,op1,op2
|
|
lbzu opcode,3(eip)
|
|
extsh r3,result
|
|
cmpwi cr6,r3,0
|
|
GOTNEXT
|
|
|
|
testl_imm_eax: addi base,state,0
|
|
li offset,EAX
|
|
testl_imm: lwbrx op1,MEM
|
|
SET_FLAGS(FLAGS_TEST(L))
|
|
lwbrx op2,eip,one
|
|
and result,r3,r4
|
|
lbzu opcode,5(eip)
|
|
cmpwi cr6,result,0
|
|
GOTNEXT
|
|
|
|
/* Not does not affect flags */
|
|
notb: lbzx r3,MEM
|
|
xori r3,r3,255
|
|
stbx r3,MEM
|
|
NEXT
|
|
|
|
notw: lhzx r3,MEM
|
|
xori r3,r3,65535
|
|
sthx r3,MEM
|
|
NEXT
|
|
|
|
notl: lwzx r3,MEM
|
|
not r3,r3
|
|
stwx r3,MEM
|
|
NEXT
|
|
|
|
boundw: lhbrx r4,REG
|
|
li r3,code_bound
|
|
lhbrx r5,MEM
|
|
addi offset,offset,2
|
|
extsh r4,r4
|
|
lhbrx r6,MEM
|
|
extsh r5,r5
|
|
cmpw r4,r5
|
|
extsh r6,r6
|
|
blt- complex
|
|
cmpw r4,r6
|
|
ble+ nop
|
|
b complex
|
|
|
|
boundl: lwbrx r4,REG
|
|
li r3,code_bound
|
|
lwbrx r5,MEM
|
|
addi offset,offset,4
|
|
lwbrx r6,MEM
|
|
cmpw r4,r5
|
|
blt- complex
|
|
cmpw r4,r6
|
|
ble+ nop
|
|
b complex
|
|
|
|
/* Bit test and modify instructions */
|
|
|
|
/* Common routine: bit index in op2, returns memory value in r3, mask in op2,
|
|
and of mask and value in op1. CF flag is set as with 32 bit add when bit is
|
|
non zero since result (which is cleared) will be less than op1, and in cr4,
|
|
all other flags are undefined from Intel doc. Here OF and SF are cleared
|
|
and ZF is set as a side effect of result being cleared. */
|
|
_setup_bitw: cmpw base,state
|
|
SET_FLAGS(FLAGS_BTEST)
|
|
extsh op2,op2
|
|
beq- 1f
|
|
srawi r4,op2,4
|
|
add offset,offset,r4
|
|
1: clrlwi op2,op2,28 # true bit index
|
|
lhbrx r3,MEM
|
|
slw op2,one,op2 # build mask
|
|
li result,0 # implicitly sets CF
|
|
and op1,r3,op2 # if result<op1
|
|
cmplw cr4,result,op1 # sets CF in cr4
|
|
blr
|
|
|
|
_setup_bitl: cmpw base,state
|
|
SET_FLAGS(FLAGS_BTEST)
|
|
beq- 1f
|
|
srawi r4,op2,5
|
|
add offset,offset,r4
|
|
1: lwbrx r3,MEM
|
|
rotlw op2,one,op2 # build mask
|
|
li result,0
|
|
and op1,r3,op2
|
|
cmplw cr4,result,op1
|
|
blr
|
|
|
|
/* Immediate forms bit tests are not frequent since logical are often faster */
|
|
btw_imm: NEXTBYTE(op2)
|
|
b 1f
|
|
btw_reg_mem: lhbrx op2,REG
|
|
1: bl _setup_bitw
|
|
NEXT
|
|
|
|
btl_imm: NEXTBYTE(op2)
|
|
b 1f
|
|
btl_reg_mem: lhbrx op2,REG
|
|
1: bl _setup_bitl
|
|
NEXT
|
|
|
|
btcw_imm: NEXTBYTE(op2)
|
|
b 1f
|
|
btcw_reg_mem: lhbrx op2,REG
|
|
1: bl _setup_bitw
|
|
xor r3,r3,op2
|
|
sthbrx r3,MEM
|
|
NEXT
|
|
|
|
btcl_imm: NEXTBYTE(op2)
|
|
b 1f
|
|
btcl_reg_mem: lhbrx op2,REG
|
|
1: bl _setup_bitl
|
|
xor r3,r3,op2
|
|
stwbrx result,MEM
|
|
NEXT
|
|
|
|
btrw_imm: NEXTBYTE(op2)
|
|
b 1f
|
|
btrw_reg_mem: lhbrx op2,REG
|
|
1: bl _setup_bitw
|
|
andc r3,r3,op2
|
|
sthbrx r3,MEM
|
|
NEXT
|
|
|
|
btrl_imm: NEXTBYTE(op2)
|
|
b 1f
|
|
btrl_reg_mem: lhbrx op2,REG
|
|
1: bl _setup_bitl
|
|
andc r3,r3,op2
|
|
stwbrx r3,MEM
|
|
NEXT
|
|
|
|
btsw_imm: NEXTBYTE(op2)
|
|
b 1f
|
|
btsw_reg_mem: lhbrx op2,REG
|
|
1: bl _setup_bitw
|
|
or r3,r3,op2
|
|
sthbrx r3,MEM
|
|
NEXT
|
|
|
|
btsl_imm: NEXTBYTE(op2)
|
|
b 1f
|
|
btsl_reg_mem: lhbrx op2,REG
|
|
1: bl _setup_bitl
|
|
or r3,r3,op2
|
|
stwbrx r3,MEM
|
|
NEXT
|
|
|
|
/* Bit string search instructions, only ZF is defined after these, and the
|
|
result value is not defined when the bit field is zero. */
|
|
bsfw: lhbrx result,MEM
|
|
SET_FLAGS(FLAGS_BSRCH(W))
|
|
neg r3,result
|
|
cmpwi cr6,result,0 # sets ZF
|
|
and r3,r3,result # keep only LSB
|
|
cntlzw r3,r3
|
|
subfic r3,r3,31
|
|
sthbrx r3,REG
|
|
NEXT
|
|
|
|
bsfl: lwbrx result,MEM
|
|
SET_FLAGS(FLAGS_BSRCH(L))
|
|
neg r3,result
|
|
cmpwi cr6,result,0 # sets ZF
|
|
and r3,r3,result # keep only LSB
|
|
cntlzw r3,r3
|
|
subfic r3,r3,31
|
|
stwbrx r3,REG
|
|
NEXT
|
|
|
|
bsrw: lhbrx result,MEM
|
|
SET_FLAGS(FLAGS_BSRCH(W))
|
|
cntlzw r3,result
|
|
cmpwi cr6,result,0
|
|
subfic r3,r3,31
|
|
sthbrx r3,REG
|
|
NEXT
|
|
|
|
bsrl: lwbrx result,MEM
|
|
SET_FLAGS(FLAGS_BSRCH(L))
|
|
cntlzw r3,result
|
|
cmpwi cr6,result,0
|
|
subfic r3,r3,31
|
|
stwbrx r3,REG
|
|
NEXT
|
|
|
|
/* Unconditional jumps, first the indirect than relative */
|
|
jmpw: lhbrx eip,MEM
|
|
lbzux opcode,eip,csb
|
|
GOTNEXT
|
|
|
|
jmpl: lwbrx eip,MEM
|
|
lbzux opcode,eip,csb
|
|
GOTNEXT
|
|
|
|
sjmp_w: lbz r3,1(eip)
|
|
sub eip,eip,csb
|
|
addi eip,eip,2 # EIP after instruction
|
|
extsb r3,r3
|
|
add eip,eip,r3
|
|
clrlwi eip,eip,16 # module 64k
|
|
lbzux opcode,eip,csb
|
|
GOTNEXT
|
|
|
|
jmp_w: lhbrx r3,eip,one # eip now off by 3
|
|
sub eip,eip,csb
|
|
addi r3,r3,3 # compensate
|
|
add eip,eip,r3
|
|
clrlwi eip,eip,16
|
|
lbzux opcode,eip,csb
|
|
GOTNEXT
|
|
|
|
sjmp_l: lbz r3,1(eip)
|
|
addi eip,eip,2
|
|
extsb r3,r3
|
|
lbzux opcode,eip,r3
|
|
GOTNEXT
|
|
|
|
jmp_l: lwbrx r3,eip,one # Simple
|
|
addi eip,eip,5
|
|
lbzux opcode,eip,r3
|
|
GOTNEXT
|
|
|
|
/* The conditional jumps: although it should not happen,
|
|
byte relative jumps (sjmp) may wrap around in 16 bit mode */
|
|
|
|
#define NOTTAKEN_S lbzu opcode,2(eip); GOTNEXT
|
|
#define NOTTAKEN_W lbzu opcode,3(eip); GOTNEXT
|
|
#define NOTTAKEN_L lbzu opcode,5(eip); GOTNEXT
|
|
|
|
#define CONDJMP(cond, eval, flag) \
|
|
sj##cond##_w: EVAL_##eval; bt flag,sjmp_w; NOTTAKEN_S; \
|
|
j##cond##_w: EVAL_##eval; bt flag,jmp_w; NOTTAKEN_W; \
|
|
sj##cond##_l: EVAL_##eval; bt flag,sjmp_l; NOTTAKEN_S; \
|
|
j##cond##_l: EVAL_##eval; bt flag,jmp_l; NOTTAKEN_L; \
|
|
sjn##cond##_w: EVAL_##eval; bf flag,sjmp_w; NOTTAKEN_S; \
|
|
jn##cond##_w: EVAL_##eval; bf flag,jmp_w; NOTTAKEN_W; \
|
|
sjn##cond##_l: EVAL_##eval; bf flag,sjmp_l; NOTTAKEN_S; \
|
|
jn##cond##_l: EVAL_##eval; bf flag,jmp_l; NOTTAKEN_L
|
|
|
|
CONDJMP(o, OF, OF)
|
|
CONDJMP(c, CF, CF)
|
|
CONDJMP(z, ZF, ZF)
|
|
CONDJMP(a, ABOVE, ABOVE)
|
|
CONDJMP(s, SF, SF)
|
|
CONDJMP(p, PF, PF)
|
|
CONDJMP(g, SIGNED, SGT)
|
|
CONDJMP(l, SIGNED, SLT)
|
|
|
|
jcxz_w: lhz r3,CX(state); cmpwi r3,0; beq- sjmp_w; NOTTAKEN_S
|
|
jcxz_l: lhz r3,CX(state); cmpwi r3,0; beq- sjmp_l; NOTTAKEN_S
|
|
jecxz_w: lwz r3,ECX(state); cmpwi r3,0; beq- sjmp_w; NOTTAKEN_S
|
|
jecxz_l: lwz r3,ECX(state); cmpwi r3,0; beq- sjmp_l; NOTTAKEN_S
|
|
|
|
/* Note that loop is somewhat strange, the data size attribute gives
|
|
the size of eip, and the address size whether the counter is cx or ecx.
|
|
This is the same for jcxz/jecxz. */
|
|
|
|
loopw_w: li opreg,CX
|
|
lhbrx r0,REG
|
|
sub. r0,r0,one
|
|
sthbrx r0,REG
|
|
bne+ sjmp_w
|
|
NOTTAKEN_S
|
|
|
|
loopl_w: li opreg,ECX
|
|
lwbrx r0,REG
|
|
sub. r0,r0,one
|
|
stwbrx r0,REG
|
|
bne+ sjmp_w
|
|
NOTTAKEN_S
|
|
|
|
loopw_l: li opreg,CX
|
|
lhbrx r0,REG
|
|
sub. r0,r0,one
|
|
sthbrx r0,REG
|
|
bne+ sjmp_l
|
|
NOTTAKEN_S
|
|
|
|
loopl_l: li opreg,ECX
|
|
lwbrx r0,REG
|
|
sub. r0,r0,one
|
|
stwbrx r0,REG
|
|
bne+ sjmp_l
|
|
NOTTAKEN_S
|
|
|
|
loopzw_w: li opreg,CX
|
|
lhbrx r0,REG
|
|
EVAL_ZF
|
|
sub. r0,r0,one
|
|
sthbrx r0,REG
|
|
bf ZF,1f
|
|
bne+ sjmp_w
|
|
1: NOTTAKEN_S
|
|
|
|
loopzl_w: li opreg,ECX
|
|
lwbrx r0,REG
|
|
EVAL_ZF
|
|
sub. r3,r3,one
|
|
stwbrx r3,REG
|
|
bf ZF,1f
|
|
bne+ sjmp_w
|
|
1: NOTTAKEN_S
|
|
|
|
loopzw_l: li opreg,CX
|
|
lhbrx r0,REG
|
|
EVAL_ZF
|
|
sub. r0,r0,one
|
|
sthbrx r0,REG
|
|
bf ZF,1f
|
|
bne+ sjmp_l
|
|
1: NOTTAKEN_S
|
|
|
|
loopzl_l: li opreg,ECX
|
|
lwbrx r0,REG
|
|
EVAL_ZF
|
|
sub. r0,r0,one
|
|
stwbrx r0,REG
|
|
bf ZF,1f
|
|
bne+ sjmp_l
|
|
1: NOTTAKEN_S
|
|
|
|
loopnzw_w: li opreg,CX
|
|
lhbrx r0,REG
|
|
EVAL_ZF
|
|
sub. r0,r0,one
|
|
sthbrx r0,REG
|
|
bt ZF,1f
|
|
bne+ sjmp_w
|
|
1: NOTTAKEN_S
|
|
|
|
loopnzl_w: li opreg,ECX
|
|
lwbrx r0,REG
|
|
EVAL_ZF
|
|
sub. r0,r0,one
|
|
stwbrx r0,REG
|
|
bt ZF,1f
|
|
bne+ sjmp_w
|
|
1: NOTTAKEN_S
|
|
|
|
loopnzw_l: li opreg,CX
|
|
lhbrx r0,REG
|
|
EVAL_ZF
|
|
sub. r0,r0,one
|
|
sthbrx r0,REG
|
|
bt ZF,1f
|
|
bne+ sjmp_l
|
|
1: NOTTAKEN_S
|
|
|
|
loopnzl_l: li opreg,ECX
|
|
lwbrx r0,REG
|
|
EVAL_ZF
|
|
sub. r0,r0,one
|
|
stwbrx r0,REG
|
|
bt ZF,1f
|
|
bne+ sjmp_l
|
|
1: NOTTAKEN_S
|
|
|
|
/* Memory indirect calls are rare enough to limit code duplication */
|
|
callw_sp_mem: lhbrx r3,MEM
|
|
sub r4,eip,csb
|
|
addi r4,r4,1 # r4 is now return address
|
|
b 1f
|
|
.equ calll_sp_mem, unimpl
|
|
|
|
callw_sp: lhbrx r3,eip,one
|
|
sub r4,eip,csb
|
|
addi r4,r4,3 # r4 is return address
|
|
add r3,r4,r3
|
|
1: clrlwi eip,r3,16
|
|
li r5,SP
|
|
lhbrx r6,state,r5 # get sp
|
|
addi r6,r6,-2
|
|
lbzux opcode,eip,csb
|
|
sthbrx r6,state,r5 # update sp
|
|
clrlwi r6,r6,16
|
|
sthbrx r4,ssb,r6 # push return address
|
|
GOTNEXT
|
|
.equ calll_sp, unimpl
|
|
|
|
retw_sp_imm: li opreg,SP
|
|
lhbrx r4,REG
|
|
lhbrx r6,eip,one
|
|
addi r5,r4,2
|
|
lhbrx eip,ssb,r4
|
|
lbzux opcode,eip,csb
|
|
add r5,r5,r6
|
|
sthbrx r5,REG
|
|
GOTNEXT
|
|
|
|
.equ retl_sp_imm, unimpl
|
|
|
|
retw_sp: li opreg,SP
|
|
lhbrx r4,REG
|
|
addi r5,r4,2
|
|
lhbrx eip,ssb,r4
|
|
lbzux opcode,eip,csb
|
|
sthbrx r5,REG
|
|
GOTNEXT
|
|
|
|
.equ retl_sp, unimpl
|
|
|
|
/* Enter is a mess, and the description in Intel documents is actually wrong
|
|
* in most revisions (all PPro/PII I have but the old Pentium is Ok) !
|
|
*/
|
|
|
|
enterw_sp: lhbrx r0,eip,one # Stack space to allocate
|
|
li opreg,SP
|
|
lhbrx r3,REG # SP
|
|
li r7,BP
|
|
lbzu r4,3(eip) # nesting level
|
|
addi r3,r3,-2
|
|
lhbrx r5,state,r7 # Original BP
|
|
clrlwi r3,r3,16
|
|
sthbrx r5,ssb,r3 # Push BP
|
|
andi. r4,r4,31 # modulo 32 and test
|
|
mr r6,r3 # Save frame pointer to temp
|
|
beq 3f
|
|
mtctr r4 # iterate level-1 times
|
|
b 2f
|
|
1: addi r5,r5,-2 # copy list of frame pointers
|
|
clrlwi r5,r5,16
|
|
lhzx r4,ssb,r5
|
|
addi r3,r3,-2
|
|
clrlwi r3,r3,16
|
|
sthx r4,ssb,r3
|
|
2: bdnz 1b
|
|
addi r3,r3,-2 # save current frame pointer
|
|
clrlwi r3,r3,16
|
|
sthbrx r6,ssb,r3
|
|
3: sthbrx r6,state,r7 # New BP
|
|
sub r3,r3,r0
|
|
sthbrx r3,REG # Save new stack pointer
|
|
NEXT
|
|
|
|
.equ enterl_sp, unimpl
|
|
|
|
leavew_sp: li opreg,BP
|
|
lhbrx r3,REG # Stack = BP
|
|
addi r4,r3,2 #
|
|
lhzx r3,ssb,r3
|
|
li opreg,SP
|
|
sthbrx r4,REG # New Stack
|
|
sth r3,BP(state) # Popped BP
|
|
NEXT
|
|
|
|
.equ leavel_sp, unimpl
|
|
|
|
/* String instructions: first a generic setup routine, which exits early
|
|
if there is a repeat prefix with a count of 0 */
|
|
#define STRINGSRC base,offset
|
|
#define STRINGDST esb,opreg
|
|
|
|
_setup_stringw: li offset,SI #
|
|
rlwinm. r3,opcode,19,0,1 # lt=repnz, gt= repz, eq none
|
|
li opreg,DI
|
|
lhbrx offset,state,offset # load si
|
|
li r3,1 # no repeat
|
|
lhbrx opreg,state,opreg # load di
|
|
beq 1f # no repeat
|
|
li r3,CX
|
|
lhbrx r3,state,r3 # load CX
|
|
cmpwi r3,0
|
|
beq nop # early exit here !
|
|
1: mtctr r3 # ctr=CX or 1
|
|
li r7,1 # stride
|
|
bflr+ DF
|
|
li r7,-1 # change stride sign
|
|
blr
|
|
|
|
/* Ending routine to update all changed registers (goes directly to NEXT) */
|
|
_finish_strw: li r4,SI
|
|
sthbrx offset,state,r4 # update si
|
|
li r4,DI
|
|
sthbrx opreg,state,r4 # update di
|
|
beq nop
|
|
mfctr r3
|
|
li r4,CX
|
|
sthbrx r3,state,r4 # update cx
|
|
NEXT
|
|
|
|
lodsb_a16: bl _setup_stringw
|
|
1: lbzx r0,STRINGSRC # [rep] lodsb
|
|
add offset,offset,r7
|
|
clrlwi offset,offset,16
|
|
bdnz 1b
|
|
stb r0,AL(state)
|
|
b _finish_strw
|
|
|
|
lodsw_a16: bl _setup_stringw
|
|
slwi r7,r7,1
|
|
1: lhzx r0,STRINGSRC # [rep] lodsw
|
|
add offset,offset,r7
|
|
clrlwi offset,offset,16
|
|
bdnz 1b
|
|
sth r0,AX(state)
|
|
b _finish_strw
|
|
|
|
lodsl_a16: bl _setup_stringw
|
|
slwi r7,r7,2
|
|
1: lwzx r0,STRINGSRC # [rep] lodsl
|
|
add offset,offset,r7
|
|
clrlwi offset,offset,16
|
|
bdnz 1b
|
|
stw r0,EAX(state)
|
|
b _finish_strw
|
|
|
|
stosb_a16: bl _setup_stringw
|
|
lbz r0,AL(state)
|
|
1: stbx r0,STRINGDST # [rep] stosb
|
|
add opreg,opreg,r7
|
|
clrlwi opreg,opreg,16
|
|
bdnz 1b
|
|
b _finish_strw
|
|
|
|
stosw_a16: bl _setup_stringw
|
|
lhz r0,AX(state)
|
|
slwi r7,r7,1
|
|
1: sthx r0,STRINGDST # [rep] stosw
|
|
add opreg,opreg,r7
|
|
clrlwi opreg,opreg,16
|
|
bdnz 1b
|
|
b _finish_strw
|
|
|
|
stosl_a16: bl _setup_stringw
|
|
lwz r0,EAX(state)
|
|
slwi r7,r7,2
|
|
1: stwx r0,STRINGDST # [rep] stosl
|
|
add opreg,opreg,r7
|
|
clrlwi opreg,opreg,16
|
|
bdnz 1b
|
|
b _finish_strw
|
|
|
|
movsb_a16: bl _setup_stringw
|
|
1: lbzx r0,STRINGSRC # [rep] movsb
|
|
add offset,offset,r7
|
|
stbx r0,STRINGDST
|
|
clrlwi offset,offset,16
|
|
add opreg,opreg,r7
|
|
clrlwi opreg,opreg,16
|
|
bdnz 1b
|
|
b _finish_strw
|
|
|
|
movsw_a16: bl _setup_stringw
|
|
slwi r7,r7,1
|
|
1: lhzx r0,STRINGSRC # [rep] movsw
|
|
add offset,offset,r7
|
|
sthx r0,STRINGDST
|
|
clrlwi offset,offset,16
|
|
add opreg,opreg,r7
|
|
clrlwi opreg,opreg,16
|
|
bdnz 1b
|
|
b _finish_strw
|
|
|
|
movsl_a16: bl _setup_stringw
|
|
slwi r7,r7,2
|
|
1: lwzx r0,STRINGSRC # [rep] movsl
|
|
add offset,offset,r7
|
|
stwx r0,STRINGDST
|
|
clrlwi offset,offset,16
|
|
add opreg,opreg,r7
|
|
clrlwi opreg,opreg,16
|
|
bdnz 1b
|
|
b _finish_strw
|
|
|
|
/* At least on a Pentium, repeated string I/O instructions check for
|
|
access port permission even if count is 0 ! So the order of the check is not
|
|
important. */
|
|
insb_a16: li r4,DX
|
|
li r3,code_insb_a16
|
|
lhbrx r4,state,r4
|
|
bl _check_port
|
|
bl _setup_stringw
|
|
lwz base,iobase(state)
|
|
1: lbzx r0,base,r4 # [rep] insb
|
|
eieio
|
|
stbx r0,STRINGDST
|
|
add opreg,opreg,r7
|
|
clrlwi opreg,opreg,16
|
|
bdnz 1b
|
|
b _finish_strw
|
|
|
|
insw_a16: li r4,DX
|
|
li r3,code_insw_a16
|
|
lhbrx r4,state,r4
|
|
bl _check_port
|
|
bl _setup_stringw
|
|
lwz base,iobase(state)
|
|
slwi r7,r7,1
|
|
1: lhzx r0,base,r4 # [rep] insw
|
|
eieio
|
|
sthx r0,STRINGDST
|
|
add opreg,opreg,r7
|
|
clrlwi opreg,opreg,16
|
|
bdnz 1b
|
|
b _finish_strw
|
|
|
|
insl_a16: li r4,DX
|
|
li r3,code_insl_a16
|
|
lhbrx r4,state,r4
|
|
bl _check_port
|
|
bl _setup_stringw
|
|
lwz base,iobase(state)
|
|
slwi r7,r7,2
|
|
1: lwzx r0,base,r4 # [rep] insl
|
|
eieio
|
|
stwx r0,STRINGDST
|
|
add opreg,opreg,r7
|
|
clrlwi opreg,opreg,16
|
|
bdnz 1b
|
|
b _finish_strw
|
|
|
|
outsb_a16: li r4,DX
|
|
li r3,code_outsb_a16
|
|
lhbrx r4,state,r4
|
|
bl _check_port
|
|
bl _setup_stringw
|
|
lwz r6,iobase(state)
|
|
1: lbzx r0,STRINGSRC # [rep] outsb
|
|
add offset,offset,r7
|
|
stbx r0,r6,r4
|
|
clrlwi offset,offset,16
|
|
eieio
|
|
bdnz 1b
|
|
b _finish_strw
|
|
|
|
outsw_a16: li r4,DX
|
|
li r3,code_outsw_a16
|
|
lhbrx r4,state,r4
|
|
bl _check_port
|
|
bl _setup_stringw
|
|
li r5,DX
|
|
lwz r6,iobase(state)
|
|
slwi r7,r7,1
|
|
1: lhzx r0,STRINGSRC # [rep] outsw
|
|
add offset,offset,r7
|
|
sthx r0,r6,r4
|
|
clrlwi offset,offset,16
|
|
eieio
|
|
bdnz 1b
|
|
b _finish_strw
|
|
|
|
outsl_a16: li r4,DX
|
|
li r3,code_outsl_a16
|
|
lhbrx r4,state,r4
|
|
bl _check_port
|
|
bl _setup_stringw
|
|
lwz r6,iobase(state)
|
|
slwi r7,r7,2
|
|
1: lwzx r0,STRINGSRC # [rep] outsl
|
|
add offset,offset,r7
|
|
stwx r0,r6,r4
|
|
clrlwi offset,offset,16
|
|
eieio
|
|
bdnz 1b
|
|
b _finish_strw
|
|
|
|
cmpsb_a16: bl _setup_stringw
|
|
SET_FLAGS(FLAGS_CMP(B))
|
|
blt 3f # repnz prefix
|
|
1: lbzx op1,STRINGSRC # [repz] cmpsb
|
|
add offset,offset,r7
|
|
lbzx op2,STRINGDST
|
|
add opreg,opreg,r7
|
|
cmplw cr4,op1,op2
|
|
clrlwi offset,offset,16
|
|
clrlwi opreg,opreg,16
|
|
bdnzt CF+2,1b
|
|
2: extsb r3,op1
|
|
extsb r4,op2
|
|
cmpw cr6,r3,r4
|
|
sub result,op1,op2
|
|
b _finish_strw
|
|
|
|
3: lbzx op1,STRINGSRC # repnz cmpsb
|
|
add offset,offset,r7
|
|
lbzx op2,STRINGDST
|
|
add opreg,opreg,r7
|
|
cmplw cr4,op1,op2
|
|
clrlwi offset,offset,16
|
|
clrlwi opreg,opreg,16
|
|
bdnzf CF+2,3b
|
|
b 2b
|
|
|
|
cmpsw_a16: bl _setup_stringw
|
|
SET_FLAGS(FLAGS_CMP(W))
|
|
slwi r7,r7,1
|
|
blt 3f # repnz prefix
|
|
1: lhbrx op1,STRINGSRC # [repz] cmpsb
|
|
add offset,offset,r7
|
|
lhbrx op2,STRINGDST
|
|
add opreg,opreg,r7
|
|
cmplw cr4,op1,op2
|
|
clrlwi offset,offset,16
|
|
clrlwi opreg,opreg,16
|
|
bdnzt CF+2,1b
|
|
2: extsh r3,op1
|
|
extsh r4,op2
|
|
cmpw cr6,r3,r4
|
|
sub result,op1,op2
|
|
b _finish_strw
|
|
|
|
3: lhbrx op1,STRINGSRC # repnz cmpsw
|
|
add offset,offset,r7
|
|
lhbrx op2,STRINGDST
|
|
add opreg,opreg,r7
|
|
cmplw cr4,op1,op2
|
|
clrlwi offset,offset,16
|
|
clrlwi opreg,opreg,16
|
|
bdnzf CF+2,3b
|
|
b 2b
|
|
|
|
cmpsl_a16: bl _setup_stringw
|
|
SET_FLAGS(FLAGS_CMP(L))
|
|
slwi r7,r7,2
|
|
blt 3f # repnz prefix
|
|
1: lwbrx op1,STRINGSRC # [repz] cmpsl
|
|
add offset,offset,r7
|
|
lwbrx op2,STRINGDST
|
|
add opreg,opreg,r7
|
|
cmplw cr4,op1,op2
|
|
clrlwi offset,offset,16
|
|
clrlwi opreg,opreg,16
|
|
bdnzt CF+2,1b
|
|
2: cmpw cr6,op1,op2
|
|
sub result,op1,op2
|
|
b _finish_strw
|
|
|
|
3: lwbrx op1,STRINGSRC # repnz cmpsl
|
|
add offset,offset,r7
|
|
lwbrx op2,STRINGDST
|
|
add opreg,opreg,r7
|
|
cmplw cr4,op1,op2
|
|
clrlwi offset,offset,16
|
|
clrlwi opreg,opreg,16
|
|
bdnzf CF+2,3b
|
|
b 2b
|
|
|
|
scasb_a16: bl _setup_stringw
|
|
lbzx op1,AL,state # AL
|
|
SET_FLAGS(FLAGS_CMP(B))
|
|
bgt 3f # repz prefix
|
|
1: lbzx op2,STRINGDST # [repnz] scasb
|
|
add opreg,opreg,r7
|
|
cmplw cr4,op1,op2
|
|
clrlwi opreg,opreg,16
|
|
bdnzf CF+2,1b
|
|
2: extsb r3,op1
|
|
extsb r4,op2
|
|
cmpw cr6,r3,r4
|
|
sub result,op1,op2
|
|
b _finish_strw
|
|
|
|
3: lbzx op2,STRINGDST # repz scasb
|
|
add opreg,opreg,r7
|
|
cmplw cr4,op1,op2
|
|
clrlwi opreg,opreg,16
|
|
bdnzt CF+2,3b
|
|
b 2b
|
|
|
|
scasw_a16: bl _setup_stringw
|
|
lhbrx op1,AX,state
|
|
SET_FLAGS(FLAGS_CMP(W))
|
|
slwi r7,r7,1
|
|
bgt 3f # repz prefix
|
|
1: lhbrx op2,STRINGDST # [repnz] scasw
|
|
add opreg,opreg,r7
|
|
cmplw cr4,op1,op2
|
|
clrlwi opreg,opreg,16
|
|
bdnzf CF+2,1b
|
|
2: extsh r3,op1
|
|
extsh r4,op2
|
|
cmpw cr6,r3,r4
|
|
sub result,op1,op2
|
|
b _finish_strw
|
|
|
|
3: lhbrx op2,STRINGDST # repz scasw
|
|
add opreg,opreg,r7
|
|
cmplw cr4,op1,op2
|
|
clrlwi opreg,opreg,16
|
|
bdnzt CF+2,3b
|
|
b 2b
|
|
|
|
scasl_a16: bl _setup_stringw
|
|
lwbrx op1,EAX,state
|
|
SET_FLAGS(FLAGS_CMP(L))
|
|
slwi r7,r7,2
|
|
bgt 3f # repz prefix
|
|
1: lwbrx op2,STRINGDST # [repnz] scasl
|
|
add opreg,opreg,r7
|
|
cmplw cr4,op1,op2
|
|
clrlwi opreg,opreg,16
|
|
bdnzf CF+2,1b
|
|
2: cmpw cr6,op1,op2
|
|
sub result,op1,op2
|
|
b _finish_strw
|
|
|
|
3: lwbrx op2,STRINGDST # repz scasl
|
|
add opreg,opreg,r7
|
|
cmplw cr4,op1,op2
|
|
clrlwi opreg,opreg,16
|
|
bdnzt CF+2,3b
|
|
b 2b
|
|
|
|
.equ lodsb_a32, unimpl
|
|
.equ lodsw_a32, unimpl
|
|
.equ lodsl_a32, unimpl
|
|
.equ stosb_a32, unimpl
|
|
.equ stosw_a32, unimpl
|
|
.equ stosl_a32, unimpl
|
|
.equ movsb_a32, unimpl
|
|
.equ movsw_a32, unimpl
|
|
.equ movsl_a32, unimpl
|
|
.equ insb_a32, unimpl
|
|
.equ insw_a32, unimpl
|
|
.equ insl_a32, unimpl
|
|
.equ outsb_a32, unimpl
|
|
.equ outsw_a32, unimpl
|
|
.equ outsl_a32, unimpl
|
|
.equ cmpsb_a32, unimpl
|
|
.equ cmpsw_a32, unimpl
|
|
.equ cmpsl_a32, unimpl
|
|
.equ scasb_a32, unimpl
|
|
.equ scasw_a32, unimpl
|
|
.equ scasl_a32, unimpl
|
|
|
|
xlatb_a16: li offset,BX
|
|
lbz r3,AL(state)
|
|
lhbrx offset,offset,state
|
|
add r3,r3,base
|
|
lbzx r3,r3,offset
|
|
stb r3,AL(state)
|
|
NEXT
|
|
|
|
.equ xlatb_a32, unimpl
|
|
|
|
/*
|
|
* Shift and rotates: note the oddity that rotates do not affect SF/ZF/AF/PF
|
|
* but shifts do. Also testing has indicated that rotates with a count of zero
|
|
* do not affect any flag. The documentation specifies this for shifts but
|
|
* is more obscure for rotates. The overflow flag setting is only specified
|
|
* when count is 1, otherwise OF is undefined which simplifies emulation.
|
|
*/
|
|
|
|
/*
|
|
* The rotates through carry are among the most difficult instructions,
|
|
* they are implemented as a shift of 2*n+some bits depending on case.
|
|
* First the left rotates through carry.
|
|
*/
|
|
|
|
/* Byte rcl is performed on 18 bits (17 actually used) in a single register */
|
|
rclb_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
rclb_cl: lbz r3,CL(state)
|
|
b 1f
|
|
rclb_1: li r3,1
|
|
1: lbzx r0,MEM
|
|
andi. r3,r3,31 # count%32
|
|
addc r4,flags,flags # CF_IN->xer[ca]
|
|
RES2CF(r6)
|
|
subfe r4,result,op1
|
|
mulli r5,r3,29 # 29=ceil(256/9)
|
|
CF_ROTCNT(r7)
|
|
addze r6,r6
|
|
CF_POL_INSERT(r0,23)
|
|
srwi r5,r5,8 # count/9
|
|
rlwnm r6,r6,r7,0x100
|
|
xor r0,r0,r6 # (23)0:CF:data8
|
|
rlwimi r5,r5,3,26,28 # 9*(count/9)
|
|
rlwimi r0,r0,23,0,7 # CF:(data8):(14)0:CF:data8
|
|
sub r3,r3,r5 # count%9
|
|
beq- nop # no flags changed if count 0
|
|
ROTATE_FLAGS
|
|
rlwnm r0,r0,r3,0x000001ff # (23)0:NewCF:Result8
|
|
rlwimi flags,r0,19,CF_VALUE
|
|
stbx r0,MEM
|
|
rlwimi flags,r0,18,OF_XOR
|
|
NEXT
|
|
|
|
/* Word rcl is performed on 33 bits (CF:data16:CF:(15 MSB of data16) */
|
|
rclw_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
rclw_cl: lbz r3,CL(state)
|
|
b 1f
|
|
rclw_1: li r3,1
|
|
1: lhbrx r0,MEM
|
|
andi. r3,r3,31 # count=count%32
|
|
addc r4,flags,flags
|
|
RES2CF(r6)
|
|
subfe r4,result,op1
|
|
addi r5,r3,15 # modulo 17: >=32 if >=17
|
|
CF_ROTCNT(r7)
|
|
addze r6,r6
|
|
addi r7,r7,8
|
|
CF_POL_INSERT(r0,15)
|
|
srwi r5,r5,5 # count/17
|
|
rlwnm r6,r6,r7,0x10000
|
|
rlwimi r5,r5,4,27,27 # 17*(count/17)
|
|
xor r0,r0,r6 # (15)0:CF:data16
|
|
sub r3,r3,r5 # count%17
|
|
rlwinm r4,r0,15,0xffff0000 # CF:(15 MSB of data16):(16)0
|
|
slw r0,r0,r3 # New carry and MSBs
|
|
rlwnm r4,r4,r3,16,31 # New LSBs
|
|
beq- nop # no flags changed if count 0
|
|
ROTATE_FLAGS
|
|
add r0,r0,r4 # result
|
|
rlwimi flags,r0,11,CF_VALUE
|
|
sthbrx r0,MEM
|
|
rlwimi flags,r0,10,OF_XOR
|
|
NEXT
|
|
|
|
/* Longword rcl only needs 64 bits because the maximum rotate count is 31 ! */
|
|
rcll_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
rcll_cl: lbz r3,CL(state)
|
|
b 1f
|
|
rcll_1: li r3,1
|
|
1: lwbrx r0,MEM
|
|
andi. r3,r3,31 # count=count%32
|
|
addc r4,r4,flags # ~XER[CA]
|
|
RES2CF(r6)
|
|
subfe r4,result,op1
|
|
CF_ROTCNT(r7)
|
|
addze r6,r6
|
|
srwi r4,r0,1 # 0:(31 MSB of data32)
|
|
addi r7,r7,23
|
|
CF_POL_INSERT(r4,0)
|
|
rlwnm r6,r6,r7,0,0
|
|
beq- nop # no flags changed if count 0
|
|
subfic r5,r3,32
|
|
xor r4,r4,r6
|
|
ROTATE_FLAGS
|
|
slw r0,r0,r3 # New MSBs
|
|
srw r5,r4,r5 # New LSBs
|
|
rlwnm r4,r4,r3,0,0 # New Carry
|
|
add r0,r0,r5 # result
|
|
rlwimi flags,r4,28,CF_VALUE
|
|
rlwimi flags,r0,27,OF_XOR
|
|
stwbrx r0,MEM
|
|
NEXT
|
|
|
|
/* right rotates through carry are even worse because PPC only has a left
|
|
rotate instruction. Somewhat tough when combined with modulo 9, 17, or
|
|
33 operation and the rules of OF and CF flag settings. */
|
|
/* Byte rcr is performed on 17 bits */
|
|
rcrb_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
rcrb_cl: lbz r3,CL(state)
|
|
b 1f
|
|
rcrb_1: li r3,1
|
|
1: lbzx r0,MEM
|
|
andi. r3,r3,31 # count%32
|
|
addc r4,flags,flags # cf_in->xer[ca]
|
|
RES2CF(r6)
|
|
mulli r5,r3,29 # 29=ceil(256/9)
|
|
subfe r4,result,op1
|
|
CF_ROTCNT(r7)
|
|
addze r6,r6
|
|
CF_POL_INSERT(r0,23)
|
|
srwi r5,r5,8 # count/9
|
|
rlwimi r0,r0,9,0x0001fe00 # (15)0:data8:0:data8
|
|
rlwnm r6,r6,r7,0x100
|
|
rlwimi r5,r5,3,26,28 # 9*(count/9)
|
|
xor r0,r0,r6 # (15)0:data8:CF:data8
|
|
sub r3,r3,r5 # count%9
|
|
beq- nop # no flags changed if count 0
|
|
ROTATE_FLAGS
|
|
srw r0,r0,r3 # (23)junk:NewCF:Result8
|
|
rlwimi flags,r0,19,CF_VALUE|OF_XOR
|
|
stbx r0,MEM
|
|
NEXT
|
|
|
|
/* Word rcr is a 33 bit right shift with a quirk, because the 33rd bit
|
|
is only needed when the rotate count is 16 and rotating left or right
|
|
by 16 a 32 bit quantity is the same ! */
|
|
rcrw_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
rcrw_cl: lbz r3,CL(state)
|
|
b 1f
|
|
rcrw_1: li r3,1
|
|
1: lhbrx r0,MEM
|
|
andi. r3,r3,31 # count%32
|
|
addc r4,flags,flags # cf_in->xer[ca]
|
|
RES2CF(r6)
|
|
subfe r4,result,op1
|
|
addi r5,r3,15 # >=32 if >=17
|
|
CF_ROTCNT(r7)
|
|
addze r6,r6
|
|
addi r7,r7,8
|
|
CF_POL_INSERT(r0,15)
|
|
srwi r5,r5,5 # count/17
|
|
rlwnm r6,r6,r7,0x10000
|
|
rlwinm r7,r0,16,0x01 # MSB of data16
|
|
rlwimi r0,r0,17,0xfffe0000 # (15 MSB of data16):0:data16
|
|
rlwimi r5,r5,4,27,27 # 17*(count/17)
|
|
xor r0,r0,r6 # (15 MSB of data16):CF:data16
|
|
sub r3,r3,r5 # count%17
|
|
beq- nop # no flags changed if count 0
|
|
srw r0,r0,r3 # shift right
|
|
rlwnm r7,r7,r3,0x10000 # just in case count=16
|
|
ROTATE_FLAGS
|
|
add r0,r0,r7 # junk15:NewCF:result16
|
|
rlwimi flags,r0,11,CF_VALUE|OF_XOR
|
|
sthbrx r0,MEM
|
|
NEXT
|
|
|
|
/* Longword rcr need only 64 bits since the rotate count is limited to 31 */
|
|
rcrl_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
rcrl_cl: lbz r3,CL(state)
|
|
b 1f
|
|
rcrl_1: li r3,1
|
|
1: lwbrx r0,MEM
|
|
andi. r3,r3,31 # count%32
|
|
addc r4,flags,flags
|
|
RES2CF(r6)
|
|
subfe r4,result,op1
|
|
CF_ROTCNT(r7)
|
|
slwi r4,r0,1 # (31MSB of data32):0
|
|
addze r6,r6
|
|
addi r7,r7,24
|
|
CF_POL_INSERT(r4,31)
|
|
rlwnm r6,r6,r7,0x01
|
|
beq- nop # no flags changed if count 0
|
|
subfic r7,r3,32
|
|
xor r4,r4,r6
|
|
srw r0,r0,r3 # Result LSB
|
|
slw r5,r4,r7 # Result MSB
|
|
srw r4,r4,r3 # NewCF in LSB
|
|
add r0,r0,r5 # result
|
|
rlwimi flags,r4,27,CF_VALUE
|
|
stwbrx r0,MEM
|
|
rlwimi flags,r0,27,OF_XOR
|
|
NEXT
|
|
|
|
/* After the rotates through carry, normal rotates are so simple ! */
|
|
rolb_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
rolb_cl: lbz r3,CL(state)
|
|
b 1f
|
|
rolb_1: li r3,1
|
|
1: lbzx r0,MEM
|
|
andi. r4,r3,31 # count%32 == 0 ?
|
|
clrlwi r3,r3,29 # count%8
|
|
rlwimi r0,r0,24,0xff000000 # replicate for shift in
|
|
beq- nop # no flags changed if count 0
|
|
ROTATE_FLAGS
|
|
rotlw r0,r0,r3
|
|
rlwimi flags,r0,27,CF_VALUE # New CF
|
|
stbx r0,MEM
|
|
rlwimi flags,r0,26,OF_XOR # New OF (CF xor MSB)
|
|
NEXT
|
|
|
|
rolw_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
rolw_cl: lbz r3,CL(state)
|
|
b 1f
|
|
rolw_1: li r3,1
|
|
1: lhbrx r0,MEM
|
|
andi. r3,r3,31
|
|
rlwimi r0,r0,16,0,15 # duplicate
|
|
beq- nop # no flags changed if count 0
|
|
ROTATE_FLAGS
|
|
rotlw r0,r0,r3 # result word duplicated
|
|
rlwimi flags,r0,27,CF_VALUE # New CF
|
|
sthbrx r0,MEM
|
|
rlwimi flags,r0,26,OF_XOR # New OF (CF xor MSB)
|
|
NEXT
|
|
|
|
roll_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
roll_cl: lbz r3,CL(state)
|
|
b 1f
|
|
roll_1: li r3,1
|
|
1: lwbrx r0,MEM
|
|
andi. r3,r3,31
|
|
beq- nop # no flags changed if count 0
|
|
ROTATE_FLAGS
|
|
rotlw r0,r0,r3 # result
|
|
rlwimi flags,r0,27,CF_VALUE # New CF
|
|
stwbrx r0,MEM
|
|
rlwimi flags,r0,26,OF_XOR # New OF (CF xor MSB)
|
|
NEXT
|
|
|
|
rorb_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
rorb_cl: lbz r3,CL(state)
|
|
b 1f
|
|
rorb_1: li r3,1
|
|
1: lbzx r0,MEM
|
|
andi. r4,r3,31 # count%32 == 0 ?
|
|
clrlwi r3,r3,29 # count%8
|
|
rlwimi r0,r0,8,0x0000ff00 # replicate for shift in
|
|
beq- nop # no flags changed if count 0
|
|
ROTATE_FLAGS
|
|
srw r0,r0,r3
|
|
rlwimi flags,r0,20,CF_VALUE
|
|
stbx r0,MEM
|
|
rlwimi flags,r0,19,OF_XOR
|
|
NEXT
|
|
|
|
rorw_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
rorw_cl: lbz r3,CL(state)
|
|
b 1f
|
|
rorw_1: li r3,1
|
|
1: lhbrx r0,MEM
|
|
andi. r4,r3,31
|
|
clrlwi r3,r3,28 # count %16
|
|
rlwimi r0,r0,16,0xffff0000 # duplicate
|
|
beq- nop # no flags changed if count 0
|
|
ROTATE_FLAGS
|
|
srw r0,r0,r3 # junk16:result16
|
|
rlwimi flags,r0,12,CF_VALUE
|
|
sthbrx r0,MEM
|
|
rlwimi flags,r0,11,OF_XOR
|
|
NEXT
|
|
|
|
rorl_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
rorl_cl: lbz r3,CL(state)
|
|
b 1f
|
|
rorl_1: li r3,1
|
|
1: lwbrx r0,MEM
|
|
andi. r4,r3,31
|
|
neg r3,r3
|
|
beq- nop # no flags changed if count 0
|
|
ROTATE_FLAGS
|
|
rotlw r0,r0,r3 # result
|
|
rlwimi flags,r0,28,CF_VALUE
|
|
stwbrx r0,MEM
|
|
rlwimi flags,r0,27,OF_XOR
|
|
NEXT
|
|
|
|
/* Right arithmetic shifts: they clear OF whenever count!=0 */
|
|
#define SAR_FLAGS CF_ZERO|OF_ZERO|RESL
|
|
sarb_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
sarb_cl: lbz r3,CL(state)
|
|
b 1f
|
|
sarb_1: li r3,1
|
|
1: lbzx r4,MEM
|
|
andi. r3,r3,31
|
|
addi r5,r3,-1
|
|
extsb r4,r4
|
|
beq- nop # no flags changed if count 0
|
|
SET_FLAGS(SAR_FLAGS)
|
|
sraw result,r4,r3
|
|
srw r5,r4,r5
|
|
stbx result,MEM
|
|
rlwimi flags,r5,27,CF_VALUE
|
|
NEXT
|
|
|
|
sarw_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
sarw_cl: lbz r3,CL(state)
|
|
b 1f
|
|
sarw_1: li r3,1
|
|
1: lhbrx r4,MEM
|
|
andi. r3,r3,31
|
|
addi r5,r3,-1
|
|
extsh r4,r4
|
|
beq- nop # no flags changed if count 0
|
|
SET_FLAGS(SAR_FLAGS)
|
|
sraw result,r4,r3
|
|
srw r5,r4,r5
|
|
sthbrx result,MEM
|
|
rlwimi flags,r5,27,CF_VALUE
|
|
NEXT
|
|
|
|
sarl_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
sarl_cl: lbz r3,CL(state)
|
|
b 1f
|
|
sarl_1: li r3,1
|
|
1: lwbrx r4,MEM
|
|
andi. r3,r3,31
|
|
addi r5,r3,-1
|
|
beq- nop # no flags changed if count 0
|
|
SET_FLAGS(SAR_FLAGS)
|
|
sraw result,r4,r3
|
|
srw r5,r4,r5
|
|
stwbrx result,MEM
|
|
rlwimi flags,r5,27,CF_VALUE
|
|
NEXT
|
|
|
|
/* Left shifts are quite easy: they use the flag mechanism of add */
|
|
shlb_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
shlb_cl: lbz r3,CL(state)
|
|
b 1f
|
|
shlb_1: li r3,1
|
|
1: andi. r3,r3,31
|
|
beq- nop # no flags changed if count 0
|
|
lbzx op1,MEM
|
|
SET_FLAGS(FLAGS_ADD(B))
|
|
slw result,op1,r3
|
|
addi op2,op1,0 # for OF computation only !
|
|
stbx result,MEM
|
|
NEXT
|
|
|
|
shlw_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
shlw_cl: lbz r3,CL(state)
|
|
b 1f
|
|
shlw_1: li r3,1
|
|
1: andi. r3,r3,31
|
|
beq- nop # no flags changed if count 0
|
|
lhbrx op1,MEM
|
|
SET_FLAGS(FLAGS_ADD(W))
|
|
slw result,op1,r3
|
|
addi op2,op1,0 # for OF computation only !
|
|
sthbrx result,MEM
|
|
NEXT
|
|
|
|
/* That one may be wrong */
|
|
shll_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
shll_cl: lbz r3,CL(state)
|
|
b 1f
|
|
shll_1: li r3,1
|
|
1: andi. r3,r3,31
|
|
beq- nop # no flags changed if count 0
|
|
lwbrx op1,MEM
|
|
addi r4,r3,-1
|
|
SET_FLAGS(FLAGS_ADD(L))
|
|
slw result,op1,r3
|
|
addi op2,op1,0 # for OF computation only !
|
|
slw op1,op1,r4 # for CF computation
|
|
stwbrx result,MEM
|
|
NEXT
|
|
|
|
/* Right shifts are quite complex, because of funny flag rules ! */
|
|
shrb_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
shrb_cl: lbz r3,CL(state)
|
|
b 1f
|
|
shrb_1: li r3,1
|
|
1: andi. r3,r3,31
|
|
beq- nop # no flags changed if count 0
|
|
lbzx op1,MEM
|
|
addi r4,r3,-1
|
|
SET_FLAGS(FLAGS_SHR(B))
|
|
srw result,op1,r3
|
|
srw r4,op1,r4
|
|
li op2,-1 # for OF computation only !
|
|
stbx result,MEM
|
|
rlwimi flags,r4,27,CF_VALUE # Set CF
|
|
NEXT
|
|
|
|
shrw_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
shrw_cl: lbz r3,CL(state)
|
|
b 1f
|
|
shrw_1: li r3,1
|
|
1: andi. r3,r3,31
|
|
beq- nop # no flags changed if count 0
|
|
lhbrx op1,MEM
|
|
addi r4,r3,-1
|
|
SET_FLAGS(FLAGS_SHR(W))
|
|
srw result,op1,r3
|
|
srw r4,op1,r4
|
|
li op2,-1 # for OF computation only !
|
|
sthbrx result,MEM
|
|
rlwimi flags,r4,27,CF_VALUE # Set CF
|
|
NEXT
|
|
|
|
shrl_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
shrl_cl: lbz r3,CL(state)
|
|
b 1f
|
|
shrl_1: li r3,1
|
|
1: andi. r3,r3,31
|
|
beq- nop # no flags changed if count 0
|
|
lwbrx op1,MEM
|
|
addi r4,r3,-1
|
|
SET_FLAGS(FLAGS_SHR(L))
|
|
srw result,op1,r3
|
|
srw r4,op1,r4
|
|
li op2,-1 # for OF computation only !
|
|
stwbrx result,MEM
|
|
rlwimi flags,r4,27,CF_VALUE # Set CF
|
|
NEXT
|
|
|
|
/* Double length shifts, shldw uses FLAGS_ADD for simplicity */
|
|
shldw_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
shldw_cl: lbz r3,CL(state)
|
|
1: andi. r3,r3,31
|
|
beq- nop
|
|
lhbrx op1,MEM
|
|
SET_FLAGS(FLAGS_ADD(W))
|
|
lhbrx op2,REG
|
|
rlwimi op1,op2,16,0,15 # op2:op1
|
|
addi op2,op1,0
|
|
rotlw result,op1,r3
|
|
sthbrx result,MEM
|
|
NEXT
|
|
|
|
shldl_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
shldl_cl: lbz r3,CL(state)
|
|
1: andi. r3,r3,31
|
|
beq- nop
|
|
lwbrx op1,MEM
|
|
SET_FLAGS(FLAGS_DBLSH(L))
|
|
lwbrx op2,REG
|
|
subfic r4,r3,32
|
|
slw result,op1,r3
|
|
srw r4,op2,r4
|
|
rotlw r3,op1,r3
|
|
or result,result,r4
|
|
addi op2,op1,0
|
|
rlwimi flags,r3,27,CF_VALUE
|
|
stwbrx result,MEM
|
|
NEXT
|
|
|
|
shrdw_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
shrdw_cl: lbz r3,CL(state)
|
|
1: andi. r3,r3,31
|
|
beq- nop
|
|
lhbrx op1,MEM
|
|
SET_FLAGS(FLAGS_DBLSH(W))
|
|
lhbrx op2,REG
|
|
addi r4,r3,-1
|
|
rlwimi op1,op2,16,0,15 # op2:op1
|
|
addi op2,op1,0
|
|
srw result,op1,r3
|
|
srw r4,op1,r4
|
|
sthbrx result,MEM
|
|
rlwimi flags,r4,27,CF_VALUE
|
|
NEXT
|
|
|
|
shrdl_imm: NEXTBYTE(r3)
|
|
b 1f
|
|
shrdl_cl: lbz r3,CL(state)
|
|
1: andi. r3,r3,31
|
|
beq- nop
|
|
lwbrx op1,MEM
|
|
SET_FLAGS(FLAGS_DBLSH(L))
|
|
lwbrx op2,REG
|
|
subfic r4,r3,32
|
|
srw result,op1,r3
|
|
addi r3,r3,-1
|
|
slw r4,op2,r4
|
|
srw r3,op1,r3
|
|
or result,result,r4
|
|
addi op2,op1,0
|
|
rlwimi flags,r3,27,CF_VALUE
|
|
stwbrx result,MEM
|
|
NEXT
|
|
|
|
/* One operand multiplies: with result double the operand size, unsigned */
|
|
mulb: lbzx op2,MEM
|
|
lbz op1,AL(state)
|
|
mullw result,op1,op2
|
|
SET_FLAGS(FLAGS_MUL)
|
|
subfic r3,result,255
|
|
sthbrx result,AX,state
|
|
rlwimi flags,r3,0,CF_VALUE|OF_VALUE
|
|
NEXT
|
|
|
|
mulw: lhbrx op2,MEM
|
|
lhbrx op1,AX,state
|
|
mullw result,op1,op2
|
|
SET_FLAGS(FLAGS_MUL)
|
|
li r4,DX
|
|
srwi r3,result,16
|
|
sthbrx result,AX,state
|
|
neg r5,r3
|
|
sthbrx r3,r4,state # DX
|
|
rlwimi flags,r5,0,CF_VALUE|OF_VALUE
|
|
NEXT
|
|
|
|
mull: lwbrx op2,MEM
|
|
lwbrx op1,EAX,state
|
|
mullw result,op1,op2
|
|
mulhwu. r3,op1,op2
|
|
SET_FLAGS(FLAGS_MUL)
|
|
stwbrx result,EAX,state
|
|
li r4,EDX
|
|
stwbrx r3,r4,state
|
|
beq+ nop
|
|
oris flags,flags,(CF_SET|OF_SET)>>16
|
|
NEXT
|
|
|
|
/* One operand multiplies: with result double the operand size, signed */
|
|
imulb: lbzx op2,MEM
|
|
extsb op2,op2
|
|
lbz op1,AL(state)
|
|
extsb op1,op1
|
|
mullw result,op1,op2
|
|
SET_FLAGS(FLAGS_MUL)
|
|
extsb r3,result
|
|
sthbrx result,AX,state
|
|
cmpw r3,result
|
|
beq+ nop
|
|
oris flags,flags,(CF_SET|OF_SET)>>16
|
|
NEXT
|
|
|
|
imulw: lhbrx op2,MEM
|
|
extsh op2,op2
|
|
lhbrx op1,AX,state
|
|
extsh op1,op1
|
|
mullw result,op1,op2
|
|
SET_FLAGS(FLAGS_MUL)
|
|
li r3,DX
|
|
extsh r4,result
|
|
srwi r5,result,16
|
|
sthbrx result,AX,state
|
|
cmpw r4,result
|
|
sthbrx r5,r3,state
|
|
beq+ nop
|
|
oris flags,flags,(CF_SET|OF_SET)>>16
|
|
NEXT
|
|
|
|
imull: lwbrx op2,MEM
|
|
SET_FLAGS(FLAGS_MUL)
|
|
lwbrx op1,EAX,state
|
|
li r3,EDX
|
|
mulhw r4,op1,op2
|
|
mullw result,op1,op2
|
|
stwbrx r4,r3,state
|
|
srawi r3,result,31
|
|
cmpw r3,r4
|
|
beq+ nop
|
|
oris flags,flags,(CF_SET|OF_SET)>>16
|
|
NEXT
|
|
|
|
/* Other multiplies */
|
|
imulw_mem_reg: lhbrx op2,REG
|
|
extsh op2,op2
|
|
b 1f
|
|
|
|
imulw_imm: NEXTWORD(op2)
|
|
extsh op2,op2
|
|
b 1f
|
|
|
|
imulw_imm8: NEXTBYTE(op2)
|
|
extsb op2,op2
|
|
1: lhbrx op1,MEM
|
|
extsh op1,op1
|
|
mullw result,op1,op2
|
|
SET_FLAGS(FLAGS_MUL)
|
|
extsh r3,result
|
|
sthbrx result,REG
|
|
cmpw r3,result
|
|
beq+ nop
|
|
oris flags,flags,(CF_SET|OF_SET)>>16
|
|
NEXT # SF/ZF/AF/PF undefined !
|
|
|
|
imull_mem_reg: lwbrx op2,REG
|
|
b 1f
|
|
|
|
imull_imm: NEXTDWORD(op2)
|
|
b 1f
|
|
|
|
imull_imm8: NEXTBYTE(op2)
|
|
extsb op2,op2
|
|
1: lwbrx op1,MEM
|
|
mullw result,op1,op2
|
|
SET_FLAGS(FLAGS_MUL)
|
|
mulhw r3,op1,op2
|
|
srawi r4,result,31
|
|
stwbrx result,REG
|
|
cmpw r3,r4
|
|
beq+ nop
|
|
oris flags,flags,(CF_SET|OF_SET)>>16
|
|
NEXT # SF/ZF/AF/PF undefined !
|
|
|
|
/* aad is indeed a multiply */
|
|
aad: NEXTBYTE(r3)
|
|
lbz op1,AH(state)
|
|
lbz op2,AL(state)
|
|
mullw result,op1,r3 # AH*imm
|
|
SET_FLAGS(FLAGS_LOG(B)) # SF/ZF/PF from result
|
|
add result,result,op2 # AH*imm+AL
|
|
slwi r3,result,8
|
|
sth r3,AX(state) # AH=0
|
|
NEXT # OF/AF/CF undefined
|
|
|
|
/* Unsigned divides: we may destroy all flags */
|
|
divb: lhbrx r4,AX,state
|
|
lbzx r3,MEM
|
|
srwi r5,r4,8
|
|
cmplw r5,r3
|
|
bnl- _divide_error
|
|
divwu r5,r4,r3
|
|
mullw r3,r5,r3
|
|
sub r3,r4,r3
|
|
stb r5,AL(state)
|
|
stb r3,AH(state)
|
|
NEXT
|
|
|
|
divw: li opreg,DX
|
|
lhbrx r4,AX,state
|
|
lhbrx r5,REG
|
|
lhbrx r3,MEM
|
|
insrwi r4,r5,16,0
|
|
cmplw r5,r3
|
|
bnl- _divide_error
|
|
divwu r5,r4,r3
|
|
mullw r3,r5,r3
|
|
sub r3,r4,r3
|
|
sthbrx r5,AX,state
|
|
sthbrx r3,REG
|
|
NEXT
|
|
|
|
divl: li opreg,EDX # Not yet fully implemented
|
|
lwbrx r3,MEM
|
|
lwbrx r4,REG
|
|
lwbrx r5,EAX,state
|
|
cmplw r4,r3
|
|
bnl- _divide_error
|
|
cmplwi r4,0
|
|
bne- 1f
|
|
divwu r4,r5,r3
|
|
mullw r3,r4,r3
|
|
stwbrx r4,EAX,state
|
|
sub r3,r5,r3
|
|
stwbrx r3,REG
|
|
NEXT
|
|
/* full implementation of 64:32 unsigned divide, slow but rarely used */
|
|
1: bl _div_64_32
|
|
stwbrx r5,EAX,state
|
|
stwbrx r4,REG
|
|
NEXT
|
|
/*
|
|
* Divide r4:r5 by r3, quotient in r5, remainder in r4.
|
|
* The algorithm is stupid because it won't be used very often.
|
|
*/
|
|
_div_64_32: li r7,32
|
|
mtctr r7
|
|
1: cmpwi r4,0 # always subtract in case
|
|
addc r5,r5,r5 # MSB is set
|
|
adde r4,r4,r4
|
|
blt 2f
|
|
cmplw r4,r3
|
|
blt 3f
|
|
2: sub r4,r4,r3
|
|
addi r5,r5,1
|
|
3: bdnz 1b
|
|
|
|
/* Signed divides: we may destroy all flags */
|
|
idivb: lbzx r3,MEM
|
|
lhbrx r4,AX,state
|
|
cmpwi r3,0
|
|
beq- _divide_error
|
|
divw r5,r4,r3
|
|
extsb r7,r5
|
|
mullw r3,r5,r3
|
|
cmpw r5,r7
|
|
sub r3,r4,r3
|
|
bne- _divide_error
|
|
stb r5,AL(state)
|
|
stb r3,AH(state)
|
|
NEXT
|
|
|
|
idivw: li opreg,DX
|
|
lhbrx r4,AX,state
|
|
lhbrx r5,REG
|
|
lhbrx r3,MEM
|
|
insrwi r4,r5,16,0
|
|
cmpwi r3,0
|
|
beq- _divide_error
|
|
divw r5,r4,r3
|
|
extsh r7,r5
|
|
mullw r3,r5,r3
|
|
cmpw r5,r7
|
|
sub r3,r4,r3
|
|
bne- _divide_error
|
|
sthbrx r5,AX,state
|
|
sthbrx r3,REG
|
|
NEXT
|
|
|
|
idivl: li opreg,EDX # Not yet fully implemented
|
|
lwbrx r3,MEM
|
|
lwbrx r5,EAX,state
|
|
cmpwi cr1,r3,0
|
|
lwbrx r4,REG
|
|
srwi r7,r5,31
|
|
beq- _divide_error
|
|
add. r7,r7,r4
|
|
bne- 1f # EDX not sign extension of EAX
|
|
divw r4,r5,r3
|
|
xoris r7,r5,0x8000 # only overflow case is
|
|
orc. r7,r7,r3 # 0x80000000 divided by -1
|
|
mullw r3,r4,r3
|
|
beq- _divide_error
|
|
stwbrx r4,EAX,state
|
|
sub r3,r5,r3
|
|
stwbrx r3,REG
|
|
NEXT
|
|
|
|
/* full 64 by 32 signed divide, checks for overflow might be right now */
|
|
1: srawi r6,r4,31 # absolute value of r4:r5
|
|
srawi r0,r3,31 # absolute value of r3
|
|
xor r5,r5,r6
|
|
xor r3,r3,r0
|
|
subfc r5,r6,r5
|
|
xor r4,r4,r6
|
|
sub r3,r3,r0
|
|
subfe r4,r6,r4
|
|
xor r0,r0,r6 # sign of result
|
|
cmplw r4,r3 # coarse overflow detection
|
|
bnl- _divide_error # (probably not necessary)
|
|
bl _div_64_32
|
|
xor r5,r5,r0 # apply sign to result
|
|
sub r5,r5,r0
|
|
xor. r7,r0,r5 # wrong sign: overflow
|
|
xor r4,r4,r6 # apply sign to remainder
|
|
blt- _divide_error
|
|
stwbrx r5,EAX,state
|
|
sub r4,r4,r6
|
|
stwbrx r4,REG
|
|
NEXT
|
|
|
|
/* aam is indeed a divide */
|
|
aam: NEXTBYTE(r3)
|
|
lbz r4,AL(state)
|
|
cmpwi r3,0
|
|
beq- _divide_error # zero divide
|
|
divwu op2,r4,r3 # AL/imm8
|
|
SET_FLAGS(FLAGS_LOG(B)) # SF/ZF/PF from AL
|
|
mullw r3,op2,r3 # (AL/imm8)*imm8
|
|
stb op2,AH(state)
|
|
sub result,r4,r3 # AL-imm8*(AL/imm8)
|
|
stb result,AL(state)
|
|
NEXT # OF/AF/CF undefined
|
|
|
|
_divide_error: li r3,code_divide_err
|
|
b complex
|
|
|
|
/* Instructions dealing with segment registers */
|
|
pushw_sp_sr: li r3,SP
|
|
rlwinm opreg,opcode,31,27,29
|
|
addi r5,state,SELECTORS+2
|
|
lhbrx r4,state,r3
|
|
lhzx r0,r5,opreg
|
|
addi r4,r4,-2
|
|
sthbrx r4,state,r3
|
|
clrlwi r4,r4,16
|
|
sthbrx r0,r4,ssb
|
|
NEXT
|
|
|
|
pushl_sp_sr: li r3,SP
|
|
rlwinm opreg,opcode,31,27,29
|
|
addi r5,state,SELECTORS+2
|
|
lhbrx r4,state,r3
|
|
lhzx r0,r5,opreg
|
|
addi r4,r4,-4
|
|
sthbrx r4,state,r3
|
|
clrlwi r4,r4,16
|
|
stwbrx r0,r4,ssb
|
|
NEXT
|
|
|
|
movl_sr_mem: cmpwi opreg,20
|
|
addi opreg,opreg,SELECTORS+2
|
|
cmpw cr1,base,state # Only registers are sensitive
|
|
bgt- ud # to word/longword difference
|
|
lhzx r0,REG
|
|
bne cr1,1f
|
|
stwbrx r0,MEM # Actually a register
|
|
NEXT
|
|
|
|
movw_sr_mem: cmpwi opreg,20 # SREG 0 to 5 only
|
|
addi opreg,opreg,SELECTORS+2
|
|
bgt- ud
|
|
lhzx r0,REG
|
|
1: sthbrx r0,MEM
|
|
NEXT
|
|
|
|
/* Now the instructions that modify the segment registers, note that
|
|
move/pop to ss disable interrupts and traps for one instruction ! */
|
|
popl_sp_sr: li r6,4
|
|
b 1f
|
|
popw_sp_sr: li r6,2
|
|
1: li r7,SP
|
|
rlwinm opreg,opcode,31,27,29
|
|
lhbrx offset,state,r7
|
|
addi opreg,opreg,SELBASES
|
|
lhbrx r4,ssb,offset # new selector
|
|
add offset,offset,r6
|
|
bl _segment_load
|
|
sthbrx offset,state,r7 # update sp
|
|
cmpwi opreg,8 # is ss ?
|
|
stwux r3,REG
|
|
stw r4,SELECTORS-SELBASES(opreg)
|
|
lwz esb,esbase(state)
|
|
bne+ nop
|
|
lwz ssb,ssbase(state) # pop ss
|
|
crmove RF,TF # prevent traps
|
|
NEXT
|
|
|
|
movw_mem_sr: cmpwi opreg,20
|
|
addi r7,state,SELBASES
|
|
bgt- ud
|
|
cmpwi opreg,4 # CS illegal
|
|
beq- ud
|
|
lhbrx r4,MEM
|
|
bl _segment_load
|
|
stwux r3,r7,opreg
|
|
cmpwi opreg,8
|
|
stw r4,SELECTORS-SELBASES(r7)
|
|
lwz esb,esbase(state)
|
|
bne+ nop
|
|
lwz ssb,ssbase(state)
|
|
crmove RF,TF # prevent traps
|
|
NEXT
|
|
|
|
.equ movl_mem_sr, movw_mem_sr
|
|
|
|
/* The encoding of les/lss/lds/lfs/lgs is strange, opcode is c4/b2/c5/b4/b5
|
|
for es/ss/ds/fs/gs which are sreg 0/2/3/4/5. And obviously there is
|
|
no lcs instruction, it's called a far jump. */
|
|
|
|
ldlptrl: lwzux r7,MEM
|
|
li r4,4
|
|
bl 1f
|
|
stwx r7,REG
|
|
NEXT
|
|
ldlptrw: lhzux r7,MEM
|
|
li r4,2
|
|
bl 1f
|
|
sthx r7,REG
|
|
NEXT
|
|
|
|
1: cmpw base,state
|
|
lis r3,0xc011 # es/ss/ds/fs/gs
|
|
rlwinm r5,opcode,2,0x0c # 00/08/04/00/04
|
|
mflr r0
|
|
addi r3,r3,0x4800 # r4=0xc0114800
|
|
rlwimi r5,opcode,0,0x10 # 00/18/04/10/14
|
|
lhbrx r4,r4,offset
|
|
rlwnm opcode,r3,r5,0x1c # 00/08/0c/10/14 = sreg*4 !
|
|
beq- ud # Only mem operands allowed !
|
|
bl _segment_load
|
|
addi r5,opcode,SELBASES
|
|
stwux r3,r5,state
|
|
mtlr r0
|
|
stw r4,SELECTORS-SELBASES(r5)
|
|
lwz esb,esbase(state) # keep shadow state in sync
|
|
lwz ssb,ssbase(state)
|
|
blr
|
|
|
|
/* Intructions that may modify the current code segment: the next optimization
|
|
* might be to avoid calling C code when the code segment does not change. But
|
|
* it's probably not worth the effort.
|
|
*/
|
|
/* Far calls, jumps and returns */
|
|
lcall_w: NEXTWORD(r4)
|
|
NEXTWORD(r5)
|
|
li r3,code_lcallw
|
|
b complex
|
|
|
|
lcall_l: NEXTDWORD(r4)
|
|
NEXTWORD(r5)
|
|
li r3,code_lcalll
|
|
b complex
|
|
|
|
lcallw: lhbrx r4,MEM
|
|
addi offset,offset,2
|
|
lhbrx r5,MEM
|
|
li r3,code_lcallw
|
|
b complex
|
|
|
|
lcalll: lwbrx r4,MEM
|
|
addi offset,offset,4
|
|
lhbrx r5,MEM
|
|
li r3,code_lcalll
|
|
b complex
|
|
|
|
ljmp_w: NEXTWORD(r4)
|
|
NEXTWORD(r5)
|
|
li r3,code_ljmpw
|
|
b complex
|
|
|
|
ljmp_l: NEXTDWORD(r4)
|
|
NEXTWORD(r5)
|
|
li r3,code_ljmpl
|
|
b complex
|
|
|
|
ljmpw: lhbrx r4,MEM
|
|
addi offset,offset,2
|
|
lhbrx r5,MEM
|
|
li r3,code_ljmpw
|
|
b complex
|
|
|
|
ljmpl: lwbrx r4,MEM
|
|
addi offset,offset,4
|
|
lhbrx r5,MEM
|
|
li r3,code_ljmpl
|
|
b complex
|
|
|
|
lretw_imm: NEXTWORD(r4)
|
|
b 1f
|
|
lretw: li r4,0
|
|
1: li r3,code_lretw
|
|
b complex
|
|
|
|
lretl_imm: NEXTWORD(r4)
|
|
b 1f
|
|
lretl: li r4,0
|
|
1: li r3,code_lretl
|
|
b complex
|
|
|
|
/* Interrupts */
|
|
int: li r3,code_softint # handled by C code
|
|
NEXTBYTE(r4)
|
|
b complex
|
|
|
|
int3: li r3,code_int3 # handled by C code
|
|
b complex
|
|
|
|
into: EVAL_OF
|
|
bf+ OF,nop
|
|
li r3,code_into
|
|
b complex # handled by C code
|
|
|
|
iretw: li r3,code_iretw # handled by C code
|
|
b complex
|
|
|
|
iretl: li r3,code_iretl
|
|
b complex
|
|
|
|
/* Miscellaneous flag control instructions */
|
|
clc: oris flags,flags,(CF_IN_CR|CF_STATE_MASK|ABOVE_IN_CR)>>16
|
|
xoris flags,flags,(CF_IN_CR|CF_STATE_MASK|ABOVE_IN_CR)>>16
|
|
NEXT
|
|
|
|
cmc: oris flags,flags,(CF_IN_CR|ABOVE_IN_CR)>>16
|
|
xoris flags,flags,(CF_IN_CR|CF_COMPLEMENT|ABOVE_IN_CR)>>16
|
|
NEXT
|
|
|
|
stc: oris flags,flags,\
|
|
(CF_IN_CR|CF_LOCATION|CF_COMPLEMENT|ABOVE_IN_CR)>>16
|
|
xoris flags,flags,(CF_IN_CR|CF_LOCATION|ABOVE_IN_CR)>>16
|
|
NEXT
|
|
|
|
cld: crclr DF
|
|
NEXT
|
|
|
|
std: crset DF
|
|
NEXT
|
|
|
|
cli: crclr IF
|
|
NEXT
|
|
|
|
sti: crset IF
|
|
NEXT
|
|
|
|
lahf: bl _eval_flags
|
|
stb r3,AH(state)
|
|
NEXT
|
|
|
|
sahf: andis. r3,flags,OF_EXPLICIT>>16
|
|
lbz r0,AH(state)
|
|
beql+ _eval_of # save OF just in case
|
|
rlwinm op1,r0,31,0x08 # AF
|
|
rlwinm flags,flags,0,OF_STATE_MASK
|
|
extsb result,r0 # SF/PF
|
|
ZF862ZF(r0)
|
|
oris flags,flags,(ZF_PROTECT|ZF_IN_CR|SF_IN_CR)>>16
|
|
addi op2,op1,0 # AF
|
|
ori result,result,0x00fb # set all except PF
|
|
mtcrf 0x02,r0 # SF/ZF
|
|
rlwimi flags,r0,27,CF_VALUE # CF
|
|
xori result,result,0x00ff # 00 if PF set, 04 if clear
|
|
NEXT
|
|
|
|
pushfw_sp: bl _eval_flags
|
|
li r4,SP
|
|
lhbrx r5,r4,state
|
|
addi r5,r5,-2
|
|
sthbrx r5,r4,state
|
|
clrlwi r5,r5,16
|
|
sthbrx r3,ssb,r5
|
|
NEXT
|
|
|
|
pushfl_sp: bl _eval_flags
|
|
li r4,SP
|
|
lhbrx r5,r4,state
|
|
addi r5,r5,-4
|
|
sthbrx r5,r4,state
|
|
clrlwi r5,r5,16
|
|
stwbrx r3,ssb,r5
|
|
NEXT
|
|
|
|
popfl_sp: li r4,SP
|
|
lhbrx r5,r4,state
|
|
lwbrx r3,ssb,r5
|
|
addi r5,r5,4
|
|
stw r3,eflags(state)
|
|
sthbrx r5,r4,state
|
|
b 1f
|
|
|
|
popfw_sp: li r4,SP
|
|
lhbrx r5,r4,state
|
|
lhbrx r3,ssb,r5
|
|
addi r5,r5,2
|
|
sth r3,eflags+2(state)
|
|
sthbrx r5,r4,state
|
|
1: rlwinm op1,r3,31,0x08 # AF
|
|
xori result,r3,4 # PF
|
|
ZF862ZF(r3) # cr6
|
|
lis flags,(OF_EXPLICIT|ZF_PROTECT|ZF_IN_CR|SF_IN_CR)>>16
|
|
addi op2,op1,0 # AF
|
|
rlwinm result,result,0,0x04 # PF
|
|
rlwimi flags,r3,27,CF_VALUE # CF
|
|
mtcrf 0x6,r3 # IF,DF,TF,SF,ZF
|
|
rlwimi result,r3,24,0,0 # SF
|
|
rlwimi flags,r3,15,OF_VALUE # OF
|
|
NEXT
|
|
|
|
/* SETcc is slightly faster for setz/setnz */
|
|
setz: EVAL_ZF
|
|
bt ZF,1f
|
|
0: cmpwi opreg,0
|
|
bne- ud
|
|
stbx opreg,MEM
|
|
NEXT
|
|
|
|
setnz: EVAL_ZF
|
|
bt ZF,0b
|
|
1: cmpwi opreg,0
|
|
bne- ud
|
|
stbx one,MEM
|
|
NEXT
|
|
|
|
#define SETCC(cond, eval, flag) \
|
|
set##cond: EVAL_##eval; bt flag,1b; b 0b; \
|
|
setn##cond: EVAL_##eval; bt flag,0b; b 1b
|
|
|
|
SETCC(c, CF, CF)
|
|
SETCC(a, ABOVE, ABOVE)
|
|
SETCC(s, SF, SF)
|
|
SETCC(g, SIGNED, SGT)
|
|
SETCC(l, SIGNED, SLT)
|
|
SETCC(o, OF, OF)
|
|
SETCC(p, PF, PF)
|
|
|
|
/* No wait for a 486SX */
|
|
.equ wait, nop
|
|
|
|
/* ARPL is not recognized in real mode */
|
|
.equ arpl, ud
|
|
|
|
/* clts and in general control and debug registers are not implemented */
|
|
.equ clts, unimpl
|
|
|
|
aaa: lhbrx r0,AX,state
|
|
bl _eval_af
|
|
rlwinm r3,r3,0,0x10
|
|
SET_FLAGS(FLAGS_ADD(W))
|
|
rlwimi r3,r0,0,0x0f
|
|
li r4,0x106
|
|
addi r3,r3,-10
|
|
srwi r3,r3,16 # carry ? 0 : 0xffff
|
|
andc op1,r4,r3 # carry ? 0x106 : 0
|
|
add result,r0,op1
|
|
rlwinm result,result,0,28,23 # clear high half of AL
|
|
li op2,10 # sets AF indirectly
|
|
sthbrx r3,AX,state # OF/SF/ZF/PF undefined !
|
|
rlwimi result,op1,8,0x10000 # insert CF
|
|
NEXT
|
|
|
|
aas: lhbrx r0,AX,state
|
|
bl _eval_af
|
|
rlwinm r3,r3,0,0x10
|
|
SET_FLAGS(FLAGS_ADD(W))
|
|
rlwimi r3,r0,0,0x0f # AF:AL&0x0f
|
|
li r4,0x106
|
|
addi r3,r3,-10
|
|
srwi r3,r3,16 # carry ? 0 : 0xffff
|
|
andc op1,r4,r3 # carry ? 0x106 : 0
|
|
sub result,r0,op1
|
|
rlwinm result,result,0,28,23 # clear high half of AL
|
|
li op2,10 # sets AF indirectly
|
|
sthbrx r3,AX,state # OF/SF/ZF/PF undefined !
|
|
rlwimi result,op1,8,0x10000 # insert CF
|
|
NEXT
|
|
|
|
daa: lbz r0,AL(state)
|
|
bl _eval_af
|
|
rlwinm r7,r3,0,0x10
|
|
bl _eval_cf # r3=CF<<8
|
|
rlwimi r7,r0,0,0x0f
|
|
SET_FLAGS(FLAGS_ADD(B))
|
|
addi r4,r7,-10
|
|
rlwinm r4,r4,3,0x06 # 6 if AF or >9, 0 otherwise
|
|
srwi op1,r7,1 # 0..4, no AF, 5..f AF set
|
|
add r0,r0,r4 # conditional add
|
|
li op2,11 # sets AF depnding on op1
|
|
or r0,r0,r3
|
|
subfic r3,r0,159
|
|
rlwinm r3,r3,7,0x60 # mask value to add
|
|
add result,r0,r3 # final result for SF/ZF/PF
|
|
stb result,AL(state)
|
|
rlwimi result,r3,2,0x100 # set CF if added
|
|
NEXT
|
|
|
|
das: lbz r0,AL(state)
|
|
bl _eval_af
|
|
rlwinm r7,r3,0,0x10
|
|
bl _eval_cf
|
|
rlwimi r7,r0,0,0x0f
|
|
SET_FLAGS(FLAGS_ADD(B))
|
|
addi r4,r7,-10
|
|
rlwinm r4,r4,3,0x06
|
|
srwi op1,r7,1 # 0..4, no AF, 5..f AF set
|
|
sub r0,r0,r4 # conditional add
|
|
li op2,11 # sets AF depending on op1
|
|
or r4,r0,r3 # insert CF
|
|
addi r3,r4,-160
|
|
rlwinm r3,r3,7,0x60 # mask value to add
|
|
sub result,r4,r3 # final result for SF/ZF/PF
|
|
stb result,AL(state)
|
|
rlwimi result,r3,2,0x100 # set CF
|
|
NEXT
|
|
|
|
/* 486 specific instructions */
|
|
|
|
/* For cmpxchg, only the zero flag is important */
|
|
|
|
cmpxchgb: lbz op1,AL(state)
|
|
SET_FLAGS(FLAGS_SUB(B)|ZF_IN_CR)
|
|
lbzx op2,MEM
|
|
cmpw cr6,op1,op2
|
|
sub result,op1,op2
|
|
bne cr6,1f
|
|
lbzx r3,REG # success: swap
|
|
stbx r3,MEM
|
|
NEXT
|
|
1: stb op2,AL(state)
|
|
NEXT
|
|
|
|
cmpxchgw: lhbrx op1,AX,state
|
|
SET_FLAGS(FLAGS_SUB(W)|ZF_IN_CR)
|
|
lhbrx op2,MEM
|
|
cmpw cr6,op1,op2
|
|
sub result,op1,op2
|
|
bne cr6,1f
|
|
lhzx r3,REG # success: swap
|
|
sthx r3,MEM
|
|
NEXT
|
|
1: sthbrx op2,AX,state
|
|
NEXT
|
|
|
|
cmpxchgl: lwbrx op1,EAX,state
|
|
SET_FLAGS(FLAGS_SUB(L)|ZF_IN_CR|SIGNED_IN_CR)
|
|
lwbrx op2,MEM
|
|
cmpw cr6,op1,op2
|
|
sub result,op1,op2
|
|
bne cr6,1f
|
|
lwzx r3,REG # success: swap
|
|
stwx r3,MEM
|
|
NEXT
|
|
1: stwbrx op2,EAX,state
|
|
NEXT
|
|
|
|
xaddb: lbzx op2,MEM
|
|
SET_FLAGS(FLAGS_ADD(B))
|
|
lbzx op1,REG
|
|
add result,op1,op2
|
|
stbx result,MEM
|
|
stbx op2,REG
|
|
NEXT
|
|
|
|
xaddw: lhbrx op2,MEM
|
|
SET_FLAGS(FLAGS_ADD(W))
|
|
lhbrx op1,REG
|
|
add result,op1,op2
|
|
sthbrx result,MEM
|
|
sthbrx op2,REG
|
|
NEXT
|
|
|
|
xaddl: lwbrx op2,MEM
|
|
SET_FLAGS(FLAGS_ADD(L))
|
|
lwbrx op1,REG
|
|
add result,op1,op2
|
|
stwbrx result,MEM
|
|
stwbrx op2,REG
|
|
NEXT
|
|
|
|
/* All FPU instructions skipped. This is a 486 SX ! */
|
|
esc: li r3,code_dna # DNA interrupt
|
|
b complex
|
|
|
|
.equ hlt, unimpl # Cannot stop
|
|
|
|
.equ invd, unimpl
|
|
|
|
/* Undefined in real address mode */
|
|
.equ lar, ud
|
|
|
|
.equ lgdt, unimpl
|
|
.equ lidt, unimpl
|
|
.equ lldt, ud
|
|
.equ lmsw, unimpl
|
|
|
|
/* protected mode only */
|
|
.equ lsl, ud
|
|
.equ ltr, ud
|
|
|
|
.equ movl_cr_reg, unimpl
|
|
.equ movl_reg_cr, unimpl
|
|
.equ movl_dr_reg, unimpl
|
|
.equ movl_reg_dr, unimpl
|
|
|
|
.equ sgdt, unimpl
|
|
|
|
.equ sidt, unimpl
|
|
.equ sldt, ud
|
|
.equ smsw, unimpl
|
|
|
|
.equ str, ud
|
|
|
|
ud: li r3,code_ud
|
|
li r4,0
|
|
b complex
|
|
|
|
unimpl: li r3,code_ud
|
|
li r4,1
|
|
b complex
|
|
|
|
.equ verr, ud
|
|
.equ verw, ud
|
|
.equ wbinvd, unimpl
|
|
|
|
em86_end:
|
|
.size em86_enter,em86_end-em86_enter
|
|
#ifdef __BOOT__
|
|
.data
|
|
#define ENTRY(x,t) .long x+t-_jtables
|
|
#else
|
|
.section .rodata
|
|
#define ENTRY(x,t) .long x+t
|
|
#endif
|
|
|
|
#define BOP(x) ENTRY(x,2) /* Byte operation with mod/rm byte */
|
|
#define WLOP(x) ENTRY(x,3) /* 16 or 32 bit operation with mod/rm byte */
|
|
#define EXTOP(x) ENTRY(x,0) /* Opcode with extension in mod/rm byte */
|
|
#define OP(x) ENTRY(x,1) /* Direct one byte opcode/prefix */
|
|
|
|
/* A few macros for the main table */
|
|
#define gen6(op, wl, axeax) \
|
|
BOP(op##b##_reg_mem); WLOP(op##wl##_reg_mem); \
|
|
BOP(op##b##_mem_reg); WLOP(op##wl##_mem_reg); \
|
|
OP(op##b##_imm_al); OP(op##wl##_imm_##axeax)
|
|
|
|
#define rep7(l,t) \
|
|
ENTRY(l,t); ENTRY(l,t); ENTRY(l,t); ENTRY(l,t); \
|
|
ENTRY(l,t); ENTRY(l,t); ENTRY(l,t)
|
|
|
|
#define rep8(l) l ; l; l; l; l; l; l; l;
|
|
|
|
#define allcond(pfx, sfx, t) \
|
|
ENTRY(pfx##o##sfx, t); ENTRY(pfx##no##sfx, t); \
|
|
ENTRY(pfx##c##sfx, t); ENTRY(pfx##nc##sfx, t); \
|
|
ENTRY(pfx##z##sfx, t); ENTRY(pfx##nz##sfx, t); \
|
|
ENTRY(pfx##na##sfx, t); ENTRY(pfx##a##sfx, t); \
|
|
ENTRY(pfx##s##sfx, t); ENTRY(pfx##ns##sfx, t); \
|
|
ENTRY(pfx##p##sfx, t); ENTRY(pfx##np##sfx, t); \
|
|
ENTRY(pfx##l##sfx, t); ENTRY(pfx##nl##sfx, t); \
|
|
ENTRY(pfx##ng##sfx, t); ENTRY(pfx##g##sfx, t)
|
|
|
|
/* single/double register sign extensions and other oddities */
|
|
#define h2sextw cbw /* Half to Single sign extension */
|
|
#define s2dextw cwd /* Single to Double sign extension */
|
|
#define h2sextl cwde
|
|
#define s2dextl cdq
|
|
#define j_a16_cxz_w jcxz_w
|
|
#define j_a32_cxz_w jecxz_w
|
|
#define j_a16_cxz_l jcxz_l
|
|
#define j_a32_cxz_l jecxz_l
|
|
#define loopa16_w loopw_w
|
|
#define loopa16_l loopw_l
|
|
#define loopa32_w loopl_w
|
|
#define loopa32_l loopl_l
|
|
#define loopnza16_w loopnzw_w
|
|
#define loopnza16_l loopnzw_l
|
|
#define loopnza32_w loopnzl_w
|
|
#define loopnza32_l loopnzl_l
|
|
#define loopza16_w loopzw_w
|
|
#define loopza16_l loopzw_l
|
|
#define loopza32_w loopzl_w
|
|
#define loopza32_l loopzl_l
|
|
/* No FP support */
|
|
|
|
/* Addressing mode table */
|
|
.align 5
|
|
# (%bx,%si), (%bx,%di), (%bp,%si), (%bp,%di)
|
|
adtable: .long 0x00004360, 0x00004370, 0x80004560, 0x80004570
|
|
# (%si), (%di), o16, (%bx)
|
|
.long 0x00004600, 0x00004700, 0x00002000, 0x00004300
|
|
# o8(%bx,%si), o8(%bx,%di), o8(%bp,%si), o8(%bp,%di)
|
|
.long 0x00004360, 0x00004370, 0x80004560, 0x80004570
|
|
# o8(%si), o8(%di), o8(%bp), o8(%bx)
|
|
.long 0x00004600, 0x00004700, 0x80004500, 0x00004300
|
|
# o16(%bx,%si), o16(%bx,%di), o16(%bp,%si), o16(%bp,%di)
|
|
.long 0x00004360, 0x00004370, 0x80004560, 0x80004570
|
|
# o16(%si), o16(%di), o16(%bp), o16(%bx)
|
|
.long 0x00004600, 0x00004700, 0x80004500, 0x00004300
|
|
# register addressing modes do not use the table
|
|
.long 0, 0, 0, 0, 0, 0, 0, 0
|
|
#now 32 bit modes
|
|
# (%eax), (%ecx), (%edx), (%ebx)
|
|
.long 0x00004090, 0x00004190, 0x00004290, 0x00004390
|
|
# sib, o32, (%esi), (%edi)
|
|
.long 0x00003090, 0x00002090, 0x00004690, 0x00004790
|
|
# o8(%eax), o8(%ecx), o8(%edx), o8(%ebx)
|
|
.long 0x00004090, 0x00004190, 0x00004290, 0x00004390
|
|
# sib, o8(%ebp), o8(%esi), o8(%edi)
|
|
.long 0x00003090, 0x80004590, 0x00004690, 0x00004790
|
|
# o32(%eax), o32(%ecx), o32(%edx), o32(%ebx)
|
|
.long 0x00004090, 0x00004190, 0x00004290, 0x00004390
|
|
# sib, o32(%ebp), o32(%esi), o32(%edi)
|
|
.long 0x00003090, 0x80004590, 0x00004690, 0x00004790
|
|
# register addressing modes do not use the table
|
|
.long 0, 0, 0, 0, 0, 0, 0, 0
|
|
|
|
#define jtable(wl, awl, spesp, axeax, name ) \
|
|
.align 5; \
|
|
jtab_##name: gen6(add, wl, axeax); \
|
|
OP(push##wl##_##spesp##_sr); \
|
|
OP(pop##wl##_##spesp##_sr); \
|
|
gen6(or, wl, axeax); \
|
|
OP(push##wl##_##spesp##_sr); \
|
|
OP(_twobytes); \
|
|
gen6(adc, wl, axeax); \
|
|
OP(push##wl##_##spesp##_sr); \
|
|
OP(pop##wl##_##spesp##_sr); \
|
|
gen6(sbb, wl, axeax); \
|
|
OP(push##wl##_##spesp##_sr); \
|
|
OP(pop##wl##_##spesp##_sr); \
|
|
gen6(and, wl, axeax); OP(_es); OP(daa); \
|
|
gen6(sub, wl, axeax); OP(_cs); OP(das); \
|
|
gen6(xor, wl, axeax); OP(_ss); OP(aaa); \
|
|
gen6(cmp, wl, axeax); OP(_ds); OP(aas); \
|
|
rep8(OP(inc##wl##_reg)); \
|
|
rep8(OP(dec##wl##_reg)); \
|
|
rep8(OP(push##wl##_##spesp##_reg)); \
|
|
rep8(OP(pop##wl##_##spesp##_reg)); \
|
|
OP(pusha##wl##_##spesp); OP(popa##wl##_##spesp); \
|
|
WLOP(bound##wl); WLOP(arpl); \
|
|
OP(_fs); OP(_gs); OP(_opsize); OP(_adsize); \
|
|
OP(push##wl##_##spesp##_imm); WLOP(imul##wl##_imm); \
|
|
OP(push##wl##_##spesp##_imm8); WLOP(imul##wl##_imm8); \
|
|
OP(insb_##awl); OP(ins##wl##_##awl); \
|
|
OP(outsb_##awl); OP(outs##wl##_##awl); \
|
|
allcond(sj,_##wl,1); \
|
|
EXTOP(grp1b_imm); EXTOP(grp1##wl##_imm); \
|
|
EXTOP(grp1b_imm); EXTOP(grp1##wl##_imm8); \
|
|
BOP(testb_reg_mem); WLOP(test##wl##_reg_mem); \
|
|
BOP(xchgb_reg_mem); WLOP(xchg##wl##_reg_mem); \
|
|
BOP(movb_reg_mem); WLOP(mov##wl##_reg_mem); \
|
|
BOP(movb_mem_reg); WLOP(mov##wl##_mem_reg); \
|
|
WLOP(mov##wl##_sr_mem); WLOP(lea##wl); \
|
|
WLOP(mov##wl##_mem_sr); WLOP(pop##wl##_##spesp##_##awl); \
|
|
OP(nop); rep7(xchg##wl##_##axeax##_reg,1); \
|
|
OP(h2sext##wl); OP(s2dext##wl); \
|
|
OP(lcall_##wl); OP(wait); \
|
|
OP(pushf##wl##_##spesp); OP(popf##wl##_##spesp); \
|
|
OP(sahf); OP(lahf); \
|
|
OP(movb_##awl##_al); OP(mov##wl##_##awl##_##axeax); \
|
|
OP(movb_al_##awl); OP(mov##wl##_##axeax##_##awl); \
|
|
OP(movsb_##awl); OP(movs##wl##_##awl); \
|
|
OP(cmpsb_##awl); OP(cmps##wl##_##awl); \
|
|
OP(testb_imm_al); OP(test##wl##_imm_##axeax); \
|
|
OP(stosb_##awl); OP(stos##wl##_##awl); \
|
|
OP(lodsb_##awl); OP(lods##wl##_##awl); \
|
|
OP(scasb_##awl); OP(scas##wl##_##awl); \
|
|
rep8(OP(movb_imm_reg)); \
|
|
rep8(OP(mov##wl##_imm_reg)); \
|
|
EXTOP(shiftb_imm); EXTOP(shift##wl##_imm); \
|
|
OP(ret##wl##_##spesp##_imm); OP(ret##wl##_##spesp); \
|
|
WLOP(ldlptr##wl); WLOP(ldlptr##wl); \
|
|
BOP(movb_imm_mem); WLOP(mov##wl##_imm_mem); \
|
|
OP(enter##wl##_##spesp); OP(leave##wl##_##spesp); \
|
|
OP(lret##wl##_imm); OP(lret##wl); \
|
|
OP(int3); OP(int); OP(into); OP(iret##wl); \
|
|
EXTOP(shiftb_1); EXTOP(shift##wl##_1); \
|
|
EXTOP(shiftb_cl); EXTOP(shift##wl##_cl); \
|
|
OP(aam); OP(aad); OP(ud); OP(xlatb_##awl); \
|
|
rep8(OP(esc)); \
|
|
OP(loopnz##awl##_##wl); OP(loopz##awl##_##wl); \
|
|
OP(loop##awl##_##wl); OP(j_##awl##_cxz_##wl); \
|
|
OP(inb_port_al); OP(in##wl##_port_##axeax); \
|
|
OP(outb_al_port); OP(out##wl##_##axeax##_port); \
|
|
OP(call##wl##_##spesp); OP(jmp_##wl); \
|
|
OP(ljmp_##wl); OP(sjmp_##wl); \
|
|
OP(inb_dx_al); OP(in##wl##_dx_##axeax); \
|
|
OP(outb_al_dx); OP(out##wl##_##axeax##_dx); \
|
|
OP(_lock); OP(ud); OP(_repnz); OP(_repz); \
|
|
OP(hlt); OP(cmc); \
|
|
EXTOP(grp3b); EXTOP(grp3##wl); \
|
|
OP(clc); OP(stc); OP(cli); OP(sti); \
|
|
OP(cld); OP(std); \
|
|
EXTOP(grp4b); EXTOP(grp5##wl##_##spesp); \
|
|
/* Here we start the table for twobyte instructions */ \
|
|
OP(ud); OP(ud); WLOP(lar); WLOP(lsl); \
|
|
OP(ud); OP(ud); OP(clts); OP(ud); \
|
|
OP(invd); OP(wbinvd); OP(ud); OP(ud); \
|
|
OP(ud); OP(ud); OP(ud); OP(ud); \
|
|
rep8(OP(ud)); \
|
|
rep8(OP(ud)); \
|
|
OP(movl_cr_reg); OP(movl_reg_cr); \
|
|
OP(movl_dr_reg); OP(movl_reg_dr); \
|
|
OP(ud); OP(ud); OP(ud); OP(ud); \
|
|
rep8(OP(ud)); \
|
|
/* .long wrmsr, rdtsc, rdmsr, rdpmc; */\
|
|
rep8(OP(ud)); \
|
|
rep8(OP(ud)); \
|
|
/* allcond(cmov, wl); */ \
|
|
rep8(OP(ud)); rep8(OP(ud)); \
|
|
rep8(OP(ud)); rep8(OP(ud)); \
|
|
/* MMX Start */ \
|
|
rep8(OP(ud)); rep8(OP(ud)); \
|
|
rep8(OP(ud)); rep8(OP(ud)); \
|
|
/* MMX End */ \
|
|
allcond(j,_##wl, 1); \
|
|
allcond(set,,2); \
|
|
OP(push##wl##_##spesp##_sr); OP(pop##wl##_##spesp##_sr); \
|
|
OP(ud) /* cpuid */; WLOP(bt##wl##_reg_mem); \
|
|
WLOP(shld##wl##_imm); WLOP(shld##wl##_cl); \
|
|
OP(ud); OP(ud); \
|
|
OP(push##wl##_##spesp##_sr); OP(pop##wl##_##spesp##_sr); \
|
|
OP(ud) /* rsm */; WLOP(bts##wl##_reg_mem); \
|
|
WLOP(shrd##wl##_imm); WLOP(shrd##wl##_cl); \
|
|
OP(ud); WLOP(imul##wl##_mem_reg); \
|
|
BOP(cmpxchgb); WLOP(cmpxchg##wl); \
|
|
WLOP(ldlptr##wl); WLOP(btr##wl##_reg_mem); \
|
|
WLOP(ldlptr##wl); WLOP(ldlptr##wl); \
|
|
WLOP(movzb##wl); WLOP(movzw##wl); \
|
|
OP(ud); OP(ud); \
|
|
EXTOP(grp8##wl); WLOP(btc##wl##_reg_mem); \
|
|
WLOP(bsf##wl); WLOP(bsr##wl); \
|
|
WLOP(movsb##wl); WLOP(movsw##wl); \
|
|
BOP(xaddb); WLOP(xadd##wl); \
|
|
OP(ud); OP(ud); \
|
|
OP(ud); OP(ud); OP(ud); OP(ud); \
|
|
rep8(OP(bswap)); \
|
|
/* MMX Start */ \
|
|
rep8(OP(ud)); rep8(OP(ud)); \
|
|
rep8(OP(ud)); rep8(OP(ud)); \
|
|
rep8(OP(ud)); rep8(OP(ud)); \
|
|
/* MMX End */
|
|
.align 5 /* 8kb of tables, 32 byte aligned */
|
|
_jtables: jtable(w, a16, sp, ax, www) /* data16, addr16 */
|
|
jtable(l, a16, sp, eax, lww) /* data32, addr16 */
|
|
jtable(w, a32, sp, ax, wlw) /* data16, addr32 */
|
|
jtable(l, a32, sp, eax, llw) /* data32, addr32 */
|
|
/* The other possible combinations are only required by protected mode
|
|
code using a big stack segment */
|
|
/* Here are the auxiliary tables for opcode extensions, note that
|
|
all entries get 2 or 3 added. */
|
|
#define grp1table(bwl,t,s8) \
|
|
grp1##bwl##_imm##s8:; \
|
|
ENTRY(add##bwl##_imm##s8,t); ENTRY(or##bwl##_imm##s8,t); \
|
|
ENTRY(adc##bwl##_imm##s8,t); ENTRY(sbb##bwl##_imm##s8,t); \
|
|
ENTRY(and##bwl##_imm##s8,t); ENTRY(sub##bwl##_imm##s8,t); \
|
|
ENTRY(xor##bwl##_imm##s8,t); ENTRY(cmp##bwl##_imm##s8,t)
|
|
|
|
grp1table(b,2,)
|
|
grp1table(w,3,)
|
|
grp1table(w,3,8)
|
|
grp1table(l,3,)
|
|
grp1table(l,3,8)
|
|
|
|
#define shifttable(bwl,t,c) \
|
|
shift##bwl##_##c:; \
|
|
ENTRY(rol##bwl##_##c,t); ENTRY(ror##bwl##_##c,t); \
|
|
ENTRY(rcl##bwl##_##c,t); ENTRY(rcr##bwl##_##c,t); \
|
|
ENTRY(shl##bwl##_##c,t); ENTRY(shr##bwl##_##c,t); \
|
|
OP(ud); ENTRY(sar##bwl##_##c,t)
|
|
|
|
shifttable(b,2,1)
|
|
shifttable(w,3,1)
|
|
shifttable(l,3,1)
|
|
|
|
shifttable(b,2,cl)
|
|
shifttable(w,3,cl)
|
|
shifttable(l,3,cl)
|
|
|
|
shifttable(b,2,imm)
|
|
shifttable(w,3,imm)
|
|
shifttable(l,3,imm)
|
|
|
|
#define grp3table(bwl,t) \
|
|
grp3##bwl: ENTRY(test##bwl##_imm,t); OP(ud); \
|
|
ENTRY(not##bwl,t); ENTRY(neg##bwl,t); \
|
|
ENTRY(mul##bwl,t); ENTRY(imul##bwl,t); \
|
|
ENTRY(div##bwl,t); ENTRY(idiv##bwl,t)
|
|
|
|
grp3table(b,2)
|
|
grp3table(w,3)
|
|
grp3table(l,3)
|
|
|
|
grp4b: BOP(incb); BOP(decb); \
|
|
OP(ud); OP(ud); \
|
|
OP(ud); OP(ud); \
|
|
OP(ud); OP(ud)
|
|
|
|
#define grp5table(wl,spesp) \
|
|
grp5##wl##_##spesp: \
|
|
WLOP(inc##wl); WLOP(dec##wl); \
|
|
WLOP(call##wl##_##spesp##_mem); WLOP(lcall##wl##); \
|
|
WLOP(jmp##wl); WLOP(ljmp##wl); \
|
|
WLOP(push##wl##_##spesp); OP(ud)
|
|
|
|
grp5table(w,sp)
|
|
grp5table(l,sp)
|
|
|
|
#define grp8table(wl) \
|
|
grp8##wl: OP(ud); OP(ud); OP(ud); OP(ud); \
|
|
WLOP(bt##wl##_imm); WLOP(bts##wl##_imm); \
|
|
WLOP(btr##wl##_imm); WLOP(btc##wl##_imm)
|
|
|
|
grp8table(w)
|
|
grp8table(l)
|
|
#ifdef __BOOT__
|
|
_endjtables: .long 0 /* Points to _jtables after relocation */
|
|
#endif
|