/* dspfns.h * * Copyright 2001,2014 ARM Limited. All rights reserved. * * RCS $Revision: 185525 $ * Checkin $Date: 2014-05-29 12:44:48 +0100 (Thu, 29 May 2014) $ * Revising $Author: ransin01 $ */ /* ---------------------------------------------------------------------- * This header file provides a set of DSP-type primitive * operations, such as 16-bit and 32-bit saturating arithmetic. The * operations it provides are similar to the ones used by the ITU * for publishing specifications of DSP algorithms. */ #ifndef ARMDSP_DSPFNS_H #define ARMDSP_DSPFNS_H #define __ARMCLIB_VERSION 5050106 #ifdef __cplusplus #define __STDC_LIMIT_MACROS 1 #define __STDC_FORMAT_MACROS 1 #define __STDC_CONSTANT_MACROS 1 #endif /* __cplusplus */ #include #include #ifndef MAX_16 #define MAX_16 INT16_MAX #define MIN_16 INT16_MIN #define MAX_32 INT32_MAX #define MIN_32 INT32_MIN #endif /* MAX_16 etc. */ #if 0 #ifndef __TARGET_FEATURE_DSPMUL #error ETSI intrinsics not currently emulated on this platform #endif /* __TARGET_FEATURE_DSPMUL */ #if defined(__thumb) && (__TARGET_ARCH_THUMB < 4) #error ETSI intrinsics not available on Thumb-1 #endif /* Thumb but not Thumb-2 */ #endif #ifdef __cplusplus #define __ARM_INTRINSIC __forceinline #elif defined __GNUC__ || defined _USE_STATIC_INLINE #define __ARM_INTRINSIC static __forceinline #elif (defined(__STDC_VERSION__) && 199901L <= __STDC_VERSION__) #define __ARM_INTRINSIC __forceinline #else #define __ARM_INTRINSIC __forceinline #endif /* Define this to 1 if you do not need add() etc. to set the saturation flag */ #ifndef __ARM_DSP_IGNORE_OVERFLOW #define __ARM_DSP_IGNORE_OVERFLOW 0 #endif /* Define this to 1 if you believe all shift counts are in the range [-255,255] */ #ifndef __ARM_DSP_SMALL_SHIFTS #define __ARM_DSP_SMALL_SHIFTS 0 #endif #ifdef __cplusplus extern "C" { #endif #ifdef __TARGET_FEATURE_DSPMUL #pragma recognize_itu_functions /* enable vectorization of ITU functions */ #if !defined(__ARM_BIG_ENDIAN) && !defined(__BIG_ENDIAN) typedef union { struct { int _dnm:27; int Q:1; int V:1; int C:1; int Z:1; int N:1; } b; unsigned int word; } _ARM_PSR; #else /* defined(__ARM_BIG_ENDIAN) || defined(__BIG_ENDIAN) */ typedef union { struct { int N:1; int Z:1; int C:1; int V:1; int Q:1; int _dnm:27; } b; unsigned int word; } _ARM_PSR; #endif /* defined(__ARM_BIG_ENDIAN) || defined(__BIG_ENDIAN) */ register _ARM_PSR _apsr_for_q __asm("apsr"); #define Overflow _apsr_for_q.b.Q #else __ARM_INTRINSIC int *_arm_global_overflow(void) { static int v; return &v; } #define Overflow (*_arm_global_overflow()) __ARM_INTRINSIC int32_t __qadd(int32_t x, int32_t y) { int32_t r; #if __TARGET_ARCH_ARM > 0 int ov = 0; __asm { adds r, x, y movvs ov, #1 } if (ov) { #if !__ARM_DSP_IGNORE_OVERFLOW Overflow = 1; #endif r = y < 0 ? INT32_MIN : INT32_MAX; } #else r = x + y; if (y > 0 && r < x) { #if !__ARM_DSP_IGNORE_OVERFLOW Overflow = 1; #endif return INT32_MAX; } else if (y < 0 && r > x) { #if !__ARM_DSP_IGNORE_OVERFLOW Overflow = 1; #endif return INT32_MIN; } #endif return r; } __ARM_INTRINSIC int32_t __qsub(int32_t x, int32_t y) { int32_t r; #if __TARGET_ARCH_ARM > 0 int ov = 0; __asm { subs r, x, y movvs ov, #1 } if (ov) { #if !__ARM_DSP_IGNORE_OVERFLOW Overflow = 1; #endif r = y >= 0 ? INT32_MIN : INT32_MAX; } #else r = x - y; if (y > 0 && r > x) { #if !__ARM_DSP_IGNORE_OVERFLOW Overflow = 1; #endif return INT32_MIN; } else if (y < 0 && r < x) { #if !__ARM_DSP_IGNORE_OVERFLOW Overflow = 1; #endif return INT32_MAX; } #endif return r; } __ARM_INTRINSIC int32_t __qdbl(int32_t x) { return __qadd(x, x); } #endif __ARM_INTRINSIC int *_arm_global_carry(void) { static int c; return &c; } #define Carry (*_arm_global_carry()) /* * Convert a 32-bit signed integer into a 16-bit signed integer by * saturation. */ __ARM_INTRINSIC int16_t saturate(int32_t x) { #if (defined(__thumb) && (__TARGET_ARCH_THUMB >= 4)) || (__TARGET_ARCH_ARM >= 6) return (int16_t)__ssat(x, 16); #else /* ARM v5E has no SSAT instruction */ if (x > INT16_MAX || x < INT16_MIN) x = __qdbl(INT32_MAX - ((x) >> 31)) >> 16; /* Saturate and set Overflow */ return (int16_t) x; #endif } /* * Add two 16-bit signed integers with saturation. */ __ARM_INTRINSIC int16_t add(int16_t x, int16_t y) { #if __ARM_DSP_IGNORE_OVERFLOW && ((defined(__thumb) && (__TARGET_ARCH_THUMB >= 4)) || (__TARGET_ARCH_ARM >= 6)) return (int16_t)__qadd16(x, y); #else return (int16_t)(__qadd(x<<16, y<<16) >> 16); #endif } /* * Subtract one 16-bit signed integer from another with saturation. */ __ARM_INTRINSIC int16_t sub(int16_t x, int16_t y) { #if __ARM_DSP_IGNORE_OVERFLOW && ((defined(__thumb) && (__TARGET_ARCH_THUMB >= 4)) || (__TARGET_ARCH_ARM >= 6)) return (int16_t)__qsub16(x, y); #else return (int16_t)(__qsub(x<<16, y<<16) >> 16); #endif } /* * Absolute value of a 16-bit signed integer. Saturating, so * abs(-0x8000) becomes +0x7FFF. */ __ARM_INTRINSIC int16_t abs_s(int16_t x) { if (x >= 0) return x; #if (defined(__thumb) && (__TARGET_ARCH_THUMB >= 4)) || (__TARGET_ARCH_ARM >= 6) return (int16_t)__qsub16(0, x); #else else if (x == INT16_MIN) return INT16_MAX; else return (int16_t) -x; #endif } /* * Shift a 16-bit signed integer left (or right, if the shift count * is negative). Saturate on overflow. */ __ARM_INTRINSIC int16_t shl(int16_t x, int16_t shift) { if (shift <= 0 || x == 0) { #if !__ARM_DSP_SMALL_SHIFTS if (shift < -63) shift = -63; #endif /* __ARM_DSP_SMALL_SHIFTS */ return (int16_t) (x >> (-shift)); } if (shift > 15) shift = 16; return saturate(x << shift); } /* * Shift a 16-bit signed integer right (or left, if the shift count * is negative). Saturate on overflow. */ __ARM_INTRINSIC int16_t shr(int16_t x, int16_t shift) { if (shift >= 0 || x == 0) { #if !__ARM_DSP_SMALL_SHIFTS if (shift > 63) shift = 63; #endif /* __ARM_DSP_SMALL_SHIFTS */ return (int16_t) (x >> shift); } if (shift < -15) shift = -16; return saturate(x << (-shift)); } /* * Multiply two 16-bit signed integers, shift the result right by * 15 and saturate it. (Saturation is only necessary if both inputs * were -0x8000, in which case the result "should" be 0x8000 and is * saturated to 0x7FFF.) */ __ARM_INTRINSIC int16_t mult(int16_t x, int16_t y) { return (int16_t)(__qdbl(x*y) >> 16); } /* * Multiply two 16-bit signed integers to give a 32-bit signed * integer. Shift left by one, and saturate the result. (Saturation * is only necessary if both inputs were -0x8000, in which case the * result "should" be 0x40000000 << 1 = +0x80000000, and is * saturated to +0x7FFFFFFF.) */ __ARM_INTRINSIC int32_t L_mult(int16_t x, int16_t y) { return __qdbl(x*y); } /* * Negate a 16-bit signed integer, with saturation. (Saturation is * only necessary when the input is -0x8000.) */ __ARM_INTRINSIC int16_t negate(int16_t x) { #if (defined(__thumb) && (__TARGET_ARCH_THUMB >= 4)) || (__TARGET_ARCH_ARM >= 6) return (int16_t)__qsub16(0, x); #else if (x == INT16_MIN) return INT16_MAX; return (int16_t) -x; #endif } /* * Return the top 16 bits of a 32-bit signed integer. */ __ARM_INTRINSIC int16_t extract_h(int32_t x) { return (int16_t) (x >> 16); } /* * Return the bottom 16 bits of a 32-bit signed integer, with no * saturation, just coerced into a two's complement 16 bit * representation. */ __ARM_INTRINSIC int16_t extract_l(int32_t x) { return (int16_t) x; } /* * Divide a 32-bit signed integer by 2^16, rounding to the nearest * integer (round up on a tie). Equivalent to adding 0x8000 with * saturation, then shifting right by 16. */ __ARM_INTRINSIC int16_t round(int32_t x) { return extract_h(__qadd(x, 0x8000)); } /* * Multiply two 16-bit signed integers together to give a 32-bit * signed integer, shift left by one with saturation, and add to * another 32-bit integer with saturation. * * Note the intermediate saturation operation in the definition: * * L_mac(-1, -0x8000, -0x8000) * * will give 0x7FFFFFFE and not 0x7FFFFFFF: * the unshifted product is: 0x40000000 * shift left with saturation: 0x7FFFFFFF * add to -1 with saturation: 0x7FFFFFFE */ __ARM_INTRINSIC int32_t L_mac(int32_t accumulator, int16_t x, int16_t y) { return __qadd(accumulator, __qdbl(x*y)); } /* * Multiply two 16-bit signed integers together to give a 32-bit * signed integer, shift left by one with saturation, and subtract * from another 32-bit integer with saturation. * * Note the intermediate saturation operation in the definition: * * L_msu(1, -0x8000, -0x8000) * * will give 0x80000002 and not 0x80000001: * the unshifted product is: 0x40000000 * shift left with saturation: 0x7FFFFFFF * subtract from 1 with saturation: 0x80000002 */ __ARM_INTRINSIC int32_t L_msu(int32_t accumulator, int16_t x, int16_t y) { return __qsub(accumulator, __qdbl(x*y)); } /* * Add two 32-bit signed integers with saturation. */ __ARM_INTRINSIC int32_t L_add(int32_t x, int32_t y) { return __qadd(x, y); } /* * Subtract one 32-bit signed integer from another with saturation. */ __ARM_INTRINSIC int32_t L_sub(int32_t x, int32_t y) { return __qsub(x, y); } /* * Add together the Carry variable and two 32-bit signed integers, * without saturation. * Note: the reference implementation has INT32_MIN + -1 + (Carry=1) * set the cumulative overflow flag. This does not match intuition, * or the natural behavior of ARM's ADCS instruction. */ __ARM_INTRINSIC int32_t L_add_c(int32_t x, int32_t y) { int32_t result; #if __TARGET_ARCH_ARM > 0 int32_t flags; __asm { movs flags, Carry, lsr #1 adcs result, x, y; mrs flags, CPSR; } #if !__ARM_DSP_IGNORE_OVERFLOW if (flags & 0x10000000) Overflow = 1; /* V -> Q */ #endif Carry = (flags & 0x20000000) != 0; #else /* Inline assembler not available */ result = x + y + Carry; Carry = (uint32_t)((x & y) | ((x | y) & ~result)) >> 31; #if !__ARM_DSP_IGNORE_OVERFLOW if (((result ^ x) & (result ^ y) & 0x80000000) != 0) Overflow = 1; #endif #endif return result; } /* * Subtract one 32-bit signed integer, together with the logical * negation of the Carry variable, from another 32-bit signed integer, * without saturation. * N.b. the computation matches that of the ETSI reference function * (in basicop2.c). The comment above the ETSI reference function says * that L_sub_c(a,b) = a-b-C, but that does not match their code. */ __ARM_INTRINSIC int32_t L_sub_c(int32_t x, int32_t y) { int32_t result; #if __TARGET_ARCH_ARM > 0 int32_t flags; __asm { movs flags, Carry, lsr #1 sbcs result, x, y; mrs flags, CPSR; } #if !__ARM_DSP_IGNORE_OVERFLOW if (flags & 0x10000000) Overflow = 1; /* V -> Q */ #endif Carry = (flags & 0x20000000) != 0; #else /* Inline assembler not available */ result = x + ~y + Carry; Carry = ((uint32_t)((x & ~y) | ((x | ~y) & ~result)) >> 31); #if !__ARM_DSP_IGNORE_OVERFLOW if (((x ^ y) & (result ^ y) & 0x80000000) != 0) Overflow = 1; #endif #endif return result; } /* * Multiply two 16-bit signed integers together to give a 32-bit * signed integer, shift left by one _with_ saturation, and add * with carry to another 32-bit integer _without_ saturation. */ __ARM_INTRINSIC int32_t L_macNs(int32_t accumulator, int16_t x, int16_t y) { return L_add_c(accumulator, L_mult(x, y)); } /* * Multiply two 16-bit signed integers together to give a 32-bit * signed integer, shift left by one _with_ saturation, and * subtract with carry from another 32-bit integer _without_ * saturation. */ __ARM_INTRINSIC int32_t L_msuNs(int32_t accumulator, int16_t x, int16_t y) { return L_sub_c(accumulator, L_mult(x, y)); } /* * Negate a 32-bit signed integer, with saturation. (Saturation is * only necessary when the input is -0x80000000.) */ __ARM_INTRINSIC int32_t L_negate(int32_t x) { return __qsub(0, x); } /* * Multiply two 16-bit signed integers, shift the result right by * 15 with rounding, and saturate it. (Saturation is only necessary * if both inputs were -0x8000, in which case the result "should" * be 0x8000 and is saturated to 0x7FFF.) */ __ARM_INTRINSIC int16_t mult_r(int16_t x, int16_t y) { return (int16_t)(__qdbl(x*y + 0x4000) >> 16); } /* * Return the number of bits of left shift needed to arrange for a * 16-bit signed integer to have value >= 0x4000 or <= -0x4001. * * Returns 0 if x is zero (following C reference implementation). */ __ARM_INTRINSIC int16_t norm_s(int16_t x) { return __clz(x ^ ((int32_t)x << 17)) & 15; } /* * Return the number of bits of left shift needed to arrange for a * 32-bit signed integer to have value >= 0x40000000 (if +ve) * or <= -0x40000001 (if -ve). * * Returns 0 if x is zero (following C reference implementation). */ __ARM_INTRINSIC int16_t norm_l(int32_t x) { return __clz(x ^ (x << 1)) & 31; } /* * Shift a 32-bit signed integer left (or right, if the shift count * is negative). Saturate on overflow. */ __ARM_INTRINSIC int32_t L_shl(int32_t x, int16_t shift) { if (shift <= 0) { #if !__ARM_DSP_SMALL_SHIFTS if (shift < -63) shift = -63; #endif /* __ARM_DSP_SMALL_SHIFTS */ return x >> (-shift); } if (shift <= norm_l(x) || x == 0) return x << shift; return __qdbl((x < 0) ? INT32_MIN : INT32_MAX); } /* * Shift a 32-bit signed integer right (or left, if the shift count * is negative). Saturate on overflow. */ __ARM_INTRINSIC int32_t L_shr(int32_t x, int16_t shift) { if (shift >= 0) { #if !__ARM_DSP_SMALL_SHIFTS if (shift > 63) shift = 63; #endif /* __ARM_DSP_SMALL_SHIFTS */ return x >> shift; } if ((-shift) <= norm_l(x) || x == 0) return x << (-shift); return __qdbl((x < 0) ? INT32_MIN : INT32_MAX); } /* * Shift a 16-bit signed integer right, with rounding. Shift left * with saturation if the shift count is negative. */ __ARM_INTRINSIC int16_t shr_r(int16_t x, int16_t shift) { if (shift == 0 || x == 0) return (int16_t)x; if (shift > 0) { #if !__ARM_DSP_SMALL_SHIFTS if (shift > 32) shift = 32; #endif /* __ARM_DSP_SMALL_SHIFTS */ return (int16_t) (((x >> (shift-1)) + 1) >> 1); } if (shift < -15) shift = -16; return saturate(x << (-shift)); } /* * Multiply two 16-bit signed integers together to give a 32-bit * signed integer, shift left by one with saturation, and add to * another 32-bit integer with saturation (like L_mac). Then shift * the result right by 15 bits with rounding (like round). */ __ARM_INTRINSIC int16_t mac_r(int32_t accumulator, int16_t x, int16_t y) { return round(L_mac(accumulator, x, y)); } /* * Multiply two 16-bit signed integers together to give a 32-bit * signed integer, shift left by one with saturation, and subtract * from another 32-bit integer with saturation (like L_msu). Then * shift the result right by 15 bits with rounding (like round). */ __ARM_INTRINSIC int16_t msu_r(int32_t accumulator, int16_t x, int16_t y) { return round(L_msu(accumulator, x, y)); } /* * Shift a 16-bit signed integer left by 16 bits to generate a * 32-bit signed integer. The bottom 16 bits are zeroed. */ __ARM_INTRINSIC int32_t L_deposit_h(int16_t x) { return ((int32_t)x) << 16; } /* * Sign-extend a 16-bit signed integer by 16 bits to generate a * 32-bit signed integer. */ __ARM_INTRINSIC int32_t L_deposit_l(int16_t x) { return (int32_t)x; } /* * Shift a 32-bit signed integer right, with rounding. Shift left * with saturation if the shift count is negative. */ __ARM_INTRINSIC int32_t L_shr_r(int32_t x, int16_t shift) { if (shift == 0 || x == 0) return x; if (shift > 0) { #if !__ARM_DSP_SMALL_SHIFTS int32_t x2 = (shift > 32) ? 0 : x >> (shift-1); #else int32_t x2 = x >> (shift-1); #endif /* __ARM_DSP_SMALL_SHIFTS */ return (x2 >> 1) + (x2 & 1); } if (-shift <= norm_l(x) || x == 0) return x << (-shift); return __qdbl((x < 0) ? INT32_MIN : INT32_MAX); } /* * Absolute value of a 32-bit signed integer. Saturating, so * abs(-0x80000000) becomes +0x7FFFFFFF. */ __ARM_INTRINSIC int32_t L_abs(int32_t x) { if (x >= 0) return x; else return __qsub(0, x); } /* * Return a saturated value appropriate to the most recent carry- * affecting operation (L_add_c, L_macNs, L_sub_c, L_msuNs). * * In other words: return the argument if the Q flag is clear. * Otherwise, return -0x80000000 or +0x7FFFFFFF depending on * whether the Carry flag is set or clear (respectively). */ __ARM_INTRINSIC int32_t L_sat(int32_t x) { if (Overflow) { Overflow = 0; x = (int32_t)((uint32_t)INT32_MAX + Carry); Carry = 0; } return x; } /* * Divide one 16-bit signed integer by another, and produce a * 15-bit fixed point fractional result (by multiplying the true * mathematical result by 0x8000). The divisor (denominator) is * assumed to be non-zero and also assumed to be greater or equal * to the dividend (numerator). Hence the (unscaled) result is * necessarily within the range [0,1]. * * Both operands are assumed to be positive. * * After division, the result is saturated to fit into a 16-bit * signed integer. (The only way this can happen is if the operands * are equal, so that the result would be 1, i.e. +0x8000 in 15-bit * fixed point.) */ __ARM_INTRINSIC int16_t div_s(int16_t x, int16_t y) { int32_t quot; assert(y > 0); assert(x >= 0); assert(x <= y); quot = 0x8000 * x; quot /= y; if (quot > INT16_MAX) return INT16_MAX; else return (int16_t)quot; } #ifdef __cplusplus } #endif #endif /* ARMDSP_DSPFNS_H */