You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
283 lines
11 KiB
283 lines
11 KiB
5 years ago
|
/* c55x.h - Emulation of selected TI C55x intrinsics */
|
||
|
/* Copyright (C) 2006 ARM Limited. All rights reserved. */
|
||
|
|
||
|
/*
|
||
|
* RCS $Revision: 178362 $
|
||
|
* Checkin $Date: 2013-01-10 16:08:02 +0000 (Thu, 10 Jan 2013) $
|
||
|
* Revising $Author: pwright $
|
||
|
*/
|
||
|
|
||
|
#ifndef __arm_c55x_h
|
||
|
#define __arm_c55x_h
|
||
|
#define __ARMCLIB_VERSION 5050106
|
||
|
|
||
|
#ifdef __cplusplus
|
||
|
#define __STDC_LIMIT_MACROS 1
|
||
|
#endif /* __cplusplus */
|
||
|
#include <stdint.h>
|
||
|
|
||
|
#ifdef __cplusplus
|
||
|
#define __ARM_INTRINSIC __forceinline
|
||
|
#elif defined __GNUC__ || defined _USE_STATIC_INLINE
|
||
|
#define __ARM_INTRINSIC static __forceinline
|
||
|
#elif (defined(__STDC_VERSION__) && 199901L <= __STDC_VERSION__)
|
||
|
#define __ARM_INTRINSIC __forceinline
|
||
|
#else
|
||
|
#define __ARM_INTRINSIC __forceinline
|
||
|
#endif
|
||
|
|
||
|
/* Define this to 1 if you believe all shift counts are in the range [-255,255] */
|
||
|
#ifndef __ARM_DSP_SMALL_SHIFTS
|
||
|
#define __ARM_DSP_SMALL_SHIFTS 0
|
||
|
#endif
|
||
|
|
||
|
__ARM_INTRINSIC int32_t _sround (int32_t src);
|
||
|
|
||
|
__ARM_INTRINSIC int16_t _sadd (int16_t src1, int16_t src2) { return __qadd(src1<<16, src2<<16) >> 16; }
|
||
|
__ARM_INTRINSIC int16_t _a_sadd (int16_t src1, int16_t src2) { return __qadd(src1<<16, src2<<16) >> 16; }
|
||
|
__ARM_INTRINSIC int32_t _lsadd (int32_t src1, int32_t src2) { return __qadd(src1, src2); }
|
||
|
__ARM_INTRINSIC int32_t _a_lsadd (int32_t src1, int32_t src2) { return __qadd(src1, src2); }
|
||
|
__ARM_INTRINSIC int16_t _ssub (int16_t src1, int16_t src2) { return __qsub(src1<<16, src2<<16) >> 16; }
|
||
|
__ARM_INTRINSIC int32_t _lssub (int32_t src1, int32_t src2) { return __qsub(src1, src2); }
|
||
|
|
||
|
__ARM_INTRINSIC int16_t _sneg (int16_t src) { return _ssub(0, src); }
|
||
|
__ARM_INTRINSIC int32_t _lsneg (int32_t src) { return __qsub(0, src); }
|
||
|
|
||
|
__ARM_INTRINSIC int16_t _abss (int16_t src) { return src < 0 ? _sneg(src) : src; }
|
||
|
__ARM_INTRINSIC int32_t _labss (int32_t src) { return src < 0 ? _lsneg(src) : src; }
|
||
|
|
||
|
__ARM_INTRINSIC int16_t _smpy (int16_t src1, int16_t src2) { return (int16_t)(__qdbl(src1*src2) >> 16); }
|
||
|
|
||
|
__ARM_INTRINSIC int32_t _lsmpy (int16_t src1, int16_t src2) { return __qdbl(src1 * src2); }
|
||
|
__ARM_INTRINSIC int32_t _lmpy (int16_t src1, int16_t src2) { return src1 * src2; }
|
||
|
__ARM_INTRINSIC int32_t _lmpysu (int16_t src1, uint16_t src2) { return src1 * src2; }
|
||
|
__ARM_INTRINSIC int32_t _lmpyu (uint16_t src1, uint16_t src2) { return src1 * src2; }
|
||
|
__ARM_INTRINSIC int32_t _lsmpyi (int16_t src1, int16_t src2) { return src1 * src2; }
|
||
|
__ARM_INTRINSIC int32_t _lsmpysui(int16_t src1, uint16_t src2) { return src1 * src2; }
|
||
|
__ARM_INTRINSIC int32_t _lsmpysu (int16_t src1, uint16_t src2) { return __qdbl(src1 * src2); }
|
||
|
__ARM_INTRINSIC int32_t _lsmpyui (uint16_t src1, uint16_t src2) { uint32_t r = src1 * src2; return (r > INT32_MAX) ? INT32_MAX : r; }
|
||
|
__ARM_INTRINSIC int32_t _lsmpyu (uint16_t src1, uint16_t src2) { return __qdbl(_lsmpyui(src1, src2)); }
|
||
|
|
||
|
__ARM_INTRINSIC int32_t _smpyr (int16_t src1, int16_t src2) { return __qdbl(src1*src2 + 0x4000) & 0xFFFF0000; } /* old name */
|
||
|
__ARM_INTRINSIC int32_t _lsmpyr (int16_t src1, int16_t src2) { return __qdbl(src1*src2 + 0x4000) & 0xFFFF0000; }
|
||
|
|
||
|
/* Accumulation functions: comments indicate the (notional) multiplication intrinsic involved */
|
||
|
__ARM_INTRINSIC int32_t _smac (int32_t src1, int16_t src2, int16_t src3) { return __qadd(src1, __qdbl(src2*src3)); } /* _lsmpy */
|
||
|
__ARM_INTRINSIC int32_t _a_smac (int32_t src1, int16_t src2, int16_t src3) { return __qadd(src1, __qdbl(src2*src3)); }
|
||
|
__ARM_INTRINSIC int32_t _smaci (int32_t src1, int16_t src2, int16_t src3) { return __qadd(src1, src2*src3); } /* _lsmpyi */
|
||
|
__ARM_INTRINSIC int32_t _smacsui (int32_t src1, int16_t src2, uint16_t src3) { return __qadd(src1, src2*src3); } /* _lmpysui */
|
||
|
__ARM_INTRINSIC int32_t _smacsu (int32_t src1, int16_t src2, uint16_t src3) { return __qadd(src1, __qdbl(src2*src3)); } /* _lmpysu */
|
||
|
|
||
|
__ARM_INTRINSIC int32_t _smacr (int32_t src1, int16_t src2, int16_t src3) { return _sround(__qadd(src1, __qdbl(src2*src3))); }
|
||
|
__ARM_INTRINSIC int32_t _a_smacr (int32_t src1, int16_t src2, int16_t src3) { return _sround(__qadd(src1, __qdbl(src2*src3))); }
|
||
|
|
||
|
__ARM_INTRINSIC int32_t _smas (int32_t src1, int16_t src2, int16_t src3) { return __qsub(src1, __qdbl(src2*src3)); } /* _lsmpy */
|
||
|
__ARM_INTRINSIC int32_t _a_smas (int32_t src1, int16_t src2, int16_t src3) { return __qsub(src1, __qdbl(src2*src3)); }
|
||
|
__ARM_INTRINSIC int32_t _smasi (int32_t src1, int16_t src2, int16_t src3) { return __qsub(src1, src2*src3); } /* _lsmpyi */
|
||
|
__ARM_INTRINSIC int32_t _smassui (int32_t src1, int16_t src2, uint16_t src3) { return __qsub(src1, src2*src3); } /* _lmpysui */
|
||
|
__ARM_INTRINSIC int32_t _smassu (int32_t src1, int16_t src2, uint16_t src3) { return __qsub(src1, __qdbl(src2*src3)); } /* _lmpysu */
|
||
|
|
||
|
__ARM_INTRINSIC int32_t _smasr (int32_t src1, int16_t src2, int16_t src3) { return _sround(__qsub(src1, __qdbl(src2*src3))); }
|
||
|
__ARM_INTRINSIC int32_t _a_smasr (int32_t src1, int16_t src2, int16_t src3) { return _sround(__qsub(src1, __qdbl(src2*src3))); }
|
||
|
|
||
|
#if __ARM_DSP_SMALL_SHIFTS
|
||
|
#define __ARM_normalize_shift(x) (x)
|
||
|
#else /* !__ARM_DSP_SMALL_SHIFTS */
|
||
|
/* Normalize shifts of any range. Expression should reduce to a 6-bit saturation.
|
||
|
5-bit saturation is not enough as 32-bit shift and 31-bit shift are different. */
|
||
|
#define __ARM_normalize_shift(x) ((x) >= 63 ? 63 : (x) <= -64 ? -64 : (x))
|
||
|
#endif /* __ARM_DSP_SMALL_SHIFTS */
|
||
|
|
||
|
/* 'x' is known to cause an out-of-range left-shift result. Generate a
|
||
|
saturated maximal value and also cause the saturation flag to be set. */
|
||
|
#define __ARM_saturated_result(x) __qdbl(INT32_MAX - ((x) >> 31))
|
||
|
|
||
|
|
||
|
/* Result is saturated */
|
||
|
__ARM_INTRINSIC int16_t _sshl(int16_t src1, int16_t src2)
|
||
|
{
|
||
|
src2 = __ARM_normalize_shift(src2);
|
||
|
if (src2 < 0) {
|
||
|
return src1 >> -src2;
|
||
|
} else {
|
||
|
int16_t sh = src1 << src2;
|
||
|
if ((sh >> src2) != src1) {
|
||
|
sh = __ARM_saturated_result(src1) >> 16;
|
||
|
}
|
||
|
return sh;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Result is saturated */
|
||
|
__ARM_INTRINSIC int32_t _lsshl(int32_t src1, int16_t src2)
|
||
|
{
|
||
|
src2 = __ARM_normalize_shift(src2);
|
||
|
if (src2 < 0) {
|
||
|
return src1 >> -src2;
|
||
|
} else {
|
||
|
int32_t sh = src1 << src2;
|
||
|
if ((sh >> src2) != src1) {
|
||
|
sh = __ARM_saturated_result(src1);
|
||
|
}
|
||
|
return sh;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Result is saturated */
|
||
|
__ARM_INTRINSIC int16_t _shrs(int16_t src1, int16_t src2)
|
||
|
{
|
||
|
src2 = __ARM_normalize_shift(src2);
|
||
|
if (src2 < 0) {
|
||
|
int16_t sh = src1 << -src2;
|
||
|
if ((sh >> -src2) != src1) {
|
||
|
sh = __ARM_saturated_result(src1) >> 16;
|
||
|
}
|
||
|
return sh;
|
||
|
} else {
|
||
|
return src1 >> src2;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Result is saturated */
|
||
|
__ARM_INTRINSIC int32_t _lshrs(int32_t src1, int16_t src2)
|
||
|
{
|
||
|
src2 = __ARM_normalize_shift(src2);
|
||
|
if (src2 < 0) {
|
||
|
int32_t sh = src1 << -src2;
|
||
|
if ((sh >> -src2) != src1) {
|
||
|
sh = __ARM_saturated_result(src1);
|
||
|
}
|
||
|
return sh;
|
||
|
} else {
|
||
|
return src1 >> src2;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* No saturation is performed */
|
||
|
__ARM_INTRINSIC int16_t _shl(int16_t src1, int16_t src2)
|
||
|
{
|
||
|
src2 = __ARM_normalize_shift(src2);
|
||
|
if (src2 < 0) {
|
||
|
return src1 >> -src2;
|
||
|
} else {
|
||
|
return src1 << src2;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* No saturation is performed */
|
||
|
__ARM_INTRINSIC int32_t _lshl(int32_t src1, int16_t src2)
|
||
|
{
|
||
|
src2 = __ARM_normalize_shift(src2);
|
||
|
if (src2 < 0) {
|
||
|
return src1 >> -src2;
|
||
|
} else {
|
||
|
return src1 << src2;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
__ARM_INTRINSIC int32_t _sround (int32_t src) { return __qadd(src, 0x8000) & 0xFFFF0000; }
|
||
|
__ARM_INTRINSIC int32_t _round (int32_t src) { return (src + 0x8000) & 0xFFFF0000; }
|
||
|
__ARM_INTRINSIC int32_t _rnd (int32_t src) { return _sround(src); }
|
||
|
|
||
|
__ARM_INTRINSIC int32_t _roundn(int32_t src)
|
||
|
{
|
||
|
if ((src & 0x17FFF) != 0) {
|
||
|
src += (src & 0x8000);
|
||
|
}
|
||
|
return src & 0xFFFF0000;
|
||
|
}
|
||
|
|
||
|
__ARM_INTRINSIC int32_t _sroundn(int32_t src)
|
||
|
{
|
||
|
if ((src & 0x17FFF) != 0) {
|
||
|
src = __qadd(src, (src & 0x8000));
|
||
|
}
|
||
|
return src & 0xFFFF0000;
|
||
|
}
|
||
|
|
||
|
__ARM_INTRINSIC int16_t _norm (int16_t src) { return __clz(src ^ ((int32_t)src << 17)) & 15; }
|
||
|
__ARM_INTRINSIC int16_t _lnorm (int32_t src) { return __clz(src ^ (src << 1)) & 31; }
|
||
|
|
||
|
/* Note that 'long long' in TI C55x is a 40-bit type. */
|
||
|
__ARM_INTRINSIC int32_t _lsat(int64_t src)
|
||
|
{
|
||
|
return (src > INT32_MAX) ? INT32_MAX : (src < INT32_MIN) ? INT32_MIN : (int32_t)src;
|
||
|
}
|
||
|
|
||
|
__ARM_INTRINSIC int16_t _count (uint64_t src1, uint64_t src2) { return (int16_t)__builtin_popcountll(src1 & src2); }
|
||
|
|
||
|
__ARM_INTRINSIC int16_t _max (int16_t src1, int16_t src2) { return (src1 > src2) ? src1 : src2; }
|
||
|
__ARM_INTRINSIC int32_t _lmax (int32_t src1, int32_t src2) { return (src1 > src2) ? src1 : src2; }
|
||
|
__ARM_INTRINSIC int16_t _min (int16_t src1, int16_t src2) { return (src1 < src2) ? src1 : src2; }
|
||
|
__ARM_INTRINSIC int32_t _lmin (int32_t src1, int32_t src2) { return (src1 < src2) ? src1 : src2; }
|
||
|
|
||
|
/* 16-bit non-fractional division with saturation. */
|
||
|
__ARM_INTRINSIC int16_t _divs(int16_t src1, int16_t src2)
|
||
|
{
|
||
|
if (src1 == -0x8000 && src2 == -1) {
|
||
|
return 0x7FFF;
|
||
|
}
|
||
|
return src1 / src2;
|
||
|
}
|
||
|
|
||
|
/* Support function: "divides x by y with saturation" - i.e. fractional.
|
||
|
Equivalent to ETSI div_s over div_s's range */
|
||
|
__ARM_INTRINSIC int16_t divs(int16_t x, int16_t y)
|
||
|
{
|
||
|
if (x < 0 || y <= 0) return (x == 0) ? 0 : (x == y) ? INT16_MAX : ((x << 1) >= y) ? INT16_MAX : 0;
|
||
|
if (x >= y) return x ? INT16_MAX : 0;
|
||
|
return (x * 0x8000) / y;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Support function - identical to ETSI shr_r */
|
||
|
__ARM_INTRINSIC int16_t crshft_r(int16_t x, int16_t shift)
|
||
|
{
|
||
|
int32_t tmp;
|
||
|
if (shift == 0 || x == 0)
|
||
|
return x;
|
||
|
if (shift > 0) {
|
||
|
#if !__ARM_DSP_SMALL_SHIFTS
|
||
|
if (shift > 32) shift = 32;
|
||
|
#endif /* __ARM_DSP_SMALL_SHIFTS */
|
||
|
return (int16_t) (((x >> (shift-1)) + 1) >> 1);
|
||
|
}
|
||
|
if (shift < -15)
|
||
|
shift = -16;
|
||
|
tmp = x << -shift;
|
||
|
#if (defined(__thumb) && (__TARGET_ARCH_THUMB >= 4)) || (__TARGET_ARCH_ARM >= 6)
|
||
|
return (int16_t)__ssat(tmp, 16);
|
||
|
#else
|
||
|
/* ARM v5E has no SSAT instruction */
|
||
|
if (tmp > INT16_MAX || tmp < INT16_MIN)
|
||
|
tmp = __qdbl(INT32_MAX - (tmp >> 31)) >> 16; /* Saturate and set Overflow */
|
||
|
return tmp;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
/* Support function - identical to ETSI L_shr_r */
|
||
|
__ARM_INTRINSIC int32_t L_crshft_r(int32_t x, int16_t shift)
|
||
|
{
|
||
|
if (shift == 0 || x == 0)
|
||
|
return x;
|
||
|
if (shift > 0) {
|
||
|
#if !__ARM_DSP_SMALL_SHIFTS
|
||
|
int32_t x2 = (shift > 32) ? 0 : x >> (shift-1);
|
||
|
#else
|
||
|
int32_t x2 = x >> (shift-1);
|
||
|
#endif /* __ARM_DSP_SMALL_SHIFTS */
|
||
|
return (x2 >> 1) + (x2 & 1);
|
||
|
}
|
||
|
if (-shift <= __clz(x ^ (x << 1)) || x == 0)
|
||
|
return x << (-shift);
|
||
|
return __qdbl((x < 0) ? INT32_MIN : INT32_MAX);
|
||
|
}
|
||
|
|
||
|
#undef __ARM_normalize_shift
|
||
|
#undef __ARM_saturated_result
|
||
|
|
||
|
#endif /* ndef __arm_c55x_h */
|
||
|
|
||
|
/* end of c55x.h */
|
||
|
|