You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

283 lines
11 KiB

5 years ago
/* c55x.h - Emulation of selected TI C55x intrinsics */
/* Copyright (C) 2006 ARM Limited. All rights reserved. */
/*
* RCS $Revision: 178362 $
* Checkin $Date: 2013-01-10 16:08:02 +0000 (Thu, 10 Jan 2013) $
* Revising $Author: pwright $
*/
#ifndef __arm_c55x_h
#define __arm_c55x_h
#define __ARMCLIB_VERSION 5050106
#ifdef __cplusplus
#define __STDC_LIMIT_MACROS 1
#endif /* __cplusplus */
#include <stdint.h>
#ifdef __cplusplus
#define __ARM_INTRINSIC __forceinline
#elif defined __GNUC__ || defined _USE_STATIC_INLINE
#define __ARM_INTRINSIC static __forceinline
#elif (defined(__STDC_VERSION__) && 199901L <= __STDC_VERSION__)
#define __ARM_INTRINSIC __forceinline
#else
#define __ARM_INTRINSIC __forceinline
#endif
/* Define this to 1 if you believe all shift counts are in the range [-255,255] */
#ifndef __ARM_DSP_SMALL_SHIFTS
#define __ARM_DSP_SMALL_SHIFTS 0
#endif
__ARM_INTRINSIC int32_t _sround (int32_t src);
__ARM_INTRINSIC int16_t _sadd (int16_t src1, int16_t src2) { return __qadd(src1<<16, src2<<16) >> 16; }
__ARM_INTRINSIC int16_t _a_sadd (int16_t src1, int16_t src2) { return __qadd(src1<<16, src2<<16) >> 16; }
__ARM_INTRINSIC int32_t _lsadd (int32_t src1, int32_t src2) { return __qadd(src1, src2); }
__ARM_INTRINSIC int32_t _a_lsadd (int32_t src1, int32_t src2) { return __qadd(src1, src2); }
__ARM_INTRINSIC int16_t _ssub (int16_t src1, int16_t src2) { return __qsub(src1<<16, src2<<16) >> 16; }
__ARM_INTRINSIC int32_t _lssub (int32_t src1, int32_t src2) { return __qsub(src1, src2); }
__ARM_INTRINSIC int16_t _sneg (int16_t src) { return _ssub(0, src); }
__ARM_INTRINSIC int32_t _lsneg (int32_t src) { return __qsub(0, src); }
__ARM_INTRINSIC int16_t _abss (int16_t src) { return src < 0 ? _sneg(src) : src; }
__ARM_INTRINSIC int32_t _labss (int32_t src) { return src < 0 ? _lsneg(src) : src; }
__ARM_INTRINSIC int16_t _smpy (int16_t src1, int16_t src2) { return (int16_t)(__qdbl(src1*src2) >> 16); }
__ARM_INTRINSIC int32_t _lsmpy (int16_t src1, int16_t src2) { return __qdbl(src1 * src2); }
__ARM_INTRINSIC int32_t _lmpy (int16_t src1, int16_t src2) { return src1 * src2; }
__ARM_INTRINSIC int32_t _lmpysu (int16_t src1, uint16_t src2) { return src1 * src2; }
__ARM_INTRINSIC int32_t _lmpyu (uint16_t src1, uint16_t src2) { return src1 * src2; }
__ARM_INTRINSIC int32_t _lsmpyi (int16_t src1, int16_t src2) { return src1 * src2; }
__ARM_INTRINSIC int32_t _lsmpysui(int16_t src1, uint16_t src2) { return src1 * src2; }
__ARM_INTRINSIC int32_t _lsmpysu (int16_t src1, uint16_t src2) { return __qdbl(src1 * src2); }
__ARM_INTRINSIC int32_t _lsmpyui (uint16_t src1, uint16_t src2) { uint32_t r = src1 * src2; return (r > INT32_MAX) ? INT32_MAX : r; }
__ARM_INTRINSIC int32_t _lsmpyu (uint16_t src1, uint16_t src2) { return __qdbl(_lsmpyui(src1, src2)); }
__ARM_INTRINSIC int32_t _smpyr (int16_t src1, int16_t src2) { return __qdbl(src1*src2 + 0x4000) & 0xFFFF0000; } /* old name */
__ARM_INTRINSIC int32_t _lsmpyr (int16_t src1, int16_t src2) { return __qdbl(src1*src2 + 0x4000) & 0xFFFF0000; }
/* Accumulation functions: comments indicate the (notional) multiplication intrinsic involved */
__ARM_INTRINSIC int32_t _smac (int32_t src1, int16_t src2, int16_t src3) { return __qadd(src1, __qdbl(src2*src3)); } /* _lsmpy */
__ARM_INTRINSIC int32_t _a_smac (int32_t src1, int16_t src2, int16_t src3) { return __qadd(src1, __qdbl(src2*src3)); }
__ARM_INTRINSIC int32_t _smaci (int32_t src1, int16_t src2, int16_t src3) { return __qadd(src1, src2*src3); } /* _lsmpyi */
__ARM_INTRINSIC int32_t _smacsui (int32_t src1, int16_t src2, uint16_t src3) { return __qadd(src1, src2*src3); } /* _lmpysui */
__ARM_INTRINSIC int32_t _smacsu (int32_t src1, int16_t src2, uint16_t src3) { return __qadd(src1, __qdbl(src2*src3)); } /* _lmpysu */
__ARM_INTRINSIC int32_t _smacr (int32_t src1, int16_t src2, int16_t src3) { return _sround(__qadd(src1, __qdbl(src2*src3))); }
__ARM_INTRINSIC int32_t _a_smacr (int32_t src1, int16_t src2, int16_t src3) { return _sround(__qadd(src1, __qdbl(src2*src3))); }
__ARM_INTRINSIC int32_t _smas (int32_t src1, int16_t src2, int16_t src3) { return __qsub(src1, __qdbl(src2*src3)); } /* _lsmpy */
__ARM_INTRINSIC int32_t _a_smas (int32_t src1, int16_t src2, int16_t src3) { return __qsub(src1, __qdbl(src2*src3)); }
__ARM_INTRINSIC int32_t _smasi (int32_t src1, int16_t src2, int16_t src3) { return __qsub(src1, src2*src3); } /* _lsmpyi */
__ARM_INTRINSIC int32_t _smassui (int32_t src1, int16_t src2, uint16_t src3) { return __qsub(src1, src2*src3); } /* _lmpysui */
__ARM_INTRINSIC int32_t _smassu (int32_t src1, int16_t src2, uint16_t src3) { return __qsub(src1, __qdbl(src2*src3)); } /* _lmpysu */
__ARM_INTRINSIC int32_t _smasr (int32_t src1, int16_t src2, int16_t src3) { return _sround(__qsub(src1, __qdbl(src2*src3))); }
__ARM_INTRINSIC int32_t _a_smasr (int32_t src1, int16_t src2, int16_t src3) { return _sround(__qsub(src1, __qdbl(src2*src3))); }
#if __ARM_DSP_SMALL_SHIFTS
#define __ARM_normalize_shift(x) (x)
#else /* !__ARM_DSP_SMALL_SHIFTS */
/* Normalize shifts of any range. Expression should reduce to a 6-bit saturation.
5-bit saturation is not enough as 32-bit shift and 31-bit shift are different. */
#define __ARM_normalize_shift(x) ((x) >= 63 ? 63 : (x) <= -64 ? -64 : (x))
#endif /* __ARM_DSP_SMALL_SHIFTS */
/* 'x' is known to cause an out-of-range left-shift result. Generate a
saturated maximal value and also cause the saturation flag to be set. */
#define __ARM_saturated_result(x) __qdbl(INT32_MAX - ((x) >> 31))
/* Result is saturated */
__ARM_INTRINSIC int16_t _sshl(int16_t src1, int16_t src2)
{
src2 = __ARM_normalize_shift(src2);
if (src2 < 0) {
return src1 >> -src2;
} else {
int16_t sh = src1 << src2;
if ((sh >> src2) != src1) {
sh = __ARM_saturated_result(src1) >> 16;
}
return sh;
}
}
/* Result is saturated */
__ARM_INTRINSIC int32_t _lsshl(int32_t src1, int16_t src2)
{
src2 = __ARM_normalize_shift(src2);
if (src2 < 0) {
return src1 >> -src2;
} else {
int32_t sh = src1 << src2;
if ((sh >> src2) != src1) {
sh = __ARM_saturated_result(src1);
}
return sh;
}
}
/* Result is saturated */
__ARM_INTRINSIC int16_t _shrs(int16_t src1, int16_t src2)
{
src2 = __ARM_normalize_shift(src2);
if (src2 < 0) {
int16_t sh = src1 << -src2;
if ((sh >> -src2) != src1) {
sh = __ARM_saturated_result(src1) >> 16;
}
return sh;
} else {
return src1 >> src2;
}
}
/* Result is saturated */
__ARM_INTRINSIC int32_t _lshrs(int32_t src1, int16_t src2)
{
src2 = __ARM_normalize_shift(src2);
if (src2 < 0) {
int32_t sh = src1 << -src2;
if ((sh >> -src2) != src1) {
sh = __ARM_saturated_result(src1);
}
return sh;
} else {
return src1 >> src2;
}
}
/* No saturation is performed */
__ARM_INTRINSIC int16_t _shl(int16_t src1, int16_t src2)
{
src2 = __ARM_normalize_shift(src2);
if (src2 < 0) {
return src1 >> -src2;
} else {
return src1 << src2;
}
}
/* No saturation is performed */
__ARM_INTRINSIC int32_t _lshl(int32_t src1, int16_t src2)
{
src2 = __ARM_normalize_shift(src2);
if (src2 < 0) {
return src1 >> -src2;
} else {
return src1 << src2;
}
}
__ARM_INTRINSIC int32_t _sround (int32_t src) { return __qadd(src, 0x8000) & 0xFFFF0000; }
__ARM_INTRINSIC int32_t _round (int32_t src) { return (src + 0x8000) & 0xFFFF0000; }
__ARM_INTRINSIC int32_t _rnd (int32_t src) { return _sround(src); }
__ARM_INTRINSIC int32_t _roundn(int32_t src)
{
if ((src & 0x17FFF) != 0) {
src += (src & 0x8000);
}
return src & 0xFFFF0000;
}
__ARM_INTRINSIC int32_t _sroundn(int32_t src)
{
if ((src & 0x17FFF) != 0) {
src = __qadd(src, (src & 0x8000));
}
return src & 0xFFFF0000;
}
__ARM_INTRINSIC int16_t _norm (int16_t src) { return __clz(src ^ ((int32_t)src << 17)) & 15; }
__ARM_INTRINSIC int16_t _lnorm (int32_t src) { return __clz(src ^ (src << 1)) & 31; }
/* Note that 'long long' in TI C55x is a 40-bit type. */
__ARM_INTRINSIC int32_t _lsat(int64_t src)
{
return (src > INT32_MAX) ? INT32_MAX : (src < INT32_MIN) ? INT32_MIN : (int32_t)src;
}
__ARM_INTRINSIC int16_t _count (uint64_t src1, uint64_t src2) { return (int16_t)__builtin_popcountll(src1 & src2); }
__ARM_INTRINSIC int16_t _max (int16_t src1, int16_t src2) { return (src1 > src2) ? src1 : src2; }
__ARM_INTRINSIC int32_t _lmax (int32_t src1, int32_t src2) { return (src1 > src2) ? src1 : src2; }
__ARM_INTRINSIC int16_t _min (int16_t src1, int16_t src2) { return (src1 < src2) ? src1 : src2; }
__ARM_INTRINSIC int32_t _lmin (int32_t src1, int32_t src2) { return (src1 < src2) ? src1 : src2; }
/* 16-bit non-fractional division with saturation. */
__ARM_INTRINSIC int16_t _divs(int16_t src1, int16_t src2)
{
if (src1 == -0x8000 && src2 == -1) {
return 0x7FFF;
}
return src1 / src2;
}
/* Support function: "divides x by y with saturation" - i.e. fractional.
Equivalent to ETSI div_s over div_s's range */
__ARM_INTRINSIC int16_t divs(int16_t x, int16_t y)
{
if (x < 0 || y <= 0) return (x == 0) ? 0 : (x == y) ? INT16_MAX : ((x << 1) >= y) ? INT16_MAX : 0;
if (x >= y) return x ? INT16_MAX : 0;
return (x * 0x8000) / y;
}
/* Support function - identical to ETSI shr_r */
__ARM_INTRINSIC int16_t crshft_r(int16_t x, int16_t shift)
{
int32_t tmp;
if (shift == 0 || x == 0)
return x;
if (shift > 0) {
#if !__ARM_DSP_SMALL_SHIFTS
if (shift > 32) shift = 32;
#endif /* __ARM_DSP_SMALL_SHIFTS */
return (int16_t) (((x >> (shift-1)) + 1) >> 1);
}
if (shift < -15)
shift = -16;
tmp = x << -shift;
#if (defined(__thumb) && (__TARGET_ARCH_THUMB >= 4)) || (__TARGET_ARCH_ARM >= 6)
return (int16_t)__ssat(tmp, 16);
#else
/* ARM v5E has no SSAT instruction */
if (tmp > INT16_MAX || tmp < INT16_MIN)
tmp = __qdbl(INT32_MAX - (tmp >> 31)) >> 16; /* Saturate and set Overflow */
return tmp;
#endif
}
/* Support function - identical to ETSI L_shr_r */
__ARM_INTRINSIC int32_t L_crshft_r(int32_t x, int16_t shift)
{
if (shift == 0 || x == 0)
return x;
if (shift > 0) {
#if !__ARM_DSP_SMALL_SHIFTS
int32_t x2 = (shift > 32) ? 0 : x >> (shift-1);
#else
int32_t x2 = x >> (shift-1);
#endif /* __ARM_DSP_SMALL_SHIFTS */
return (x2 >> 1) + (x2 & 1);
}
if (-shift <= __clz(x ^ (x << 1)) || x == 0)
return x << (-shift);
return __qdbl((x < 0) ? INT32_MIN : INT32_MAX);
}
#undef __ARM_normalize_shift
#undef __ARM_saturated_result
#endif /* ndef __arm_c55x_h */
/* end of c55x.h */