25#ifndef PQ_FIXED_MATH_H
26#define PQ_FIXED_MATH_H
28#include "pq_globals.h"
34static inline int32_t signed_saturate_rshift(int32_t val,
int bits,
int rshift) __attribute__((always_inline, unused));
35static inline int32_t signed_saturate_rshift(int32_t val,
int bits,
int rshift)
37#if defined (__ARM_ARCH_7EM__)
39 asm volatile(
"ssat %0, %1, %2, asr %3" :
"=r" (out) :
"I" (bits),
"r" (val),
"I" (rshift));
44 max = 1 << (bits - 1);
46 if (out > max - 1) out = max - 1;
48 if (out < -max) out = -max;
55static inline int16_t saturate16(int32_t val) __attribute__((always_inline, unused));
56static inline int16_t saturate16(int32_t val)
58#if defined (__ARM_ARCH_7EM__)
61 asm volatile(
"ssat %0, %1, %2" :
"=r" (tmp) :
"I" (16),
"r" (val) );
62 out = (int16_t) (tmp);
65 if (val > 32767) val = 32767;
66 else if (val < -32768) val = -32768;
72static inline int32_t signed_multiply_32x16b(int32_t a, uint32_t b) __attribute__((always_inline, unused));
73static inline int32_t signed_multiply_32x16b(int32_t a, uint32_t b)
75#if defined (__ARM_ARCH_7EM__)
77 asm volatile(
"smulwb %0, %1, %2" :
"=r" (out) :
"r" (a),
"r" (b));
80 return ((int64_t)a * (int16_t)(b & 0xFFFF)) >> 16;
85static inline int32_t signed_multiply_32x16t(int32_t a, uint32_t b) __attribute__((always_inline, unused));
86static inline int32_t signed_multiply_32x16t(int32_t a, uint32_t b)
88#if defined (__ARM_ARCH_7EM__)
90 asm volatile(
"smulwt %0, %1, %2" :
"=r" (out) :
"r" (a),
"r" (b));
93 return ((int64_t)a * (int16_t)(b >> 16)) >> 16;
98static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b) __attribute__((always_inline, unused));
99static inline int32_t multiply_32x32_rshift32(int32_t a, int32_t b)
101#if defined (__ARM_ARCH_7EM__)
103 asm volatile(
"smmul %0, %1, %2" :
"=r" (out) :
"r" (a),
"r" (b));
106 return ((int64_t)a * (int64_t)b) >> 32;
111static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b) __attribute__((always_inline, unused));
112static inline int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b)
114#if defined (__ARM_ARCH_7EM__)
116 asm volatile(
"smmulr %0, %1, %2" :
"=r" (out) :
"r" (a),
"r" (b));
119 return (((int64_t)a * (int64_t)b) + 0x80000000) >> 32;
124static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline, unused));
125static inline int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
127#if defined (__ARM_ARCH_7EM__)
129 asm volatile(
"smmlar %0, %2, %3, %1" :
"=r" (out) :
"r" (sum),
"r" (a),
"r" (b));
132 return sum + ((((int64_t)a * (int64_t)b) + 0x80000000) >> 32);
137static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b) __attribute__((always_inline, unused));
138static inline int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a, int32_t b)
140#if defined (__ARM_ARCH_7EM__)
142 asm volatile(
"smmlsr %0, %2, %3, %1" :
"=r" (out) :
"r" (sum),
"r" (a),
"r" (b));
145 return sum - ((((int64_t)a * (int64_t)b) + 0x80000000) >> 32);
151static inline uint32_t pack_16t_16t(int32_t a, int32_t b) __attribute__((always_inline, unused));
152static inline uint32_t pack_16t_16t(int32_t a, int32_t b)
154#if defined (__ARM_ARCH_7EM__)
156 asm volatile(
"pkhtb %0, %1, %2, asr #16" :
"=r" (out) :
"r" (a),
"r" (b));
159 return (a & 0xFFFF0000) | ((uint32_t)b >> 16);
164static inline uint32_t pack_16t_16b(int32_t a, int32_t b) __attribute__((always_inline, unused));
165static inline uint32_t pack_16t_16b(int32_t a, int32_t b)
167#if defined (__ARM_ARCH_7EM__)
169 asm volatile(
"pkhtb %0, %1, %2" :
"=r" (out) :
"r" (a),
"r" (b));
172 return (a & 0xFFFF0000) | (b & 0x0000FFFF);
177static inline uint32_t pack_16b_16b(int32_t a, int32_t b) __attribute__((always_inline, unused));
178static inline uint32_t pack_16b_16b(int32_t a, int32_t b)
180#if defined (__ARM_ARCH_7EM__)
182 asm volatile(
"pkhbt %0, %1, %2, lsl #16" :
"=r" (out) :
"r" (b),
"r" (a));
185 return (a << 16) | (b & 0x0000FFFF);
190static inline uint32_t divide_32div32(uint32_t a, uint32_t b) __attribute__((always_inline, unused));
191static inline uint32_t divide_32div32(uint32_t a, uint32_t b) {
192 return b ? (uint64_t(a) << 32) / b : 0xFFFFFFFF ;