FFmpeg
mathops.h
Go to the documentation of this file.
1 /*
2  * simple math operations
3  * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #ifndef AVCODEC_X86_MATHOPS_H
23 #define AVCODEC_X86_MATHOPS_H
24 
25 #include "config.h"
26 
27 #include "libavutil/common.h"
28 #include "libavutil/x86/asm.h"
29 
30 #if HAVE_INLINE_ASM
31 
32 #if ARCH_X86_32
33 
34 #define MULL MULL
35 static av_always_inline av_const int MULL(int a, int b, unsigned shift)
36 {
37  int rt, dummy;
38  if (__builtin_constant_p(shift))
39  __asm__ (
40  "imull %3 \n\t"
41  "shrdl %4, %%edx, %%eax \n\t"
42  :"=a"(rt), "=d"(dummy)
43  :"a"(a), "rm"(b), "i"(shift & 0x1F)
44  );
45  else
46  __asm__ (
47  "imull %3 \n\t"
48  "shrdl %4, %%edx, %%eax \n\t"
49  :"=a"(rt), "=d"(dummy)
50  :"a"(a), "rm"(b), "c"((uint8_t)shift)
51  );
52  return rt;
53 }
54 
55 #define MULH MULH
56 static av_always_inline av_const int MULH(int a, int b)
57 {
58  int rt, dummy;
59  __asm__ (
60  "imull %3"
61  :"=d"(rt), "=a"(dummy)
62  :"a"(a), "rm"(b)
63  );
64  return rt;
65 }
66 
67 #define MUL64 MUL64
68 static av_always_inline av_const int64_t MUL64(int a, int b)
69 {
70  int64_t rt;
71  __asm__ (
72  "imull %2"
73  :"=A"(rt)
74  :"a"(a), "rm"(b)
75  );
76  return rt;
77 }
78 
79 #endif /* ARCH_X86_32 */
80 
81 #if HAVE_I686
82 /* median of 3 */
83 #define mid_pred mid_pred
84 static inline av_const int mid_pred(int a, int b, int c)
85 {
86  int i=b;
87  __asm__ (
88  "cmp %2, %1 \n\t"
89  "cmovg %1, %0 \n\t"
90  "cmovg %2, %1 \n\t"
91  "cmp %3, %1 \n\t"
92  "cmovl %3, %1 \n\t"
93  "cmp %1, %0 \n\t"
94  "cmovg %1, %0 \n\t"
95  :"+&r"(i), "+&r"(a)
96  :"r"(b), "r"(c)
97  );
98  return i;
99 }
100 
101 #if HAVE_6REGS
102 #define COPY3_IF_LT(x, y, a, b, c, d)\
103 __asm__ volatile(\
104  "cmpl %0, %3 \n\t"\
105  "cmovl %3, %0 \n\t"\
106  "cmovl %4, %1 \n\t"\
107  "cmovl %5, %2 \n\t"\
108  : "+&r" (x), "+&r" (a), "+r" (c)\
109  : "r" (y), "r" (b), "r" (d)\
110 );
111 #endif /* HAVE_6REGS */
112 
113 #endif /* HAVE_I686 */
114 
115 #define MASK_ABS(mask, level) \
116  __asm__ ("cdq \n\t" \
117  "xorl %1, %0 \n\t" \
118  "subl %1, %0 \n\t" \
119  : "+a"(level), "=&d"(mask))
120 
121 // avoid +32 for shift optimization (gcc should do that ...)
122 #define NEG_SSR32 NEG_SSR32
123 static inline int32_t NEG_SSR32( int32_t a, int8_t s){
124  if (__builtin_constant_p(s))
125  __asm__ ("sarl %1, %0\n\t"
126  : "+r" (a)
127  : "i" (-s & 0x1F)
128  );
129  else
130  __asm__ ("sarl %1, %0\n\t"
131  : "+r" (a)
132  : "c" ((uint8_t)(-s))
133  );
134  return a;
135 }
136 
137 #define NEG_USR32 NEG_USR32
138 static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
139  if (__builtin_constant_p(s))
140  __asm__ ("shrl %1, %0\n\t"
141  : "+r" (a)
142  : "i" (-s & 0x1F)
143  );
144  else
145  __asm__ ("shrl %1, %0\n\t"
146  : "+r" (a)
147  : "c" ((uint8_t)(-s))
148  );
149  return a;
150 }
151 
152 #endif /* HAVE_INLINE_ASM */
153 #endif /* AVCODEC_X86_MATHOPS_H */
av_const
#define av_const
Definition: attributes.h:84
b
#define b
Definition: input.c:41
dummy
int dummy
Definition: motion.c:66
MULH
#define MULH
Definition: mathops.h:42
s
#define s(width, name)
Definition: cbs_vp9.c:198
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
shift
static int shift(int a, int b)
Definition: bonk.c:261
asm.h
NEG_SSR32
#define NEG_SSR32(a, s)
Definition: mathops.h:174
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
NEG_USR32
#define NEG_USR32(a, s)
Definition: mathops.h:178
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
common.h
av_always_inline
#define av_always_inline
Definition: attributes.h:49
mid_pred
#define mid_pred
Definition: mathops.h:98
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
MUL64
#define MUL64(a, b)
Definition: mathops.h:55
int32_t
int32_t
Definition: audioconvert.c:56
MULL
#define MULL(a, b, s)
Definition: mathops.h:59