FFmpeg
idctdsp_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized idctdsp
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "idctdsp_mips.h"
25 #include "constants.h"
27 
28 void ff_put_pixels_clamped_mmi(const int16_t *block,
29  uint8_t *restrict pixels, ptrdiff_t line_size)
30 {
31  double ftmp[8];
32 
33  __asm__ volatile (
34  MMI_LDC1(%[ftmp0], %[block], 0x00)
35  MMI_LDC1(%[ftmp1], %[block], 0x08)
36  MMI_LDC1(%[ftmp2], %[block], 0x10)
37  MMI_LDC1(%[ftmp3], %[block], 0x18)
38  MMI_LDC1(%[ftmp4], %[block], 0x20)
39  MMI_LDC1(%[ftmp5], %[block], 0x28)
40  MMI_LDC1(%[ftmp6], %[block], 0x30)
41  MMI_LDC1(%[ftmp7], %[block], 0x38)
42  "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
43  "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
44  "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
45  "packushb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
46  MMI_SDC1(%[ftmp0], %[pixels], 0x00)
47  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
48  MMI_SDC1(%[ftmp2], %[pixels], 0x00)
49  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
50  MMI_SDC1(%[ftmp4], %[pixels], 0x00)
51  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
52  MMI_SDC1(%[ftmp6], %[pixels], 0x00)
53  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
54 
55  MMI_LDC1(%[ftmp0], %[block], 0x40)
56  MMI_LDC1(%[ftmp1], %[block], 0x48)
57  MMI_LDC1(%[ftmp2], %[block], 0x50)
58  MMI_LDC1(%[ftmp3], %[block], 0x58)
59  MMI_LDC1(%[ftmp4], %[block], 0x60)
60  MMI_LDC1(%[ftmp5], %[block], 0x68)
61  MMI_LDC1(%[ftmp6], %[block], 0x70)
62  MMI_LDC1(%[ftmp7], %[block], 0x78)
63  "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
64  "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
65  "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
66  "packushb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
67  MMI_SDC1(%[ftmp0], %[pixels], 0x00)
68  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
69  MMI_SDC1(%[ftmp2], %[pixels], 0x00)
70  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
71  MMI_SDC1(%[ftmp4], %[pixels], 0x00)
72  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
73  MMI_SDC1(%[ftmp6], %[pixels], 0x00)
74  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
75  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
76  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
77  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
78  [pixels]"+&r"(pixels)
79  : [line_size]"r"((mips_reg)line_size),
80  [block]"r"(block)
81  : "memory"
82  );
83 }
84 
86  uint8_t *restrict pixels, ptrdiff_t line_size)
87 {
88  double ftmp[5];
89 
90  __asm__ volatile (
91  MMI_LDC1(%[ftmp1], %[block], 0x00)
92  MMI_LDC1(%[ftmp0], %[block], 0x08)
93  "packsshb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
94  MMI_LDC1(%[ftmp2], %[block], 0x10)
95  MMI_LDC1(%[ftmp0], %[block], 0x18)
96  "packsshb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
97  MMI_LDC1(%[ftmp3], %[block], 0x20)
98  MMI_LDC1(%[ftmp0], %[block], 0x28)
99  "packsshb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
100  MMI_LDC1(%[ftmp4], %[block], 0x30)
101  MMI_LDC1(%[ftmp0], %[block], 0x38)
102  "packsshb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
103  "paddb %[ftmp1], %[ftmp1], %[ff_pb_80] \n\t"
104  "paddb %[ftmp2], %[ftmp2], %[ff_pb_80] \n\t"
105  "paddb %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t"
106  "paddb %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
107  MMI_SDC1(%[ftmp1], %[pixels], 0x00)
108  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
109  MMI_SDC1(%[ftmp2], %[pixels], 0x00)
110  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
111  MMI_SDC1(%[ftmp3], %[pixels], 0x00)
112  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
113  MMI_SDC1(%[ftmp4], %[pixels], 0x00)
114  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
115 
116  MMI_LDC1(%[ftmp1], %[block], 0x40)
117  MMI_LDC1(%[ftmp0], %[block], 0x48)
118  "packsshb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
119  MMI_LDC1(%[ftmp2], %[block], 0x50)
120  MMI_LDC1(%[ftmp0], %[block], 0x58)
121  "packsshb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
122  MMI_LDC1(%[ftmp3], %[block], 0x60)
123  MMI_LDC1(%[ftmp0], %[block], 0x68)
124  "packsshb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
125  MMI_LDC1(%[ftmp4], %[block], 0x70)
126  MMI_LDC1(%[ftmp0], %[block], 0x78)
127  "packsshb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
128  "paddb %[ftmp1], %[ftmp1], %[ff_pb_80] \n\t"
129  "paddb %[ftmp2], %[ftmp2], %[ff_pb_80] \n\t"
130  "paddb %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t"
131  "paddb %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
132  MMI_SDC1(%[ftmp1], %[pixels], 0x00)
133  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
134  MMI_SDC1(%[ftmp2], %[pixels], 0x00)
135  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
136  MMI_SDC1(%[ftmp3], %[pixels], 0x00)
137  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
138  MMI_SDC1(%[ftmp4], %[pixels], 0x00)
139  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
140  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
141  [ftmp4]"=&f"(ftmp[4]),
142  [pixels]"+&r"(pixels)
143  : [block]"r"(block),
144  [line_size]"r"((mips_reg)line_size),
145  [ff_pb_80]"f"(ff_pb_80.f)
146  : "memory"
147  );
148 }
149 
150 void ff_add_pixels_clamped_mmi(const int16_t *block,
151  uint8_t *restrict pixels, ptrdiff_t line_size)
152 {
153  double ftmp[9];
154  uint64_t tmp[1];
155  __asm__ volatile (
156  "li %[tmp0], 0x04 \n\t"
157  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
158  "1: \n\t"
159  MMI_LDC1(%[ftmp5], %[pixels], 0x00)
160  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
161  MMI_LDC1(%[ftmp6], %[pixels], 0x00)
162  PTR_SUBU "%[pixels], %[pixels], %[line_size] \n\t"
163  MMI_LDC1(%[ftmp1], %[block], 0x00)
164  MMI_LDC1(%[ftmp2], %[block], 0x08)
165  MMI_LDC1(%[ftmp3], %[block], 0x10)
166  MMI_LDC1(%[ftmp4], %[block], 0x18)
167  PTR_ADDIU "%[block], %[block], 0x20 \n\t"
168  "punpckhbh %[ftmp7], %[ftmp5], %[ftmp0] \n\t"
169  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
170  "punpckhbh %[ftmp8], %[ftmp6], %[ftmp0] \n\t"
171  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
172  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
173  "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
174  "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
175  "paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
176  "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
177  "packushb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
178  MMI_SDC1(%[ftmp1], %[pixels], 0x00)
179  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
180  MMI_SDC1(%[ftmp3], %[pixels], 0x00)
181  "addi %[tmp0], %[tmp0], -0x01 \n\t"
182  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
183  "bnez %[tmp0], 1b \n\t"
184  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
185  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
186  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
187  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
188  [ftmp8]"=&f"(ftmp[8]), [tmp0]"=&r"(tmp[0]),
189  [pixels]"+&r"(pixels), [block]"+&r"(block)
190  : [line_size]"r"((mips_reg)line_size)
191  : "memory"
192  );
193 }
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
ff_add_pixels_clamped_mmi
void ff_add_pixels_clamped_mmi(const int16_t *block, uint8_t *restrict pixels, ptrdiff_t line_size)
Definition: idctdsp_mmi.c:150
mips_reg
#define mips_reg
Definition: asmdefs.h:46
constants.h
mmiutils.h
ff_put_pixels_clamped_mmi
void ff_put_pixels_clamped_mmi(const int16_t *block, uint8_t *restrict pixels, ptrdiff_t line_size)
Definition: idctdsp_mmi.c:28
PTR_SUBU
#define PTR_SUBU
Definition: asmdefs.h:52
av_intfloat64::f
double f
Definition: intfloat.h:34
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
PTR_ADDU
#define PTR_ADDU
Definition: asmdefs.h:49
idctdsp_mips.h
ff_put_signed_pixels_clamped_mmi
void ff_put_signed_pixels_clamped_mmi(const int16_t *block, uint8_t *restrict pixels, ptrdiff_t line_size)
Definition: idctdsp_mmi.c:85
PTR_ADDIU
#define PTR_ADDIU
Definition: asmdefs.h:50
ff_pb_80
const union av_intfloat64 ff_pb_80
Definition: constants.c:60
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207