FFmpeg
idctdsp_msa.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
22 #include "idctdsp_mips.h"
23 
24 static void put_pixels_clamped_msa(const int16_t *block, uint8_t *pixels,
26 {
27  uint64_t in0_d, in1_d, in2_d, in3_d, in4_d, in5_d, in6_d, in7_d;
28  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
29 
30  LD_SH8(block, 8, in0, in1, in2, in3, in4, in5, in6, in7);
31  CLIP_SH8_0_255(in0, in1, in2, in3, in4, in5, in6, in7);
32  PCKEV_B4_SH(in0, in0, in1, in1, in2, in2, in3, in3, in0, in1, in2, in3);
33  PCKEV_B4_SH(in4, in4, in5, in5, in6, in6, in7, in7, in4, in5, in6, in7);
34 
35  in0_d = __msa_copy_u_d((v2i64) in0, 0);
36  in1_d = __msa_copy_u_d((v2i64) in1, 0);
37  in2_d = __msa_copy_u_d((v2i64) in2, 0);
38  in3_d = __msa_copy_u_d((v2i64) in3, 0);
39  in4_d = __msa_copy_u_d((v2i64) in4, 0);
40  in5_d = __msa_copy_u_d((v2i64) in5, 0);
41  in6_d = __msa_copy_u_d((v2i64) in6, 0);
42  in7_d = __msa_copy_u_d((v2i64) in7, 0);
43  SD4(in0_d, in1_d, in2_d, in3_d, pixels, stride);
44  pixels += 4 * stride;
45  SD4(in4_d, in5_d, in6_d, in7_d, pixels, stride);
46 }
47 
48 static void put_signed_pixels_clamped_msa(const int16_t *block, uint8_t *pixels,
50 {
51  uint64_t in0_d, in1_d, in2_d, in3_d, in4_d, in5_d, in6_d, in7_d;
52  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
53 
54  LD_SH8(block, 8, in0, in1, in2, in3, in4, in5, in6, in7);
55 
56  in0 += 128;
57  in1 += 128;
58  in2 += 128;
59  in3 += 128;
60  in4 += 128;
61  in5 += 128;
62  in6 += 128;
63  in7 += 128;
64 
65  CLIP_SH8_0_255(in0, in1, in2, in3, in4, in5, in6, in7);
66  PCKEV_B4_SH(in0, in0, in1, in1, in2, in2, in3, in3, in0, in1, in2, in3);
67  PCKEV_B4_SH(in4, in4, in5, in5, in6, in6, in7, in7, in4, in5, in6, in7);
68 
69  in0_d = __msa_copy_u_d((v2i64) in0, 0);
70  in1_d = __msa_copy_u_d((v2i64) in1, 0);
71  in2_d = __msa_copy_u_d((v2i64) in2, 0);
72  in3_d = __msa_copy_u_d((v2i64) in3, 0);
73  in4_d = __msa_copy_u_d((v2i64) in4, 0);
74  in5_d = __msa_copy_u_d((v2i64) in5, 0);
75  in6_d = __msa_copy_u_d((v2i64) in6, 0);
76  in7_d = __msa_copy_u_d((v2i64) in7, 0);
77  SD4(in0_d, in1_d, in2_d, in3_d, pixels, stride);
78  pixels += 4 * stride;
79  SD4(in4_d, in5_d, in6_d, in7_d, pixels, stride);
80 }
81 
82 static void add_pixels_clamped_msa(const int16_t *block, uint8_t *pixels,
84 {
85  uint64_t in0_d, in1_d, in2_d, in3_d, in4_d, in5_d, in6_d, in7_d;
86  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
87  v16u8 pix_in0, pix_in1, pix_in2, pix_in3;
88  v16u8 pix_in4, pix_in5, pix_in6, pix_in7;
89  v8u16 pix0, pix1, pix2, pix3, pix4, pix5, pix6, pix7;
90  v8i16 zero = { 0 };
91 
92  LD_SH8(block, 8, in0, in1, in2, in3, in4, in5, in6, in7);
93  LD_UB8(pixels, stride, pix_in0, pix_in1, pix_in2,
94  pix_in3, pix_in4, pix_in5, pix_in6, pix_in7);
95 
96  ILVR_B4_UH(zero, pix_in0, zero, pix_in1, zero, pix_in2, zero, pix_in3,
97  pix0, pix1, pix2, pix3);
98  ILVR_B4_UH(zero, pix_in4, zero, pix_in5, zero, pix_in6, zero, pix_in7,
99  pix4, pix5, pix6, pix7);
100 
101  in0 += (v8i16) pix0;
102  in1 += (v8i16) pix1;
103  in2 += (v8i16) pix2;
104  in3 += (v8i16) pix3;
105  in4 += (v8i16) pix4;
106  in5 += (v8i16) pix5;
107  in6 += (v8i16) pix6;
108  in7 += (v8i16) pix7;
109 
110  CLIP_SH8_0_255(in0, in1, in2, in3, in4, in5, in6, in7);
111  PCKEV_B4_SH(in0, in0, in1, in1, in2, in2, in3, in3, in0, in1, in2, in3);
112  PCKEV_B4_SH(in4, in4, in5, in5, in6, in6, in7, in7, in4, in5, in6, in7);
113 
114  in0_d = __msa_copy_u_d((v2i64) in0, 0);
115  in1_d = __msa_copy_u_d((v2i64) in1, 0);
116  in2_d = __msa_copy_u_d((v2i64) in2, 0);
117  in3_d = __msa_copy_u_d((v2i64) in3, 0);
118  in4_d = __msa_copy_u_d((v2i64) in4, 0);
119  in5_d = __msa_copy_u_d((v2i64) in5, 0);
120  in6_d = __msa_copy_u_d((v2i64) in6, 0);
121  in7_d = __msa_copy_u_d((v2i64) in7, 0);
122  SD4(in0_d, in1_d, in2_d, in3_d, pixels, stride);
123  pixels += 4 * stride;
124  SD4(in4_d, in5_d, in6_d, in7_d, pixels, stride);
125 }
126 
127 void ff_put_pixels_clamped_msa(const int16_t *block,
128  uint8_t *restrict pixels,
129  ptrdiff_t line_size)
130 {
131  put_pixels_clamped_msa(block, pixels, line_size);
132 }
133 
135  uint8_t *restrict pixels,
136  ptrdiff_t line_size)
137 {
138  put_signed_pixels_clamped_msa(block, pixels, line_size);
139 }
140 
141 void ff_add_pixels_clamped_msa(const int16_t *block,
142  uint8_t *restrict pixels,
143  ptrdiff_t line_size)
144 {
145  add_pixels_clamped_msa(block, pixels, line_size);
146 }
LD_UB8
#define LD_UB8(...)
Definition: generic_macros_msa.h:335
PCKEV_B4_SH
#define PCKEV_B4_SH(...)
Definition: generic_macros_msa.h:1740
ILVR_B4_UH
#define ILVR_B4_UH(...)
Definition: generic_macros_msa.h:1361
generic_macros_msa.h
ff_put_pixels_clamped_msa
void ff_put_pixels_clamped_msa(const int16_t *block, uint8_t *restrict pixels, ptrdiff_t line_size)
Definition: idctdsp_msa.c:127
ff_add_pixels_clamped_msa
void ff_add_pixels_clamped_msa(const int16_t *block, uint8_t *restrict pixels, ptrdiff_t line_size)
Definition: idctdsp_msa.c:141
put_signed_pixels_clamped_msa
static void put_signed_pixels_clamped_msa(const int16_t *block, uint8_t *pixels, int32_t stride)
Definition: idctdsp_msa.c:48
CLIP_SH8_0_255
#define CLIP_SH8_0_255(in0, in1, in2, in3, in4, in5, in6, in7)
Definition: generic_macros_msa.h:953
SD4
#define SD4(in0, in1, in2, in3, pdst, stride)
Definition: generic_macros_msa.h:256
LD_SH8
#define LD_SH8(...)
Definition: generic_macros_msa.h:338
stride
#define stride
Definition: h264pred_template.c:537
idctdsp_mips.h
put_pixels_clamped_msa
static void put_pixels_clamped_msa(const int16_t *block, uint8_t *pixels, int32_t stride)
Definition: idctdsp_msa.c:24
ff_put_signed_pixels_clamped_msa
void ff_put_signed_pixels_clamped_msa(const int16_t *block, uint8_t *restrict pixels, ptrdiff_t line_size)
Definition: idctdsp_msa.c:134
add_pixels_clamped_msa
static void add_pixels_clamped_msa(const int16_t *block, uint8_t *pixels, int32_t stride)
Definition: idctdsp_msa.c:82
zero
#define zero
Definition: regdef.h:64
int32_t
int32_t
Definition: audioconvert.c:56
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207