libavcodec/x86/dsputil_mmx.c File Reference

#include "libavutil/x86_cpu.h"
#include "libavcodec/dsputil.h"
#include "libavcodec/h264dsp.h"
#include "libavcodec/mpegvideo.h"
#include "libavcodec/simple_idct.h"
#include "dsputil_mmx.h"
#include "vp3dsp_mmx.h"
#include "vp3dsp_sse2.h"
#include "vp6dsp_mmx.h"
#include "vp6dsp_sse2.h"
#include "idct_xvid.h"
#include "dsputil_mmx_rnd_template.c"
#include "dsputil_mmx_avg_template.c"
#include "h264dsp_mmx.c"
#include "rv40dsp_mmx.c"

Go to the source code of this file.

Defines

#define JUMPALIGN()   __asm__ volatile (ASMALIGN(3)::)

#define MOVQ_ZERO(regd)   __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)

#define MOVQ_BFE(regd)

#define MOVQ_BONE(regd)   __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_bone))

#define MOVQ_WTWO(regd)   __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_wtwo))

#define PAVGB_MMX_NO_RND(rega, regb, regr, regfe)

#define PAVGB_MMX(rega, regb, regr, regfe)

#define PAVGBP_MMX_NO_RND(rega, regb, regr,regc, regd, regp)

#define PAVGBP_MMX(rega, regb, regr, regc, regd, regp)

#define DEF(x, y)   x ## _no_rnd_ ## y ##_mmx

#define SET_RND   MOVQ_WONE

#define PAVGBP(a, b, c, d, e, f)   PAVGBP_MMX_NO_RND(a, b, c, d, e, f)

#define PAVGB(a, b, c, e)   PAVGB_MMX_NO_RND(a, b, c, e)

#define OP_AVG(a, b, c, e)   PAVGB_MMX(a, b, c, e)

#define DEF(x, y)   x ## _ ## y ##_mmx

#define SET_RND   MOVQ_WTWO

#define PAVGBP(a, b, c, d, e, f)   PAVGBP_MMX(a, b, c, d, e, f)

#define PAVGB(a, b, c, e)   PAVGB_MMX(a, b, c, e)

#define DEF(x)   x ## _3dnow

#define PAVGB   "pavgusb"

#define OP_AVG   PAVGB

#define DEF(x)   x ## _mmx2

#define PAVGB   "pavgb"

#define OP_AVG   PAVGB

#define put_no_rnd_pixels16_mmx   put_pixels16_mmx

#define put_no_rnd_pixels8_mmx   put_pixels8_mmx

#define put_pixels16_mmx2   put_pixels16_mmx

#define put_pixels8_mmx2   put_pixels8_mmx

#define put_pixels4_mmx2   put_pixels4_mmx

#define put_no_rnd_pixels16_mmx2   put_no_rnd_pixels16_mmx

#define put_no_rnd_pixels8_mmx2   put_no_rnd_pixels8_mmx

#define put_pixels16_3dnow   put_pixels16_mmx

#define put_pixels8_3dnow   put_pixels8_mmx

#define put_pixels4_3dnow   put_pixels4_mmx

#define put_no_rnd_pixels16_3dnow   put_no_rnd_pixels16_mmx

#define put_no_rnd_pixels8_3dnow   put_no_rnd_pixels8_mmx

#define put_signed_pixels_clamped_mmx_half(off)

#define CLEAR_BLOCKS(name, n)

#define H263_LOOP_FILTER

#define PAETH(cpu, abs3)

#define ABS3_MMX2

#define ABS3_SSSE3

#define QPEL_V_LOW(m3, m4, m5, m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)

#define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW)

#define QPEL_OP(OPNAME, ROUNDER, RND, OP, MMX)

#define PUT_OP(a, b, temp, size)   "mov" #size " " #a ", " #b " \n\t"

#define AVG_3DNOW_OP(a, b, temp, size)

#define AVG_MMX2_OP(a, b, temp, size)

#define QPEL_2TAP_XY(OPNAME, SIZE, MMX, XY, HPEL)

#define QPEL_2TAP_L3(OPNAME, SIZE, MMX, XY, S0, S1, S2)

#define QPEL_2TAP(OPNAME, SIZE, MMX)

#define PREFETCH(name, op)

#define IF1(x)   x

#define IF0(x)

#define MIX5(mono, stereo)

#define MIX_MISC(stereo)

#define ff_float_to_int16_interleave6_sse(a, b, c)   float_to_int16_interleave_misc_sse(a,b,c,6)

#define ff_float_to_int16_interleave6_3dnow(a, b, c)   float_to_int16_interleave_misc_3dnow(a,b,c,6)

#define ff_float_to_int16_interleave6_3dn2(a, b, c)   float_to_int16_interleave_misc_3dnow(a,b,c,6)

#define ff_float_to_int16_interleave6_sse2   ff_float_to_int16_interleave6_sse

#define FLOAT_TO_INT16_INTERLEAVE(cpu, body)

#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU)

#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU)

#define H264_QPEL_FUNCS(x, y, CPU)

Functions

void put_pixels_clamped_mmx (const DCTELEM *block, uint8_t *pixels, int line_size)

DECLARE_ASM_CONST (8, uint8_t, ff_vector128)[8]

void put_signed_pixels_clamped_mmx (const DCTELEM *block, uint8_t *pixels, int line_size)

void add_pixels_clamped_mmx (const DCTELEM *block, uint8_t *pixels, int line_size)

static void put_pixels4_mmx (uint8_t *block, const uint8_t *pixels, int line_size, int h)

static void put_pixels8_mmx (uint8_t *block, const uint8_t *pixels, int line_size, int h)

static void put_pixels16_mmx (uint8_t *block, const uint8_t *pixels, int line_size, int h)

static void put_pixels16_sse2 (uint8_t *block, const uint8_t *pixels, int line_size, int h)

static void avg_pixels16_sse2 (uint8_t *block, const uint8_t *pixels, int line_size, int h)

static void clear_block_sse (DCTELEM *block)

static void clear_blocks_sse (DCTELEM *blocks)

static void add_bytes_mmx (uint8_t *dst, uint8_t *src, int w)

static void add_bytes_l2_mmx (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w)

static void h263_v_loop_filter_mmx (uint8_t *src, int stride, int qscale)

static void transpose4x4 (uint8_t *dst, uint8_t *src, int dst_stride, int src_stride)

static void h263_h_loop_filter_mmx (uint8_t *src, int stride, int qscale)

static void draw_edges_mmx (uint8_t *buf, int wrap, int width, int height, int w)

static void gmc_mmx (uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)

void ff_put_cavs_qpel8_mc00_mmx2 (uint8_t *dst, uint8_t *src, int stride)

void ff_avg_cavs_qpel8_mc00_mmx2 (uint8_t *dst, uint8_t *src, int stride)

void ff_put_cavs_qpel16_mc00_mmx2 (uint8_t *dst, uint8_t *src, int stride)

void ff_avg_cavs_qpel16_mc00_mmx2 (uint8_t *dst, uint8_t *src, int stride)

void ff_put_vc1_mspel_mc00_mmx (uint8_t *dst, const uint8_t *src, int stride, int rnd)

void ff_avg_vc1_mspel_mc00_mmx2 (uint8_t *dst, const uint8_t *src, int stride, int rnd)

static void ff_idct_xvid_mmx_put (uint8_t *dest, int line_size, DCTELEM *block)

static void ff_idct_xvid_mmx_add (uint8_t *dest, int line_size, DCTELEM *block)

static void ff_idct_xvid_mmx2_put (uint8_t *dest, int line_size, DCTELEM *block)

static void ff_idct_xvid_mmx2_add (uint8_t *dest, int line_size, DCTELEM *block)

static void vorbis_inverse_coupling_3dnow (float *mag, float *ang, int blocksize)

static void vorbis_inverse_coupling_sse (float *mag, float *ang, int blocksize)

static void ac3_downmix_sse (float(*samples)[256], float(*matrix)[2], int out_ch, int in_ch, int len)

static void vector_fmul_3dnow (float *dst, const float *src, int len)

static void vector_fmul_sse (float *dst, const float *src, int len)

static void vector_fmul_reverse_3dnow2 (float *dst, const float *src0, const float *src1, int len)

static void vector_fmul_reverse_sse (float *dst, const float *src0, const float *src1, int len)

static void vector_fmul_add_3dnow (float *dst, const float *src0, const float *src1, const float *src2, int len)

static void vector_fmul_add_sse (float *dst, const float *src0, const float *src1, const float *src2, int len)

static void vector_fmul_window_3dnow2 (float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len)

static void vector_fmul_window_sse (float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len)

static void int32_to_float_fmul_scalar_sse (float *dst, const int *src, float mul, int len)

static void int32_to_float_fmul_scalar_sse2 (float *dst, const int *src, float mul, int len)

static void vector_clipf_sse (float *dst, const float *src, float min, float max, int len)

static void float_to_int16_3dnow (int16_t *dst, const float *src, long len)

static void float_to_int16_sse (int16_t *dst, const float *src, long len)

static void float_to_int16_sse2 (int16_t *dst, const float *src, long len)

void ff_float_to_int16_interleave6_sse (int16_t *dst, const float **src, int len)

void ff_float_to_int16_interleave6_3dnow (int16_t *dst, const float **src, int len)

void ff_float_to_int16_interleave6_3dn2 (int16_t *dst, const float **src, int len)

int32_t ff_scalarproduct_int16_mmx2 (int16_t *v1, int16_t *v2, int order, int shift)

int32_t ff_scalarproduct_int16_sse2 (int16_t *v1, int16_t *v2, int order, int shift)

int32_t ff_scalarproduct_and_madd_int16_mmx2 (int16_t *v1, int16_t *v2, int16_t *v3, int order, int mul)

int32_t ff_scalarproduct_and_madd_int16_sse2 (int16_t *v1, int16_t *v2, int16_t *v3, int order, int mul)

int32_t ff_scalarproduct_and_madd_int16_ssse3 (int16_t *v1, int16_t *v2, int16_t *v3, int order, int mul)

void ff_add_hfyu_median_prediction_mmx2 (uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top)

int ff_add_hfyu_left_prediction_ssse3 (uint8_t *dst, const uint8_t *src, int w, int left)

int ff_add_hfyu_left_prediction_sse4 (uint8_t *dst, const uint8_t *src, int w, int left)

void ff_x264_deblock_v_luma_sse2 (uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)

void ff_x264_deblock_h_luma_sse2 (uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)

void ff_x264_deblock_h_luma_intra_mmxext (uint8_t *pix, int stride, int alpha, int beta)

void ff_x264_deblock_v_luma_intra_sse2 (uint8_t *pix, int stride, int alpha, int beta)

void ff_x264_deblock_h_luma_intra_sse2 (uint8_t *pix, int stride, int alpha, int beta)

FLOAT_TO_INT16_INTERLEAVE (3dnow,"1: \n""pf2id (%2,%0), %%mm0 \n""pf2id 8(%2,%0), %%mm1 \n""pf2id (%3,%0), %%mm2 \n""pf2id 8(%3,%0), %%mm3 \n""packssdw %%mm1, %%mm0 \n""packssdw %%mm3, %%mm2 \n""movq %%mm0, %%mm1 \n""punpcklwd %%mm2, %%mm0 \n""punpckhwd %%mm2, %%mm1 \n""movq %%mm0, (%1,%0)\n""movq %%mm1, 8(%1,%0)\n""add $16, %0 \n""js 1b \n""femms \n") FLOAT_TO_INT16_INTERLEAVE(sse

mm0 n cvtps2pi (%2,%0)

mm0 n mm1 n mm2 n mm3 n
packssdw mm0 n packssdw mm2 n
movq mm1 n punpcklwd mm0 n
punpckhwd mm1 n movq n movq n
n js n emms n FLOAT_TO_INT16_INTERLEAVE (sse2,"1: \n""cvtps2dq (%2,%0), %%xmm0 \n""cvtps2dq (%3,%0), %%xmm1 \n""packssdw %%xmm1, %%xmm0 \n""movhlps %%xmm0, %%xmm1 \n""punpcklwd %%xmm1, %%xmm0 \n""movdqa %%xmm0, (%1,%0) \n""add $16, %0 \n""js 1b \n") static void float_to_int16_interleave_3dn2(int16_t *dst

void dsputil_init_mmx (DSPContext *c, AVCodecContext *avctx)

Variables

int mm_flags

const uint64_t ff_bone = 0x0101010101010101ULL

const uint64_t ff_wtwo = 0x0002000200020002ULL

const uint64_t ff_pdw_80000000 [2]

const uint64_t ff_pw_3 = 0x0003000300030003ULL

const uint64_t ff_pw_4 = 0x0004000400040004ULL

const xmm_reg ff_pw_5 = {0x0005000500050005ULL, 0x0005000500050005ULL}

const xmm_reg ff_pw_8 = {0x0008000800080008ULL, 0x0008000800080008ULL}

const uint64_t ff_pw_15 = 0x000F000F000F000FULL

const xmm_reg ff_pw_16 = {0x0010001000100010ULL, 0x0010001000100010ULL}

const uint64_t ff_pw_20 = 0x0014001400140014ULL

const xmm_reg ff_pw_28 = {0x001C001C001C001CULL, 0x001C001C001C001CULL}

const xmm_reg ff_pw_32 = {0x0020002000200020ULL, 0x0020002000200020ULL}

const uint64_t ff_pw_42 = 0x002A002A002A002AULL

const xmm_reg ff_pw_64 = {0x0040004000400040ULL, 0x0040004000400040ULL}

const uint64_t ff_pw_96 = 0x0060006000600060ULL

const uint64_t ff_pw_128 = 0x0080008000800080ULL

const uint64_t ff_pw_255 = 0x00ff00ff00ff00ffULL

const uint64_t ff_pb_1 = 0x0101010101010101ULL

const uint64_t ff_pb_3 = 0x0303030303030303ULL

const uint64_t ff_pb_7 = 0x0707070707070707ULL

const uint64_t ff_pb_1F = 0x1F1F1F1F1F1F1F1FULL

const uint64_t ff_pb_3F = 0x3F3F3F3F3F3F3F3FULL

const uint64_t ff_pb_81 = 0x8181818181818181ULL

const uint64_t ff_pb_A1 = 0xA1A1A1A1A1A1A1A1ULL

const uint64_t ff_pb_FC = 0xFCFCFCFCFCFCFCFCULL

const double ff_pd_1 [2] = { 1.0, 1.0 }

const double ff_pd_2 [2] = { 2.0, 2.0 }

__pad0__

mm0 n mm1 n mm2 n mm3 n packssdw mm1

mm0 n mm1 n mm2 n mm3 n
packssdw mm0 n packssdw mm3

mm0 n mm1 n mm2 n mm3 n
packssdw mm0 n packssdw mm2 n
movq mm0

mm0 n mm1 n mm2 n mm3 n
packssdw mm0 n packssdw mm2 n
movq mm1 n punpcklwd mm2

mm0 n mm1 n mm2 n mm3 n
packssdw mm0 n packssdw mm2 n
movq mm1 n punpcklwd mm0 n
punpckhwd mm1 n movq n movq n add

mm0 n mm1 n mm2 n mm3 n
packssdw mm0 n packssdw mm2 n
movq mm1 n punpcklwd mm0 n
punpckhwd mm1 n movq n movq n
n js n emms n const float ** src

mm0 n mm1 n mm2 n mm3 n
packssdw mm0 n packssdw mm2 n
movq mm1 n punpcklwd mm0 n
punpckhwd mm1 n movq n movq n
n js n emms n const float long len

Define Documentation

#define ABS3_MMX2

Value:

"psubw     %%mm5, %%mm7 \n"\
        "pmaxsw    %%mm7, %%mm5 \n"\
        "pxor      %%mm6, %%mm6 \n"\
        "pxor      %%mm7, %%mm7 \n"\
        "psubw     %%mm3, %%mm6 \n"\
        "psubw     %%mm4, %%mm7 \n"\
        "pmaxsw    %%mm6, %%mm3 \n"\
        "pmaxsw    %%mm7, %%mm4 \n"\
        "pxor      %%mm7, %%mm7 \n"

Definition at line 949 of file dsputil_mmx.c.

#define ABS3_SSSE3

Value:

"pabsw     %%mm3, %%mm3 \n"\
        "pabsw     %%mm4, %%mm4 \n"\
        "pabsw     %%mm5, %%mm5 \n"

Definition at line 960 of file dsputil_mmx.c.

#define AVG_3DNOW_OP	(	a,
		b,
		temp,
		size	)

Value:

"mov" #size " " #b ", " #temp "   \n\t"\
"pavgusb " #temp ", " #a "        \n\t"\
"mov" #size " " #a ", " #b "      \n\t"

Definition at line 1618 of file dsputil_mmx.c.

#define AVG_MMX2_OP	(	a,
		b,
		temp,
		size	)

Value:

"mov" #size " " #b ", " #temp "   \n\t"\
"pavgb " #temp ", " #a "          \n\t"\
"mov" #size " " #a ", " #b "      \n\t"

Definition at line 1622 of file dsputil_mmx.c.

#define CLEAR_BLOCKS	(	name,
		n		)

Value:

static void name(DCTELEM *blocks)\
{\
    __asm__ volatile(\
                "pxor %%mm7, %%mm7              \n\t"\
                "mov     %1, %%"REG_a"          \n\t"\
                "1:                             \n\t"\
                "movq %%mm7, (%0, %%"REG_a")    \n\t"\
                "movq %%mm7, 8(%0, %%"REG_a")   \n\t"\
                "movq %%mm7, 16(%0, %%"REG_a")  \n\t"\
                "movq %%mm7, 24(%0, %%"REG_a")  \n\t"\
                "add $32, %%"REG_a"             \n\t"\
                " js 1b                         \n\t"\
                : : "r" (((uint8_t *)blocks)+128*n),\
                    "i" (-128*n)\
                : "%"REG_a\
        );\
}

Definition at line 493 of file dsputil_mmx.c.

#define DEF ( x ) x ## _mmx2

Definition at line 198 of file dsputil_mmx.c.

#define DEF ( x ) x ## _3dnow

Definition at line 198 of file dsputil_mmx.c.

#define DEF	(	x,
		y		)	x ## _ ## y ##_mmx

Definition at line 198 of file dsputil_mmx.c.

#define DEF	(	x,
		y		)	x ## _no_rnd_ ## y ##_mmx

Definition at line 198 of file dsputil_mmx.c.

#define ff_float_to_int16_interleave6_3dn2	(	a,
		b,
		c	)	float_to_int16_interleave_misc_3dnow(a,b,c,6)

Definition at line 2401 of file dsputil_mmx.c.

#define ff_float_to_int16_interleave6_3dnow	(	a,
		b,
		c	)	float_to_int16_interleave_misc_3dnow(a,b,c,6)

Definition at line 2400 of file dsputil_mmx.c.

#define ff_float_to_int16_interleave6_sse	(	a,
		b,
		c	)	float_to_int16_interleave_misc_sse(a,b,c,6)

Definition at line 2399 of file dsputil_mmx.c.

#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse

Definition at line 2403 of file dsputil_mmx.c.

#define FLOAT_TO_INT16_INTERLEAVE	(	cpu,
		body		)

Definition at line 2405 of file dsputil_mmx.c.

#define H263_LOOP_FILTER

Definition at line 634 of file dsputil_mmx.c.

Referenced by h263_h_loop_filter_mmx(), and h263_v_loop_filter_mmx().

#define H264_QPEL_FUNCS	(	x,
		y,
		CPU	)

Value:

c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_##CPU;\
            c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##CPU;\
            c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_##CPU;\
            c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;


Defines
#define	JUMPALIGN() __asm__ volatile (ASMALIGN(3)::)
#define	MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)
#define	MOVQ_BFE(regd)
#define	MOVQ_BONE(regd) __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_bone))
#define	MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_wtwo))
#define	PAVGB_MMX_NO_RND(rega, regb, regr, regfe)
#define	PAVGB_MMX(rega, regb, regr, regfe)
#define	PAVGBP_MMX_NO_RND(rega, regb, regr,regc, regd, regp)
#define	PAVGBP_MMX(rega, regb, regr, regc, regd, regp)
#define	DEF(x, y) x ## _no_rnd_ ## y ##_mmx
#define	SET_RND MOVQ_WONE
#define	PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f)
#define	PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e)
#define	OP_AVG(a, b, c, e) PAVGB_MMX(a, b, c, e)
#define	DEF(x, y) x ## _ ## y ##_mmx
#define	SET_RND MOVQ_WTWO
#define	PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f)
#define	PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e)
#define	DEF(x) x ## _3dnow
#define	PAVGB "pavgusb"
#define	OP_AVG PAVGB
#define	DEF(x) x ## _mmx2
#define	PAVGB "pavgb"
#define	OP_AVG PAVGB
#define	put_no_rnd_pixels16_mmx put_pixels16_mmx
#define	put_no_rnd_pixels8_mmx put_pixels8_mmx
#define	put_pixels16_mmx2 put_pixels16_mmx
#define	put_pixels8_mmx2 put_pixels8_mmx
#define	put_pixels4_mmx2 put_pixels4_mmx
#define	put_no_rnd_pixels16_mmx2 put_no_rnd_pixels16_mmx
#define	put_no_rnd_pixels8_mmx2 put_no_rnd_pixels8_mmx
#define	put_pixels16_3dnow put_pixels16_mmx
#define	put_pixels8_3dnow put_pixels8_mmx
#define	put_pixels4_3dnow put_pixels4_mmx
#define	put_no_rnd_pixels16_3dnow put_no_rnd_pixels16_mmx
#define	put_no_rnd_pixels8_3dnow put_no_rnd_pixels8_mmx
#define	put_signed_pixels_clamped_mmx_half(off)
#define	CLEAR_BLOCKS(name, n)
#define	H263_LOOP_FILTER
#define	PAETH(cpu, abs3)
#define	ABS3_MMX2
#define	ABS3_SSSE3
#define	QPEL_V_LOW(m3, m4, m5, m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)
#define	QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW)
#define	QPEL_OP(OPNAME, ROUNDER, RND, OP, MMX)
#define	PUT_OP(a, b, temp, size) "mov" #size " " #a ", " #b " \n\t"
#define	AVG_3DNOW_OP(a, b, temp, size)
#define	AVG_MMX2_OP(a, b, temp, size)
#define	QPEL_2TAP_XY(OPNAME, SIZE, MMX, XY, HPEL)
#define	QPEL_2TAP_L3(OPNAME, SIZE, MMX, XY, S0, S1, S2)
#define	QPEL_2TAP(OPNAME, SIZE, MMX)
#define	PREFETCH(name, op)
#define	IF1(x) x
#define	IF0(x)
#define	MIX5(mono, stereo)
#define	MIX_MISC(stereo)
#define	ff_float_to_int16_interleave6_sse(a, b, c) float_to_int16_interleave_misc_sse(a,b,c,6)
#define	ff_float_to_int16_interleave6_3dnow(a, b, c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
#define	ff_float_to_int16_interleave6_3dn2(a, b, c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
#define	ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
#define	FLOAT_TO_INT16_INTERLEAVE(cpu, body)
#define	SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU)
#define	SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU)
#define	H264_QPEL_FUNCS(x, y, CPU)
Functions
void	put_pixels_clamped_mmx (const DCTELEM block, uint8_t pixels, int line_size)
	DECLARE_ASM_CONST (8, uint8_t, ff_vector128)[8]
void	put_signed_pixels_clamped_mmx (const DCTELEM block, uint8_t pixels, int line_size)
void	add_pixels_clamped_mmx (const DCTELEM block, uint8_t pixels, int line_size)
static void	put_pixels4_mmx (uint8_t block, const uint8_t pixels, int line_size, int h)
static void	put_pixels8_mmx (uint8_t block, const uint8_t pixels, int line_size, int h)
static void	put_pixels16_mmx (uint8_t block, const uint8_t pixels, int line_size, int h)
static void	put_pixels16_sse2 (uint8_t block, const uint8_t pixels, int line_size, int h)
static void	avg_pixels16_sse2 (uint8_t block, const uint8_t pixels, int line_size, int h)
static void	clear_block_sse (DCTELEM *block)
static void	clear_blocks_sse (DCTELEM *blocks)
static void	add_bytes_mmx (uint8_t dst, uint8_t src, int w)
static void	add_bytes_l2_mmx (uint8_t dst, uint8_t src1, uint8_t *src2, int w)
static void	h263_v_loop_filter_mmx (uint8_t *src, int stride, int qscale)
static void	transpose4x4 (uint8_t dst, uint8_t src, int dst_stride, int src_stride)
static void	h263_h_loop_filter_mmx (uint8_t *src, int stride, int qscale)
static void	draw_edges_mmx (uint8_t *buf, int wrap, int width, int height, int w)
static void	gmc_mmx (uint8_t dst, uint8_t src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
void	ff_put_cavs_qpel8_mc00_mmx2 (uint8_t dst, uint8_t src, int stride)
void	ff_avg_cavs_qpel8_mc00_mmx2 (uint8_t dst, uint8_t src, int stride)
void	ff_put_cavs_qpel16_mc00_mmx2 (uint8_t dst, uint8_t src, int stride)
void	ff_avg_cavs_qpel16_mc00_mmx2 (uint8_t dst, uint8_t src, int stride)
void	ff_put_vc1_mspel_mc00_mmx (uint8_t dst, const uint8_t src, int stride, int rnd)
void	ff_avg_vc1_mspel_mc00_mmx2 (uint8_t dst, const uint8_t src, int stride, int rnd)
static void	ff_idct_xvid_mmx_put (uint8_t dest, int line_size, DCTELEM block)
static void	ff_idct_xvid_mmx_add (uint8_t dest, int line_size, DCTELEM block)
static void	ff_idct_xvid_mmx2_put (uint8_t dest, int line_size, DCTELEM block)
static void	ff_idct_xvid_mmx2_add (uint8_t dest, int line_size, DCTELEM block)
static void	vorbis_inverse_coupling_3dnow (float mag, float ang, int blocksize)
static void	vorbis_inverse_coupling_sse (float mag, float ang, int blocksize)
static void	ac3_downmix_sse (float(samples)[256], float(matrix)[2], int out_ch, int in_ch, int len)
static void	vector_fmul_3dnow (float dst, const float src, int len)
static void	vector_fmul_sse (float dst, const float src, int len)
static void	vector_fmul_reverse_3dnow2 (float dst, const float src0, const float *src1, int len)
static void	vector_fmul_reverse_sse (float dst, const float src0, const float *src1, int len)
static void	vector_fmul_add_3dnow (float dst, const float src0, const float src1, const float src2, int len)
static void	vector_fmul_add_sse (float dst, const float src0, const float src1, const float src2, int len)
static void	vector_fmul_window_3dnow2 (float dst, const float src0, const float src1, const float win, float add_bias, int len)
static void	vector_fmul_window_sse (float dst, const float src0, const float src1, const float win, float add_bias, int len)
static void	int32_to_float_fmul_scalar_sse (float dst, const int src, float mul, int len)
static void	int32_to_float_fmul_scalar_sse2 (float dst, const int src, float mul, int len)
static void	vector_clipf_sse (float dst, const float src, float min, float max, int len)
static void	float_to_int16_3dnow (int16_t dst, const float src, long len)
static void	float_to_int16_sse (int16_t dst, const float src, long len)
static void	float_to_int16_sse2 (int16_t dst, const float src, long len)
void	ff_float_to_int16_interleave6_sse (int16_t dst, const float *src, int len)
void	ff_float_to_int16_interleave6_3dnow (int16_t dst, const float *src, int len)
void	ff_float_to_int16_interleave6_3dn2 (int16_t dst, const float *src, int len)
int32_t	ff_scalarproduct_int16_mmx2 (int16_t v1, int16_t v2, int order, int shift)
int32_t	ff_scalarproduct_int16_sse2 (int16_t v1, int16_t v2, int order, int shift)
int32_t	ff_scalarproduct_and_madd_int16_mmx2 (int16_t v1, int16_t v2, int16_t *v3, int order, int mul)
int32_t	ff_scalarproduct_and_madd_int16_sse2 (int16_t v1, int16_t v2, int16_t *v3, int order, int mul)
int32_t	ff_scalarproduct_and_madd_int16_ssse3 (int16_t v1, int16_t v2, int16_t *v3, int order, int mul)
void	ff_add_hfyu_median_prediction_mmx2 (uint8_t dst, const uint8_t top, const uint8_t diff, int w, int left, int *left_top)
int	ff_add_hfyu_left_prediction_ssse3 (uint8_t dst, const uint8_t src, int w, int left)
int	ff_add_hfyu_left_prediction_sse4 (uint8_t dst, const uint8_t src, int w, int left)
void	ff_x264_deblock_v_luma_sse2 (uint8_t pix, int stride, int alpha, int beta, int8_t tc0)
void	ff_x264_deblock_h_luma_sse2 (uint8_t pix, int stride, int alpha, int beta, int8_t tc0)
void	ff_x264_deblock_h_luma_intra_mmxext (uint8_t *pix, int stride, int alpha, int beta)
void	ff_x264_deblock_v_luma_intra_sse2 (uint8_t *pix, int stride, int alpha, int beta)
void	ff_x264_deblock_h_luma_intra_sse2 (uint8_t *pix, int stride, int alpha, int beta)
	FLOAT_TO_INT16_INTERLEAVE (3dnow,"1: \n""pf2id (%2,%0), %%mm0 \n""pf2id 8(%2,%0), %%mm1 \n""pf2id (%3,%0), %%mm2 \n""pf2id 8(%3,%0), %%mm3 \n""packssdw %%mm1, %%mm0 \n""packssdw %%mm3, %%mm2 \n""movq %%mm0, %%mm1 \n""punpcklwd %%mm2, %%mm0 \n""punpckhwd %%mm2, %%mm1 \n""movq %%mm0, (%1,%0)\n""movq %%mm1, 8(%1,%0)\n""add $16, %0 \n""js 1b \n""femms \n") FLOAT_TO_INT16_INTERLEAVE(sse
mm0 n	cvtps2pi (%2,%0)
mm0 n mm1 n mm2 n mm3 n packssdw mm0 n packssdw mm2 n movq mm1 n punpcklwd mm0 n punpckhwd mm1 n movq n movq n n js n emms n	FLOAT_TO_INT16_INTERLEAVE (sse2,"1: \n""cvtps2dq (%2,%0), %%xmm0 \n""cvtps2dq (%3,%0), %%xmm1 \n""packssdw %%xmm1, %%xmm0 \n""movhlps %%xmm0, %%xmm1 \n""punpcklwd %%xmm1, %%xmm0 \n""movdqa %%xmm0, (%1,%0) \n""add $16, %0 \n""js 1b \n") static void float_to_int16_interleave_3dn2(int16_t *dst
void	dsputil_init_mmx (DSPContext c, AVCodecContext avctx)
Variables
int	mm_flags
const uint64_t	ff_bone = 0x0101010101010101ULL
const uint64_t	ff_wtwo = 0x0002000200020002ULL
const uint64_t	ff_pdw_80000000 [2]
const uint64_t	ff_pw_3 = 0x0003000300030003ULL
const uint64_t	ff_pw_4 = 0x0004000400040004ULL
const xmm_reg	ff_pw_5 = {0x0005000500050005ULL, 0x0005000500050005ULL}
const xmm_reg	ff_pw_8 = {0x0008000800080008ULL, 0x0008000800080008ULL}
const uint64_t	ff_pw_15 = 0x000F000F000F000FULL
const xmm_reg	ff_pw_16 = {0x0010001000100010ULL, 0x0010001000100010ULL}
const uint64_t	ff_pw_20 = 0x0014001400140014ULL
const xmm_reg	ff_pw_28 = {0x001C001C001C001CULL, 0x001C001C001C001CULL}
const xmm_reg	ff_pw_32 = {0x0020002000200020ULL, 0x0020002000200020ULL}
const uint64_t	ff_pw_42 = 0x002A002A002A002AULL
const xmm_reg	ff_pw_64 = {0x0040004000400040ULL, 0x0040004000400040ULL}
const uint64_t	ff_pw_96 = 0x0060006000600060ULL
const uint64_t	ff_pw_128 = 0x0080008000800080ULL
const uint64_t	ff_pw_255 = 0x00ff00ff00ff00ffULL
const uint64_t	ff_pb_1 = 0x0101010101010101ULL
const uint64_t	ff_pb_3 = 0x0303030303030303ULL
const uint64_t	ff_pb_7 = 0x0707070707070707ULL
const uint64_t	ff_pb_1F = 0x1F1F1F1F1F1F1F1FULL
const uint64_t	ff_pb_3F = 0x3F3F3F3F3F3F3F3FULL
const uint64_t	ff_pb_81 = 0x8181818181818181ULL
const uint64_t	ff_pb_A1 = 0xA1A1A1A1A1A1A1A1ULL
const uint64_t	ff_pb_FC = 0xFCFCFCFCFCFCFCFCULL
const double	ff_pd_1 [2] = { 1.0, 1.0 }
const double	ff_pd_2 [2] = { 2.0, 2.0 }
	__pad0__
mm0 n mm1 n mm2 n mm3 n packssdw	mm1
mm0 n mm1 n mm2 n mm3 n packssdw mm0 n packssdw	mm3
mm0 n mm1 n mm2 n mm3 n packssdw mm0 n packssdw mm2 n movq	mm0
mm0 n mm1 n mm2 n mm3 n packssdw mm0 n packssdw mm2 n movq mm1 n punpcklwd	mm2
mm0 n mm1 n mm2 n mm3 n packssdw mm0 n packssdw mm2 n movq mm1 n punpcklwd mm0 n punpckhwd mm1 n movq n movq n	add
mm0 n mm1 n mm2 n mm3 n packssdw mm0 n packssdw mm2 n movq mm1 n punpcklwd mm0 n punpckhwd mm1 n movq n movq n n js n emms n const float **	src
mm0 n mm1 n mm2 n mm3 n packssdw mm0 n packssdw mm2 n movq mm1 n punpcklwd mm0 n punpckhwd mm1 n movq n movq n n js n emms n const float long	len

#define PAVGBP	(	a,
		b,
		c,
		d,
		e,
		f	)	PAVGBP_MMX_NO_RND(a, b, c, d, e, f)

#define PUT_OP	(	a,
		b,
		temp,
		size	)	"mov" #size " " #a ", " #b " \n\t"

#define QPEL_V_LOW	(	m3,
		m4,
		m5,
		m6,
		pw_20,
		pw_3,
		rnd,
		in0,
		in1,
		in2,
		in7,
		out,
		OP	)

static void ac3_downmix_sse	(	float(*)	samples[256],
		float(*)	matrix[2],
		int	out_ch,
		int	in_ch,
		int	len
	)			`[static]`

static void add_bytes_l2_mmx	(	uint8_t *	dst,
		uint8_t *	src1,
		uint8_t *	src2,
		int	w
	)			`[static]`

static void add_bytes_mmx	(	uint8_t *	dst,
		uint8_t *	src,
		int	w
	)			`[static]`

void add_pixels_clamped_mmx	(	const DCTELEM *	block,
		uint8_t *	pixels,
		int	line_size
	)

static void avg_pixels16_sse2	(	uint8_t *	block,
		const uint8_t *	pixels,
		int	line_size,
		int	h
	)			`[static]`

static void draw_edges_mmx	(	uint8_t *	buf,
		int	wrap,
		int	width,
		int	height,
		int	w
	)			`[static]`

#define MIX5	(	mono,
		stereo		)

#define PAETH	(	cpu,
		abs3		)

#define PAVGB_MMX	(	rega,
		regb,
		regr,
		regfe	)

#define PAVGB_MMX_NO_RND	(	rega,
		regb,
		regr,
		regfe	)

void dsputil_init_mmx	(	DSPContext *	c,
		AVCodecContext *	avctx
	)

int ff_add_hfyu_left_prediction_sse4	(	uint8_t *	dst,
		const uint8_t *	src,
		int	w,
		int	left
	)

int ff_add_hfyu_left_prediction_ssse3	(	uint8_t *	dst,
		const uint8_t *	src,
		int	w,
		int	left
	)

void ff_add_hfyu_median_prediction_mmx2	(	uint8_t *	dst,
		const uint8_t *	top,
		const uint8_t *	diff,
		int	w,
		int *	left,
		int *	left_top
	)

void ff_avg_cavs_qpel16_mc00_mmx2	(	uint8_t *	dst,
		uint8_t *	src,
		int	stride
	)

void ff_avg_cavs_qpel8_mc00_mmx2	(	uint8_t *	dst,
		uint8_t *	src,
		int	stride
	)

void ff_avg_vc1_mspel_mc00_mmx2	(	uint8_t *	dst,
		const uint8_t *	src,
		int	stride,
		int	rnd
	)

void ff_float_to_int16_interleave6_3dn2	(	int16_t *	dst,
		const float **	src,
		int	len
	)

void ff_float_to_int16_interleave6_3dnow	(	int16_t *	dst,
		const float **	src,
		int	len
	)

void ff_float_to_int16_interleave6_sse	(	int16_t *	dst,
		const float **	src,
		int	len
	)

libavcodec/x86/dsputil_mmx.c File Reference

Defines

Functions

Variables

Define Documentation

Function Documentation

Variable Documentation

static void ff_idct_xvid_mmx2_add	(	uint8_t *	dest,
		int	line_size,
		DCTELEM *	block
	)			`[static]`

static void ff_idct_xvid_mmx2_put	(	uint8_t *	dest,
		int	line_size,
		DCTELEM *	block
	)			`[static]`

static void ff_idct_xvid_mmx_add	(	uint8_t *	dest,
		int	line_size,
		DCTELEM *	block
	)			`[static]`

static void ff_idct_xvid_mmx_put	(	uint8_t *	dest,
		int	line_size,
		DCTELEM *	block
	)			`[static]`

void ff_put_cavs_qpel16_mc00_mmx2	(	uint8_t *	dst,
		uint8_t *	src,
		int	stride
	)

void ff_put_cavs_qpel8_mc00_mmx2	(	uint8_t *	dst,
		uint8_t *	src,
		int	stride
	)

void ff_put_vc1_mspel_mc00_mmx	(	uint8_t *	dst,
		const uint8_t *	src,
		int	stride,
		int	rnd
	)

int32_t ff_scalarproduct_and_madd_int16_mmx2	(	int16_t *	v1,
		int16_t *	v2,
		int16_t *	v3,
		int	order,
		int	mul
	)

int32_t ff_scalarproduct_int16_mmx2	(	int16_t *	v1,
		int16_t *	v2,
		int	order,
		int	shift
	)

int32_t ff_scalarproduct_int16_sse2	(	int16_t *	v1,
		int16_t *	v2,
		int	order,
		int	shift
	)