FFmpeg: libavcodec/mips/acelp_filters

00001  /*
00002  * Copyright (c) 2012
00003  *      MIPS Technologies, Inc., California.
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions
00007  * are met:
00008  * 1. Redistributions of source code must retain the above copyright
00009  *    notice, this list of conditions and the following disclaimer.
00010  * 2. Redistributions in binary form must reproduce the above copyright
00011  *    notice, this list of conditions and the following disclaimer in the
00012  *    documentation and/or other materials provided with the distribution.
00013  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
00014  *    contributors may be used to endorse or promote products derived from
00015  *    this software without specific prior written permission.
00016  *
00017  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
00018  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00019  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00020  * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
00021  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00022  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00023  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00024  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00025  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00026  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00027  * SUCH DAMAGE.
00028  *
00029  * Author:  Nedeljko Babic (nbabic@mips.com)
00030  *
00031  * various filters for ACELP-based codecs optimized for MIPS
00032  *
00033  * This file is part of FFmpeg.
00034  *
00035  * FFmpeg is free software; you can redistribute it and/or
00036  * modify it under the terms of the GNU Lesser General Public
00037  * License as published by the Free Software Foundation; either
00038  * version 2.1 of the License, or (at your option) any later version.
00039  *
00040  * FFmpeg is distributed in the hope that it will be useful,
00041  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00042  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00043  * Lesser General Public License for more details.
00044  *
00045  * You should have received a copy of the GNU Lesser General Public
00046  * License along with FFmpeg; if not, write to the Free Software
00047  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00048  */
00049 
00054 #include "libavutil/attributes.h"
00055 #include "libavcodec/acelp_filters.h"
00056 
00057 static void ff_acelp_interpolatef_mips(float *out, const float *in,
00058                            const float *filter_coeffs, int precision,
00059                            int frac_pos, int filter_length, int length)
00060 {
00061     int n, i;
00062     int prec = precision * 4;
00063     int fc_offset = precision - frac_pos;
00064     float in_val_p, in_val_m, fc_val_p, fc_val_m;
00065 
00066     for (n = 0; n < length; n++) {
00071         const float *p_in_p = &in[n];
00072         const float *p_in_m = &in[n-1];
00073         const float *p_filter_coeffs_p = &filter_coeffs[frac_pos];
00074         const float *p_filter_coeffs_m = filter_coeffs + fc_offset;
00075         float v = 0;
00076 
00077         for (i = 0; i < filter_length;i++) {
00078             __asm__ volatile (
00079                 "lwc1   %[in_val_p],           0(%[p_in_p])                    \n\t"
00080                 "lwc1   %[fc_val_p],           0(%[p_filter_coeffs_p])         \n\t"
00081                 "lwc1   %[in_val_m],           0(%[p_in_m])                    \n\t"
00082                 "lwc1   %[fc_val_m],           0(%[p_filter_coeffs_m])         \n\t"
00083                 "addiu  %[p_in_p],             %[p_in_p],              4       \n\t"
00084                 "madd.s %[v],%[v],             %[in_val_p],%[fc_val_p]         \n\t"
00085                 "addiu  %[p_in_m],             %[p_in_m],              -4      \n\t"
00086                 "addu   %[p_filter_coeffs_p],  %[p_filter_coeffs_p],   %[prec] \n\t"
00087                 "addu   %[p_filter_coeffs_m],  %[p_filter_coeffs_m],   %[prec] \n\t"
00088                 "madd.s %[v],%[v],%[in_val_m], %[fc_val_m]                     \n\t"
00089 
00090                 : [v] "=&f" (v),[p_in_p] "+r" (p_in_p), [p_in_m] "+r" (p_in_m),
00091                   [p_filter_coeffs_p] "+r" (p_filter_coeffs_p),
00092                   [in_val_p] "=&f" (in_val_p), [in_val_m] "=&f" (in_val_m),
00093                   [fc_val_p] "=&f" (fc_val_p), [fc_val_m] "=&f" (fc_val_m),
00094                   [p_filter_coeffs_m] "+r" (p_filter_coeffs_m)
00095                 : [prec] "r" (prec)
00096             );
00097         }
00098         out[n] = v;
00099     }
00100 }
00101 
00102 static void ff_acelp_apply_order_2_transfer_function_mips(float *out, const float *in,
00103                                               const float zero_coeffs[2],
00104                                               const float pole_coeffs[2],
00105                                               float gain, float mem[2], int n)
00106 {
00111     __asm__ volatile (
00112         "lwc1   $f0,    0(%[mem])                                              \n\t"
00113         "blez   %[n],   ff_acelp_apply_order_2_transfer_function_end%=         \n\t"
00114         "lwc1   $f1,    4(%[mem])                                              \n\t"
00115         "lwc1   $f2,    0(%[pole_coeffs])                                      \n\t"
00116         "lwc1   $f3,    4(%[pole_coeffs])                                      \n\t"
00117         "lwc1   $f4,    0(%[zero_coeffs])                                      \n\t"
00118         "lwc1   $f5,    4(%[zero_coeffs])                                      \n\t"
00119 
00120         "ff_acelp_apply_order_2_transfer_function_madd%=:                      \n\t"
00121 
00122         "lwc1   $f6,    0(%[in])                                               \n\t"
00123         "mul.s  $f9,    $f3,      $f1                                          \n\t"
00124         "mul.s  $f7,    $f2,      $f0                                          \n\t"
00125         "msub.s $f7,    $f7,      %[gain], $f6                                 \n\t"
00126         "sub.s  $f7,    $f7,      $f9                                          \n\t"
00127         "madd.s $f8,    $f7,      $f4,     $f0                                 \n\t"
00128         "madd.s $f8,    $f8,      $f5,     $f1                                 \n\t"
00129         "lwc1   $f11,   4(%[in])                                               \n\t"
00130         "mul.s  $f12,   $f3,      $f0                                          \n\t"
00131         "mul.s  $f13,   $f2,      $f7                                          \n\t"
00132         "msub.s $f13,   $f13,     %[gain], $f11                                \n\t"
00133         "sub.s  $f13,   $f13,     $f12                                         \n\t"
00134         "madd.s $f14,   $f13,     $f4,     $f7                                 \n\t"
00135         "madd.s $f14,   $f14,     $f5,     $f0                                 \n\t"
00136         "swc1   $f8,    0(%[out])                                              \n\t"
00137         "lwc1   $f6,    8(%[in])                                               \n\t"
00138         "mul.s  $f9,    $f3,      $f7                                          \n\t"
00139         "mul.s  $f15,   $f2,      $f13                                         \n\t"
00140         "msub.s $f15,   $f15,     %[gain], $f6                                 \n\t"
00141         "sub.s  $f15,   $f15,     $f9                                          \n\t"
00142         "madd.s $f8,    $f15,     $f4,     $f13                                \n\t"
00143         "madd.s $f8,    $f8,      $f5,     $f7                                 \n\t"
00144         "swc1   $f14,   4(%[out])                                              \n\t"
00145         "lwc1   $f11,   12(%[in])                                              \n\t"
00146         "mul.s  $f12,   $f3,      $f13                                         \n\t"
00147         "mul.s  $f16,   $f2,      $f15                                         \n\t"
00148         "msub.s $f16,   $f16,     %[gain], $f11                                \n\t"
00149         "sub.s  $f16,   $f16,     $f12                                         \n\t"
00150         "madd.s $f14,   $f16,     $f4,     $f15                                \n\t"
00151         "madd.s $f14,   $f14,     $f5,     $f13                                \n\t"
00152         "swc1   $f8,    8(%[out])                                              \n\t"
00153         "lwc1   $f6,    16(%[in])                                              \n\t"
00154         "mul.s  $f9,    $f3,      $f15                                         \n\t"
00155         "mul.s  $f7,    $f2,      $f16                                         \n\t"
00156         "msub.s $f7,    $f7,      %[gain], $f6                                 \n\t"
00157         "sub.s  $f7,    $f7,      $f9                                          \n\t"
00158         "madd.s $f8,    $f7,      $f4,     $f16                                \n\t"
00159         "madd.s $f8,    $f8,      $f5,     $f15                                \n\t"
00160         "swc1   $f14,   12(%[out])                                             \n\t"
00161         "lwc1   $f11,   20(%[in])                                              \n\t"
00162         "mul.s  $f12,   $f3,      $f16                                         \n\t"
00163         "mul.s  $f13,   $f2,      $f7                                          \n\t"
00164         "msub.s $f13,   $f13,     %[gain], $f11                                \n\t"
00165         "sub.s  $f13,   $f13,     $f12                                         \n\t"
00166         "madd.s $f14,   $f13,     $f4,     $f7                                 \n\t"
00167         "madd.s $f14,   $f14,     $f5,     $f16                                \n\t"
00168         "swc1   $f8,    16(%[out])                                             \n\t"
00169         "lwc1   $f6,    24(%[in])                                              \n\t"
00170         "mul.s  $f9,    $f3,      $f7                                          \n\t"
00171         "mul.s  $f15,   $f2,      $f13                                         \n\t"
00172         "msub.s $f15,   $f15,     %[gain], $f6                                 \n\t"
00173         "sub.s  $f1,    $f15,     $f9                                          \n\t"
00174         "madd.s $f8,    $f1,      $f4,     $f13                                \n\t"
00175         "madd.s $f8,    $f8,      $f5,     $f7                                 \n\t"
00176         "swc1   $f14,   20(%[out])                                             \n\t"
00177         "lwc1   $f11,   28(%[in])                                              \n\t"
00178         "mul.s  $f12,   $f3,      $f13                                         \n\t"
00179         "mul.s  $f16,   $f2,      $f1                                          \n\t"
00180         "msub.s $f16,   $f16,     %[gain], $f11                                \n\t"
00181         "sub.s  $f0,    $f16,     $f12                                         \n\t"
00182         "madd.s $f14,   $f0,      $f4,     $f1                                 \n\t"
00183         "madd.s $f14,   $f14,     $f5,     $f13                                \n\t"
00184         "swc1   $f8,    24(%[out])                                             \n\t"
00185         "addiu  %[out], 32                                                     \n\t"
00186         "addiu  %[in],  32                                                     \n\t"
00187         "addiu  %[n],   -8                                                     \n\t"
00188         "swc1   $f14,   -4(%[out])                                             \n\t"
00189         "bnez   %[n],   ff_acelp_apply_order_2_transfer_function_madd%=        \n\t"
00190         "swc1   $f1,    4(%[mem])                                              \n\t"
00191         "swc1   $f0,    0(%[mem])                                              \n\t"
00192 
00193         "ff_acelp_apply_order_2_transfer_function_end%=:                       \n\t"
00194 
00195          : [out] "+r" (out),
00196            [in] "+r" (in), [gain] "+f" (gain),
00197            [n] "+r" (n), [mem] "+r" (mem)
00198          : [zero_coeffs] "r" (zero_coeffs),
00199            [pole_coeffs] "r" (pole_coeffs)
00200          : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5",
00201            "$f6", "$f7",  "$f8", "$f9", "$f10", "$f11",
00202            "$f12", "$f13", "$f14", "$f15", "$f16"
00203     );
00204 }
00205 
00206 void ff_acelp_filter_init_mips(ACELPFContext *c)
00207 {
00208     c->acelp_interpolatef                      = ff_acelp_interpolatef_mips;
00209     c->acelp_apply_order_2_transfer_function   = ff_acelp_apply_order_2_transfer_function_mips;
00210 }
libavcodec/mips/acelp_filters_mips.c