Many of the fields of MpegEncContext (which is also used by decoders)
are actually only used by encoders. Therefore this commit adds
a new encoder-only structure and moves all of the encoder-only
fields to it except for those which require more explicit
synchronisation between the main slice context and the other
slice contexts. This synchronisation is currently mainly provided
by ff_update_thread_context() which simply copies most of
the main slice context over the other slice contexts. Fields
which are moved to the new MPVEncContext no longer participate
in this (which is desired, because it is horrible and for the
fields b) below wasteful) which means that some fields can only
be moved when explicit synchronisation code is added in later commits.
More explicitly, this commit moves the following fields:
a) Fields not copied by ff_update_duplicate_context():
dct_error_sum and dct_count; the former does not need synchronisation,
the latter is synchronised in merge_context_after_encode().
b) Fields which do not change after initialisation (these fields
could also be put into MPVMainEncContext at the cost of
an indirection to access them): lambda_table, adaptive_quant,
{luma,chroma}_elim_threshold, new_pic, fdsp, mpvencdsp, pdsp,
{p,b_forw,b_back,b_bidir_forw,b_bidir_back,b_direct,b_field}_mv_table,
[pb]_field_select_table, mb_{type,var,mean}, mc_mb_var, {min,max}_qcoeff,
{inter,intra}_quant_bias, ac_esc_length, the *_vlc_length fields,
the q_{intra,inter,chroma_intra}_matrix{,16}, dct_offset, mb_info,
mjpeg_ctx, rtp_mode, rtp_payload_size, encode_mb, all function
pointers, mpv_flags, quantizer_noise_shaping,
frame_reconstruction_bitfield, error_rate and intra_penalty.
c) Fields which are already (re)set explicitly: The PutBitContexts
pb, tex_pb, pb2; dquant, skipdct, encoding_error, the statistics
fields {mv,i_tex,p_tex,misc,last}_bits and i_count; last_mv_dir,
esc_pos (reset when writing the header).
d) Fields which are only used by encoders not supporting slice
threading for which synchronisation doesn't matter: esc3_level_length
and the remaining mb_info fields.
e) coded_score: This field is only really used when FF_MPV_FLAG_CBP_RD
is set (which implies trellis) and even then it is only used for
non-intra blocks. For these blocks dct_quantize_trellis_c() either
sets coded_score[n] or returns a last_non_zero value of -1
in which case coded_score will be reset in encode_mb_internal().
Therefore no old values are ever used.
The MotionEstContext has not been moved yet.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
149 lines
6 KiB
C
149 lines
6 KiB
C
/*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include <stdint.h>
|
|
|
|
#include "config.h"
|
|
#include "libavutil/attributes.h"
|
|
#include "libavutil/aarch64/cpu.h"
|
|
#include "libavcodec/mpegvideoenc.h"
|
|
|
|
int ff_pix_abs16_neon(MPVEncContext *s, const uint8_t *blk1, const uint8_t *blk2,
|
|
ptrdiff_t stride, int h);
|
|
int ff_pix_abs16_xy2_neon(MPVEncContext *s, const uint8_t *blk1, const uint8_t *blk2,
|
|
ptrdiff_t stride, int h);
|
|
int ff_pix_abs16_x2_neon(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
|
|
ptrdiff_t stride, int h);
|
|
int ff_pix_abs16_y2_neon(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
|
|
ptrdiff_t stride, int h);
|
|
int ff_pix_abs8_neon(MPVEncContext *s, const uint8_t *blk1, const uint8_t *blk2,
|
|
ptrdiff_t stride, int h);
|
|
|
|
int sse16_neon(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
|
|
ptrdiff_t stride, int h);
|
|
int sse8_neon(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
|
|
ptrdiff_t stride, int h);
|
|
int sse4_neon(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
|
|
ptrdiff_t stride, int h);
|
|
|
|
int vsad16_neon(MPVEncContext *c, const uint8_t *s1, const uint8_t *s2,
|
|
ptrdiff_t stride, int h);
|
|
int vsad_intra16_neon(MPVEncContext *c, const uint8_t *s, const uint8_t *dummy,
|
|
ptrdiff_t stride, int h) ;
|
|
int vsad_intra8_neon(MPVEncContext *c, const uint8_t *s, const uint8_t *dummy,
|
|
ptrdiff_t stride, int h) ;
|
|
int vsse16_neon(MPVEncContext *c, const uint8_t *s1, const uint8_t *s2,
|
|
ptrdiff_t stride, int h);
|
|
int vsse_intra16_neon(MPVEncContext *c, const uint8_t *s, const uint8_t *dummy,
|
|
ptrdiff_t stride, int h);
|
|
int nsse16_neon(int multiplier, const uint8_t *s, const uint8_t *s2,
|
|
ptrdiff_t stride, int h);
|
|
int nsse16_neon_wrapper(MPVEncContext *c, const uint8_t *s1, const uint8_t *s2,
|
|
ptrdiff_t stride, int h);
|
|
int pix_median_abs16_neon(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
|
|
ptrdiff_t stride, int h);
|
|
int pix_median_abs8_neon(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
|
|
ptrdiff_t stride, int h);
|
|
int ff_pix_abs8_x2_neon(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
|
|
ptrdiff_t stride, int h);
|
|
int ff_pix_abs8_y2_neon(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
|
|
ptrdiff_t stride, int h);
|
|
int ff_pix_abs8_xy2_neon(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
|
|
ptrdiff_t stride, int h);
|
|
|
|
int nsse8_neon(int multiplier, const uint8_t *s, const uint8_t *s2,
|
|
ptrdiff_t stride, int h);
|
|
int nsse8_neon_wrapper(MPVEncContext *c, const uint8_t *s1, const uint8_t *s2,
|
|
ptrdiff_t stride, int h);
|
|
|
|
int vsse8_neon(MPVEncContext *c, const uint8_t *s1, const uint8_t *s2,
|
|
ptrdiff_t stride, int h);
|
|
|
|
int vsse_intra8_neon(MPVEncContext *c, const uint8_t *s, const uint8_t *dummy,
|
|
ptrdiff_t stride, int h);
|
|
|
|
#if HAVE_DOTPROD
|
|
int sse16_neon_dotprod(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
|
|
ptrdiff_t stride, int h);
|
|
int vsse_intra16_neon_dotprod(MPVEncContext *c, const uint8_t *s1, const uint8_t *s2,
|
|
ptrdiff_t stride, int h);
|
|
#endif
|
|
|
|
av_cold void ff_me_cmp_init_aarch64(MECmpContext *c, AVCodecContext *avctx)
|
|
{
|
|
int cpu_flags = av_get_cpu_flags();
|
|
|
|
if (have_neon(cpu_flags)) {
|
|
c->pix_abs[0][0] = ff_pix_abs16_neon;
|
|
c->pix_abs[0][1] = ff_pix_abs16_x2_neon;
|
|
c->pix_abs[0][2] = ff_pix_abs16_y2_neon;
|
|
c->pix_abs[0][3] = ff_pix_abs16_xy2_neon;
|
|
c->pix_abs[1][0] = ff_pix_abs8_neon;
|
|
c->pix_abs[1][1] = ff_pix_abs8_x2_neon;
|
|
c->pix_abs[1][2] = ff_pix_abs8_y2_neon;
|
|
c->pix_abs[1][3] = ff_pix_abs8_xy2_neon;
|
|
|
|
c->sad[0] = ff_pix_abs16_neon;
|
|
c->sad[1] = ff_pix_abs8_neon;
|
|
c->sse[0] = sse16_neon;
|
|
c->sse[1] = sse8_neon;
|
|
c->sse[2] = sse4_neon;
|
|
|
|
c->vsad[0] = vsad16_neon;
|
|
c->vsad[4] = vsad_intra16_neon;
|
|
c->vsad[5] = vsad_intra8_neon;
|
|
|
|
c->vsse[0] = vsse16_neon;
|
|
c->vsse[1] = vsse8_neon;
|
|
|
|
c->vsse[4] = vsse_intra16_neon;
|
|
c->vsse[5] = vsse_intra8_neon;
|
|
|
|
c->nsse[0] = nsse16_neon_wrapper;
|
|
c->nsse[1] = nsse8_neon_wrapper;
|
|
|
|
c->median_sad[0] = pix_median_abs16_neon;
|
|
c->median_sad[1] = pix_median_abs8_neon;
|
|
}
|
|
|
|
#if HAVE_DOTPROD
|
|
if (have_dotprod(cpu_flags)) {
|
|
c->sse[0] = sse16_neon_dotprod;
|
|
c->vsse[4] = vsse_intra16_neon_dotprod;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
int nsse16_neon_wrapper(MPVEncContext *c, const uint8_t *s1, const uint8_t *s2,
|
|
ptrdiff_t stride, int h)
|
|
{
|
|
if (c)
|
|
return nsse16_neon(c->c.avctx->nsse_weight, s1, s2, stride, h);
|
|
else
|
|
return nsse16_neon(8, s1, s2, stride, h);
|
|
}
|
|
|
|
int nsse8_neon_wrapper(MPVEncContext *c, const uint8_t *s1, const uint8_t *s2,
|
|
ptrdiff_t stride, int h)
|
|
{
|
|
if (c)
|
|
return nsse8_neon(c->c.avctx->nsse_weight, s1, s2, stride, h);
|
|
else
|
|
return nsse8_neon(8, s1, s2, stride, h);
|
|
}
|