/* The copyright in this software is being made available under the BSD * License, included below. This software may be subject to other third party * and contributor rights, including patent rights, and no such rights are * granted under this license. * * Copyright (c) 2010-2023, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may * be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /** \file RdCost.cpp \brief RD cost computation class */ #define DONT_UNDEF_SIZE_AWARE_PER_EL_OP #include "RdCost.h" #include "Rom.h" #include "UnitPartitioner.h" #include //! \ingroup CommonLib //! \{ EnumArray RdCost::m_distortionFunc; RdCost::RdCost() { init(); } RdCost::~RdCost() { } #if WCG_EXT double RdCost::calcRdCost( uint64_t fracBits, Distortion distortion, bool useUnadjustedLambda ) #else double RdCost::calcRdCost( uint64_t fracBits, Distortion distortion ) #endif { if (m_costMode == COST_LOSSLESS_CODING && 0 != distortion && m_isLosslessRDCost) { return MAX_DOUBLE; } #if WCG_EXT return (useUnadjustedLambda ? m_distScaleUnadjusted : m_distScale) * double(distortion) + double(fracBits); #else return m_distScale * double(distortion) + double(fracBits); #endif } void RdCost::setLambda( double dLambda, const BitDepths &bitDepths ) { m_dLambda = dLambda; m_distScale = double(1 << SCALE_BITS) / m_dLambda; m_dLambdaMotionSAD = sqrt(m_dLambda); } void RdCost::lambdaAdjustColorTrans(bool forward, ComponentID componentID, bool applyChromaScale, int* resScaleInv) { if (m_resetStore) { for (uint8_t component = 0; component < MAX_NUM_COMPONENT; component++) { ComponentID compID = (ComponentID)component; int delta_QP = DELTA_QP_ACT[compID]; double lamdbaAdjustRate = pow(2.0, delta_QP / 3.0); m_lambdaStore[0][component] = m_dLambda; m_distScaleStore[0][component] = m_distScale; m_lambdaStore[1][component] = m_dLambda * lamdbaAdjustRate; m_distScaleStore[1][component] = double(1 << SCALE_BITS) / m_lambdaStore[1][component]; } m_resetStore = false; } if (forward) { CHECK(m_pairCheck == 1, "lambda has been already adjusted"); m_pairCheck = 1; } else { CHECK(m_pairCheck == 0, "lambda has not been adjusted"); m_pairCheck = 0; } m_dLambda = m_lambdaStore[m_pairCheck][componentID]; m_distScale = m_distScaleStore[m_pairCheck][componentID]; if (applyChromaScale) { CHECK(m_pairCheck == 0 || componentID == COMPONENT_Y, "wrong lambda adjustment for CS"); double cResScale = (double)(1 << CSCALE_FP_PREC) / (double)(*resScaleInv); m_dLambda = m_dLambda / (cResScale*cResScale); m_distScale = double(1 << SCALE_BITS) / m_dLambda; } if (m_pairCheck == 0) { CHECK(m_distScale != m_distScaleUnadjusted, "lambda should be adjusted to the original value"); } } // Initialize Function Pointer by [distFunc] void RdCost::init() { m_distortionFunc[DFunc::SSE] = RdCost::xGetSSE; m_distortionFunc[DFunc::SSE2] = RdCost::xGetSSE; m_distortionFunc[DFunc::SSE4] = RdCost::xGetSSE4; m_distortionFunc[DFunc::SSE8] = RdCost::xGetSSE8; m_distortionFunc[DFunc::SSE16] = RdCost::xGetSSE16; m_distortionFunc[DFunc::SSE32] = RdCost::xGetSSE32; m_distortionFunc[DFunc::SSE64] = RdCost::xGetSSE64; m_distortionFunc[DFunc::SSE16N] = RdCost::xGetSSE16N; m_distortionFunc[DFunc::SAD] = RdCost::xGetSAD; m_distortionFunc[DFunc::SAD2] = RdCost::xGetSAD; m_distortionFunc[DFunc::SAD4] = RdCost::xGetSAD4; m_distortionFunc[DFunc::SAD8] = RdCost::xGetSAD8; m_distortionFunc[DFunc::SAD16] = RdCost::xGetSAD16; m_distortionFunc[DFunc::SAD32] = RdCost::xGetSAD32; m_distortionFunc[DFunc::SAD64] = RdCost::xGetSAD64; m_distortionFunc[DFunc::SAD16N] = RdCost::xGetSAD16N; m_distortionFunc[DFunc::SAD12] = RdCost::xGetSAD12; m_distortionFunc[DFunc::SAD24] = RdCost::xGetSAD24; m_distortionFunc[DFunc::SAD48] = RdCost::xGetSAD48; m_distortionFunc[DFunc::HAD] = RdCost::xGetHADs; m_distortionFunc[DFunc::HAD2] = RdCost::xGetHADs; m_distortionFunc[DFunc::HAD4] = RdCost::xGetHADs; m_distortionFunc[DFunc::HAD8] = RdCost::xGetHADs; m_distortionFunc[DFunc::HAD16] = RdCost::xGetHADs; m_distortionFunc[DFunc::HAD32] = RdCost::xGetHADs; m_distortionFunc[DFunc::HAD64] = RdCost::xGetHADs; m_distortionFunc[DFunc::HAD16N] = RdCost::xGetHADs; m_distortionFunc[DFunc::MRSAD] = RdCost::xGetMRSAD; m_distortionFunc[DFunc::MRSAD2] = RdCost::xGetMRSAD; m_distortionFunc[DFunc::MRSAD4] = RdCost::xGetMRSAD4; m_distortionFunc[DFunc::MRSAD8] = RdCost::xGetMRSAD8; m_distortionFunc[DFunc::MRSAD16] = RdCost::xGetMRSAD16; m_distortionFunc[DFunc::MRSAD32] = RdCost::xGetMRSAD32; m_distortionFunc[DFunc::MRSAD64] = RdCost::xGetMRSAD64; m_distortionFunc[DFunc::MRSAD16N] = RdCost::xGetMRSAD16N; m_distortionFunc[DFunc::MRSAD12] = RdCost::xGetMRSAD12; m_distortionFunc[DFunc::MRSAD24] = RdCost::xGetMRSAD24; m_distortionFunc[DFunc::MRSAD48] = RdCost::xGetMRSAD48; m_distortionFunc[DFunc::MRHAD] = RdCost::xGetMRHADs; m_distortionFunc[DFunc::MRHAD2] = RdCost::xGetMRHADs; m_distortionFunc[DFunc::MRHAD4] = RdCost::xGetMRHADs; m_distortionFunc[DFunc::MRHAD8] = RdCost::xGetMRHADs; m_distortionFunc[DFunc::MRHAD16] = RdCost::xGetMRHADs; m_distortionFunc[DFunc::MRHAD32] = RdCost::xGetMRHADs; m_distortionFunc[DFunc::MRHAD64] = RdCost::xGetMRHADs; m_distortionFunc[DFunc::MRHAD16N] = RdCost::xGetMRHADs; m_distortionFunc[DFunc::SAD_FULL_NBIT] = RdCost::xGetSAD_full; m_distortionFunc[DFunc::SAD_FULL_NBIT2] = RdCost::xGetSAD_full; m_distortionFunc[DFunc::SAD_FULL_NBIT4] = RdCost::xGetSAD_full; m_distortionFunc[DFunc::SAD_FULL_NBIT8] = RdCost::xGetSAD_full; m_distortionFunc[DFunc::SAD_FULL_NBIT16] = RdCost::xGetSAD_full; m_distortionFunc[DFunc::SAD_FULL_NBIT32] = RdCost::xGetSAD_full; m_distortionFunc[DFunc::SAD_FULL_NBIT64] = RdCost::xGetSAD_full; m_distortionFunc[DFunc::SAD_FULL_NBIT16N] = RdCost::xGetSAD_full; #if WCG_EXT m_distortionFunc[DFunc::SSE_WTD] = RdCost::xGetSSE_WTD; m_distortionFunc[DFunc::SSE2_WTD] = RdCost::xGetSSE2_WTD; m_distortionFunc[DFunc::SSE4_WTD] = RdCost::xGetSSE4_WTD; m_distortionFunc[DFunc::SSE8_WTD] = RdCost::xGetSSE8_WTD; m_distortionFunc[DFunc::SSE16_WTD] = RdCost::xGetSSE16_WTD; m_distortionFunc[DFunc::SSE32_WTD] = RdCost::xGetSSE32_WTD; m_distortionFunc[DFunc::SSE64_WTD] = RdCost::xGetSSE64_WTD; m_distortionFunc[DFunc::SSE16N_WTD] = RdCost::xGetSSE16N_WTD; #endif m_distortionFunc[DFunc::SAD_INTERMEDIATE_BITDEPTH] = RdCost::xGetSAD; m_distortionFunc[DFunc::SAD_WITH_MASK] = RdCost::xGetSADwMask; #if ENABLE_SIMD_OPT_DIST #ifdef TARGET_SIMD_X86 initRdCostX86(); #endif #endif m_costMode = COST_STANDARD_LOSSY; m_motionLambda = 0; m_iCostScale = 0; m_resetStore = true; m_pairCheck = 0; } void RdCost::setDistParam(DistParam &rcDP, const CPelBuf &org, const Pel *piRefY, ptrdiff_t iRefStride, int bitDepth, ComponentID compID, int subShiftMode, int step, bool useHadamard) { rcDP.bitDepth = bitDepth; rcDP.compID = compID; // set Original & Curr Pointer / Stride rcDP.org = org; rcDP.cur.buf = piRefY; rcDP.cur.stride = iRefStride; // set Block Width / Height rcDP.cur.width = org.width; rcDP.cur.height = org.height; rcDP.step = step; rcDP.maximumDistortionForEarlyExit = std::numeric_limits::max(); if( !useHadamard ) { const DFunc baseIdx = rcDP.useMR ? DFunc::MRSAD : DFunc::SAD; rcDP.distFunc = m_distortionFunc[baseIdx + sizeOffset(org.width)]; } else { const DFunc baseIdx = rcDP.useMR ? DFunc::MRHAD : DFunc::HAD; rcDP.distFunc = m_distortionFunc[baseIdx + sizeOffset(org.width)]; } // initialize rcDP.subShift = 0; if( subShiftMode == 1 ) { if( rcDP.org.height > 32 && ( rcDP.org.height & 15 ) == 0 ) { rcDP.subShift = 4; } else if( rcDP.org.height > 16 && ( rcDP.org.height & 7 ) == 0 ) { rcDP.subShift = 3; } else if( rcDP.org.height > 8 && ( rcDP.org.height & 3 ) == 0 ) { rcDP.subShift = 2; } else if( ( rcDP.org.height & 1 ) == 0 ) { rcDP.subShift = 1; } } else if( subShiftMode == 2 ) { if( rcDP.org.height > 8 && rcDP.org.width <= 64 ) { rcDP.subShift = 1; } } else if( subShiftMode == 3 ) { if (rcDP.org.height > 8 ) { rcDP.subShift = 1; } } } void RdCost::setDistParam( DistParam &rcDP, const CPelBuf &org, const CPelBuf &cur, int bitDepth, ComponentID compID, bool useHadamard ) { rcDP.org = org; rcDP.cur = cur; rcDP.step = 1; rcDP.subShift = 0; rcDP.bitDepth = bitDepth; rcDP.compID = compID; if( !useHadamard ) { const DFunc baseIdx = rcDP.useMR ? DFunc::MRSAD : DFunc::SAD; rcDP.distFunc = m_distortionFunc[baseIdx + sizeOffset(org.width)]; } else { const DFunc baseIdx = rcDP.useMR ? DFunc::MRHAD : DFunc::HAD; rcDP.distFunc = m_distortionFunc[baseIdx + sizeOffset(org.width)]; } rcDP.maximumDistortionForEarlyExit = std::numeric_limits::max(); } void RdCost::setDistParam(DistParam &rcDP, const Pel *pOrg, const Pel *piRefY, ptrdiff_t iOrgStride, ptrdiff_t iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode, int step, bool useHadamard, bool bioApplied) { rcDP.bitDepth = bitDepth; rcDP.compID = compID; rcDP.org.buf = pOrg; rcDP.org.stride = iOrgStride; rcDP.org.width = width; rcDP.org.height = height; rcDP.cur.buf = piRefY; rcDP.cur.stride = iRefStride; rcDP.cur.width = width; rcDP.cur.height = height; rcDP.subShift = subShiftMode; rcDP.step = step; rcDP.maximumDistortionForEarlyExit = std::numeric_limits::max(); CHECK(useHadamard || rcDP.useMR, "only used in xDmvrCost with these default parameters (so far...)"); if ( bioApplied ) { rcDP.distFunc = m_distortionFunc[DFunc::SAD_INTERMEDIATE_BITDEPTH]; return; } rcDP.distFunc = m_distortionFunc[DFunc::SAD + sizeOffset(width)]; } #if WCG_EXT Distortion RdCost::getDistPart(const CPelBuf &org, const CPelBuf &cur, int bitDepth, const ComponentID compID, DFunc distFunc, const CPelBuf *orgLuma) #else Distortion RdCost::getDistPart(const CPelBuf &org, const CPelBuf &cur, int bitDepth, const ComponentID compID, DFunc distFunc) #endif { DistParam cDtParam; cDtParam.org = org; cDtParam.cur = cur; cDtParam.step = 1; cDtParam.bitDepth = bitDepth; cDtParam.compID = compID; #if WCG_EXT if( orgLuma ) { cDtParam.cShiftX = getComponentScaleX(compID, m_cf); cDtParam.cShiftY = getComponentScaleY(compID, m_cf); if( isChroma(compID) ) { cDtParam.orgLuma = *orgLuma; } else { cDtParam.orgLuma = org; } } #endif cDtParam.distFunc = m_distortionFunc[distFunc + sizeOffset(org.width)]; if (isChroma(compID)) { return ((Distortion) (m_distortionWeight[ MAP_CHROMA(compID) ] * cDtParam.distFunc( cDtParam ))); } else { return cDtParam.distFunc( cDtParam ); } } // ==================================================================================================================== // Distortion functions // ==================================================================================================================== // -------------------------------------------------------------------------------------------------------------------- // SAD // -------------------------------------------------------------------------------------------------------------------- Distortion RdCost::xGetSAD_full( const DistParam& rcDtParam ) { CHECK( rcDtParam.applyWeight, "Cannot apply weight when using full-bit SAD!" ); const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const int height = rcDtParam.org.height; const int width = rcDtParam.org.width; const int subShift = rcDtParam.subShift; const int subStep = (1 << subShift); const ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; const ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; Distortion sum = 0; #define SAD_OP(ADDR) sum += abs(piOrg[ADDR] - piCur[ADDR]); #define SAD_INC \ piOrg += strideOrg; \ piCur += strideCur; SIZE_AWARE_PER_EL_OP( SAD_OP, SAD_INC ) #undef SAD_OP #undef SAD_INC sum <<= subShift; return sum; } Distortion RdCost::xGetSAD( const DistParam& rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSADw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const int cols = rcDtParam.org.width; int rows = rcDtParam.org.height; const int subShift = rcDtParam.subShift; const int subStep = (1 << subShift); const ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; const ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; const uint32_t distortionShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth); Distortion sum = 0; for (; rows != 0; rows -= subStep) { for (int n = 0; n < cols; n++) { sum += abs(piOrg[n] - piCur[n]); } if (rcDtParam.maximumDistortionForEarlyExit < (sum >> distortionShift)) { return (sum >> distortionShift); } piOrg += strideOrg; piCur += strideCur; } sum <<= subShift; return (sum >> distortionShift); } Distortion RdCost::xGetSAD4( const DistParam& rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSADw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; const int subShift = rcDtParam.subShift; const int subStep = (1 << subShift); const ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; const ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; Distortion sum = 0; for (; rows != 0; rows -= subStep) { sum += abs(piOrg[0] - piCur[0]); sum += abs(piOrg[1] - piCur[1]); sum += abs(piOrg[2] - piCur[2]); sum += abs(piOrg[3] - piCur[3]); piOrg += strideOrg; piCur += strideCur; } sum <<= subShift; return (sum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetSAD8( const DistParam& rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSADw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; const int subShift = rcDtParam.subShift; const int subStep = (1 << subShift); const ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; const ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; Distortion sum = 0; for (; rows != 0; rows -= subStep) { sum += abs(piOrg[0] - piCur[0]); sum += abs(piOrg[1] - piCur[1]); sum += abs(piOrg[2] - piCur[2]); sum += abs(piOrg[3] - piCur[3]); sum += abs(piOrg[4] - piCur[4]); sum += abs(piOrg[5] - piCur[5]); sum += abs(piOrg[6] - piCur[6]); sum += abs(piOrg[7] - piCur[7]); piOrg += strideOrg; piCur += strideCur; } sum <<= subShift; return (sum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetSAD16( const DistParam& rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSADw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; const int subShift = rcDtParam.subShift; const int subStep = (1 << subShift); const ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; const ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; Distortion sum = 0; for (; rows != 0; rows -= subStep) { sum += abs(piOrg[0] - piCur[0]); sum += abs(piOrg[1] - piCur[1]); sum += abs(piOrg[2] - piCur[2]); sum += abs(piOrg[3] - piCur[3]); sum += abs(piOrg[4] - piCur[4]); sum += abs(piOrg[5] - piCur[5]); sum += abs(piOrg[6] - piCur[6]); sum += abs(piOrg[7] - piCur[7]); sum += abs(piOrg[8] - piCur[8]); sum += abs(piOrg[9] - piCur[9]); sum += abs(piOrg[10] - piCur[10]); sum += abs(piOrg[11] - piCur[11]); sum += abs(piOrg[12] - piCur[12]); sum += abs(piOrg[13] - piCur[13]); sum += abs(piOrg[14] - piCur[14]); sum += abs(piOrg[15] - piCur[15]); piOrg += strideOrg; piCur += strideCur; } sum <<= subShift; return (sum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetSAD12( const DistParam& rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSADw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; const int subShift = rcDtParam.subShift; const int subStep = (1 << subShift); const ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; const ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; Distortion sum = 0; for (; rows != 0; rows -= subStep) { sum += abs(piOrg[0] - piCur[0]); sum += abs(piOrg[1] - piCur[1]); sum += abs(piOrg[2] - piCur[2]); sum += abs(piOrg[3] - piCur[3]); sum += abs(piOrg[4] - piCur[4]); sum += abs(piOrg[5] - piCur[5]); sum += abs(piOrg[6] - piCur[6]); sum += abs(piOrg[7] - piCur[7]); sum += abs(piOrg[8] - piCur[8]); sum += abs(piOrg[9] - piCur[9]); sum += abs(piOrg[10] - piCur[10]); sum += abs(piOrg[11] - piCur[11]); piOrg += strideOrg; piCur += strideCur; } sum <<= subShift; return (sum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetSAD16N( const DistParam &rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; const int cols = rcDtParam.org.width; const int subShift = rcDtParam.subShift; const int subStep = (1 << subShift); const ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; const ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; Distortion sum = 0; for (; rows != 0; rows -= subStep) { for (int n = 0; n < cols; n += 16) { sum += abs(piOrg[n + 0] - piCur[n + 0]); sum += abs(piOrg[n + 1] - piCur[n + 1]); sum += abs(piOrg[n + 2] - piCur[n + 2]); sum += abs(piOrg[n + 3] - piCur[n + 3]); sum += abs(piOrg[n + 4] - piCur[n + 4]); sum += abs(piOrg[n + 5] - piCur[n + 5]); sum += abs(piOrg[n + 6] - piCur[n + 6]); sum += abs(piOrg[n + 7] - piCur[n + 7]); sum += abs(piOrg[n + 8] - piCur[n + 8]); sum += abs(piOrg[n + 9] - piCur[n + 9]); sum += abs(piOrg[n + 10] - piCur[n + 10]); sum += abs(piOrg[n + 11] - piCur[n + 11]); sum += abs(piOrg[n + 12] - piCur[n + 12]); sum += abs(piOrg[n + 13] - piCur[n + 13]); sum += abs(piOrg[n + 14] - piCur[n + 14]); sum += abs(piOrg[n + 15] - piCur[n + 15]); } piOrg += strideOrg; piCur += strideCur; } sum <<= subShift; return (sum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetSAD32( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSADw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; int subShift = rcDtParam.subShift; int subStep = (1 << subShift); ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; Distortion sum = 0; for (; rows != 0; rows -= subStep) { sum += abs(piOrg[0] - piCur[0]); sum += abs(piOrg[1] - piCur[1]); sum += abs(piOrg[2] - piCur[2]); sum += abs(piOrg[3] - piCur[3]); sum += abs(piOrg[4] - piCur[4]); sum += abs(piOrg[5] - piCur[5]); sum += abs(piOrg[6] - piCur[6]); sum += abs(piOrg[7] - piCur[7]); sum += abs(piOrg[8] - piCur[8]); sum += abs(piOrg[9] - piCur[9]); sum += abs(piOrg[10] - piCur[10]); sum += abs(piOrg[11] - piCur[11]); sum += abs(piOrg[12] - piCur[12]); sum += abs(piOrg[13] - piCur[13]); sum += abs(piOrg[14] - piCur[14]); sum += abs(piOrg[15] - piCur[15]); sum += abs(piOrg[16] - piCur[16]); sum += abs(piOrg[17] - piCur[17]); sum += abs(piOrg[18] - piCur[18]); sum += abs(piOrg[19] - piCur[19]); sum += abs(piOrg[20] - piCur[20]); sum += abs(piOrg[21] - piCur[21]); sum += abs(piOrg[22] - piCur[22]); sum += abs(piOrg[23] - piCur[23]); sum += abs(piOrg[24] - piCur[24]); sum += abs(piOrg[25] - piCur[25]); sum += abs(piOrg[26] - piCur[26]); sum += abs(piOrg[27] - piCur[27]); sum += abs(piOrg[28] - piCur[28]); sum += abs(piOrg[29] - piCur[29]); sum += abs(piOrg[30] - piCur[30]); sum += abs(piOrg[31] - piCur[31]); piOrg += strideOrg; piCur += strideCur; } sum <<= subShift; return (sum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetSAD24( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSADw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; int subShift = rcDtParam.subShift; int subStep = (1 << subShift); ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; Distortion sum = 0; for (; rows != 0; rows -= subStep) { sum += abs(piOrg[0] - piCur[0]); sum += abs(piOrg[1] - piCur[1]); sum += abs(piOrg[2] - piCur[2]); sum += abs(piOrg[3] - piCur[3]); sum += abs(piOrg[4] - piCur[4]); sum += abs(piOrg[5] - piCur[5]); sum += abs(piOrg[6] - piCur[6]); sum += abs(piOrg[7] - piCur[7]); sum += abs(piOrg[8] - piCur[8]); sum += abs(piOrg[9] - piCur[9]); sum += abs(piOrg[10] - piCur[10]); sum += abs(piOrg[11] - piCur[11]); sum += abs(piOrg[12] - piCur[12]); sum += abs(piOrg[13] - piCur[13]); sum += abs(piOrg[14] - piCur[14]); sum += abs(piOrg[15] - piCur[15]); sum += abs(piOrg[16] - piCur[16]); sum += abs(piOrg[17] - piCur[17]); sum += abs(piOrg[18] - piCur[18]); sum += abs(piOrg[19] - piCur[19]); sum += abs(piOrg[20] - piCur[20]); sum += abs(piOrg[21] - piCur[21]); sum += abs(piOrg[22] - piCur[22]); sum += abs(piOrg[23] - piCur[23]); piOrg += strideOrg; piCur += strideCur; } sum <<= subShift; return (sum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetSAD64( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSADw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; int subShift = rcDtParam.subShift; int subStep = (1 << subShift); ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; Distortion sum = 0; for (; rows != 0; rows -= subStep) { sum += abs(piOrg[0] - piCur[0]); sum += abs(piOrg[1] - piCur[1]); sum += abs(piOrg[2] - piCur[2]); sum += abs(piOrg[3] - piCur[3]); sum += abs(piOrg[4] - piCur[4]); sum += abs(piOrg[5] - piCur[5]); sum += abs(piOrg[6] - piCur[6]); sum += abs(piOrg[7] - piCur[7]); sum += abs(piOrg[8] - piCur[8]); sum += abs(piOrg[9] - piCur[9]); sum += abs(piOrg[10] - piCur[10]); sum += abs(piOrg[11] - piCur[11]); sum += abs(piOrg[12] - piCur[12]); sum += abs(piOrg[13] - piCur[13]); sum += abs(piOrg[14] - piCur[14]); sum += abs(piOrg[15] - piCur[15]); sum += abs(piOrg[16] - piCur[16]); sum += abs(piOrg[17] - piCur[17]); sum += abs(piOrg[18] - piCur[18]); sum += abs(piOrg[19] - piCur[19]); sum += abs(piOrg[20] - piCur[20]); sum += abs(piOrg[21] - piCur[21]); sum += abs(piOrg[22] - piCur[22]); sum += abs(piOrg[23] - piCur[23]); sum += abs(piOrg[24] - piCur[24]); sum += abs(piOrg[25] - piCur[25]); sum += abs(piOrg[26] - piCur[26]); sum += abs(piOrg[27] - piCur[27]); sum += abs(piOrg[28] - piCur[28]); sum += abs(piOrg[29] - piCur[29]); sum += abs(piOrg[30] - piCur[30]); sum += abs(piOrg[31] - piCur[31]); sum += abs(piOrg[32] - piCur[32]); sum += abs(piOrg[33] - piCur[33]); sum += abs(piOrg[34] - piCur[34]); sum += abs(piOrg[35] - piCur[35]); sum += abs(piOrg[36] - piCur[36]); sum += abs(piOrg[37] - piCur[37]); sum += abs(piOrg[38] - piCur[38]); sum += abs(piOrg[39] - piCur[39]); sum += abs(piOrg[40] - piCur[40]); sum += abs(piOrg[41] - piCur[41]); sum += abs(piOrg[42] - piCur[42]); sum += abs(piOrg[43] - piCur[43]); sum += abs(piOrg[44] - piCur[44]); sum += abs(piOrg[45] - piCur[45]); sum += abs(piOrg[46] - piCur[46]); sum += abs(piOrg[47] - piCur[47]); sum += abs(piOrg[48] - piCur[48]); sum += abs(piOrg[49] - piCur[49]); sum += abs(piOrg[50] - piCur[50]); sum += abs(piOrg[51] - piCur[51]); sum += abs(piOrg[52] - piCur[52]); sum += abs(piOrg[53] - piCur[53]); sum += abs(piOrg[54] - piCur[54]); sum += abs(piOrg[55] - piCur[55]); sum += abs(piOrg[56] - piCur[56]); sum += abs(piOrg[57] - piCur[57]); sum += abs(piOrg[58] - piCur[58]); sum += abs(piOrg[59] - piCur[59]); sum += abs(piOrg[60] - piCur[60]); sum += abs(piOrg[61] - piCur[61]); sum += abs(piOrg[62] - piCur[62]); sum += abs(piOrg[63] - piCur[63]); piOrg += strideOrg; piCur += strideCur; } sum <<= subShift; return (sum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetSAD48( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSADw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; int subShift = rcDtParam.subShift; int subStep = (1 << subShift); ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; Distortion sum = 0; for (; rows != 0; rows -= subStep) { sum += abs(piOrg[0] - piCur[0]); sum += abs(piOrg[1] - piCur[1]); sum += abs(piOrg[2] - piCur[2]); sum += abs(piOrg[3] - piCur[3]); sum += abs(piOrg[4] - piCur[4]); sum += abs(piOrg[5] - piCur[5]); sum += abs(piOrg[6] - piCur[6]); sum += abs(piOrg[7] - piCur[7]); sum += abs(piOrg[8] - piCur[8]); sum += abs(piOrg[9] - piCur[9]); sum += abs(piOrg[10] - piCur[10]); sum += abs(piOrg[11] - piCur[11]); sum += abs(piOrg[12] - piCur[12]); sum += abs(piOrg[13] - piCur[13]); sum += abs(piOrg[14] - piCur[14]); sum += abs(piOrg[15] - piCur[15]); sum += abs(piOrg[16] - piCur[16]); sum += abs(piOrg[17] - piCur[17]); sum += abs(piOrg[18] - piCur[18]); sum += abs(piOrg[19] - piCur[19]); sum += abs(piOrg[20] - piCur[20]); sum += abs(piOrg[21] - piCur[21]); sum += abs(piOrg[22] - piCur[22]); sum += abs(piOrg[23] - piCur[23]); sum += abs(piOrg[24] - piCur[24]); sum += abs(piOrg[25] - piCur[25]); sum += abs(piOrg[26] - piCur[26]); sum += abs(piOrg[27] - piCur[27]); sum += abs(piOrg[28] - piCur[28]); sum += abs(piOrg[29] - piCur[29]); sum += abs(piOrg[30] - piCur[30]); sum += abs(piOrg[31] - piCur[31]); sum += abs(piOrg[32] - piCur[32]); sum += abs(piOrg[33] - piCur[33]); sum += abs(piOrg[34] - piCur[34]); sum += abs(piOrg[35] - piCur[35]); sum += abs(piOrg[36] - piCur[36]); sum += abs(piOrg[37] - piCur[37]); sum += abs(piOrg[38] - piCur[38]); sum += abs(piOrg[39] - piCur[39]); sum += abs(piOrg[40] - piCur[40]); sum += abs(piOrg[41] - piCur[41]); sum += abs(piOrg[42] - piCur[42]); sum += abs(piOrg[43] - piCur[43]); sum += abs(piOrg[44] - piCur[44]); sum += abs(piOrg[45] - piCur[45]); sum += abs(piOrg[46] - piCur[46]); sum += abs(piOrg[47] - piCur[47]); piOrg += strideOrg; piCur += strideCur; } sum <<= subShift; return (sum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } // -------------------------------------------------------------------------------------------------------------------- // MRSAD // -------------------------------------------------------------------------------------------------------------------- Distortion RdCost::xGetMRSAD( const DistParam& rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const int cols = rcDtParam.org.width; int rows = rcDtParam.org.height; const int subShift = rcDtParam.subShift; const int subStep = (1 << subShift); const ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; const ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; const uint32_t distortionShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth); int32_t deltaSum = 0; for (int r = rows; r != 0; r -= subStep, piOrg += strideOrg, piCur += strideCur) { for (int n = 0; n < cols; n++) { deltaSum += ( piOrg[n] - piCur[n] ); } } const Pel offset = Pel(deltaSum / (cols * (rows >> subShift))); piOrg = rcDtParam.org.buf; piCur = rcDtParam.cur.buf; Distortion sum = 0; for (; rows != 0; rows -= subStep) { for (int n = 0; n < cols; n++) { sum += abs(piOrg[n] - piCur[n] - offset); } if (rcDtParam.maximumDistortionForEarlyExit < (sum >> distortionShift)) { return (sum >> distortionShift); } piOrg += strideOrg; piCur += strideCur; } sum <<= subShift; return (sum >> distortionShift); } Distortion RdCost::xGetMRSAD4( const DistParam& rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; int subShift = rcDtParam.subShift; int subStep = (1 << subShift); ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; int32_t deltaSum = 0; for (int r = rows; r != 0; r -= subStep, piOrg += strideOrg, piCur += strideCur) { deltaSum += ( piOrg[0] - piCur[0] ); deltaSum += ( piOrg[1] - piCur[1] ); deltaSum += ( piOrg[2] - piCur[2] ); deltaSum += ( piOrg[3] - piCur[3] ); } const Pel offset = Pel(deltaSum / (4 * (rows >> subShift))); piOrg = rcDtParam.org.buf; piCur = rcDtParam.cur.buf; Distortion sum = 0; for (; rows != 0; rows -= subStep) { sum += abs(piOrg[0] - piCur[0] - offset); sum += abs(piOrg[1] - piCur[1] - offset); sum += abs(piOrg[2] - piCur[2] - offset); sum += abs(piOrg[3] - piCur[3] - offset); piOrg += strideOrg; piCur += strideCur; } sum <<= subShift; return (sum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetMRSAD8( const DistParam& rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; int subShift = rcDtParam.subShift; int subStep = (1 << subShift); ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; int32_t deltaSum = 0; for (int r = rows; r != 0; r -= subStep, piOrg += strideOrg, piCur += strideCur) { deltaSum += ( piOrg[0] - piCur[0] ); deltaSum += ( piOrg[1] - piCur[1] ); deltaSum += ( piOrg[2] - piCur[2] ); deltaSum += ( piOrg[3] - piCur[3] ); deltaSum += ( piOrg[4] - piCur[4] ); deltaSum += ( piOrg[5] - piCur[5] ); deltaSum += ( piOrg[6] - piCur[6] ); deltaSum += ( piOrg[7] - piCur[7] ); } const Pel offset = Pel(deltaSum / (8 * (rows >> subShift))); piOrg = rcDtParam.org.buf; piCur = rcDtParam.cur.buf; Distortion sum = 0; for (; rows != 0; rows -= subStep) { sum += abs(piOrg[0] - piCur[0] - offset); sum += abs(piOrg[1] - piCur[1] - offset); sum += abs(piOrg[2] - piCur[2] - offset); sum += abs(piOrg[3] - piCur[3] - offset); sum += abs(piOrg[4] - piCur[4] - offset); sum += abs(piOrg[5] - piCur[5] - offset); sum += abs(piOrg[6] - piCur[6] - offset); sum += abs(piOrg[7] - piCur[7] - offset); piOrg += strideOrg; piCur += strideCur; } sum <<= subShift; return (sum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetMRSAD16( const DistParam& rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; int subShift = rcDtParam.subShift; int subStep = (1 << subShift); ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; int32_t deltaSum = 0; for (int r = rows; r != 0; r -= subStep, piOrg += strideOrg, piCur += strideCur) { deltaSum += ( piOrg[ 0] - piCur[ 0] ); deltaSum += ( piOrg[ 1] - piCur[ 1] ); deltaSum += ( piOrg[ 2] - piCur[ 2] ); deltaSum += ( piOrg[ 3] - piCur[ 3] ); deltaSum += ( piOrg[ 4] - piCur[ 4] ); deltaSum += ( piOrg[ 5] - piCur[ 5] ); deltaSum += ( piOrg[ 6] - piCur[ 6] ); deltaSum += ( piOrg[ 7] - piCur[ 7] ); deltaSum += ( piOrg[ 8] - piCur[ 8] ); deltaSum += ( piOrg[ 9] - piCur[ 9] ); deltaSum += ( piOrg[10] - piCur[10] ); deltaSum += ( piOrg[11] - piCur[11] ); deltaSum += ( piOrg[12] - piCur[12] ); deltaSum += ( piOrg[13] - piCur[13] ); deltaSum += ( piOrg[14] - piCur[14] ); deltaSum += ( piOrg[15] - piCur[15] ); } const Pel offset = Pel(deltaSum / (16 * (rows >> subShift))); piOrg = rcDtParam.org.buf; piCur = rcDtParam.cur.buf; Distortion sum = 0; for (; rows != 0; rows -= subStep) { sum += abs(piOrg[0] - piCur[0] - offset); sum += abs(piOrg[1] - piCur[1] - offset); sum += abs(piOrg[2] - piCur[2] - offset); sum += abs(piOrg[3] - piCur[3] - offset); sum += abs(piOrg[4] - piCur[4] - offset); sum += abs(piOrg[5] - piCur[5] - offset); sum += abs(piOrg[6] - piCur[6] - offset); sum += abs(piOrg[7] - piCur[7] - offset); sum += abs(piOrg[8] - piCur[8] - offset); sum += abs(piOrg[9] - piCur[9] - offset); sum += abs(piOrg[10] - piCur[10] - offset); sum += abs(piOrg[11] - piCur[11] - offset); sum += abs(piOrg[12] - piCur[12] - offset); sum += abs(piOrg[13] - piCur[13] - offset); sum += abs(piOrg[14] - piCur[14] - offset); sum += abs(piOrg[15] - piCur[15] - offset); piOrg += strideOrg; piCur += strideCur; } sum <<= subShift; return (sum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetMRSAD12( const DistParam& rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; int subShift = rcDtParam.subShift; int subStep = (1 << subShift); ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; int32_t deltaSum = 0; for (int r = rows; r != 0; r -= subStep, piOrg += strideOrg, piCur += strideCur) { deltaSum += ( piOrg[ 0] - piCur[ 0] ); deltaSum += ( piOrg[ 1] - piCur[ 1] ); deltaSum += ( piOrg[ 2] - piCur[ 2] ); deltaSum += ( piOrg[ 3] - piCur[ 3] ); deltaSum += ( piOrg[ 4] - piCur[ 4] ); deltaSum += ( piOrg[ 5] - piCur[ 5] ); deltaSum += ( piOrg[ 6] - piCur[ 6] ); deltaSum += ( piOrg[ 7] - piCur[ 7] ); deltaSum += ( piOrg[ 8] - piCur[ 8] ); deltaSum += ( piOrg[ 9] - piCur[ 9] ); deltaSum += ( piOrg[10] - piCur[10] ); deltaSum += ( piOrg[11] - piCur[11] ); } const Pel offset = Pel(deltaSum / (12 * (rows >> subShift))); piOrg = rcDtParam.org.buf; piCur = rcDtParam.cur.buf; Distortion sum = 0; for (; rows != 0; rows -= subStep) { sum += abs(piOrg[0] - piCur[0] - offset); sum += abs(piOrg[1] - piCur[1] - offset); sum += abs(piOrg[2] - piCur[2] - offset); sum += abs(piOrg[3] - piCur[3] - offset); sum += abs(piOrg[4] - piCur[4] - offset); sum += abs(piOrg[5] - piCur[5] - offset); sum += abs(piOrg[6] - piCur[6] - offset); sum += abs(piOrg[7] - piCur[7] - offset); sum += abs(piOrg[8] - piCur[8] - offset); sum += abs(piOrg[9] - piCur[9] - offset); sum += abs(piOrg[10] - piCur[10] - offset); sum += abs(piOrg[11] - piCur[11] - offset); piOrg += strideOrg; piCur += strideCur; } sum <<= subShift; return (sum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetMRSAD16N( const DistParam &rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; int cols = rcDtParam.org.width; int subShift = rcDtParam.subShift; int subStep = (1 << subShift); ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; int32_t deltaSum = 0; for (int r = rows; r != 0; r -= subStep, piOrg += strideOrg, piCur += strideCur) { for (int n = 0; n < cols; n += 16) { deltaSum += ( piOrg[n+ 0] - piCur[n+ 0] ); deltaSum += ( piOrg[n+ 1] - piCur[n+ 1] ); deltaSum += ( piOrg[n+ 2] - piCur[n+ 2] ); deltaSum += ( piOrg[n+ 3] - piCur[n+ 3] ); deltaSum += ( piOrg[n+ 4] - piCur[n+ 4] ); deltaSum += ( piOrg[n+ 5] - piCur[n+ 5] ); deltaSum += ( piOrg[n+ 6] - piCur[n+ 6] ); deltaSum += ( piOrg[n+ 7] - piCur[n+ 7] ); deltaSum += ( piOrg[n+ 8] - piCur[n+ 8] ); deltaSum += ( piOrg[n+ 9] - piCur[n+ 9] ); deltaSum += ( piOrg[n+10] - piCur[n+10] ); deltaSum += ( piOrg[n+11] - piCur[n+11] ); deltaSum += ( piOrg[n+12] - piCur[n+12] ); deltaSum += ( piOrg[n+13] - piCur[n+13] ); deltaSum += ( piOrg[n+14] - piCur[n+14] ); deltaSum += ( piOrg[n+15] - piCur[n+15] ); } } const Pel offset = Pel(deltaSum / (cols * (rows >> subShift))); piOrg = rcDtParam.org.buf; piCur = rcDtParam.cur.buf; Distortion sum = 0; for (; rows != 0; rows -= subStep) { for (int n = 0; n < cols; n += 16) { sum += abs(piOrg[n + 0] - piCur[n + 0] - offset); sum += abs(piOrg[n + 1] - piCur[n + 1] - offset); sum += abs(piOrg[n + 2] - piCur[n + 2] - offset); sum += abs(piOrg[n + 3] - piCur[n + 3] - offset); sum += abs(piOrg[n + 4] - piCur[n + 4] - offset); sum += abs(piOrg[n + 5] - piCur[n + 5] - offset); sum += abs(piOrg[n + 6] - piCur[n + 6] - offset); sum += abs(piOrg[n + 7] - piCur[n + 7] - offset); sum += abs(piOrg[n + 8] - piCur[n + 8] - offset); sum += abs(piOrg[n + 9] - piCur[n + 9] - offset); sum += abs(piOrg[n + 10] - piCur[n + 10] - offset); sum += abs(piOrg[n + 11] - piCur[n + 11] - offset); sum += abs(piOrg[n + 12] - piCur[n + 12] - offset); sum += abs(piOrg[n + 13] - piCur[n + 13] - offset); sum += abs(piOrg[n + 14] - piCur[n + 14] - offset); sum += abs(piOrg[n + 15] - piCur[n + 15] - offset); } piOrg += strideOrg; piCur += strideCur; } sum <<= subShift; return (sum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetMRSAD32( const DistParam &rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; int subShift = rcDtParam.subShift; int subStep = (1 << subShift); ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; int32_t deltaSum = 0; for (int r = rows; r != 0; r -= subStep, piOrg += strideOrg, piCur += strideCur) { deltaSum += ( piOrg[ 0] - piCur[ 0] ); deltaSum += ( piOrg[ 1] - piCur[ 1] ); deltaSum += ( piOrg[ 2] - piCur[ 2] ); deltaSum += ( piOrg[ 3] - piCur[ 3] ); deltaSum += ( piOrg[ 4] - piCur[ 4] ); deltaSum += ( piOrg[ 5] - piCur[ 5] ); deltaSum += ( piOrg[ 6] - piCur[ 6] ); deltaSum += ( piOrg[ 7] - piCur[ 7] ); deltaSum += ( piOrg[ 8] - piCur[ 8] ); deltaSum += ( piOrg[ 9] - piCur[ 9] ); deltaSum += ( piOrg[10] - piCur[10] ); deltaSum += ( piOrg[11] - piCur[11] ); deltaSum += ( piOrg[12] - piCur[12] ); deltaSum += ( piOrg[13] - piCur[13] ); deltaSum += ( piOrg[14] - piCur[14] ); deltaSum += ( piOrg[15] - piCur[15] ); deltaSum += ( piOrg[16] - piCur[16] ); deltaSum += ( piOrg[17] - piCur[17] ); deltaSum += ( piOrg[18] - piCur[18] ); deltaSum += ( piOrg[19] - piCur[19] ); deltaSum += ( piOrg[20] - piCur[20] ); deltaSum += ( piOrg[21] - piCur[21] ); deltaSum += ( piOrg[22] - piCur[22] ); deltaSum += ( piOrg[23] - piCur[23] ); deltaSum += ( piOrg[24] - piCur[24] ); deltaSum += ( piOrg[25] - piCur[25] ); deltaSum += ( piOrg[26] - piCur[26] ); deltaSum += ( piOrg[27] - piCur[27] ); deltaSum += ( piOrg[28] - piCur[28] ); deltaSum += ( piOrg[29] - piCur[29] ); deltaSum += ( piOrg[30] - piCur[30] ); deltaSum += ( piOrg[31] - piCur[31] ); } const Pel offset = Pel(deltaSum / (32 * (rows >> subShift))); piOrg = rcDtParam.org.buf; piCur = rcDtParam.cur.buf; Distortion sum = 0; for (; rows != 0; rows -= subStep) { sum += abs(piOrg[0] - piCur[0] - offset); sum += abs(piOrg[1] - piCur[1] - offset); sum += abs(piOrg[2] - piCur[2] - offset); sum += abs(piOrg[3] - piCur[3] - offset); sum += abs(piOrg[4] - piCur[4] - offset); sum += abs(piOrg[5] - piCur[5] - offset); sum += abs(piOrg[6] - piCur[6] - offset); sum += abs(piOrg[7] - piCur[7] - offset); sum += abs(piOrg[8] - piCur[8] - offset); sum += abs(piOrg[9] - piCur[9] - offset); sum += abs(piOrg[10] - piCur[10] - offset); sum += abs(piOrg[11] - piCur[11] - offset); sum += abs(piOrg[12] - piCur[12] - offset); sum += abs(piOrg[13] - piCur[13] - offset); sum += abs(piOrg[14] - piCur[14] - offset); sum += abs(piOrg[15] - piCur[15] - offset); sum += abs(piOrg[16] - piCur[16] - offset); sum += abs(piOrg[17] - piCur[17] - offset); sum += abs(piOrg[18] - piCur[18] - offset); sum += abs(piOrg[19] - piCur[19] - offset); sum += abs(piOrg[20] - piCur[20] - offset); sum += abs(piOrg[21] - piCur[21] - offset); sum += abs(piOrg[22] - piCur[22] - offset); sum += abs(piOrg[23] - piCur[23] - offset); sum += abs(piOrg[24] - piCur[24] - offset); sum += abs(piOrg[25] - piCur[25] - offset); sum += abs(piOrg[26] - piCur[26] - offset); sum += abs(piOrg[27] - piCur[27] - offset); sum += abs(piOrg[28] - piCur[28] - offset); sum += abs(piOrg[29] - piCur[29] - offset); sum += abs(piOrg[30] - piCur[30] - offset); sum += abs(piOrg[31] - piCur[31] - offset); piOrg += strideOrg; piCur += strideCur; } sum <<= subShift; return (sum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetMRSAD24( const DistParam &rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; int subShift = rcDtParam.subShift; int subStep = (1 << subShift); ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; int32_t deltaSum = 0; for (int r = rows; r != 0; r -= subStep, piOrg += strideOrg, piCur += strideCur) { deltaSum += ( piOrg[ 0] - piCur[ 0] ); deltaSum += ( piOrg[ 1] - piCur[ 1] ); deltaSum += ( piOrg[ 2] - piCur[ 2] ); deltaSum += ( piOrg[ 3] - piCur[ 3] ); deltaSum += ( piOrg[ 4] - piCur[ 4] ); deltaSum += ( piOrg[ 5] - piCur[ 5] ); deltaSum += ( piOrg[ 6] - piCur[ 6] ); deltaSum += ( piOrg[ 7] - piCur[ 7] ); deltaSum += ( piOrg[ 8] - piCur[ 8] ); deltaSum += ( piOrg[ 9] - piCur[ 9] ); deltaSum += ( piOrg[10] - piCur[10] ); deltaSum += ( piOrg[11] - piCur[11] ); deltaSum += ( piOrg[12] - piCur[12] ); deltaSum += ( piOrg[13] - piCur[13] ); deltaSum += ( piOrg[14] - piCur[14] ); deltaSum += ( piOrg[15] - piCur[15] ); deltaSum += ( piOrg[16] - piCur[16] ); deltaSum += ( piOrg[17] - piCur[17] ); deltaSum += ( piOrg[18] - piCur[18] ); deltaSum += ( piOrg[19] - piCur[19] ); deltaSum += ( piOrg[20] - piCur[20] ); deltaSum += ( piOrg[21] - piCur[21] ); deltaSum += ( piOrg[22] - piCur[22] ); deltaSum += ( piOrg[23] - piCur[23] ); } const Pel offset = Pel(deltaSum / (24 * (rows >> subShift))); piOrg = rcDtParam.org.buf; piCur = rcDtParam.cur.buf; Distortion sum = 0; for (; rows != 0; rows -= subStep) { sum += abs(piOrg[0] - piCur[0] - offset); sum += abs(piOrg[1] - piCur[1] - offset); sum += abs(piOrg[2] - piCur[2] - offset); sum += abs(piOrg[3] - piCur[3] - offset); sum += abs(piOrg[4] - piCur[4] - offset); sum += abs(piOrg[5] - piCur[5] - offset); sum += abs(piOrg[6] - piCur[6] - offset); sum += abs(piOrg[7] - piCur[7] - offset); sum += abs(piOrg[8] - piCur[8] - offset); sum += abs(piOrg[9] - piCur[9] - offset); sum += abs(piOrg[10] - piCur[10] - offset); sum += abs(piOrg[11] - piCur[11] - offset); sum += abs(piOrg[12] - piCur[12] - offset); sum += abs(piOrg[13] - piCur[13] - offset); sum += abs(piOrg[14] - piCur[14] - offset); sum += abs(piOrg[15] - piCur[15] - offset); sum += abs(piOrg[16] - piCur[16] - offset); sum += abs(piOrg[17] - piCur[17] - offset); sum += abs(piOrg[18] - piCur[18] - offset); sum += abs(piOrg[19] - piCur[19] - offset); sum += abs(piOrg[20] - piCur[20] - offset); sum += abs(piOrg[21] - piCur[21] - offset); sum += abs(piOrg[22] - piCur[22] - offset); sum += abs(piOrg[23] - piCur[23] - offset); piOrg += strideOrg; piCur += strideCur; } sum <<= subShift; return (sum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetMRSAD64( const DistParam &rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; int subShift = rcDtParam.subShift; int subStep = (1 << subShift); ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; int32_t deltaSum = 0; for (int r = rows; r != 0; r -= subStep, piOrg += strideOrg, piCur += strideCur) { deltaSum += ( piOrg[ 0] - piCur[ 0] ); deltaSum += ( piOrg[ 1] - piCur[ 1] ); deltaSum += ( piOrg[ 2] - piCur[ 2] ); deltaSum += ( piOrg[ 3] - piCur[ 3] ); deltaSum += ( piOrg[ 4] - piCur[ 4] ); deltaSum += ( piOrg[ 5] - piCur[ 5] ); deltaSum += ( piOrg[ 6] - piCur[ 6] ); deltaSum += ( piOrg[ 7] - piCur[ 7] ); deltaSum += ( piOrg[ 8] - piCur[ 8] ); deltaSum += ( piOrg[ 9] - piCur[ 9] ); deltaSum += ( piOrg[10] - piCur[10] ); deltaSum += ( piOrg[11] - piCur[11] ); deltaSum += ( piOrg[12] - piCur[12] ); deltaSum += ( piOrg[13] - piCur[13] ); deltaSum += ( piOrg[14] - piCur[14] ); deltaSum += ( piOrg[15] - piCur[15] ); deltaSum += ( piOrg[16] - piCur[16] ); deltaSum += ( piOrg[17] - piCur[17] ); deltaSum += ( piOrg[18] - piCur[18] ); deltaSum += ( piOrg[19] - piCur[19] ); deltaSum += ( piOrg[20] - piCur[20] ); deltaSum += ( piOrg[21] - piCur[21] ); deltaSum += ( piOrg[22] - piCur[22] ); deltaSum += ( piOrg[23] - piCur[23] ); deltaSum += ( piOrg[24] - piCur[24] ); deltaSum += ( piOrg[25] - piCur[25] ); deltaSum += ( piOrg[26] - piCur[26] ); deltaSum += ( piOrg[27] - piCur[27] ); deltaSum += ( piOrg[28] - piCur[28] ); deltaSum += ( piOrg[29] - piCur[29] ); deltaSum += ( piOrg[30] - piCur[30] ); deltaSum += ( piOrg[31] - piCur[31] ); deltaSum += ( piOrg[32] - piCur[32] ); deltaSum += ( piOrg[33] - piCur[33] ); deltaSum += ( piOrg[34] - piCur[34] ); deltaSum += ( piOrg[35] - piCur[35] ); deltaSum += ( piOrg[36] - piCur[36] ); deltaSum += ( piOrg[37] - piCur[37] ); deltaSum += ( piOrg[38] - piCur[38] ); deltaSum += ( piOrg[39] - piCur[39] ); deltaSum += ( piOrg[40] - piCur[40] ); deltaSum += ( piOrg[41] - piCur[41] ); deltaSum += ( piOrg[42] - piCur[42] ); deltaSum += ( piOrg[43] - piCur[43] ); deltaSum += ( piOrg[44] - piCur[44] ); deltaSum += ( piOrg[45] - piCur[45] ); deltaSum += ( piOrg[46] - piCur[46] ); deltaSum += ( piOrg[47] - piCur[47] ); deltaSum += ( piOrg[48] - piCur[48] ); deltaSum += ( piOrg[49] - piCur[49] ); deltaSum += ( piOrg[50] - piCur[50] ); deltaSum += ( piOrg[51] - piCur[51] ); deltaSum += ( piOrg[52] - piCur[52] ); deltaSum += ( piOrg[53] - piCur[53] ); deltaSum += ( piOrg[54] - piCur[54] ); deltaSum += ( piOrg[55] - piCur[55] ); deltaSum += ( piOrg[56] - piCur[56] ); deltaSum += ( piOrg[57] - piCur[57] ); deltaSum += ( piOrg[58] - piCur[58] ); deltaSum += ( piOrg[59] - piCur[59] ); deltaSum += ( piOrg[60] - piCur[60] ); deltaSum += ( piOrg[61] - piCur[61] ); deltaSum += ( piOrg[62] - piCur[62] ); deltaSum += ( piOrg[63] - piCur[63] ); } const Pel offset = Pel(deltaSum / (64 * (rows >> subShift))); piOrg = rcDtParam.org.buf; piCur = rcDtParam.cur.buf; Distortion sum = 0; for (; rows != 0; rows -= subStep) { sum += abs(piOrg[0] - piCur[0] - offset); sum += abs(piOrg[1] - piCur[1] - offset); sum += abs(piOrg[2] - piCur[2] - offset); sum += abs(piOrg[3] - piCur[3] - offset); sum += abs(piOrg[4] - piCur[4] - offset); sum += abs(piOrg[5] - piCur[5] - offset); sum += abs(piOrg[6] - piCur[6] - offset); sum += abs(piOrg[7] - piCur[7] - offset); sum += abs(piOrg[8] - piCur[8] - offset); sum += abs(piOrg[9] - piCur[9] - offset); sum += abs(piOrg[10] - piCur[10] - offset); sum += abs(piOrg[11] - piCur[11] - offset); sum += abs(piOrg[12] - piCur[12] - offset); sum += abs(piOrg[13] - piCur[13] - offset); sum += abs(piOrg[14] - piCur[14] - offset); sum += abs(piOrg[15] - piCur[15] - offset); sum += abs(piOrg[16] - piCur[16] - offset); sum += abs(piOrg[17] - piCur[17] - offset); sum += abs(piOrg[18] - piCur[18] - offset); sum += abs(piOrg[19] - piCur[19] - offset); sum += abs(piOrg[20] - piCur[20] - offset); sum += abs(piOrg[21] - piCur[21] - offset); sum += abs(piOrg[22] - piCur[22] - offset); sum += abs(piOrg[23] - piCur[23] - offset); sum += abs(piOrg[24] - piCur[24] - offset); sum += abs(piOrg[25] - piCur[25] - offset); sum += abs(piOrg[26] - piCur[26] - offset); sum += abs(piOrg[27] - piCur[27] - offset); sum += abs(piOrg[28] - piCur[28] - offset); sum += abs(piOrg[29] - piCur[29] - offset); sum += abs(piOrg[30] - piCur[30] - offset); sum += abs(piOrg[31] - piCur[31] - offset); sum += abs(piOrg[32] - piCur[32] - offset); sum += abs(piOrg[33] - piCur[33] - offset); sum += abs(piOrg[34] - piCur[34] - offset); sum += abs(piOrg[35] - piCur[35] - offset); sum += abs(piOrg[36] - piCur[36] - offset); sum += abs(piOrg[37] - piCur[37] - offset); sum += abs(piOrg[38] - piCur[38] - offset); sum += abs(piOrg[39] - piCur[39] - offset); sum += abs(piOrg[40] - piCur[40] - offset); sum += abs(piOrg[41] - piCur[41] - offset); sum += abs(piOrg[42] - piCur[42] - offset); sum += abs(piOrg[43] - piCur[43] - offset); sum += abs(piOrg[44] - piCur[44] - offset); sum += abs(piOrg[45] - piCur[45] - offset); sum += abs(piOrg[46] - piCur[46] - offset); sum += abs(piOrg[47] - piCur[47] - offset); sum += abs(piOrg[48] - piCur[48] - offset); sum += abs(piOrg[49] - piCur[49] - offset); sum += abs(piOrg[50] - piCur[50] - offset); sum += abs(piOrg[51] - piCur[51] - offset); sum += abs(piOrg[52] - piCur[52] - offset); sum += abs(piOrg[53] - piCur[53] - offset); sum += abs(piOrg[54] - piCur[54] - offset); sum += abs(piOrg[55] - piCur[55] - offset); sum += abs(piOrg[56] - piCur[56] - offset); sum += abs(piOrg[57] - piCur[57] - offset); sum += abs(piOrg[58] - piCur[58] - offset); sum += abs(piOrg[59] - piCur[59] - offset); sum += abs(piOrg[60] - piCur[60] - offset); sum += abs(piOrg[61] - piCur[61] - offset); sum += abs(piOrg[62] - piCur[62] - offset); sum += abs(piOrg[63] - piCur[63] - offset); piOrg += strideOrg; piCur += strideCur; } sum <<= subShift; return (sum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } Distortion RdCost::xGetMRSAD48( const DistParam &rcDtParam ) { const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; int subShift = rcDtParam.subShift; int subStep = (1 << subShift); ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; int32_t deltaSum = 0; for (int r = rows; r != 0; r -= subStep, piOrg += strideOrg, piCur += strideCur) { deltaSum += ( piOrg[ 0] - piCur[ 0] ); deltaSum += ( piOrg[ 1] - piCur[ 1] ); deltaSum += ( piOrg[ 2] - piCur[ 2] ); deltaSum += ( piOrg[ 3] - piCur[ 3] ); deltaSum += ( piOrg[ 4] - piCur[ 4] ); deltaSum += ( piOrg[ 5] - piCur[ 5] ); deltaSum += ( piOrg[ 6] - piCur[ 6] ); deltaSum += ( piOrg[ 7] - piCur[ 7] ); deltaSum += ( piOrg[ 8] - piCur[ 8] ); deltaSum += ( piOrg[ 9] - piCur[ 9] ); deltaSum += ( piOrg[10] - piCur[10] ); deltaSum += ( piOrg[11] - piCur[11] ); deltaSum += ( piOrg[12] - piCur[12] ); deltaSum += ( piOrg[13] - piCur[13] ); deltaSum += ( piOrg[14] - piCur[14] ); deltaSum += ( piOrg[15] - piCur[15] ); deltaSum += ( piOrg[16] - piCur[16] ); deltaSum += ( piOrg[17] - piCur[17] ); deltaSum += ( piOrg[18] - piCur[18] ); deltaSum += ( piOrg[19] - piCur[19] ); deltaSum += ( piOrg[20] - piCur[20] ); deltaSum += ( piOrg[21] - piCur[21] ); deltaSum += ( piOrg[22] - piCur[22] ); deltaSum += ( piOrg[23] - piCur[23] ); deltaSum += ( piOrg[24] - piCur[24] ); deltaSum += ( piOrg[25] - piCur[25] ); deltaSum += ( piOrg[26] - piCur[26] ); deltaSum += ( piOrg[27] - piCur[27] ); deltaSum += ( piOrg[28] - piCur[28] ); deltaSum += ( piOrg[29] - piCur[29] ); deltaSum += ( piOrg[30] - piCur[30] ); deltaSum += ( piOrg[31] - piCur[31] ); deltaSum += ( piOrg[32] - piCur[32] ); deltaSum += ( piOrg[33] - piCur[33] ); deltaSum += ( piOrg[34] - piCur[34] ); deltaSum += ( piOrg[35] - piCur[35] ); deltaSum += ( piOrg[36] - piCur[36] ); deltaSum += ( piOrg[37] - piCur[37] ); deltaSum += ( piOrg[38] - piCur[38] ); deltaSum += ( piOrg[39] - piCur[39] ); deltaSum += ( piOrg[40] - piCur[40] ); deltaSum += ( piOrg[41] - piCur[41] ); deltaSum += ( piOrg[42] - piCur[42] ); deltaSum += ( piOrg[43] - piCur[43] ); deltaSum += ( piOrg[44] - piCur[44] ); deltaSum += ( piOrg[45] - piCur[45] ); deltaSum += ( piOrg[46] - piCur[46] ); deltaSum += ( piOrg[47] - piCur[47] ); } const Pel offset = Pel(deltaSum / (48 * (rows >> subShift))); piOrg = rcDtParam.org.buf; piCur = rcDtParam.cur.buf; Distortion sum = 0; for (; rows != 0; rows -= subStep) { sum += abs(piOrg[0] - piCur[0] - offset); sum += abs(piOrg[1] - piCur[1] - offset); sum += abs(piOrg[2] - piCur[2] - offset); sum += abs(piOrg[3] - piCur[3] - offset); sum += abs(piOrg[4] - piCur[4] - offset); sum += abs(piOrg[5] - piCur[5] - offset); sum += abs(piOrg[6] - piCur[6] - offset); sum += abs(piOrg[7] - piCur[7] - offset); sum += abs(piOrg[8] - piCur[8] - offset); sum += abs(piOrg[9] - piCur[9] - offset); sum += abs(piOrg[10] - piCur[10] - offset); sum += abs(piOrg[11] - piCur[11] - offset); sum += abs(piOrg[12] - piCur[12] - offset); sum += abs(piOrg[13] - piCur[13] - offset); sum += abs(piOrg[14] - piCur[14] - offset); sum += abs(piOrg[15] - piCur[15] - offset); sum += abs(piOrg[16] - piCur[16] - offset); sum += abs(piOrg[17] - piCur[17] - offset); sum += abs(piOrg[18] - piCur[18] - offset); sum += abs(piOrg[19] - piCur[19] - offset); sum += abs(piOrg[20] - piCur[20] - offset); sum += abs(piOrg[21] - piCur[21] - offset); sum += abs(piOrg[22] - piCur[22] - offset); sum += abs(piOrg[23] - piCur[23] - offset); sum += abs(piOrg[24] - piCur[24] - offset); sum += abs(piOrg[25] - piCur[25] - offset); sum += abs(piOrg[26] - piCur[26] - offset); sum += abs(piOrg[27] - piCur[27] - offset); sum += abs(piOrg[28] - piCur[28] - offset); sum += abs(piOrg[29] - piCur[29] - offset); sum += abs(piOrg[30] - piCur[30] - offset); sum += abs(piOrg[31] - piCur[31] - offset); sum += abs(piOrg[32] - piCur[32] - offset); sum += abs(piOrg[33] - piCur[33] - offset); sum += abs(piOrg[34] - piCur[34] - offset); sum += abs(piOrg[35] - piCur[35] - offset); sum += abs(piOrg[36] - piCur[36] - offset); sum += abs(piOrg[37] - piCur[37] - offset); sum += abs(piOrg[38] - piCur[38] - offset); sum += abs(piOrg[39] - piCur[39] - offset); sum += abs(piOrg[40] - piCur[40] - offset); sum += abs(piOrg[41] - piCur[41] - offset); sum += abs(piOrg[42] - piCur[42] - offset); sum += abs(piOrg[43] - piCur[43] - offset); sum += abs(piOrg[44] - piCur[44] - offset); sum += abs(piOrg[45] - piCur[45] - offset); sum += abs(piOrg[46] - piCur[46] - offset); sum += abs(piOrg[47] - piCur[47] - offset); piOrg += strideOrg; piCur += strideCur; } sum <<= subShift; return (sum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } // -------------------------------------------------------------------------------------------------------------------- // SSE // -------------------------------------------------------------------------------------------------------------------- Distortion RdCost::xGetSSE( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; int cols = rcDtParam.org.width; ptrdiff_t strideCur = rcDtParam.cur.stride; ptrdiff_t strideOrg = rcDtParam.org.stride; Distortion sum = 0; uint32_t shift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; Intermediate_Int temp; for (; rows != 0; rows--) { for (int n = 0; n < cols; n++) { temp = piOrg[n] - piCur[n]; sum += Distortion((temp * temp) >> shift); } piOrg += strideOrg; piCur += strideCur; } return (sum); } Distortion RdCost::xGetSSE4( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 4, "Invalid size" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; ptrdiff_t strideOrg = rcDtParam.org.stride; ptrdiff_t strideCur = rcDtParam.cur.stride; Distortion sum = 0; uint32_t shift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; Intermediate_Int temp; for (; rows != 0; rows--) { temp = piOrg[0] - piCur[0]; sum += Distortion((temp * temp) >> shift); temp = piOrg[1] - piCur[1]; sum += Distortion((temp * temp) >> shift); temp = piOrg[2] - piCur[2]; sum += Distortion((temp * temp) >> shift); temp = piOrg[3] - piCur[3]; sum += Distortion((temp * temp) >> shift); piOrg += strideOrg; piCur += strideCur; } return (sum); } Distortion RdCost::xGetSSE8( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 8, "Invalid size" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; ptrdiff_t strideOrg = rcDtParam.org.stride; ptrdiff_t strideCur = rcDtParam.cur.stride; Distortion sum = 0; uint32_t shift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; Intermediate_Int temp; for (; rows != 0; rows--) { temp = piOrg[0] - piCur[0]; sum += Distortion((temp * temp) >> shift); temp = piOrg[1] - piCur[1]; sum += Distortion((temp * temp) >> shift); temp = piOrg[2] - piCur[2]; sum += Distortion((temp * temp) >> shift); temp = piOrg[3] - piCur[3]; sum += Distortion((temp * temp) >> shift); temp = piOrg[4] - piCur[4]; sum += Distortion((temp * temp) >> shift); temp = piOrg[5] - piCur[5]; sum += Distortion((temp * temp) >> shift); temp = piOrg[6] - piCur[6]; sum += Distortion((temp * temp) >> shift); temp = piOrg[7] - piCur[7]; sum += Distortion((temp * temp) >> shift); piOrg += strideOrg; piCur += strideCur; } return (sum); } Distortion RdCost::xGetSSE16( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 16, "Invalid size" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; ptrdiff_t strideOrg = rcDtParam.org.stride; ptrdiff_t strideCur = rcDtParam.cur.stride; Distortion sum = 0; uint32_t shift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; Intermediate_Int temp; for (; rows != 0; rows--) { temp = piOrg[0] - piCur[0]; sum += Distortion((temp * temp) >> shift); temp = piOrg[1] - piCur[1]; sum += Distortion((temp * temp) >> shift); temp = piOrg[2] - piCur[2]; sum += Distortion((temp * temp) >> shift); temp = piOrg[3] - piCur[3]; sum += Distortion((temp * temp) >> shift); temp = piOrg[4] - piCur[4]; sum += Distortion((temp * temp) >> shift); temp = piOrg[5] - piCur[5]; sum += Distortion((temp * temp) >> shift); temp = piOrg[6] - piCur[6]; sum += Distortion((temp * temp) >> shift); temp = piOrg[7] - piCur[7]; sum += Distortion((temp * temp) >> shift); temp = piOrg[8] - piCur[8]; sum += Distortion((temp * temp) >> shift); temp = piOrg[9] - piCur[9]; sum += Distortion((temp * temp) >> shift); temp = piOrg[10] - piCur[10]; sum += Distortion((temp * temp) >> shift); temp = piOrg[11] - piCur[11]; sum += Distortion((temp * temp) >> shift); temp = piOrg[12] - piCur[12]; sum += Distortion((temp * temp) >> shift); temp = piOrg[13] - piCur[13]; sum += Distortion((temp * temp) >> shift); temp = piOrg[14] - piCur[14]; sum += Distortion((temp * temp) >> shift); temp = piOrg[15] - piCur[15]; sum += Distortion((temp * temp) >> shift); piOrg += strideOrg; piCur += strideCur; } return (sum); } Distortion RdCost::xGetSSE16N( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; int cols = rcDtParam.org.width; ptrdiff_t strideOrg = rcDtParam.org.stride; ptrdiff_t strideCur = rcDtParam.cur.stride; Distortion sum = 0; uint32_t shift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; Intermediate_Int temp; for (; rows != 0; rows--) { for (int n = 0; n < cols; n += 16) { temp = piOrg[n + 0] - piCur[n + 0]; sum += Distortion((temp * temp) >> shift); temp = piOrg[n + 1] - piCur[n + 1]; sum += Distortion((temp * temp) >> shift); temp = piOrg[n + 2] - piCur[n + 2]; sum += Distortion((temp * temp) >> shift); temp = piOrg[n + 3] - piCur[n + 3]; sum += Distortion((temp * temp) >> shift); temp = piOrg[n + 4] - piCur[n + 4]; sum += Distortion((temp * temp) >> shift); temp = piOrg[n + 5] - piCur[n + 5]; sum += Distortion((temp * temp) >> shift); temp = piOrg[n + 6] - piCur[n + 6]; sum += Distortion((temp * temp) >> shift); temp = piOrg[n + 7] - piCur[n + 7]; sum += Distortion((temp * temp) >> shift); temp = piOrg[n + 8] - piCur[n + 8]; sum += Distortion((temp * temp) >> shift); temp = piOrg[n + 9] - piCur[n + 9]; sum += Distortion((temp * temp) >> shift); temp = piOrg[n + 10] - piCur[n + 10]; sum += Distortion((temp * temp) >> shift); temp = piOrg[n + 11] - piCur[n + 11]; sum += Distortion((temp * temp) >> shift); temp = piOrg[n + 12] - piCur[n + 12]; sum += Distortion((temp * temp) >> shift); temp = piOrg[n + 13] - piCur[n + 13]; sum += Distortion((temp * temp) >> shift); temp = piOrg[n + 14] - piCur[n + 14]; sum += Distortion((temp * temp) >> shift); temp = piOrg[n + 15] - piCur[n + 15]; sum += Distortion((temp * temp) >> shift); } piOrg += strideOrg; piCur += strideCur; } return (sum); } Distortion RdCost::xGetSSE32( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 32, "Invalid size" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; ptrdiff_t strideOrg = rcDtParam.org.stride; ptrdiff_t strideCur = rcDtParam.cur.stride; Distortion sum = 0; uint32_t shift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; Intermediate_Int temp; for (; rows != 0; rows--) { temp = piOrg[0] - piCur[0]; sum += Distortion((temp * temp) >> shift); temp = piOrg[1] - piCur[1]; sum += Distortion((temp * temp) >> shift); temp = piOrg[2] - piCur[2]; sum += Distortion((temp * temp) >> shift); temp = piOrg[3] - piCur[3]; sum += Distortion((temp * temp) >> shift); temp = piOrg[4] - piCur[4]; sum += Distortion((temp * temp) >> shift); temp = piOrg[5] - piCur[5]; sum += Distortion((temp * temp) >> shift); temp = piOrg[6] - piCur[6]; sum += Distortion((temp * temp) >> shift); temp = piOrg[7] - piCur[7]; sum += Distortion((temp * temp) >> shift); temp = piOrg[8] - piCur[8]; sum += Distortion((temp * temp) >> shift); temp = piOrg[9] - piCur[9]; sum += Distortion((temp * temp) >> shift); temp = piOrg[10] - piCur[10]; sum += Distortion((temp * temp) >> shift); temp = piOrg[11] - piCur[11]; sum += Distortion((temp * temp) >> shift); temp = piOrg[12] - piCur[12]; sum += Distortion((temp * temp) >> shift); temp = piOrg[13] - piCur[13]; sum += Distortion((temp * temp) >> shift); temp = piOrg[14] - piCur[14]; sum += Distortion((temp * temp) >> shift); temp = piOrg[15] - piCur[15]; sum += Distortion((temp * temp) >> shift); temp = piOrg[16] - piCur[16]; sum += Distortion((temp * temp) >> shift); temp = piOrg[17] - piCur[17]; sum += Distortion((temp * temp) >> shift); temp = piOrg[18] - piCur[18]; sum += Distortion((temp * temp) >> shift); temp = piOrg[19] - piCur[19]; sum += Distortion((temp * temp) >> shift); temp = piOrg[20] - piCur[20]; sum += Distortion((temp * temp) >> shift); temp = piOrg[21] - piCur[21]; sum += Distortion((temp * temp) >> shift); temp = piOrg[22] - piCur[22]; sum += Distortion((temp * temp) >> shift); temp = piOrg[23] - piCur[23]; sum += Distortion((temp * temp) >> shift); temp = piOrg[24] - piCur[24]; sum += Distortion((temp * temp) >> shift); temp = piOrg[25] - piCur[25]; sum += Distortion((temp * temp) >> shift); temp = piOrg[26] - piCur[26]; sum += Distortion((temp * temp) >> shift); temp = piOrg[27] - piCur[27]; sum += Distortion((temp * temp) >> shift); temp = piOrg[28] - piCur[28]; sum += Distortion((temp * temp) >> shift); temp = piOrg[29] - piCur[29]; sum += Distortion((temp * temp) >> shift); temp = piOrg[30] - piCur[30]; sum += Distortion((temp * temp) >> shift); temp = piOrg[31] - piCur[31]; sum += Distortion((temp * temp) >> shift); piOrg += strideOrg; piCur += strideCur; } return (sum); } Distortion RdCost::xGetSSE64( const DistParam &rcDtParam ) { if ( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 64, "Invalid size" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; int rows = rcDtParam.org.height; ptrdiff_t strideOrg = rcDtParam.org.stride; ptrdiff_t strideCur = rcDtParam.cur.stride; Distortion sum = 0; uint32_t shift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; Intermediate_Int temp; for (; rows != 0; rows--) { temp = piOrg[0] - piCur[0]; sum += Distortion((temp * temp) >> shift); temp = piOrg[1] - piCur[1]; sum += Distortion((temp * temp) >> shift); temp = piOrg[2] - piCur[2]; sum += Distortion((temp * temp) >> shift); temp = piOrg[3] - piCur[3]; sum += Distortion((temp * temp) >> shift); temp = piOrg[4] - piCur[4]; sum += Distortion((temp * temp) >> shift); temp = piOrg[5] - piCur[5]; sum += Distortion((temp * temp) >> shift); temp = piOrg[6] - piCur[6]; sum += Distortion((temp * temp) >> shift); temp = piOrg[7] - piCur[7]; sum += Distortion((temp * temp) >> shift); temp = piOrg[8] - piCur[8]; sum += Distortion((temp * temp) >> shift); temp = piOrg[9] - piCur[9]; sum += Distortion((temp * temp) >> shift); temp = piOrg[10] - piCur[10]; sum += Distortion((temp * temp) >> shift); temp = piOrg[11] - piCur[11]; sum += Distortion((temp * temp) >> shift); temp = piOrg[12] - piCur[12]; sum += Distortion((temp * temp) >> shift); temp = piOrg[13] - piCur[13]; sum += Distortion((temp * temp) >> shift); temp = piOrg[14] - piCur[14]; sum += Distortion((temp * temp) >> shift); temp = piOrg[15] - piCur[15]; sum += Distortion((temp * temp) >> shift); temp = piOrg[16] - piCur[16]; sum += Distortion((temp * temp) >> shift); temp = piOrg[17] - piCur[17]; sum += Distortion((temp * temp) >> shift); temp = piOrg[18] - piCur[18]; sum += Distortion((temp * temp) >> shift); temp = piOrg[19] - piCur[19]; sum += Distortion((temp * temp) >> shift); temp = piOrg[20] - piCur[20]; sum += Distortion((temp * temp) >> shift); temp = piOrg[21] - piCur[21]; sum += Distortion((temp * temp) >> shift); temp = piOrg[22] - piCur[22]; sum += Distortion((temp * temp) >> shift); temp = piOrg[23] - piCur[23]; sum += Distortion((temp * temp) >> shift); temp = piOrg[24] - piCur[24]; sum += Distortion((temp * temp) >> shift); temp = piOrg[25] - piCur[25]; sum += Distortion((temp * temp) >> shift); temp = piOrg[26] - piCur[26]; sum += Distortion((temp * temp) >> shift); temp = piOrg[27] - piCur[27]; sum += Distortion((temp * temp) >> shift); temp = piOrg[28] - piCur[28]; sum += Distortion((temp * temp) >> shift); temp = piOrg[29] - piCur[29]; sum += Distortion((temp * temp) >> shift); temp = piOrg[30] - piCur[30]; sum += Distortion((temp * temp) >> shift); temp = piOrg[31] - piCur[31]; sum += Distortion((temp * temp) >> shift); temp = piOrg[32] - piCur[32]; sum += Distortion((temp * temp) >> shift); temp = piOrg[33] - piCur[33]; sum += Distortion((temp * temp) >> shift); temp = piOrg[34] - piCur[34]; sum += Distortion((temp * temp) >> shift); temp = piOrg[35] - piCur[35]; sum += Distortion((temp * temp) >> shift); temp = piOrg[36] - piCur[36]; sum += Distortion((temp * temp) >> shift); temp = piOrg[37] - piCur[37]; sum += Distortion((temp * temp) >> shift); temp = piOrg[38] - piCur[38]; sum += Distortion((temp * temp) >> shift); temp = piOrg[39] - piCur[39]; sum += Distortion((temp * temp) >> shift); temp = piOrg[40] - piCur[40]; sum += Distortion((temp * temp) >> shift); temp = piOrg[41] - piCur[41]; sum += Distortion((temp * temp) >> shift); temp = piOrg[42] - piCur[42]; sum += Distortion((temp * temp) >> shift); temp = piOrg[43] - piCur[43]; sum += Distortion((temp * temp) >> shift); temp = piOrg[44] - piCur[44]; sum += Distortion((temp * temp) >> shift); temp = piOrg[45] - piCur[45]; sum += Distortion((temp * temp) >> shift); temp = piOrg[46] - piCur[46]; sum += Distortion((temp * temp) >> shift); temp = piOrg[47] - piCur[47]; sum += Distortion((temp * temp) >> shift); temp = piOrg[48] - piCur[48]; sum += Distortion((temp * temp) >> shift); temp = piOrg[49] - piCur[49]; sum += Distortion((temp * temp) >> shift); temp = piOrg[50] - piCur[50]; sum += Distortion((temp * temp) >> shift); temp = piOrg[51] - piCur[51]; sum += Distortion((temp * temp) >> shift); temp = piOrg[52] - piCur[52]; sum += Distortion((temp * temp) >> shift); temp = piOrg[53] - piCur[53]; sum += Distortion((temp * temp) >> shift); temp = piOrg[54] - piCur[54]; sum += Distortion((temp * temp) >> shift); temp = piOrg[55] - piCur[55]; sum += Distortion((temp * temp) >> shift); temp = piOrg[56] - piCur[56]; sum += Distortion((temp * temp) >> shift); temp = piOrg[57] - piCur[57]; sum += Distortion((temp * temp) >> shift); temp = piOrg[58] - piCur[58]; sum += Distortion((temp * temp) >> shift); temp = piOrg[59] - piCur[59]; sum += Distortion((temp * temp) >> shift); temp = piOrg[60] - piCur[60]; sum += Distortion((temp * temp) >> shift); temp = piOrg[61] - piCur[61]; sum += Distortion((temp * temp) >> shift); temp = piOrg[62] - piCur[62]; sum += Distortion((temp * temp) >> shift); temp = piOrg[63] - piCur[63]; sum += Distortion((temp * temp) >> shift); piOrg += strideOrg; piCur += strideCur; } return (sum); } // -------------------------------------------------------------------------------------------------------------------- // HADAMARD with step (used in fractional search) // -------------------------------------------------------------------------------------------------------------------- Distortion RdCost::xCalcHADs2x2(const Pel *piOrg, const Pel *piCur, ptrdiff_t strideOrg, ptrdiff_t strideCur, int step) { Distortion satd = 0; TCoeff diff[4], m[4]; CHECK(step != 1, "Invalid step"); diff[0] = piOrg[0 ] - piCur[0]; diff[1] = piOrg[1 ] - piCur[1]; diff[2] = piOrg[strideOrg] - piCur[0 + strideCur]; diff[3] = piOrg[strideOrg + 1] - piCur[1 + strideCur]; m[0] = diff[0] + diff[2]; m[1] = diff[1] + diff[3]; m[2] = diff[0] - diff[2]; m[3] = diff[1] - diff[3]; #if JVET_R0164_MEAN_SCALED_SATD satd += abs(m[0] + m[1]) >> 2; #else satd += abs(m[0] + m[1]); #endif satd += abs(m[0] - m[1]); satd += abs(m[2] + m[3]); satd += abs(m[2] - m[3]); return satd; } Distortion RdCost::xCalcHADs4x4(const Pel *piOrg, const Pel *piCur, ptrdiff_t strideOrg, ptrdiff_t strideCur, int step) { int k; Distortion satd = 0; TCoeff diff[16], m[16], d[16]; CHECK(step != 1, "Invalid step"); for( k = 0; k < 16; k+=4 ) { diff[k+0] = piOrg[0] - piCur[0]; diff[k+1] = piOrg[1] - piCur[1]; diff[k+2] = piOrg[2] - piCur[2]; diff[k+3] = piOrg[3] - piCur[3]; piCur += strideCur; piOrg += strideOrg; } /*===== hadamard transform =====*/ m[ 0] = diff[ 0] + diff[12]; m[ 1] = diff[ 1] + diff[13]; m[ 2] = diff[ 2] + diff[14]; m[ 3] = diff[ 3] + diff[15]; m[ 4] = diff[ 4] + diff[ 8]; m[ 5] = diff[ 5] + diff[ 9]; m[ 6] = diff[ 6] + diff[10]; m[ 7] = diff[ 7] + diff[11]; m[ 8] = diff[ 4] - diff[ 8]; m[ 9] = diff[ 5] - diff[ 9]; m[10] = diff[ 6] - diff[10]; m[11] = diff[ 7] - diff[11]; m[12] = diff[ 0] - diff[12]; m[13] = diff[ 1] - diff[13]; m[14] = diff[ 2] - diff[14]; m[15] = diff[ 3] - diff[15]; d[ 0] = m[ 0] + m[ 4]; d[ 1] = m[ 1] + m[ 5]; d[ 2] = m[ 2] + m[ 6]; d[ 3] = m[ 3] + m[ 7]; d[ 4] = m[ 8] + m[12]; d[ 5] = m[ 9] + m[13]; d[ 6] = m[10] + m[14]; d[ 7] = m[11] + m[15]; d[ 8] = m[ 0] - m[ 4]; d[ 9] = m[ 1] - m[ 5]; d[10] = m[ 2] - m[ 6]; d[11] = m[ 3] - m[ 7]; d[12] = m[12] - m[ 8]; d[13] = m[13] - m[ 9]; d[14] = m[14] - m[10]; d[15] = m[15] - m[11]; m[ 0] = d[ 0] + d[ 3]; m[ 1] = d[ 1] + d[ 2]; m[ 2] = d[ 1] - d[ 2]; m[ 3] = d[ 0] - d[ 3]; m[ 4] = d[ 4] + d[ 7]; m[ 5] = d[ 5] + d[ 6]; m[ 6] = d[ 5] - d[ 6]; m[ 7] = d[ 4] - d[ 7]; m[ 8] = d[ 8] + d[11]; m[ 9] = d[ 9] + d[10]; m[10] = d[ 9] - d[10]; m[11] = d[ 8] - d[11]; m[12] = d[12] + d[15]; m[13] = d[13] + d[14]; m[14] = d[13] - d[14]; m[15] = d[12] - d[15]; d[ 0] = m[ 0] + m[ 1]; d[ 1] = m[ 0] - m[ 1]; d[ 2] = m[ 2] + m[ 3]; d[ 3] = m[ 3] - m[ 2]; d[ 4] = m[ 4] + m[ 5]; d[ 5] = m[ 4] - m[ 5]; d[ 6] = m[ 6] + m[ 7]; d[ 7] = m[ 7] - m[ 6]; d[ 8] = m[ 8] + m[ 9]; d[ 9] = m[ 8] - m[ 9]; d[10] = m[10] + m[11]; d[11] = m[11] - m[10]; d[12] = m[12] + m[13]; d[13] = m[12] - m[13]; d[14] = m[14] + m[15]; d[15] = m[15] - m[14]; for (k=0; k<16; ++k) { satd += abs(d[k]); } #if JVET_R0164_MEAN_SCALED_SATD satd -= abs(d[0]); satd += abs(d[0]) >> 2; #endif satd = ((satd+1)>>1); return satd; } Distortion RdCost::xCalcHADs8x8(const Pel *piOrg, const Pel *piCur, ptrdiff_t strideOrg, ptrdiff_t strideCur, int step) { int k, i, j, jj; Distortion sad = 0; TCoeff diff[64], m1[8][8], m2[8][8], m3[8][8]; CHECK(step != 1, "Invalid step"); for( k = 0; k < 64; k += 8 ) { diff[k+0] = piOrg[0] - piCur[0]; diff[k+1] = piOrg[1] - piCur[1]; diff[k+2] = piOrg[2] - piCur[2]; diff[k+3] = piOrg[3] - piCur[3]; diff[k+4] = piOrg[4] - piCur[4]; diff[k+5] = piOrg[5] - piCur[5]; diff[k+6] = piOrg[6] - piCur[6]; diff[k+7] = piOrg[7] - piCur[7]; piCur += strideCur; piOrg += strideOrg; } //horizontal for (j=0; j < 8; j++) { jj = j << 3; m2[j][0] = diff[jj ] + diff[jj+4]; m2[j][1] = diff[jj+1] + diff[jj+5]; m2[j][2] = diff[jj+2] + diff[jj+6]; m2[j][3] = diff[jj+3] + diff[jj+7]; m2[j][4] = diff[jj ] - diff[jj+4]; m2[j][5] = diff[jj+1] - diff[jj+5]; m2[j][6] = diff[jj+2] - diff[jj+6]; m2[j][7] = diff[jj+3] - diff[jj+7]; m1[j][0] = m2[j][0] + m2[j][2]; m1[j][1] = m2[j][1] + m2[j][3]; m1[j][2] = m2[j][0] - m2[j][2]; m1[j][3] = m2[j][1] - m2[j][3]; m1[j][4] = m2[j][4] + m2[j][6]; m1[j][5] = m2[j][5] + m2[j][7]; m1[j][6] = m2[j][4] - m2[j][6]; m1[j][7] = m2[j][5] - m2[j][7]; m2[j][0] = m1[j][0] + m1[j][1]; m2[j][1] = m1[j][0] - m1[j][1]; m2[j][2] = m1[j][2] + m1[j][3]; m2[j][3] = m1[j][2] - m1[j][3]; m2[j][4] = m1[j][4] + m1[j][5]; m2[j][5] = m1[j][4] - m1[j][5]; m2[j][6] = m1[j][6] + m1[j][7]; m2[j][7] = m1[j][6] - m1[j][7]; } //vertical for (i=0; i < 8; i++) { m3[0][i] = m2[0][i] + m2[4][i]; m3[1][i] = m2[1][i] + m2[5][i]; m3[2][i] = m2[2][i] + m2[6][i]; m3[3][i] = m2[3][i] + m2[7][i]; m3[4][i] = m2[0][i] - m2[4][i]; m3[5][i] = m2[1][i] - m2[5][i]; m3[6][i] = m2[2][i] - m2[6][i]; m3[7][i] = m2[3][i] - m2[7][i]; m1[0][i] = m3[0][i] + m3[2][i]; m1[1][i] = m3[1][i] + m3[3][i]; m1[2][i] = m3[0][i] - m3[2][i]; m1[3][i] = m3[1][i] - m3[3][i]; m1[4][i] = m3[4][i] + m3[6][i]; m1[5][i] = m3[5][i] + m3[7][i]; m1[6][i] = m3[4][i] - m3[6][i]; m1[7][i] = m3[5][i] - m3[7][i]; m2[0][i] = m1[0][i] + m1[1][i]; m2[1][i] = m1[0][i] - m1[1][i]; m2[2][i] = m1[2][i] + m1[3][i]; m2[3][i] = m1[2][i] - m1[3][i]; m2[4][i] = m1[4][i] + m1[5][i]; m2[5][i] = m1[4][i] - m1[5][i]; m2[6][i] = m1[6][i] + m1[7][i]; m2[7][i] = m1[6][i] - m1[7][i]; } for (i = 0; i < 8; i++) { for (j = 0; j < 8; j++) { sad += abs(m2[i][j]); } } #if JVET_R0164_MEAN_SCALED_SATD sad -= abs(m2[0][0]); sad += abs(m2[0][0]) >> 2; #endif sad = ((sad+2)>>2); return sad; } Distortion RdCost::xCalcHADs16x8(const Pel *piOrg, const Pel *piCur, ptrdiff_t strideOrg, ptrdiff_t strideCur) { //need to add SIMD implementation ,JCA int k, i, j, jj, sad = 0; int diff[128], m1[8][16], m2[8][16]; for( k = 0; k < 128; k += 16 ) { diff[k + 0] = piOrg[0] - piCur[0]; diff[k + 1] = piOrg[1] - piCur[1]; diff[k + 2] = piOrg[2] - piCur[2]; diff[k + 3] = piOrg[3] - piCur[3]; diff[k + 4] = piOrg[4] - piCur[4]; diff[k + 5] = piOrg[5] - piCur[5]; diff[k + 6] = piOrg[6] - piCur[6]; diff[k + 7] = piOrg[7] - piCur[7]; diff[k + 8] = piOrg[8] - piCur[8]; diff[k + 9] = piOrg[9] - piCur[9]; diff[k + 10] = piOrg[10] - piCur[10]; diff[k + 11] = piOrg[11] - piCur[11]; diff[k + 12] = piOrg[12] - piCur[12]; diff[k + 13] = piOrg[13] - piCur[13]; diff[k + 14] = piOrg[14] - piCur[14]; diff[k + 15] = piOrg[15] - piCur[15]; piCur += strideCur; piOrg += strideOrg; } //horizontal for( j = 0; j < 8; j++ ) { jj = j << 4; m2[j][0] = diff[jj ] + diff[jj + 8]; m2[j][1] = diff[jj + 1] + diff[jj + 9]; m2[j][2] = diff[jj + 2] + diff[jj + 10]; m2[j][3] = diff[jj + 3] + diff[jj + 11]; m2[j][4] = diff[jj + 4] + diff[jj + 12]; m2[j][5] = diff[jj + 5] + diff[jj + 13]; m2[j][6] = diff[jj + 6] + diff[jj + 14]; m2[j][7] = diff[jj + 7] + diff[jj + 15]; m2[j][8] = diff[jj ] - diff[jj + 8]; m2[j][9] = diff[jj + 1] - diff[jj + 9]; m2[j][10] = diff[jj + 2] - diff[jj + 10]; m2[j][11] = diff[jj + 3] - diff[jj + 11]; m2[j][12] = diff[jj + 4] - diff[jj + 12]; m2[j][13] = diff[jj + 5] - diff[jj + 13]; m2[j][14] = diff[jj + 6] - diff[jj + 14]; m2[j][15] = diff[jj + 7] - diff[jj + 15]; m1[j][0] = m2[j][0] + m2[j][4]; m1[j][1] = m2[j][1] + m2[j][5]; m1[j][2] = m2[j][2] + m2[j][6]; m1[j][3] = m2[j][3] + m2[j][7]; m1[j][4] = m2[j][0] - m2[j][4]; m1[j][5] = m2[j][1] - m2[j][5]; m1[j][6] = m2[j][2] - m2[j][6]; m1[j][7] = m2[j][3] - m2[j][7]; m1[j][8] = m2[j][8] + m2[j][12]; m1[j][9] = m2[j][9] + m2[j][13]; m1[j][10] = m2[j][10] + m2[j][14]; m1[j][11] = m2[j][11] + m2[j][15]; m1[j][12] = m2[j][8] - m2[j][12]; m1[j][13] = m2[j][9] - m2[j][13]; m1[j][14] = m2[j][10] - m2[j][14]; m1[j][15] = m2[j][11] - m2[j][15]; m2[j][0] = m1[j][0] + m1[j][2]; m2[j][1] = m1[j][1] + m1[j][3]; m2[j][2] = m1[j][0] - m1[j][2]; m2[j][3] = m1[j][1] - m1[j][3]; m2[j][4] = m1[j][4] + m1[j][6]; m2[j][5] = m1[j][5] + m1[j][7]; m2[j][6] = m1[j][4] - m1[j][6]; m2[j][7] = m1[j][5] - m1[j][7]; m2[j][8] = m1[j][8] + m1[j][10]; m2[j][9] = m1[j][9] + m1[j][11]; m2[j][10] = m1[j][8] - m1[j][10]; m2[j][11] = m1[j][9] - m1[j][11]; m2[j][12] = m1[j][12] + m1[j][14]; m2[j][13] = m1[j][13] + m1[j][15]; m2[j][14] = m1[j][12] - m1[j][14]; m2[j][15] = m1[j][13] - m1[j][15]; m1[j][0] = m2[j][0] + m2[j][1]; m1[j][1] = m2[j][0] - m2[j][1]; m1[j][2] = m2[j][2] + m2[j][3]; m1[j][3] = m2[j][2] - m2[j][3]; m1[j][4] = m2[j][4] + m2[j][5]; m1[j][5] = m2[j][4] - m2[j][5]; m1[j][6] = m2[j][6] + m2[j][7]; m1[j][7] = m2[j][6] - m2[j][7]; m1[j][8] = m2[j][8] + m2[j][9]; m1[j][9] = m2[j][8] - m2[j][9]; m1[j][10] = m2[j][10] + m2[j][11]; m1[j][11] = m2[j][10] - m2[j][11]; m1[j][12] = m2[j][12] + m2[j][13]; m1[j][13] = m2[j][12] - m2[j][13]; m1[j][14] = m2[j][14] + m2[j][15]; m1[j][15] = m2[j][14] - m2[j][15]; } //vertical for( i = 0; i < 16; i++ ) { m2[0][i] = m1[0][i] + m1[4][i]; m2[1][i] = m1[1][i] + m1[5][i]; m2[2][i] = m1[2][i] + m1[6][i]; m2[3][i] = m1[3][i] + m1[7][i]; m2[4][i] = m1[0][i] - m1[4][i]; m2[5][i] = m1[1][i] - m1[5][i]; m2[6][i] = m1[2][i] - m1[6][i]; m2[7][i] = m1[3][i] - m1[7][i]; m1[0][i] = m2[0][i] + m2[2][i]; m1[1][i] = m2[1][i] + m2[3][i]; m1[2][i] = m2[0][i] - m2[2][i]; m1[3][i] = m2[1][i] - m2[3][i]; m1[4][i] = m2[4][i] + m2[6][i]; m1[5][i] = m2[5][i] + m2[7][i]; m1[6][i] = m2[4][i] - m2[6][i]; m1[7][i] = m2[5][i] - m2[7][i]; m2[0][i] = m1[0][i] + m1[1][i]; m2[1][i] = m1[0][i] - m1[1][i]; m2[2][i] = m1[2][i] + m1[3][i]; m2[3][i] = m1[2][i] - m1[3][i]; m2[4][i] = m1[4][i] + m1[5][i]; m2[5][i] = m1[4][i] - m1[5][i]; m2[6][i] = m1[6][i] + m1[7][i]; m2[7][i] = m1[6][i] - m1[7][i]; } for( i = 0; i < 8; i++ ) { for( j = 0; j < 16; j++ ) { sad += abs( m2[i][j] ); } } #if JVET_R0164_MEAN_SCALED_SATD sad -= abs(m2[0][0]); sad += abs(m2[0][0]) >> 2; #endif sad = ( int ) ( sad / sqrt( 16.0 * 8 ) * 2 ); return sad; } Distortion RdCost::xCalcHADs8x16(const Pel *piOrg, const Pel *piCur, ptrdiff_t strideOrg, ptrdiff_t strideCur) { int k, i, j, jj, sad = 0; int diff[128], m1[16][8], m2[16][8]; for( k = 0; k < 128; k += 8 ) { diff[k + 0] = piOrg[0] - piCur[0]; diff[k + 1] = piOrg[1] - piCur[1]; diff[k + 2] = piOrg[2] - piCur[2]; diff[k + 3] = piOrg[3] - piCur[3]; diff[k + 4] = piOrg[4] - piCur[4]; diff[k + 5] = piOrg[5] - piCur[5]; diff[k + 6] = piOrg[6] - piCur[6]; diff[k + 7] = piOrg[7] - piCur[7]; piCur += strideCur; piOrg += strideOrg; } //horizontal for( j = 0; j < 16; j++ ) { jj = j << 3; m2[j][0] = diff[jj] + diff[jj + 4]; m2[j][1] = diff[jj + 1] + diff[jj + 5]; m2[j][2] = diff[jj + 2] + diff[jj + 6]; m2[j][3] = diff[jj + 3] + diff[jj + 7]; m2[j][4] = diff[jj] - diff[jj + 4]; m2[j][5] = diff[jj + 1] - diff[jj + 5]; m2[j][6] = diff[jj + 2] - diff[jj + 6]; m2[j][7] = diff[jj + 3] - diff[jj + 7]; m1[j][0] = m2[j][0] + m2[j][2]; m1[j][1] = m2[j][1] + m2[j][3]; m1[j][2] = m2[j][0] - m2[j][2]; m1[j][3] = m2[j][1] - m2[j][3]; m1[j][4] = m2[j][4] + m2[j][6]; m1[j][5] = m2[j][5] + m2[j][7]; m1[j][6] = m2[j][4] - m2[j][6]; m1[j][7] = m2[j][5] - m2[j][7]; m2[j][0] = m1[j][0] + m1[j][1]; m2[j][1] = m1[j][0] - m1[j][1]; m2[j][2] = m1[j][2] + m1[j][3]; m2[j][3] = m1[j][2] - m1[j][3]; m2[j][4] = m1[j][4] + m1[j][5]; m2[j][5] = m1[j][4] - m1[j][5]; m2[j][6] = m1[j][6] + m1[j][7]; m2[j][7] = m1[j][6] - m1[j][7]; } //vertical for( i = 0; i < 8; i++ ) { m1[0][i] = m2[0][i] + m2[8][i]; m1[1][i] = m2[1][i] + m2[9][i]; m1[2][i] = m2[2][i] + m2[10][i]; m1[3][i] = m2[3][i] + m2[11][i]; m1[4][i] = m2[4][i] + m2[12][i]; m1[5][i] = m2[5][i] + m2[13][i]; m1[6][i] = m2[6][i] + m2[14][i]; m1[7][i] = m2[7][i] + m2[15][i]; m1[8][i] = m2[0][i] - m2[8][i]; m1[9][i] = m2[1][i] - m2[9][i]; m1[10][i] = m2[2][i] - m2[10][i]; m1[11][i] = m2[3][i] - m2[11][i]; m1[12][i] = m2[4][i] - m2[12][i]; m1[13][i] = m2[5][i] - m2[13][i]; m1[14][i] = m2[6][i] - m2[14][i]; m1[15][i] = m2[7][i] - m2[15][i]; m2[0][i] = m1[0][i] + m1[4][i]; m2[1][i] = m1[1][i] + m1[5][i]; m2[2][i] = m1[2][i] + m1[6][i]; m2[3][i] = m1[3][i] + m1[7][i]; m2[4][i] = m1[0][i] - m1[4][i]; m2[5][i] = m1[1][i] - m1[5][i]; m2[6][i] = m1[2][i] - m1[6][i]; m2[7][i] = m1[3][i] - m1[7][i]; m2[8][i] = m1[8][i] + m1[12][i]; m2[9][i] = m1[9][i] + m1[13][i]; m2[10][i] = m1[10][i] + m1[14][i]; m2[11][i] = m1[11][i] + m1[15][i]; m2[12][i] = m1[8][i] - m1[12][i]; m2[13][i] = m1[9][i] - m1[13][i]; m2[14][i] = m1[10][i] - m1[14][i]; m2[15][i] = m1[11][i] - m1[15][i]; m1[0][i] = m2[0][i] + m2[2][i]; m1[1][i] = m2[1][i] + m2[3][i]; m1[2][i] = m2[0][i] - m2[2][i]; m1[3][i] = m2[1][i] - m2[3][i]; m1[4][i] = m2[4][i] + m2[6][i]; m1[5][i] = m2[5][i] + m2[7][i]; m1[6][i] = m2[4][i] - m2[6][i]; m1[7][i] = m2[5][i] - m2[7][i]; m1[8][i] = m2[8][i] + m2[10][i]; m1[9][i] = m2[9][i] + m2[11][i]; m1[10][i] = m2[8][i] - m2[10][i]; m1[11][i] = m2[9][i] - m2[11][i]; m1[12][i] = m2[12][i] + m2[14][i]; m1[13][i] = m2[13][i] + m2[15][i]; m1[14][i] = m2[12][i] - m2[14][i]; m1[15][i] = m2[13][i] - m2[15][i]; m2[0][i] = m1[0][i] + m1[1][i]; m2[1][i] = m1[0][i] - m1[1][i]; m2[2][i] = m1[2][i] + m1[3][i]; m2[3][i] = m1[2][i] - m1[3][i]; m2[4][i] = m1[4][i] + m1[5][i]; m2[5][i] = m1[4][i] - m1[5][i]; m2[6][i] = m1[6][i] + m1[7][i]; m2[7][i] = m1[6][i] - m1[7][i]; m2[8][i] = m1[8][i] + m1[9][i]; m2[9][i] = m1[8][i] - m1[9][i]; m2[10][i] = m1[10][i] + m1[11][i]; m2[11][i] = m1[10][i] - m1[11][i]; m2[12][i] = m1[12][i] + m1[13][i]; m2[13][i] = m1[12][i] - m1[13][i]; m2[14][i] = m1[14][i] + m1[15][i]; m2[15][i] = m1[14][i] - m1[15][i]; } for( i = 0; i < 16; i++ ) { for( j = 0; j < 8; j++ ) { sad += abs( m2[i][j] ); } } #if JVET_R0164_MEAN_SCALED_SATD sad -= abs(m2[0][0]); sad += abs(m2[0][0]) >> 2; #endif sad = ( int ) ( sad / sqrt( 16.0 * 8 ) * 2 ); return sad; } Distortion RdCost::xCalcHADs4x8(const Pel *piOrg, const Pel *piCur, ptrdiff_t strideOrg, ptrdiff_t strideCur) { int k, i, j, jj, sad = 0; int diff[32], m1[8][4], m2[8][4]; for( k = 0; k < 32; k += 4 ) { diff[k + 0] = piOrg[0] - piCur[0]; diff[k + 1] = piOrg[1] - piCur[1]; diff[k + 2] = piOrg[2] - piCur[2]; diff[k + 3] = piOrg[3] - piCur[3]; piCur += strideCur; piOrg += strideOrg; } //horizontal for( j = 0; j < 8; j++ ) { jj = j << 2; m2[j][0] = diff[jj] + diff[jj + 2]; m2[j][1] = diff[jj + 1] + diff[jj + 3]; m2[j][2] = diff[jj] - diff[jj + 2]; m2[j][3] = diff[jj + 1] - diff[jj + 3]; m1[j][0] = m2[j][0] + m2[j][1]; m1[j][1] = m2[j][0] - m2[j][1]; m1[j][2] = m2[j][2] + m2[j][3]; m1[j][3] = m2[j][2] - m2[j][3]; } //vertical for( i = 0; i < 4; i++ ) { m2[0][i] = m1[0][i] + m1[4][i]; m2[1][i] = m1[1][i] + m1[5][i]; m2[2][i] = m1[2][i] + m1[6][i]; m2[3][i] = m1[3][i] + m1[7][i]; m2[4][i] = m1[0][i] - m1[4][i]; m2[5][i] = m1[1][i] - m1[5][i]; m2[6][i] = m1[2][i] - m1[6][i]; m2[7][i] = m1[3][i] - m1[7][i]; m1[0][i] = m2[0][i] + m2[2][i]; m1[1][i] = m2[1][i] + m2[3][i]; m1[2][i] = m2[0][i] - m2[2][i]; m1[3][i] = m2[1][i] - m2[3][i]; m1[4][i] = m2[4][i] + m2[6][i]; m1[5][i] = m2[5][i] + m2[7][i]; m1[6][i] = m2[4][i] - m2[6][i]; m1[7][i] = m2[5][i] - m2[7][i]; m2[0][i] = m1[0][i] + m1[1][i]; m2[1][i] = m1[0][i] - m1[1][i]; m2[2][i] = m1[2][i] + m1[3][i]; m2[3][i] = m1[2][i] - m1[3][i]; m2[4][i] = m1[4][i] + m1[5][i]; m2[5][i] = m1[4][i] - m1[5][i]; m2[6][i] = m1[6][i] + m1[7][i]; m2[7][i] = m1[6][i] - m1[7][i]; } for( i = 0; i < 8; i++ ) { for( j = 0; j < 4; j++ ) { sad += abs( m2[i][j] ); } } #if JVET_R0164_MEAN_SCALED_SATD sad -= abs(m2[0][0]); sad += abs(m2[0][0]) >> 2; #endif sad = ( int ) ( sad / sqrt( 4.0 * 8 ) * 2 ); return sad; } Distortion RdCost::xCalcHADs8x4(const Pel *piOrg, const Pel *piCur, ptrdiff_t strideOrg, ptrdiff_t strideCur) { int k, i, j, jj, sad = 0; int diff[32], m1[4][8], m2[4][8]; for( k = 0; k < 32; k += 8 ) { diff[k + 0] = piOrg[0] - piCur[0]; diff[k + 1] = piOrg[1] - piCur[1]; diff[k + 2] = piOrg[2] - piCur[2]; diff[k + 3] = piOrg[3] - piCur[3]; diff[k + 4] = piOrg[4] - piCur[4]; diff[k + 5] = piOrg[5] - piCur[5]; diff[k + 6] = piOrg[6] - piCur[6]; diff[k + 7] = piOrg[7] - piCur[7]; piCur += strideCur; piOrg += strideOrg; } //horizontal for( j = 0; j < 4; j++ ) { jj = j << 3; m2[j][0] = diff[jj] + diff[jj + 4]; m2[j][1] = diff[jj + 1] + diff[jj + 5]; m2[j][2] = diff[jj + 2] + diff[jj + 6]; m2[j][3] = diff[jj + 3] + diff[jj + 7]; m2[j][4] = diff[jj] - diff[jj + 4]; m2[j][5] = diff[jj + 1] - diff[jj + 5]; m2[j][6] = diff[jj + 2] - diff[jj + 6]; m2[j][7] = diff[jj + 3] - diff[jj + 7]; m1[j][0] = m2[j][0] + m2[j][2]; m1[j][1] = m2[j][1] + m2[j][3]; m1[j][2] = m2[j][0] - m2[j][2]; m1[j][3] = m2[j][1] - m2[j][3]; m1[j][4] = m2[j][4] + m2[j][6]; m1[j][5] = m2[j][5] + m2[j][7]; m1[j][6] = m2[j][4] - m2[j][6]; m1[j][7] = m2[j][5] - m2[j][7]; m2[j][0] = m1[j][0] + m1[j][1]; m2[j][1] = m1[j][0] - m1[j][1]; m2[j][2] = m1[j][2] + m1[j][3]; m2[j][3] = m1[j][2] - m1[j][3]; m2[j][4] = m1[j][4] + m1[j][5]; m2[j][5] = m1[j][4] - m1[j][5]; m2[j][6] = m1[j][6] + m1[j][7]; m2[j][7] = m1[j][6] - m1[j][7]; } //vertical for( i = 0; i < 8; i++ ) { m1[0][i] = m2[0][i] + m2[2][i]; m1[1][i] = m2[1][i] + m2[3][i]; m1[2][i] = m2[0][i] - m2[2][i]; m1[3][i] = m2[1][i] - m2[3][i]; m2[0][i] = m1[0][i] + m1[1][i]; m2[1][i] = m1[0][i] - m1[1][i]; m2[2][i] = m1[2][i] + m1[3][i]; m2[3][i] = m1[2][i] - m1[3][i]; } for( i = 0; i < 4; i++ ) { for( j = 0; j < 8; j++ ) { sad += abs( m2[i][j] ); } } #if JVET_R0164_MEAN_SCALED_SATD sad -= abs(m2[0][0]); sad += abs(m2[0][0]) >> 2; #endif sad = ( int ) ( sad / sqrt( 4.0 * 8 ) * 2 ); return sad; } Distortion RdCost::xGetHADs( const DistParam &rcDtParam ) { if( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetHADsw( rcDtParam ); } const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const int rows = rcDtParam.org.height; const int cols = rcDtParam.org.width; const ptrdiff_t strideCur = rcDtParam.cur.stride; const ptrdiff_t strideOrg = rcDtParam.org.stride; const int step = rcDtParam.step; int x = 0, y = 0; Distortion sum = 0; if (cols > rows && (rows & 7) == 0 && (cols & 15) == 0) { for (y = 0; y < rows; y += 8) { for (x = 0; x < cols; x += 16) { sum += xCalcHADs16x8(&piOrg[x], &piCur[x], strideOrg, strideCur); } piOrg += strideOrg * 8; piCur += strideCur * 8; } } else if (cols < rows && (cols & 7) == 0 && (rows & 15) == 0) { for (y = 0; y < rows; y += 16) { for (x = 0; x < cols; x += 8) { sum += xCalcHADs8x16(&piOrg[x], &piCur[x], strideOrg, strideCur); } piOrg += strideOrg * 16; piCur += strideCur * 16; } } else if (cols > rows && (rows & 3) == 0 && (cols & 7) == 0) { for (y = 0; y < rows; y += 4) { for (x = 0; x < cols; x += 8) { sum += xCalcHADs8x4(&piOrg[x], &piCur[x], strideOrg, strideCur); } piOrg += strideOrg * 4; piCur += strideCur * 4; } } else if (cols < rows && (cols & 3) == 0 && (rows & 7) == 0) { for (y = 0; y < rows; y += 8) { for (x = 0; x < cols; x += 4) { sum += xCalcHADs4x8(&piOrg[x], &piCur[x], strideOrg, strideCur); } piOrg += strideOrg * 8; piCur += strideCur * 8; } } else if ((rows % 8 == 0) && (cols % 8 == 0)) { ptrdiff_t offsetOrg = strideOrg << 3; ptrdiff_t offsetCur = strideCur << 3; for (y = 0; y < rows; y += 8) { for (x = 0; x < cols; x += 8) { sum += xCalcHADs8x8(&piOrg[x], &piCur[x * step], strideOrg, strideCur, step); } piOrg += offsetOrg; piCur += offsetCur; } } else if ((rows % 4 == 0) && (cols % 4 == 0)) { ptrdiff_t offsetOrg = strideOrg << 2; ptrdiff_t offsetCur = strideCur << 2; for (y = 0; y < rows; y += 4) { for (x = 0; x < cols; x += 4) { sum += xCalcHADs4x4(&piOrg[x], &piCur[x * step], strideOrg, strideCur, step); } piOrg += offsetOrg; piCur += offsetCur; } } else if ((rows % 2 == 0) && (cols % 2 == 0)) { ptrdiff_t offsetOrg = strideOrg << 1; ptrdiff_t offsetCur = strideCur << 1; for (y = 0; y < rows; y += 2) { for (x = 0; x < cols; x += 2) { sum += xCalcHADs2x2(&piOrg[x], &piCur[x * step], strideOrg, strideCur, step); } piOrg += offsetOrg; piCur += offsetCur; } } else { THROW( "Invalid size" ); } return (sum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); } #if WCG_EXT uint32_t RdCost::m_signalType = RESHAPE_SIGNAL_NULL; int32_t RdCost::m_chromaWeight = MSE_WEIGHT_ONE; int RdCost::m_lumaBD = 10; std::vector RdCost::m_reshapeLumaLevelToWeightPLUT; std::vector RdCost::m_lumaLevelToWeightPLUT; void RdCost::saveUnadjustedLambda() { m_dLambda_unadjusted = m_dLambda; m_distScaleUnadjusted = m_distScale; } void RdCost::initLumaLevelToWeightTable(int bitDepth) { int lutSize = 1 << bitDepth; if (m_lumaLevelToWeightPLUT.empty()) { m_lumaLevelToWeightPLUT.resize(lutSize, 1.0); } for (int i = 0; i < lutSize; i++) { double x = bitDepth < 10 ? i << (10 - bitDepth) : bitDepth > 10 ? i >> (bitDepth - 10) : i; double y; y = 0.015 * x - 1.5 - 6; // this is the Equation used to derive the luma qp LUT for HDR in MPEG HDR anchor3.2 (JCTCX-X1020) y = y < -3 ? -3 : (y > 6 ? 6 : y); m_lumaLevelToWeightPLUT[i] = pow(2.0, y / 3.0); // or power(10, dQp/10) they are almost equal } } void RdCost::initLumaLevelToWeightTableReshape() { int lutSize = 1 << m_lumaBD; if (m_reshapeLumaLevelToWeightPLUT.empty()) { m_reshapeLumaLevelToWeightPLUT.resize(lutSize, MSE_WEIGHT_ONE); } if (m_lumaLevelToWeightPLUT.empty()) { m_lumaLevelToWeightPLUT.resize(lutSize, 1.0); } if (m_signalType == RESHAPE_SIGNAL_PQ) { for (int i = 0; i < (1 << m_lumaBD); i++) { double x = m_lumaBD < 10 ? i << (10 - m_lumaBD) : m_lumaBD > 10 ? i >> (m_lumaBD - 10) : i; double y; y = 0.015*x - 1.5 - 6; y = y < -3 ? -3 : (y > 6 ? 6 : y); const double weight = pow(2.0, y / 3.0); m_reshapeLumaLevelToWeightPLUT[i] = (int) (weight * MSE_WEIGHT_ONE); m_lumaLevelToWeightPLUT[i] = weight; } } } void RdCost::updateReshapeLumaLevelToWeightTableChromaMD(std::vector& ILUT) { for (int i = 0; i < (1 << m_lumaBD); i++) { m_reshapeLumaLevelToWeightPLUT[i] = (int) (m_lumaLevelToWeightPLUT[ILUT[i]] * MSE_WEIGHT_ONE); } } void RdCost::restoreReshapeLumaLevelToWeightTable() { for (int i = 0; i < (1 << m_lumaBD); i++) { m_reshapeLumaLevelToWeightPLUT.at(i) = (int) (m_lumaLevelToWeightPLUT.at(i) * MSE_WEIGHT_ONE); } } void RdCost::updateReshapeLumaLevelToWeightTable(SliceReshapeInfo &sliceReshape, Pel *wtTable, double cwt) { if (m_signalType == RESHAPE_SIGNAL_SDR || m_signalType == RESHAPE_SIGNAL_HLG) { if (sliceReshape.getSliceReshapeModelPresentFlag()) { double wBin = 1.0; double weight = 1.0; int histLens = (1 << m_lumaBD) / PIC_CODE_CW_BINS; for (int i = 0; i < PIC_CODE_CW_BINS; i++) { if ((i < sliceReshape.reshaperModelMinBinIdx) || (i > sliceReshape.reshaperModelMaxBinIdx)) { weight = 1.0; } else { if (sliceReshape.reshaperModelBinCWDelta[i] == 1 || (sliceReshape.reshaperModelBinCWDelta[i] == -1 * histLens)) { weight = wBin; } else { weight = (double)wtTable[i] / (double)histLens; weight = weight*weight; } } for (int j = 0; j < histLens; j++) { int ii = i*histLens + j; m_reshapeLumaLevelToWeightPLUT[ii] = (int) (weight * MSE_WEIGHT_ONE); } } m_chromaWeight = (int) (cwt * MSE_WEIGHT_ONE); } else { THROW("updateReshapeLumaLevelToWeightTable ERROR!!"); } } else { THROW("updateReshapeLumaLevelToWeightTable not support other signal types!!"); } } Distortion RdCost::getWeightedMSE(int compIdx, const Pel org, const Pel cur, const uint32_t shift, const Pel orgLuma) { CHECKD(org < 0, "Sample value must be positive"); if (compIdx == COMPONENT_Y) { CHECKD(org != orgLuma, "Luma sample values must be equal to each other"); } Pel diff = org - cur; // use luma to get weight int64_t weight = MSE_WEIGHT_ONE; if (m_signalType == RESHAPE_SIGNAL_SDR || m_signalType == RESHAPE_SIGNAL_HLG) { if (compIdx == COMPONENT_Y) { weight = m_reshapeLumaLevelToWeightPLUT[orgLuma]; } else { weight = m_chromaWeight; } } else { weight = m_reshapeLumaLevelToWeightPLUT[orgLuma]; } return (weight * (diff * diff) + (1 << MSE_WEIGHT_FRAC_BITS >> 1)) >> (MSE_WEIGHT_FRAC_BITS + shift); } Distortion RdCost::xGetSSE_WTD( const DistParam &rcDtParam ) { if( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSSEw( rcDtParam ); // ignore it for now } int rows = rcDtParam.org.height; const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const int cols = rcDtParam.org.width; const ptrdiff_t strideCur = rcDtParam.cur.stride; const ptrdiff_t strideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const ptrdiff_t strideOrgLuma = rcDtParam.orgLuma.stride; const size_t cShift = rcDtParam.cShiftX; const size_t cShiftY = rcDtParam.cShiftY; Distortion sum = 0; uint32_t shift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; for (; rows != 0; rows--) { for (int n = 0; n < cols; n++) { sum += getWeightedMSE(rcDtParam.compID, piOrg[n], piCur[n], shift, piOrgLuma[n << cShift]); } piOrg += strideOrg; piCur += strideCur; piOrgLuma += strideOrgLuma << cShiftY; } return (sum); } Distortion RdCost::xGetSSE2_WTD( const DistParam &rcDtParam ) { if( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 2, "" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); // ignore it for now } int rows = rcDtParam.org.height; const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const ptrdiff_t strideCur = rcDtParam.cur.stride; const ptrdiff_t strideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const size_t strideOrgLuma = rcDtParam.orgLuma.stride; const size_t cShift = rcDtParam.cShiftX; const size_t cShiftY = rcDtParam.cShiftY; Distortion sum = 0; uint32_t shift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; for (; rows != 0; rows--) { sum += getWeightedMSE( rcDtParam.compID, piOrg[0], piCur[0], shift, piOrgLuma[size_t(0) << cShift]); // piOrg[0] - piCur[0]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[1], piCur[1], shift, piOrgLuma[size_t(1) << cShift]); // piOrg[1] - piCur[1]; sum += Distortion(( temp * temp ) >> shift); piOrg += strideOrg; piCur += strideCur; piOrgLuma += strideOrgLuma << cShiftY; } return (sum); } Distortion RdCost::xGetSSE4_WTD( const DistParam &rcDtParam ) { if( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 4, "" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); // ignore it for now } int rows = rcDtParam.org.height; const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const ptrdiff_t strideCur = rcDtParam.cur.stride; const ptrdiff_t strideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const size_t strideOrgLuma = rcDtParam.orgLuma.stride; const size_t cShift = rcDtParam.cShiftX; const size_t cShiftY = rcDtParam.cShiftY; Distortion sum = 0; uint32_t shift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; for (; rows != 0; rows--) { sum += getWeightedMSE( rcDtParam.compID, piOrg[0], piCur[0], shift, piOrgLuma[size_t(0) << cShift]); // piOrg[0] - piCur[0]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[1], piCur[1], shift, piOrgLuma[size_t(1) << cShift]); // piOrg[1] - piCur[1]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[2], piCur[2], shift, piOrgLuma[size_t(2) << cShift]); // piOrg[2] - piCur[2]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[3], piCur[3], shift, piOrgLuma[size_t(3) << cShift]); // piOrg[3] - piCur[3]; sum += Distortion(( temp * temp ) >> shift); piOrg += strideOrg; piCur += strideCur; piOrgLuma += strideOrgLuma << cShiftY; } return (sum); } Distortion RdCost::xGetSSE8_WTD( const DistParam &rcDtParam ) { if( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 8, "" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } int rows = rcDtParam.org.height; const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const ptrdiff_t strideCur = rcDtParam.cur.stride; const ptrdiff_t strideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const size_t strideOrgLuma = rcDtParam.orgLuma.stride; const size_t cShift = rcDtParam.cShiftX; const size_t cShiftY = rcDtParam.cShiftY; Distortion sum = 0; uint32_t shift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; for (; rows != 0; rows--) { sum += getWeightedMSE(rcDtParam.compID, piOrg[0], piCur[0], shift, piOrgLuma[0]); // piOrg[0] - piCur[0]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[1], piCur[1], shift, piOrgLuma[size_t(1) << cShift]); // piOrg[1] - piCur[1]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[2], piCur[2], shift, piOrgLuma[size_t(2) << cShift]); // piOrg[2] - piCur[2]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[3], piCur[3], shift, piOrgLuma[size_t(3) << cShift]); // piOrg[3] - piCur[3]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[4], piCur[4], shift, piOrgLuma[size_t(4) << cShift]); // piOrg[4] - piCur[4]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[5], piCur[5], shift, piOrgLuma[size_t(5) << cShift]); // piOrg[5] - piCur[5]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[6], piCur[6], shift, piOrgLuma[size_t(6) << cShift]); // piOrg[6] - piCur[6]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[7], piCur[7], shift, piOrgLuma[size_t(7) << cShift]); // piOrg[7] - piCur[7]; sum += Distortion(( temp * temp ) >> shift); piOrg += strideOrg; piCur += strideCur; piOrgLuma += strideOrgLuma << cShiftY; } return (sum); } Distortion RdCost::xGetSSE16_WTD( const DistParam &rcDtParam ) { if( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 16, "" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } int rows = rcDtParam.org.height; const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const ptrdiff_t strideCur = rcDtParam.cur.stride; const ptrdiff_t strideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const size_t strideOrgLuma = rcDtParam.orgLuma.stride; const size_t cShift = rcDtParam.cShiftX; const size_t cShiftY = rcDtParam.cShiftY; Distortion sum = 0; uint32_t shift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; for (; rows != 0; rows--) { sum += getWeightedMSE(rcDtParam.compID, piOrg[0], piCur[0], shift, piOrgLuma[0]); // piOrg[ 0] - piCur[ 0]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[1], piCur[1], shift, piOrgLuma[size_t(1) << cShift]); // piOrg[ 1] - piCur[ 1]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[2], piCur[2], shift, piOrgLuma[size_t(2) << cShift]); // piOrg[ 2] - piCur[ 2]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[3], piCur[3], shift, piOrgLuma[size_t(3) << cShift]); // piOrg[ 3] - piCur[ 3]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[4], piCur[4], shift, piOrgLuma[size_t(4) << cShift]); // piOrg[ 4] - piCur[ 4]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[5], piCur[5], shift, piOrgLuma[size_t(5) << cShift]); // piOrg[ 5] - piCur[ 5]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[6], piCur[6], shift, piOrgLuma[size_t(6) << cShift]); // piOrg[ 6] - piCur[ 6]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[7], piCur[7], shift, piOrgLuma[size_t(7) << cShift]); // piOrg[ 7] - piCur[ 7]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[8], piCur[8], shift, piOrgLuma[size_t(8) << cShift]); // piOrg[ 8] - piCur[ 8]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[9], piCur[9], shift, piOrgLuma[size_t(9) << cShift]); // piOrg[ 9] - piCur[ 9]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[10], piCur[10], shift, piOrgLuma[size_t(10) << cShift]); // piOrg[10] - piCur[10]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[11], piCur[11], shift, piOrgLuma[size_t(11) << cShift]); // piOrg[11] - piCur[11]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[12], piCur[12], shift, piOrgLuma[size_t(12) << cShift]); // piOrg[12] - piCur[12]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[13], piCur[13], shift, piOrgLuma[size_t(13) << cShift]); // piOrg[13] - piCur[13]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[14], piCur[14], shift, piOrgLuma[size_t(14) << cShift]); // piOrg[14] - piCur[14]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE( rcDtParam.compID, piOrg[15], piCur[15], shift, piOrgLuma[size_t(15) << cShift]); // piOrg[15] - piCur[15]; sum += Distortion(( temp * temp ) >> shift); piOrg += strideOrg; piCur += strideCur; piOrgLuma += strideOrgLuma << cShiftY; } return (sum); } Distortion RdCost::xGetSSE16N_WTD( const DistParam &rcDtParam ) { if( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } int rows = rcDtParam.org.height; const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const int cols = rcDtParam.org.width; const ptrdiff_t strideCur = rcDtParam.cur.stride; const ptrdiff_t strideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const size_t strideOrgLuma = rcDtParam.orgLuma.stride; const size_t cShift = rcDtParam.cShiftX; const size_t cShiftY = rcDtParam.cShiftY; Distortion sum = 0; uint32_t shift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; for (; rows != 0; rows--) { for (int n = 0; n < cols; n += 16) { sum += getWeightedMSE(rcDtParam.compID, piOrg[n + 0], piCur[n + 0], shift, piOrgLuma[size_t(n + 0) << cShift]); // temp = piOrg[n+ 0] - piCur[n+ 0]; sum += // Distortion(( temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[n + 1], piCur[n + 1], shift, piOrgLuma[size_t(n + 1) << cShift]); // temp = piOrg[n+ 1] - piCur[n+ 1]; sum += // Distortion(( temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[n + 2], piCur[n + 2], shift, piOrgLuma[size_t(n + 2) << cShift]); // temp = piOrg[n+ 2] - piCur[n+ 2]; sum += // Distortion(( temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[n + 3], piCur[n + 3], shift, piOrgLuma[size_t(n + 3) << cShift]); // temp = piOrg[n+ 3] - piCur[n+ 3]; sum += // Distortion(( temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[n + 4], piCur[n + 4], shift, piOrgLuma[size_t(n + 4) << cShift]); // temp = piOrg[n+ 4] - piCur[n+ 4]; sum += // Distortion(( temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[n + 5], piCur[n + 5], shift, piOrgLuma[size_t(n + 5) << cShift]); // temp = piOrg[n+ 5] - piCur[n+ 5]; sum += // Distortion(( temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[n + 6], piCur[n + 6], shift, piOrgLuma[size_t(n + 6) << cShift]); // temp = piOrg[n+ 6] - piCur[n+ 6]; sum += // Distortion(( temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[n + 7], piCur[n + 7], shift, piOrgLuma[size_t(n + 7) << cShift]); // temp = piOrg[n+ 7] - piCur[n+ 7]; sum += // Distortion(( temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[n + 8], piCur[n + 8], shift, piOrgLuma[size_t(n + 8) << cShift]); // temp = piOrg[n+ 8] - piCur[n+ 8]; sum += // Distortion(( temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[n + 9], piCur[n + 9], shift, piOrgLuma[size_t(n + 9) << cShift]); // temp = piOrg[n+ 9] - piCur[n+ 9]; sum += // Distortion(( temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[n + 10], piCur[n + 10], shift, piOrgLuma[size_t(n + 10) << cShift]); // temp = piOrg[n+10] - piCur[n+10]; sum += // Distortion(( temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[n + 11], piCur[n + 11], shift, piOrgLuma[size_t(n + 11) << cShift]); // temp = piOrg[n+11] - piCur[n+11]; sum += // Distortion(( temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[n + 12], piCur[n + 12], shift, piOrgLuma[size_t(n + 12) << cShift]); // temp = piOrg[n+12] - piCur[n+12]; sum += // Distortion(( temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[n + 13], piCur[n + 13], shift, piOrgLuma[size_t(n + 13) << cShift]); // temp = piOrg[n+13] - piCur[n+13]; sum += // Distortion(( temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[n + 14], piCur[n + 14], shift, piOrgLuma[size_t(n + 14) << cShift]); // temp = piOrg[n+14] - piCur[n+14]; sum += // Distortion(( temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[n + 15], piCur[n + 15], shift, piOrgLuma[size_t(n + 15) << cShift]); // temp = piOrg[n+15] - piCur[n+15]; sum += // Distortion(( temp * temp ) >> shift); } piOrg += strideOrg; piCur += strideCur; piOrgLuma += strideOrgLuma << cShiftY; } return (sum); } Distortion RdCost::xGetSSE32_WTD( const DistParam &rcDtParam ) { if( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 32, "" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } int rows = rcDtParam.org.height; const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const ptrdiff_t strideCur = rcDtParam.cur.stride; const ptrdiff_t strideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const size_t strideOrgLuma = rcDtParam.orgLuma.stride; const size_t cShift = rcDtParam.cShiftX; const size_t cShiftY = rcDtParam.cShiftY; Distortion sum = 0; uint32_t shift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; for (; rows != 0; rows--) { sum += getWeightedMSE( rcDtParam.compID, piOrg[0], piCur[0], shift, piOrgLuma[size_t(0)]); // temp = piOrg[ 0] - piCur[ 0]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[1], piCur[1], shift, piOrgLuma[size_t(1) << cShift]); // temp = piOrg[ 1] - piCur[ 1]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[2], piCur[2], shift, piOrgLuma[size_t(2) << cShift]); // temp = piOrg[ 2] - piCur[ 2]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[3], piCur[3], shift, piOrgLuma[size_t(3) << cShift]); // temp = piOrg[ 3] - piCur[ 3]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[4], piCur[4], shift, piOrgLuma[size_t(4) << cShift]); // temp = piOrg[ 4] - piCur[ 4]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[5], piCur[5], shift, piOrgLuma[size_t(5) << cShift]); // temp = piOrg[ 5] - piCur[ 5]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[6], piCur[6], shift, piOrgLuma[size_t(6) << cShift]); // temp = piOrg[ 6] - piCur[ 6]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[7], piCur[7], shift, piOrgLuma[size_t(7) << cShift]); // temp = piOrg[ 7] - piCur[ 7]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[8], piCur[8], shift, piOrgLuma[size_t(8) << cShift]); // temp = piOrg[ 8] - piCur[ 8]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[9], piCur[9], shift, piOrgLuma[size_t(9) << cShift]); // temp = piOrg[ 9] - piCur[ 9]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[10], piCur[10], shift, piOrgLuma[size_t(10) << cShift]); // temp = piOrg[10] - piCur[10]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[11], piCur[11], shift, piOrgLuma[size_t(11) << cShift]); // temp = piOrg[11] - piCur[11]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[12], piCur[12], shift, piOrgLuma[size_t(12) << cShift]); // temp = piOrg[12] - piCur[12]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[13], piCur[13], shift, piOrgLuma[size_t(13) << cShift]); // temp = piOrg[13] - piCur[13]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[14], piCur[14], shift, piOrgLuma[size_t(14) << cShift]); // temp = piOrg[14] - piCur[14]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[15], piCur[15], shift, piOrgLuma[size_t(15) << cShift]); // temp = piOrg[15] - piCur[15]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[16], piCur[16], shift, piOrgLuma[size_t(16) << cShift]); // temp = piOrg[16] - piCur[16]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[17], piCur[17], shift, piOrgLuma[size_t(17) << cShift]); // temp = piOrg[17] - piCur[17]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[18], piCur[18], shift, piOrgLuma[size_t(18) << cShift]); // temp = piOrg[18] - piCur[18]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[19], piCur[19], shift, piOrgLuma[size_t(19) << cShift]); // temp = piOrg[19] - piCur[19]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[20], piCur[20], shift, piOrgLuma[size_t(20) << cShift]); // temp = piOrg[20] - piCur[20]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[21], piCur[21], shift, piOrgLuma[size_t(21) << cShift]); // temp = piOrg[21] - piCur[21]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[22], piCur[22], shift, piOrgLuma[size_t(22) << cShift]); // temp = piOrg[22] - piCur[22]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[23], piCur[23], shift, piOrgLuma[size_t(23) << cShift]); // temp = piOrg[23] - piCur[23]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[24], piCur[24], shift, piOrgLuma[size_t(24) << cShift]); // temp = piOrg[24] - piCur[24]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[25], piCur[25], shift, piOrgLuma[size_t(25) << cShift]); // temp = piOrg[25] - piCur[25]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[26], piCur[26], shift, piOrgLuma[size_t(26) << cShift]); // temp = piOrg[26] - piCur[26]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[27], piCur[27], shift, piOrgLuma[size_t(27) << cShift]); // temp = piOrg[27] - piCur[27]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[28], piCur[28], shift, piOrgLuma[size_t(28) << cShift]); // temp = piOrg[28] - piCur[28]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[29], piCur[29], shift, piOrgLuma[size_t(29) << cShift]); // temp = piOrg[29] - piCur[29]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[30], piCur[30], shift, piOrgLuma[size_t(30) << cShift]); // temp = piOrg[30] - piCur[30]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[31], piCur[31], shift, piOrgLuma[size_t(31) << cShift]); // temp = piOrg[31] - piCur[31]; sum += Distortion(( // temp * temp ) >> shift); piOrg += strideOrg; piCur += strideCur; piOrgLuma += strideOrgLuma << cShiftY; } return (sum); } Distortion RdCost::xGetSSE64_WTD( const DistParam &rcDtParam ) { if( rcDtParam.applyWeight ) { CHECK( rcDtParam.org.width != 64, "" ); return RdCostWeightPrediction::xGetSSEw( rcDtParam ); } int rows = rcDtParam.org.height; const Pel* piOrg = rcDtParam.org.buf; const Pel* piCur = rcDtParam.cur.buf; const ptrdiff_t strideCur = rcDtParam.cur.stride; const ptrdiff_t strideOrg = rcDtParam.org.stride; const Pel* piOrgLuma = rcDtParam.orgLuma.buf; const size_t strideOrgLuma = rcDtParam.orgLuma.stride; const size_t cShift = rcDtParam.cShiftX; const size_t cShiftY = rcDtParam.cShiftY; Distortion sum = 0; uint32_t shift = DISTORTION_PRECISION_ADJUSTMENT((rcDtParam.bitDepth)) << 1; for (; rows != 0; rows--) { sum += getWeightedMSE( rcDtParam.compID, piOrg[0], piCur[0], shift, piOrgLuma[size_t(0)]); // temp = piOrg[ 0] - piCur[ 0]; sum += Distortion(( temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[1], piCur[1], shift, piOrgLuma[size_t(1) << cShift]); // temp = piOrg[ 1] - piCur[ 1]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[2], piCur[2], shift, piOrgLuma[size_t(2) << cShift]); // temp = piOrg[ 2] - piCur[ 2]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[3], piCur[3], shift, piOrgLuma[size_t(3) << cShift]); // temp = piOrg[ 3] - piCur[ 3]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[4], piCur[4], shift, piOrgLuma[size_t(4) << cShift]); // temp = piOrg[ 4] - piCur[ 4]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[5], piCur[5], shift, piOrgLuma[size_t(5) << cShift]); // temp = piOrg[ 5] - piCur[ 5]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[6], piCur[6], shift, piOrgLuma[size_t(6) << cShift]); // temp = piOrg[ 6] - piCur[ 6]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[7], piCur[7], shift, piOrgLuma[size_t(7) << cShift]); // temp = piOrg[ 7] - piCur[ 7]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[8], piCur[8], shift, piOrgLuma[size_t(8) << cShift]); // temp = piOrg[ 8] - piCur[ 8]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[9], piCur[9], shift, piOrgLuma[size_t(9) << cShift]); // temp = piOrg[ 9] - piCur[ 9]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[10], piCur[10], shift, piOrgLuma[size_t(10) << cShift]); // temp = piOrg[10] - piCur[10]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[11], piCur[11], shift, piOrgLuma[size_t(11) << cShift]); // temp = piOrg[11] - piCur[11]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[12], piCur[12], shift, piOrgLuma[size_t(12) << cShift]); // temp = piOrg[12] - piCur[12]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[13], piCur[13], shift, piOrgLuma[size_t(13) << cShift]); // temp = piOrg[13] - piCur[13]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[14], piCur[14], shift, piOrgLuma[size_t(14) << cShift]); // temp = piOrg[14] - piCur[14]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[15], piCur[15], shift, piOrgLuma[size_t(15) << cShift]); // temp = piOrg[15] - piCur[15]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[16], piCur[16], shift, piOrgLuma[size_t(16) << cShift]); // temp = piOrg[16] - piCur[16]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[17], piCur[17], shift, piOrgLuma[size_t(17) << cShift]); // temp = piOrg[17] - piCur[17]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[18], piCur[18], shift, piOrgLuma[size_t(18) << cShift]); // temp = piOrg[18] - piCur[18]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[19], piCur[19], shift, piOrgLuma[size_t(19) << cShift]); // temp = piOrg[19] - piCur[19]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[20], piCur[20], shift, piOrgLuma[size_t(20) << cShift]); // temp = piOrg[20] - piCur[20]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[21], piCur[21], shift, piOrgLuma[size_t(21) << cShift]); // temp = piOrg[21] - piCur[21]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[22], piCur[22], shift, piOrgLuma[size_t(22) << cShift]); // temp = piOrg[22] - piCur[22]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[23], piCur[23], shift, piOrgLuma[size_t(23) << cShift]); // temp = piOrg[23] - piCur[23]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[24], piCur[24], shift, piOrgLuma[size_t(24) << cShift]); // temp = piOrg[24] - piCur[24]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[25], piCur[25], shift, piOrgLuma[size_t(25) << cShift]); // temp = piOrg[25] - piCur[25]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[26], piCur[26], shift, piOrgLuma[size_t(26) << cShift]); // temp = piOrg[26] - piCur[26]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[27], piCur[27], shift, piOrgLuma[size_t(27) << cShift]); // temp = piOrg[27] - piCur[27]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[28], piCur[28], shift, piOrgLuma[size_t(28) << cShift]); // temp = piOrg[28] - piCur[28]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[29], piCur[29], shift, piOrgLuma[size_t(29) << cShift]); // temp = piOrg[29] - piCur[29]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[30], piCur[30], shift, piOrgLuma[size_t(30) << cShift]); // temp = piOrg[30] - piCur[30]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[31], piCur[31], shift, piOrgLuma[size_t(31) << cShift]); // temp = piOrg[31] - piCur[31]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[32], piCur[32], shift, piOrgLuma[size_t(32) << cShift]); // temp = piOrg[32] - piCur[32]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[33], piCur[33], shift, piOrgLuma[size_t(33) << cShift]); // temp = piOrg[33] - piCur[33]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[34], piCur[34], shift, piOrgLuma[size_t(34) << cShift]); // temp = piOrg[34] - piCur[34]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[35], piCur[35], shift, piOrgLuma[size_t(35) << cShift]); // temp = piOrg[35] - piCur[35]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[36], piCur[36], shift, piOrgLuma[size_t(36) << cShift]); // temp = piOrg[36] - piCur[36]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[37], piCur[37], shift, piOrgLuma[size_t(37) << cShift]); // temp = piOrg[37] - piCur[37]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[38], piCur[38], shift, piOrgLuma[size_t(38) << cShift]); // temp = piOrg[38] - piCur[38]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[39], piCur[39], shift, piOrgLuma[size_t(39) << cShift]); // temp = piOrg[39] - piCur[39]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[40], piCur[40], shift, piOrgLuma[size_t(40) << cShift]); // temp = piOrg[40] - piCur[40]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[41], piCur[41], shift, piOrgLuma[size_t(41) << cShift]); // temp = piOrg[41] - piCur[41]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[42], piCur[42], shift, piOrgLuma[size_t(42) << cShift]); // temp = piOrg[42] - piCur[42]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[43], piCur[43], shift, piOrgLuma[size_t(43) << cShift]); // temp = piOrg[43] - piCur[43]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[44], piCur[44], shift, piOrgLuma[size_t(44) << cShift]); // temp = piOrg[44] - piCur[44]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[45], piCur[45], shift, piOrgLuma[size_t(45) << cShift]); // temp = piOrg[45] - piCur[45]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[46], piCur[46], shift, piOrgLuma[size_t(46) << cShift]); // temp = piOrg[46] - piCur[46]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[47], piCur[47], shift, piOrgLuma[size_t(47) << cShift]); // temp = piOrg[47] - piCur[47]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[48], piCur[48], shift, piOrgLuma[size_t(48) << cShift]); // temp = piOrg[48] - piCur[48]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[49], piCur[49], shift, piOrgLuma[size_t(49) << cShift]); // temp = piOrg[49] - piCur[49]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[50], piCur[50], shift, piOrgLuma[size_t(50) << cShift]); // temp = piOrg[50] - piCur[50]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[51], piCur[51], shift, piOrgLuma[size_t(51) << cShift]); // temp = piOrg[51] - piCur[51]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[52], piCur[52], shift, piOrgLuma[size_t(52) << cShift]); // temp = piOrg[52] - piCur[52]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[53], piCur[53], shift, piOrgLuma[size_t(53) << cShift]); // temp = piOrg[53] - piCur[53]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[54], piCur[54], shift, piOrgLuma[size_t(54) << cShift]); // temp = piOrg[54] - piCur[54]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[55], piCur[55], shift, piOrgLuma[size_t(55) << cShift]); // temp = piOrg[55] - piCur[55]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[56], piCur[56], shift, piOrgLuma[size_t(56) << cShift]); // temp = piOrg[56] - piCur[56]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[57], piCur[57], shift, piOrgLuma[size_t(57) << cShift]); // temp = piOrg[57] - piCur[57]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[58], piCur[58], shift, piOrgLuma[size_t(58) << cShift]); // temp = piOrg[58] - piCur[58]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[59], piCur[59], shift, piOrgLuma[size_t(59) << cShift]); // temp = piOrg[59] - piCur[59]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[60], piCur[60], shift, piOrgLuma[size_t(60) << cShift]); // temp = piOrg[60] - piCur[60]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[61], piCur[61], shift, piOrgLuma[size_t(61) << cShift]); // temp = piOrg[61] - piCur[61]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[62], piCur[62], shift, piOrgLuma[size_t(62) << cShift]); // temp = piOrg[62] - piCur[62]; sum += Distortion(( // temp * temp ) >> shift); sum += getWeightedMSE(rcDtParam.compID, piOrg[63], piCur[63], shift, piOrgLuma[size_t(63) << cShift]); // temp = piOrg[63] - piCur[63]; sum += Distortion(( // temp * temp ) >> shift); piOrg += strideOrg; piCur += strideCur; piOrgLuma += strideOrgLuma << cShiftY; } return (sum); } #endif Pel orgCopy[MAX_CU_SIZE * MAX_CU_SIZE]; #if _OPENMP #pragma omp threadprivate(orgCopy) #endif Distortion RdCost::xGetMRHADs( const DistParam &rcDtParam ) { const Pel offset = rcDtParam.org.meanDiff( rcDtParam.cur ); PelBuf modOrg( orgCopy, rcDtParam.org ); modOrg.copyFrom( rcDtParam.org ); modOrg.subtract( offset ); DistParam modDistParam = rcDtParam; modDistParam.org = modOrg; return m_distortionFunc[DFunc::HAD](modDistParam); } void RdCost::setDistParam(DistParam &rcDP, const CPelBuf &org, const Pel *piRefY, ptrdiff_t iRefStride, const Pel *mask, ptrdiff_t iMaskStride, int stepX, ptrdiff_t iMaskStride2, int bitDepth, ComponentID compID) { rcDP.bitDepth = bitDepth; rcDP.compID = compID; // set Original & Curr Pointer / Stride rcDP.org = org; rcDP.cur.buf = piRefY; rcDP.cur.stride = iRefStride; // set Mask rcDP.mask = mask; rcDP.maskStride = iMaskStride; rcDP.stepX = stepX; rcDP.maskStride2 = iMaskStride2; // set Block Width / Height rcDP.cur.width = org.width; rcDP.cur.height = org.height; rcDP.maximumDistortionForEarlyExit = std::numeric_limits::max(); // set Cost function for motion estimation with Mask rcDP.distFunc = m_distortionFunc[DFunc::SAD_WITH_MASK]; } Distortion RdCost::xGetSADwMask( const DistParam& rcDtParam ) { if ( rcDtParam.applyWeight ) { return RdCostWeightPrediction::xGetSADw( rcDtParam ); } const Pel* org = rcDtParam.org.buf; const Pel* cur = rcDtParam.cur.buf; const Pel* mask = rcDtParam.mask; const int cols = rcDtParam.org.width; int rows = rcDtParam.org.height; const int subShift = rcDtParam.subShift; const int subStep = ( 1 << subShift); const ptrdiff_t strideCur = rcDtParam.cur.stride * subStep; const ptrdiff_t strideOrg = rcDtParam.org.stride * subStep; const ptrdiff_t strideMask = rcDtParam.maskStride * subStep; const int stepX = rcDtParam.stepX; const ptrdiff_t strideMask2 = rcDtParam.maskStride2; const uint32_t distortionShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth); Distortion sum = 0; for (; rows != 0; rows -= subStep) { for (int n = 0; n < cols; n++) { sum += abs(org[n] - cur[n]) * *mask; mask += stepX; } org += strideOrg; cur += strideCur; mask += strideMask; mask += strideMask2; } sum <<= subShift; return (sum >> distortionShift ); } //! \}