/* The copyright in this software is being made available under the BSD * License, included below. This software may be subject to other third party * and contributor rights, including patent rights, and no such rights are * granted under this license. * * Copyright (c) 2010-2023, ITU/ISO/IEC * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may * be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /** \file QuantRDOQ.cpp \brief transform and quantization class */ #include "QuantRDOQ.h" #include "UnitTools.h" #include "ContextModelling.h" #include "CodingStructure.h" #include "dtrace_next.h" #include "dtrace_buffer.h" #include #include #include struct coeffGroupRDStats { int iNNZbeforePos0; double d64CodedLevelandDist; // distortion and level cost only double d64UncodedDist; // all zero coded block distortion double d64SigCost; double d64SigCost_0; int iNumSbbCtxBins; }; //! \ingroup CommonLib //! \{ // ==================================================================================================================== // Constants // ==================================================================================================================== // ==================================================================================================================== // Static functions // ==================================================================================================================== // ==================================================================================================================== // QuantRDOQ class member functions // ==================================================================================================================== QuantRDOQ::QuantRDOQ( const Quant* other ) : Quant( other ) { const QuantRDOQ *rdoq = dynamic_cast( other ); CHECK( other && !rdoq, "The RDOQ cast must be successfull!" ); xInitScalingList( rdoq ); } QuantRDOQ::~QuantRDOQ() { xDestroyScalingList(); } /** Get the best level in RD sense * * \returns best quantized transform level for given scan position * * This method calculates the best quantized transform level for a given scan position. */ inline uint32_t QuantRDOQ::xGetCodedLevel( double& rd64CodedCost, double& rd64CodedCost0, double& rd64CodedCostSig, Intermediate_Int lLevelDouble, uint32_t uiMaxAbsLevel, const BinFracBits* fracBitsSig, const BinFracBits& fracBitsPar, const BinFracBits& fracBitsGt1, const BinFracBits& fracBitsGt2, const int remRegBins, unsigned goRiceZero, uint16_t ui16AbsGoRice, int iQBits, double errorScale, bool bLast, bool useLimitedPrefixLength, const int maxLog2TrDynamicRange ) const { double dCurrCostSig = 0; uint32_t uiBestAbsLevel = 0; if( !bLast && uiMaxAbsLevel < 3 ) { rd64CodedCostSig = xGetRateSigCoef( *fracBitsSig, 0 ); rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig; if( uiMaxAbsLevel == 0 ) { return uiBestAbsLevel; } } else { rd64CodedCost = MAX_DOUBLE; } if( !bLast ) { dCurrCostSig = xGetRateSigCoef( *fracBitsSig, 1 ); } uint32_t uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 ); for( int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- ) { double dErr = double( lLevelDouble - ( Intermediate_Int(uiAbsLevel) << iQBits ) ); double dCurrCost = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, ui16AbsGoRice, true, maxLog2TrDynamicRange ) ); dCurrCost += dCurrCostSig; if( dCurrCost < rd64CodedCost ) { uiBestAbsLevel = uiAbsLevel; rd64CodedCost = dCurrCost; rd64CodedCostSig = dCurrCostSig; } } return uiBestAbsLevel; } /** Calculates the cost for specific absolute transform level * \param uiAbsLevel scaled quantized level * \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC) * \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC) * \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3 * \param c1Idx * \param c2Idx * \param useLimitedPrefixLength * \param maxLog2TrDynamicRange * \returns cost of given absolute transform level */ inline int QuantRDOQ::xGetICRate( const uint32_t uiAbsLevel, const BinFracBits& fracBitsPar, const BinFracBits& fracBitsGt1, const BinFracBits& fracBitsGt2, const int remRegBins, unsigned goRiceZero, const uint16_t ui16AbsGoRice, const bool useLimitedPrefixLength, const int maxLog2TrDynamicRange ) const { if( remRegBins < 4 ) { int iRate = int( xGetIEPRate() ); // cost of sign bit uint32_t symbol = ( uiAbsLevel == 0 ? goRiceZero : uiAbsLevel <= goRiceZero ? uiAbsLevel-1 : uiAbsLevel ); uint32_t length; const int threshold = COEF_REMAIN_BIN_REDUCTION; if( symbol < ( threshold << ui16AbsGoRice ) ) { length = symbol >> ui16AbsGoRice; iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS; } else if( useLimitedPrefixLength ) { const uint32_t maximumPrefixLength = ( 32 - ( COEF_REMAIN_BIN_REDUCTION + maxLog2TrDynamicRange ) ); uint32_t prefixLength = 0; uint32_t suffix = ( symbol >> ui16AbsGoRice ) - COEF_REMAIN_BIN_REDUCTION; while( ( prefixLength < maximumPrefixLength ) && ( suffix > ( ( 2 << prefixLength ) - 2 ) ) ) { prefixLength++; } const uint32_t suffixLength = ( prefixLength == maximumPrefixLength ) ? ( maxLog2TrDynamicRange - ui16AbsGoRice ) : ( prefixLength + 1/*separator*/ ); iRate += ( COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ui16AbsGoRice ) << SCALE_BITS; } else { length = ui16AbsGoRice; symbol = symbol - ( threshold << ui16AbsGoRice ); while( symbol >= ( 1 << length ) ) { symbol -= ( 1 << ( length++ ) ); } iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS; } return iRate; } int iRate = int( xGetIEPRate() ); // cost of sign bit const uint32_t cthres = 4; if( uiAbsLevel >= cthres ) { uint32_t symbol = ( uiAbsLevel - cthres ) >> 1; uint32_t length; const int threshold = COEF_REMAIN_BIN_REDUCTION; if( symbol < ( threshold << ui16AbsGoRice ) ) { length = symbol >> ui16AbsGoRice; iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS; } else if( useLimitedPrefixLength ) { const uint32_t maximumPrefixLength = ( 32 - ( COEF_REMAIN_BIN_REDUCTION + maxLog2TrDynamicRange ) ); uint32_t prefixLength = 0; uint32_t suffix = ( symbol >> ui16AbsGoRice ) - COEF_REMAIN_BIN_REDUCTION; while( ( prefixLength < maximumPrefixLength ) && ( suffix > ( ( 2 << prefixLength ) - 2 ) ) ) { prefixLength++; } const uint32_t suffixLength = ( prefixLength == maximumPrefixLength ) ? ( maxLog2TrDynamicRange - ui16AbsGoRice ) : ( prefixLength + 1/*separator*/ ); iRate += ( COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ui16AbsGoRice ) << SCALE_BITS; } else { length = ui16AbsGoRice; symbol = symbol - ( threshold << ui16AbsGoRice ); while( symbol >= ( 1 << length ) ) { symbol -= ( 1 << ( length++ ) ); } iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS; } iRate += fracBitsGt1.intBits[1]; iRate += fracBitsPar.intBits[( uiAbsLevel - 2 ) & 1]; iRate += fracBitsGt2.intBits[1]; } else if( uiAbsLevel == 1 ) { iRate += fracBitsGt1.intBits[0]; } else if( uiAbsLevel == 2 ) { iRate += fracBitsGt1.intBits[1]; iRate += fracBitsPar.intBits[0]; iRate += fracBitsGt2.intBits[0]; } else if( uiAbsLevel == 3 ) { iRate += fracBitsGt1.intBits[1]; iRate += fracBitsPar.intBits[1]; iRate += fracBitsGt2.intBits[0]; } else { iRate = 0; } return iRate; } inline double QuantRDOQ::xGetRateSigCoeffGroup( const BinFracBits& fracBitsSigCG, unsigned uiSignificanceCoeffGroup ) const { return xGetICost( fracBitsSigCG.intBits[uiSignificanceCoeffGroup] ); } /** Calculates the cost of signaling the last significant coefficient in the block * \param uiPosX X coordinate of the last significant coefficient * \param uiPosY Y coordinate of the last significant coefficient * \param component colour component ID * \returns cost of last significant coefficient */ /* * \param uiWidth width of the transform unit (TU) */ inline double QuantRDOQ::xGetRateLast( const int* lastBitsX, const int* lastBitsY, unsigned PosX, unsigned PosY ) const { uint32_t ctxX = g_groupIdx[PosX]; uint32_t ctxY = g_groupIdx[PosY]; double cost = lastBitsX[ctxX] + lastBitsY[ctxY]; if (ctxX > 3) { cost += xGetIEPRate() * ((ctxX - 2) >> 1); } if (ctxY > 3) { cost += xGetIEPRate() * ((ctxY - 2) >> 1); } return xGetICost(cost); } inline double QuantRDOQ::xGetRateSigCoef( const BinFracBits& fracBitsSig, unsigned uiSignificance ) const { return xGetICost( fracBitsSig.intBits[uiSignificance] ); } /** Get the cost for a specific rate * \param dRate rate of a bit * \returns cost at the specific rate */ inline double QuantRDOQ::xGetICost ( double dRate ) const { return m_dLambda * dRate; } /** Get the cost of an equal probable bit * \returns cost of equal probable bit */ inline double QuantRDOQ::xGetIEPRate ( ) const { return 32768; } /** set quantized matrix coefficient for encode * \param scalingList quantized matrix address * \param format chroma format * \param maxLog2TrDynamicRange * \param bitDepths reference to bit depth array for all channels */ void QuantRDOQ::setScalingList(ScalingList *scalingList, const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths) { Quant::setScalingList( scalingList, maxLog2TrDynamicRange, bitDepths ); const int minimumQp = 0; const int maximumQp = SCALING_LIST_REM_NUM; for(uint32_t size = 0; size < SCALING_LIST_SIZE_NUM; size++) { for(uint32_t list = 0; list < SCALING_LIST_NUM; list++) { for(int qp = minimumQp; qp < maximumQp; qp++) { // xSetScalingListEnc(scalingList,list,size,qp); // xSetScalingListDec(*scalingList,list,size,qp); xSetErrScaleCoeff(list,size, size,qp,maxLog2TrDynamicRange, bitDepths); } } } } double QuantRDOQ::xGetErrScaleCoeff(const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth, bool bTransformSkip=false) { const int iTransformShift = bTransformSkip ? 0 : getTransformShift(channelBitDepth, Size(width, height), maxLog2TrDynamicRange); double dErrScale = (double)(1 << SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function double dTransShift = (double)iTransformShift + (needsSqrt2 ? -0.5 : 0.0); dErrScale = dErrScale * pow(2.0, (-2.0*dTransShift)); // Compensate for scaling through forward transform const int QStep = g_quantScales[needsSqrt2 ? 1 : 0][qp]; double finalErrScale = dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth) << 1)); return finalErrScale; } /** set error scale coefficients * \param list list ID * \param size * \param qp quantization parameter * \param maxLog2TrDynamicRange * \param bitDepths reference to bit depth array for all channels */ void QuantRDOQ::xSetErrScaleCoeff( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp, const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths ) { const int width = g_scalingListSizeX[sizeX]; const int height = g_scalingListSizeX[sizeY]; const ChannelType channelType = ((list == 0) || (list == MAX_NUM_COMPONENT)) ? ChannelType::LUMA : ChannelType::CHROMA; const int channelBitDepth = bitDepths[channelType]; const int iTransformShift = getTransformShift( channelBitDepth, Size(g_scalingListSizeX[sizeX], g_scalingListSizeX[sizeY]), maxLog2TrDynamicRange[to_underlying(channelType)]); // Represents scaling through forward transform uint32_t i, uiMaxNumCoeff = width * height; int *piQuantcoeff; double *pdErrScale; piQuantcoeff = getQuantCoeff( list, qp, sizeX, sizeY ); pdErrScale = xGetErrScaleCoeffSL( list, sizeX, sizeY, qp); double dErrScale = (double)( 1 << SCALE_BITS ); // Compensate for scaling of bitcount in Lagrange cost function const bool needsSqrt2 = ((floorLog2(width) + floorLog2(height)) & 1) == 1; double dTransShift = (double)iTransformShift + ( needsSqrt2 ? -0.5 : 0.0 ); dErrScale = dErrScale*pow( 2.0, ( -2.0*dTransShift ) ); // Compensate for scaling through forward transform for( i = 0; i < uiMaxNumCoeff; i++ ) { pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1 << (DISTORTION_PRECISION_ADJUSTMENT(bitDepths.recon[channelType]) << 1)); } int QStep = g_quantScales[needsSqrt2][qp]; xGetErrScaleCoeffNoScalingList(list, sizeX, sizeY, qp) = dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(bitDepths.recon[channelType]) << 1)); } /** set flat matrix value to quantized coefficient */ void QuantRDOQ::setFlatScalingList(const int maxLog2TrDynamicRange[MAX_NUM_CHANNEL_TYPE], const BitDepths &bitDepths) { Quant::setFlatScalingList( maxLog2TrDynamicRange, bitDepths ); const int minimumQp = 0; const int maximumQp = SCALING_LIST_REM_NUM; for(uint32_t sizeX = 0; sizeX < SCALING_LIST_SIZE_NUM; sizeX++) { for(uint32_t sizeY = 0; sizeY < SCALING_LIST_SIZE_NUM; sizeY++) { for(uint32_t list = 0; list < SCALING_LIST_NUM; list++) { for(int qp = minimumQp; qp < maximumQp; qp++) { xSetErrScaleCoeff( list, sizeX, sizeY, qp, maxLog2TrDynamicRange, bitDepths ); } } } } } /** initialization process of scaling list array */ void QuantRDOQ::xInitScalingList( const QuantRDOQ* other ) { m_isErrScaleListOwner = other == nullptr; size_t numElements = 0; for (uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++) { for (uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++) { for (uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++) { for (uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++) { numElements += g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY]; } } } } if (m_isErrScaleListOwner) { m_errScale[0][0][0][0] = new double[numElements]; } size_t offset = 0; for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++) { for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++) { for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++) { for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++) { if( m_isErrScaleListOwner ) { m_errScale[sizeIdX][sizeIdY][listId][qp] = m_errScale[0][0][0][0] + offset; offset += g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY]; } else { m_errScale[sizeIdX][sizeIdY][listId][qp] = other->m_errScale[sizeIdX][sizeIdY][listId][qp]; } } // listID loop } } } } /** destroy quantization matrix array */ void QuantRDOQ::xDestroyScalingList() { if (!m_isErrScaleListOwner) { return; } delete[] m_errScale[0][0][0][0]; } void QuantRDOQ::quant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &absSum, const QpParam &cQP, const Ctx &ctx) { const CompArea &rect = tu.blocks[compID]; const uint32_t uiWidth = rect.width; const uint32_t uiHeight = rect.height; const CCoeffBuf &piCoef = pSrc; CoeffBuf piQCoef = tu.getCoeffs(compID); const bool useTransformSkip = (tu.mtsIdx[compID] == MtsType::SKIP); bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_useRDOQ; if (tu.cu->ispMode == ISPType::NONE || !isLuma(compID)) { useRDOQ &= uiWidth > 2; useRDOQ &= uiHeight > 2; } if (tu.cs->slice->isLossless()) { useRDOQ = false; } if (useRDOQ && (isLuma(compID) || RDOQ_CHROMA)) { if (!m_useSelectiveRDOQ || xNeedRDOQ(tu, compID, piCoef, cQP)) { if( useTransformSkip ) { if (tu.cu->getBdpcmMode(compID) != BdpcmMode::NONE) { forwardBDPCM(tu, compID, pSrc, absSum, cQP, ctx); } else { xRateDistOptQuantTS(tu, compID, pSrc, absSum, cQP, ctx); } } else { xRateDistOptQuant(tu, compID, pSrc, absSum, cQP, ctx); } } else { piQCoef.fill(0); absSum = 0; } } else { Quant::quant(tu, compID, pSrc, absSum, cQP, ctx); } } void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &absSum, const QpParam &cQP, const Ctx &ctx) { const FracBitsAccess& fracBits = ctx.getFracBitsAcess(); const SPS &sps = *tu.cs->sps; const CompArea &rect = tu.blocks[compID]; const uint32_t uiWidth = rect.width; const uint32_t uiHeight = rect.height; const ChannelType chType = toChannelType(compID); const int channelBitDepth = sps.getBitDepth( chType ); const bool extendedPrecision = sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag(); const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(chType); const bool useIntraSubPartitions = tu.cu->ispMode != ISPType::NONE && isLuma(compID); /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller) * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result */ // Represents scaling through forward transform int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange); if (tu.mtsIdx[compID] == MtsType::SKIP && extendedPrecision) { iTransformShift = std::max(0, iTransformShift); } double d64BlockUncodedCost = 0; const uint32_t uiLog2BlockWidth = floorLog2(uiWidth); const uint32_t uiLog2BlockHeight = floorLog2(uiHeight); const uint32_t uiMaxNumCoeff = rect.area(); CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID"); int scalingListType = getScalingListType(tu.cu->predMode, compID); CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list"); const TCoeff *plSrcCoeff = pSrc.buf; TCoeff *piDstCoeff = tu.getCoeffs(compID).buf; double *pdCostCoeff = m_pdCostCoeff; double *pdCostSig = m_pdCostSig; double *pdCostCoeff0 = m_pdCostCoeff0; int *rateIncUp = m_rateIncUp; int *rateIncDown = m_rateIncDown; int *sigRateDelta = m_sigRateDelta; TCoeff *deltaU = m_deltaU; memset(piDstCoeff, 0, sizeof(*piDstCoeff) * uiMaxNumCoeff); memset( m_pdCostCoeff, 0, sizeof( double ) * uiMaxNumCoeff ); memset( m_pdCostSig, 0, sizeof( double ) * uiMaxNumCoeff ); memset( m_rateIncUp, 0, sizeof( int ) * uiMaxNumCoeff ); memset( m_rateIncDown, 0, sizeof( int ) * uiMaxNumCoeff ); memset( m_sigRateDelta, 0, sizeof( int ) * uiMaxNumCoeff ); memset( m_deltaU, 0, sizeof( TCoeff ) * uiMaxNumCoeff ); const bool needSqrtAdjustment= TU::needsBlockSizeTrafoScale( tu, compID ); const bool isTransformSkip = tu.mtsIdx[compID] == MtsType::SKIP; const double *const pdErrScale = xGetErrScaleCoeffSL(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip)); const int *const piQCoef = getQuantCoeff(scalingListType, cQP.rem(isTransformSkip), uiLog2BlockWidth, uiLog2BlockHeight); const bool disableSMForLFNST = tu.cs->slice->getExplicitScalingListUsed() ? tu.cs->slice->getSPS()->getDisableScalingMatrixForLfnstBlks() : false; const bool isLfnstApplied = tu.cu->lfnstIdx > 0 && (tu.cu->isSepTree() ? true : isLuma(compID)); const bool disableSMForACT = tu.cs->slice->getSPS()->getScalingMatrixForAlternativeColourSpaceDisabledFlag() && (tu.cs->slice->getSPS()->getScalingMatrixDesignatedColourSpaceFlag() == tu.cu->colorTransform); const bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, isTransformSkip, isLfnstApplied, disableSMForLFNST, disableSMForACT); const int defaultQuantisationCoefficient = g_quantScales[ needSqrtAdjustment ?1:0][cQP.rem(isTransformSkip)]; const double defaultErrorScale = xGetErrScaleCoeffNoScalingList(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip)); const int iQBits = QUANT_SHIFT + cQP.per(isTransformSkip) + iTransformShift + (needSqrtAdjustment?-1:0); // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange); const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1; CoeffCodingContext cctx(tu, compID, tu.cs->slice->getSignDataHidingEnabledFlag(), tu.cu->getBdpcmMode(compID)); int baseLevel = cctx.getBaseLevel(); if (tu.cs->slice->getSPS()->getSpsRangeExtension().getPersistentRiceAdaptationEnabledFlag()) { unsigned riceStats = ctx.getGRAdaptStats((unsigned)compID); TCoeff historyValue = (TCoeff)1 << riceStats; cctx.setHistValue(historyValue); } else { cctx.setHistValue(0); } const int iCGSizeM1 = (1 << cctx.log2CGSize()) - 1; int iCGLastScanPos = -1; double d64BaseCost = 0; int iLastScanPos = -1; int ctxBinSampleRatio = (compID == COMPONENT_Y) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA; int remRegBins = (tu.getTbAreaAfterCoefZeroOut(compID) * ctxBinSampleRatio) >> 4; uint32_t goRiceParam = 0; double *pdCostCoeffGroupSig = m_pdCostCoeffGroupSig; memset( pdCostCoeffGroupSig, 0, ( uiMaxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) ); int iScanPos; coeffGroupRDStats rdStats; #if ENABLE_TRACING DTRACE( g_trace_ctx, D_RDOQ, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), rect.x, rect.y, rect.width, rect.height, compID ); #endif const uint32_t lfnstIdx = tu.cu->lfnstIdx; const int iCGNum = lfnstIdx > 0 ? 1 : getNonzeroTuSize(uiWidth) * getNonzeroTuSize(uiHeight) >> cctx.log2CGSize(); for (int subSetId = iCGNum - 1; subSetId >= 0; subSetId--) { cctx.initSubblock( subSetId ); uint32_t maxNonZeroPosInCG = iCGSizeM1; if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) ) { maxNonZeroPosInCG = 7; } memset( &rdStats, 0, sizeof (coeffGroupRDStats)); for( int iScanPosinCG = iCGSizeM1; iScanPosinCG > maxNonZeroPosInCG; iScanPosinCG-- ) { iScanPos = cctx.minSubPos() + iScanPosinCG; uint32_t blkPos = cctx.blockPos( iScanPos ); piDstCoeff[ blkPos ] = 0; } for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- ) { iScanPos = cctx.minSubPos() + iScanPosinCG; //===== quantization ===== uint32_t uiBlkPos = cctx.blockPos(iScanPos); // set coeff const int quantisationCoefficient = (enableScalingLists) ? piQCoef [uiBlkPos] : defaultQuantisationCoefficient; const double errorScale = (enableScalingLists) ? pdErrScale[uiBlkPos] : defaultErrorScale; const int64_t tmpLevel = int64_t(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient; const Intermediate_Int lLevelDouble = (Intermediate_Int)std::min(tmpLevel, std::numeric_limits::max() - (Intermediate_Int(1) << (iQBits - 1))); uint32_t uiMaxAbsLevel = std::min(uint32_t(entropyCodingMaximum), uint32_t((lLevelDouble + (Intermediate_Int(1) << (iQBits - 1))) >> iQBits)); const double dErr = double( lLevelDouble ); pdCostCoeff0[ iScanPos ] = dErr * dErr * errorScale; d64BlockUncodedCost += pdCostCoeff0[ iScanPos ]; piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel; if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 ) { iLastScanPos = iScanPos; iCGLastScanPos = cctx.subSetId(); } if ( iLastScanPos >= 0 ) { #if ENABLE_TRACING uint32_t uiCGPosY = cctx.cgPosX(); uint32_t uiCGPosX = cctx.cgPosY(); uint32_t uiPosY = cctx.posY( iScanPos ); uint32_t uiPosX = cctx.posX( iScanPos ); DTRACE( g_trace_ctx, D_RDOQ, "%d [%d][%d][%2d:%2d][%2d:%2d]", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), iScanPos, uiBlkPos, uiCGPosX, uiCGPosY, uiPosX, uiPosY ); #endif //===== coefficient level estimation ===== unsigned ctxIdSig = 0; if( iScanPos != iLastScanPos ) { ctxIdSig = cctx.sigCtxIdAbs( iScanPos, piDstCoeff, 0 ); } uint32_t uiLevel; uint8_t ctxOffset = cctx.ctxOffsetAbs (); uint32_t uiParCtx = cctx.parityCtxIdAbs ( ctxOffset ); uint32_t uiGt1Ctx = cctx.greater1CtxIdAbs ( ctxOffset ); uint32_t uiGt2Ctx = cctx.greater2CtxIdAbs ( ctxOffset ); uint32_t goRiceZero = 0; if( remRegBins < 4 ) { goRiceParam = (cctx.*(cctx.deriveRiceRRC))(iScanPos, piDstCoeff, 0); goRiceZero = g_goRicePosCoeff0(0, goRiceParam); } const BinFracBits fracBitsPar = fracBits.getFracBitsArray( uiParCtx ); const BinFracBits fracBitsGt1 = fracBits.getFracBitsArray( uiGt1Ctx ); const BinFracBits fracBitsGt2 = fracBits.getFracBitsArray( uiGt2Ctx ); if( iScanPos == iLastScanPos ) { uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ], lLevelDouble, uiMaxAbsLevel, nullptr, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 1, extendedPrecision, maxLog2TrDynamicRange ); } else { DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig ); const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig ); uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ], lLevelDouble, uiMaxAbsLevel, &fracBitsSig, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 0, extendedPrecision, maxLog2TrDynamicRange ); sigRateDelta[ uiBlkPos ] = ( remRegBins < 4 ? 0 : fracBitsSig.intBits[1] - fracBitsSig.intBits[0] ); } DTRACE( g_trace_ctx, D_RDOQ, " Lev=%d \n", uiLevel ); DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC0=%d\n", (int64_t)( pdCostCoeff0[iScanPos] ) ); DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC =%d\n", (int64_t)( pdCostCoeff[iScanPos] ) ); deltaU[ uiBlkPos ] = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8)); if( uiLevel > 0 ) { int rateNow = xGetICRate( uiLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ); rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow; rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow; } else // uiLevel == 0 { if( remRegBins < 4 ) { int rateNow = xGetICRate( uiLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ); rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, extendedPrecision, maxLog2TrDynamicRange ) - rateNow; } else { rateIncUp [ uiBlkPos ] = fracBitsGt1.intBits[ 0 ]; } } piDstCoeff[ uiBlkPos ] = uiLevel; d64BaseCost += pdCostCoeff [ iScanPos ]; if( ( (iScanPos & iCGSizeM1) == 0 ) && ( iScanPos > 0 ) ) { goRiceParam = 0; } else if( remRegBins >= 4 ) { goRiceParam = (cctx.*(cctx.deriveRiceRRC))(iScanPos, piDstCoeff, baseLevel); remRegBins -= (uiLevel < 2 ? uiLevel : 3) + (iScanPos != iLastScanPos); } } else { d64BaseCost += pdCostCoeff0[ iScanPos ]; } rdStats.d64SigCost += pdCostSig[ iScanPos ]; if (iScanPosinCG == 0 ) { rdStats.d64SigCost_0 = pdCostSig[ iScanPos ]; } if (piDstCoeff[ uiBlkPos ] ) { cctx.setSigGroup(); rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ]; rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ]; if ( iScanPosinCG != 0 ) { rdStats.iNNZbeforePos0++; } } } //end for (iScanPosinCG) if (iCGLastScanPos >= 0) { if( cctx.subSetId() ) { if( !cctx.isSigGroup() ) { const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() ); d64BaseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost; pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0); } else { if (cctx.subSetId() < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below. { if ( rdStats.iNNZbeforePos0 == 0 ) { d64BaseCost -= rdStats.d64SigCost_0; rdStats.d64SigCost -= rdStats.d64SigCost_0; } // rd-cost if SigCoeffGroupFlag = 0, initialization double d64CostZeroCG = d64BaseCost; const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() ); if (cctx.subSetId() < iCGLastScanPos) { d64BaseCost += xGetRateSigCoeffGroup(fracBitsSigGroup,1); d64CostZeroCG += xGetRateSigCoeffGroup(fracBitsSigGroup,0); pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,1); } // try to convert the current coeff group from non-zero to all-zero d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels // if we can save cost, change this block to all-zero block if ( d64CostZeroCG < d64BaseCost ) { cctx.resetSigGroup(); d64BaseCost = d64CostZeroCG; if (cctx.subSetId() < iCGLastScanPos) { pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,0); } // reset coeffs to 0 in this block for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- ) { iScanPos = cctx.minSubPos() + iScanPosinCG; uint32_t uiBlkPos = cctx.blockPos( iScanPos ); if (piDstCoeff[ uiBlkPos ]) { piDstCoeff [ uiBlkPos ] = 0; pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ]; pdCostSig [ iScanPos ] = 0; } } } // end if ( d64CostAllZeros < d64BaseCost ) } } // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0) } else { cctx.setSigGroup(); } } } //end for (cctx.subSetId) //===== estimate last position ===== if ( iLastScanPos < 0 ) { return; } double d64BestCost = 0; int iBestLastIdxP1 = 0; if( !CU::isIntra( *tu.cu ) && isLuma( compID ) && tu.depth == 0 ) { const BinFracBits fracBitsQtRootCbf = fracBits.getFracBitsArray( Ctx::QtRootCbf() ); d64BestCost = d64BlockUncodedCost + xGetICost( fracBitsQtRootCbf.intBits[ 0 ] ); d64BaseCost += xGetICost( fracBitsQtRootCbf.intBits[ 1 ] ); } else { bool previousCbf = tu.cbf[COMPONENT_Cb]; bool lastCbfIsInferred = false; if( useIntraSubPartitions ) { bool rootCbfSoFar = false; bool isLastSubPartition = CU::isISPLast(*tu.cu, tu.Y(), compID); uint32_t nTus = tu.cu->ispMode == ISPType::HOR ? tu.cu->lheight() >> floorLog2(tu.lheight()) : tu.cu->lwidth() >> floorLog2(tu.lwidth()); if( isLastSubPartition ) { TransformUnit* tuPointer = tu.cu->firstTU; for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ ) { rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMPONENT_Y, tu.depth); tuPointer = tuPointer->next; } if( !rootCbfSoFar ) { lastCbfIsInferred = true; } } if( !lastCbfIsInferred ) { previousCbf = TU::getPrevTuCbfAtDepth(tu, compID, tu.depth); } } BinFracBits fracBitsQtCbf = fracBits.getFracBitsArray( Ctx::QtCbf[compID]( DeriveCtx::CtxQtCbf( rect.compID, previousCbf, useIntraSubPartitions ) ) ); if( !lastCbfIsInferred ) { d64BestCost = d64BlockUncodedCost + xGetICost(fracBitsQtCbf.intBits[0]); d64BaseCost += xGetICost(fracBitsQtCbf.intBits[1]); } else { d64BestCost = d64BlockUncodedCost; } } int lastBitsX[LAST_SIGNIFICANT_GROUPS] = { 0 }; int lastBitsY[LAST_SIGNIFICANT_GROUPS] = { 0 }; { int dim1 = getNonzeroTuSize(uiWidth); int dim2 = getNonzeroTuSize(uiHeight); int bitsX = 0; int bitsY = 0; int ctxId; //X-coordinate for (ctxId = 0; ctxId < g_groupIdx[dim1 - 1]; ctxId++) { const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastXCtxId(ctxId) ); lastBitsX[ ctxId ] = bitsX + fB.intBits[ 0 ]; bitsX += fB.intBits[ 1 ]; } lastBitsX[ctxId] = bitsX; //Y-coordinate for (ctxId = 0; ctxId < g_groupIdx[dim2 - 1]; ctxId++) { const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastYCtxId(ctxId) ); lastBitsY[ ctxId ] = bitsY + fB.intBits[ 0 ]; bitsY += fB.intBits[ 1 ]; } lastBitsY[ctxId] = bitsY; } unsigned zoTbWdith = getNonzeroTuSize(cctx.width()); unsigned zoTbHeight = getNonzeroTuSize(cctx.height()); if (tu.cs->sps->getMtsEnabled() && tu.cu->sbtInfo != 0 && tu.blocks[compID].width <= 32 && tu.blocks[compID].height <= 32 && compID == COMPONENT_Y) { zoTbWdith = (tu.blocks[compID].width == 32) ? 16 : zoTbWdith; zoTbHeight = (tu.blocks[compID].height == 32) ? 16 : zoTbHeight; } bool bFoundLast = false; for (int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--) { d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ]; if (cctx.isSigGroup( iCGScanPos ) ) { uint32_t maxNonZeroPosInCG = iCGSizeM1; if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) ) { maxNonZeroPosInCG = 7; } for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- ) { iScanPos = iCGScanPos * (iCGSizeM1 + 1) + iScanPosinCG; if (iScanPos > iLastScanPos) { continue; } uint32_t uiBlkPos = cctx.blockPos( iScanPos ); if( piDstCoeff[ uiBlkPos ] ) { uint32_t uiPosY = uiBlkPos >> uiLog2BlockWidth; uint32_t uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockWidth ); if (tu.cu->slice->getReverseLastSigCoeffFlag()) { uiPosX = zoTbWdith - 1 - uiPosX; uiPosY = zoTbHeight - 1 - uiPosY; } double d64CostLast = xGetRateLast( lastBitsX, lastBitsY, uiPosX, uiPosY ); double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ]; if( totalCost < d64BestCost ) { iBestLastIdxP1 = iScanPos + 1; d64BestCost = totalCost; } if( piDstCoeff[ uiBlkPos ] > 1 ) { bFoundLast = true; break; } d64BaseCost -= pdCostCoeff[ iScanPos ]; d64BaseCost += pdCostCoeff0[ iScanPos ]; } else { d64BaseCost -= pdCostSig[ iScanPos ]; } } //end for if (bFoundLast) { break; } } // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ]) DTRACE( g_trace_ctx, D_RDOQ_COST, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ_COST ), rect.x, rect.y, rect.width, rect.height, compID ); DTRACE( g_trace_ctx, D_RDOQ_COST, "Uncoded=%d\n", (int64_t)( d64BlockUncodedCost ) ); DTRACE( g_trace_ctx, D_RDOQ_COST, "Coded =%d\n", (int64_t)( d64BaseCost ) ); } // end for for ( int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ ) { int blkPos = cctx.blockPos( scanPos ); TCoeff level = piDstCoeff[ blkPos ]; absSum += level; piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level; } //===== clean uncoded coefficients ===== for ( int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ ) { piDstCoeff[ cctx.blockPos( scanPos ) ] = 0; } if (cctx.signHiding() && absSum >= 2) { const double inverseQuantScale = double(g_invQuantScales[0][cQP.rem(isTransformSkip)]); int64_t rdFactor = (int64_t)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per(isTransformSkip))) / m_dLambda / 16 / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth))) + 0.5); int lastCG = -1; int absSum = 0 ; int n ; for (int subSet = iCGNum - 1; subSet >= 0; subSet--) { int subPos = subSet << cctx.log2CGSize(); int firstNZPosInCG = iCGSizeM1 + 1, lastNZPosInCG = -1; absSum = 0 ; for( n = iCGSizeM1; n >= 0; --n ) { if( piDstCoeff[ cctx.blockPos( n + subPos )] ) { lastNZPosInCG = n; break; } } for( n = 0; n <= iCGSizeM1; n++ ) { if( piDstCoeff[ cctx.blockPos( n + subPos )] ) { firstNZPosInCG = n; break; } } for( n = firstNZPosInCG; n <= lastNZPosInCG; n++ ) { absSum += int(piDstCoeff[ cctx.blockPos( n + subPos )]); } if(lastNZPosInCG>=0 && lastCG==-1) { lastCG = 1; } if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD ) { uint32_t signbit = (piDstCoeff[cctx.blockPos(subPos+firstNZPosInCG)]>0?0:1); if( signbit!=(absSum&0x1) ) // hide but need tune { // calculate the cost int64_t minCostInc = std::numeric_limits::max(), curCost = std::numeric_limits::max(); int minPos = -1, finalChange = 0, curChange = 0; for( n = (lastCG == 1 ? lastNZPosInCG : iCGSizeM1); n >= 0; --n ) { uint32_t uiBlkPos = cctx.blockPos( n + subPos ); if(piDstCoeff[ uiBlkPos ] != 0 ) { int64_t costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos]; int64_t costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos] - ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0); if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1) { costDown -= (4<::max(); } else { curCost = costDown; } } } else { curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<=0?0:1); if(thissignbit != signbit ) { curCost = std::numeric_limits::max(); } } } if( curCost=0) { piDstCoeff[minPos] += finalChange ; } else { piDstCoeff[minPos] -= finalChange ; } } } if(lastCG==1) { lastCG=0 ; } } } } void QuantRDOQ::xRateDistOptQuantTS( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &coeffs, TCoeff &absSum, const QpParam &qp, const Ctx &ctx ) { const FracBitsAccess& fracBits = ctx.getFracBitsAcess(); const SPS &sps = *tu.cs->sps; const CompArea &rect = tu.blocks[compID]; const uint32_t width = rect.width; const uint32_t height = rect.height; const ChannelType chType = toChannelType(compID); const int channelBitDepth = sps.getBitDepth( chType ); const bool extendedPrecision = sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag(); const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(chType); int transformShift = getTransformShift( channelBitDepth, rect.size(), maxLog2TrDynamicRange ); if( extendedPrecision ) { transformShift = std::max( 0, transformShift ); } const uint32_t maxNumCoeff = rect.area(); CHECK( compID >= MAX_NUM_TBLOCKS, "Invalid component ID" ); int scalingListType = getScalingListType( tu.cu->predMode, compID ); CHECK( scalingListType >= SCALING_LIST_NUM, "Invalid scaling list" ); const TCoeff *srcCoeff = coeffs.buf; TCoeff *dstCoeff = tu.getCoeffs( compID ).buf; double *costCoeff = m_pdCostCoeff; double *costSig = m_pdCostSig; double *costCoeff0 = m_pdCostCoeff0; memset( m_pdCostCoeff, 0, sizeof( double ) * maxNumCoeff ); memset( m_pdCostSig, 0, sizeof( double ) * maxNumCoeff ); m_bdpcm = BdpcmMode::NONE; const bool needsSqrt2Scale = TU::needsSqrt2Scale( tu, compID ); // should always be false - transform-skipped blocks don't require sqrt(2) compensation. const bool isTransformSkip = tu.mtsIdx[compID] == MtsType::SKIP; const int qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip ? 0 : transformShift) + (needsSqrt2Scale ? -1 : 0); // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits const int quantisationCoefficient = g_quantScales[needsSqrt2Scale?1:0][qp.rem(isTransformSkip)]; const double errorScale = xGetErrScaleCoeff( TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip); const TCoeff entropyCodingMaximum = ( 1 << maxLog2TrDynamicRange ) - 1; uint32_t coeffLevels[3]; double coeffLevelError[4]; CoeffCodingContext cctx(tu, compID, tu.cs->slice->getSignDataHidingEnabledFlag(), m_bdpcm); const int sbSizeM1 = ( 1 << cctx.log2CGSize() ) - 1; double baseCost = 0; uint32_t goRiceParam = 0; double *costSigSubBlock = m_pdCostCoeffGroupSig; memset( costSigSubBlock, 0, ( maxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) ); const int sbNum = width * height >> cctx.log2CGSize(); int scanPos; coeffGroupRDStats rdStats; bool anySigCG = false; int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2; cctx.setNumCtxBins(maxCtxBins); for( int sbId = 0; sbId < sbNum; sbId++ ) { cctx.initSubblock( sbId ); int noCoeffCoded = 0; baseCost = 0.0; memset( &rdStats, 0, sizeof (coeffGroupRDStats)); rdStats.iNumSbbCtxBins = 0; for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ ) { int lastPosCoded = sbSizeM1; scanPos = cctx.minSubPos() + scanPosInSB; //===== quantization ===== uint32_t blkPos = cctx.blockPos( scanPos ); // set coeff const int64_t tmpLevel = int64_t( abs( srcCoeff[blkPos] ) ) * quantisationCoefficient; const Intermediate_Int levelDouble = (Intermediate_Int)std::min( tmpLevel, std::numeric_limits::max() - ( Intermediate_Int( 1 ) << ( qBits - 1 ) ) ); uint32_t roundAbsLevel = std::min(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits)); uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1); uint32_t downAbsLevel = std::min(uint32_t(entropyCodingMaximum), uint32_t(levelDouble >> qBits)); uint32_t upAbsLevel = std::min(uint32_t(entropyCodingMaximum), downAbsLevel + 1); m_testedLevels = 0; coeffLevels[m_testedLevels++] = roundAbsLevel; if (minAbsLevel != roundAbsLevel) { coeffLevels[m_testedLevels++] = minAbsLevel; } int rightPixel, belowPixel, predPixel; cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff); predPixel = cctx.deriveModCoeff(rightPixel, belowPixel, upAbsLevel, false); if (upAbsLevel != roundAbsLevel && upAbsLevel != minAbsLevel && predPixel == 1) { coeffLevels[m_testedLevels++] = upAbsLevel; } double dErr = double(levelDouble); coeffLevelError[0] = dErr * dErr * errorScale; costCoeff0[scanPos] = coeffLevelError[0]; dstCoeff[blkPos] = coeffLevels[0]; //===== coefficient level estimation ===== unsigned ctxIdSig = cctx.sigCtxIdAbsTS( scanPos, dstCoeff ); uint32_t cLevel; const BinFracBits fracBitsPar = fracBits.getFracBitsArray( cctx.parityCtxIdAbsTS() ); goRiceParam = cctx.templateAbsSumTS( scanPos, dstCoeff ); if (tu.cu->slice->getSPS()->getSpsRangeExtension().getTSRCRicePresentFlag() && tu.mtsIdx[compID] == MtsType::SKIP) { goRiceParam = goRiceParam + tu.cu->slice->getTsrcIndex(); } unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, m_bdpcm); const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign); const uint8_t sign = srcCoeff[ blkPos ] < 0 ? 1 : 0; DTRACE_COND( ( coeffLevels[0] != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig ); unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, m_bdpcm); const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId); const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig ); bool lastCoeff = false; // if (scanPosInSB == lastPosCoded && noCoeffCoded == 0) { lastCoeff = true; } int numUsedCtxBins = 0; cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError, &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, extendedPrecision, maxLog2TrDynamicRange, numUsedCtxBins); cctx.decimateNumCtxBins(numUsedCtxBins); rdStats.iNumSbbCtxBins += numUsedCtxBins; if (cLevel > 0) { noCoeffCoded++; } TCoeff level = cLevel; dstCoeff[blkPos] = (level != 0 && srcCoeff[blkPos] < 0) ? -level : level; baseCost += costCoeff[ scanPos ]; rdStats.d64SigCost += costSig[ scanPos ]; if( dstCoeff[ blkPos ] ) { cctx.setSigGroup(); rdStats.d64CodedLevelandDist += costCoeff [ scanPos ] - costSig[ scanPos ]; rdStats.d64UncodedDist += costCoeff0[ scanPos ]; } } //end for (iScanPosinCG) if( !cctx.isSigGroup() ) { const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) ); baseCost += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 ) - rdStats.d64SigCost; costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 ); cctx.increaseNumCtxBins(rdStats.iNumSbbCtxBins); // skip sub-block } else if( sbId != sbNum - 1 || anySigCG ) { // rd-cost if SigCoeffGroupFlag = 0, initialization double costZeroSB = baseCost; const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) ); baseCost += xGetRateSigCoeffGroup( fracBitsSigGroup, 1 ); costZeroSB += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 ); costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 1 ); costZeroSB += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels costZeroSB -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels costZeroSB -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels if( costZeroSB < baseCost ) { cctx.resetSigGroup(); baseCost = costZeroSB; costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 ); cctx.increaseNumCtxBins(rdStats.iNumSbbCtxBins); // skip sub-block for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ ) { scanPos = cctx.minSubPos() + scanPosInSB; uint32_t blkPos = cctx.blockPos( scanPos ); if( dstCoeff[ blkPos ] ) { dstCoeff[ blkPos ] = 0; costCoeff[ scanPos ] = costCoeff0[ scanPos ]; costSig[ scanPos] = 0; } } } else { anySigCG = true; } } } //===== estimate last position ===== for( int scanPos = 0; scanPos < maxNumCoeff; scanPos++ ) { int blkPos = cctx.blockPos( scanPos ); TCoeff level = dstCoeff[ blkPos ]; absSum += abs(level); } } void QuantRDOQ::forwardBDPCM(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &coeffs, TCoeff &absSum, const QpParam &qp, const Ctx &ctx) { const FracBitsAccess& fracBits = ctx.getFracBitsAcess(); const SPS &sps = *tu.cs->sps; const CompArea &rect = tu.blocks[compID]; const uint32_t width = rect.width; const uint32_t height = rect.height; const ChannelType chType = toChannelType(compID); const int channelBitDepth = sps.getBitDepth(chType); const bool extendedPrecision = sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag(); const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(chType); const BdpcmMode dirMode = tu.cu->getBdpcmMode(compID); int transformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange); if (extendedPrecision) { transformShift = std::max(0, transformShift); } const uint32_t maxNumCoeff = rect.area(); CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID"); int scalingListType = getScalingListType(tu.cu->predMode, compID); CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list"); const TCoeff *srcCoeff = coeffs.buf; TCoeff *dstCoeff = tu.getCoeffs(compID).buf; double *costCoeff = m_pdCostCoeff; double *costSig = m_pdCostSig; double *costCoeff0 = m_pdCostCoeff0; memset(m_pdCostCoeff, 0, sizeof(double) * maxNumCoeff); memset(m_pdCostSig, 0, sizeof(double) * maxNumCoeff); memset(m_fullCoeff, 0, sizeof(TCoeff) * maxNumCoeff); m_bdpcm = dirMode; const bool needsSqrt2Scale = TU::needsSqrt2Scale( tu, compID); // should always be false - transform-skipped blocks don't require sqrt(2) compensation. const bool isTransformSkip = tu.mtsIdx[compID] == MtsType::SKIP; const int qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip? 0 : transformShift) + ( needsSqrt2Scale ? -1 : 0); // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits const int quantisationCoefficient = g_quantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)]; const double errorScale = xGetErrScaleCoeff(TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip); TrQuantParams trQuantParams; trQuantParams.rightShift = (IQUANT_SHIFT - ((isTransformSkip ? 0 : transformShift) + qp.per(isTransformSkip))); trQuantParams.qScale = g_invQuantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)]; const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1; uint32_t coeffLevels[3]; double coeffLevelError[4]; CoeffCodingContext cctx(tu, compID, tu.cs->slice->getSignDataHidingEnabledFlag(), dirMode); const int sbSizeM1 = (1 << cctx.log2CGSize()) - 1; double baseCost = 0; uint32_t goRiceParam = 0; double *costSigSubBlock = m_pdCostCoeffGroupSig; memset(costSigSubBlock, 0, (maxNumCoeff >> cctx.log2CGSize()) * sizeof(double)); const int sbNum = width * height >> cctx.log2CGSize(); int scanPos; coeffGroupRDStats rdStats; bool anySigCG = false; int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2; cctx.setNumCtxBins(maxCtxBins); for (int sbId = 0; sbId < sbNum; sbId++) { cctx.initSubblock(sbId); int noCoeffCoded = 0; baseCost = 0.0; memset(&rdStats, 0, sizeof(coeffGroupRDStats)); rdStats.iNumSbbCtxBins = 0; for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++) { int lastPosCoded = sbSizeM1; scanPos = cctx.minSubPos() + scanPosInSB; //===== quantization ===== uint32_t blkPos = cctx.blockPos(scanPos); const int posX = cctx.posX(scanPos); const int posY = cctx.posY(scanPos); const int posS = (BdpcmMode::HOR == dirMode) ? posX : posY; const ptrdiff_t posNb = (BdpcmMode::HOR == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride; TCoeff predCoeff = (0 != posS) ? m_fullCoeff[posNb] : 0; // set coeff const int64_t tmpLevel = int64_t(abs(srcCoeff[blkPos] - predCoeff)) * quantisationCoefficient; const Intermediate_Int levelDouble = (Intermediate_Int)std::min(tmpLevel, std::numeric_limits::max() - (Intermediate_Int(1) << (qBits - 1))); uint32_t roundAbsLevel = std::min(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits)); uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1); m_testedLevels = 0; coeffLevels[m_testedLevels++] = roundAbsLevel; if (minAbsLevel != roundAbsLevel) { coeffLevels[m_testedLevels++] = minAbsLevel; } double dErr = double(levelDouble); coeffLevelError[0] = dErr * dErr * errorScale; costCoeff0[scanPos] = coeffLevelError[0]; dstCoeff[blkPos] = coeffLevels[0]; //===== coefficient level estimation ===== unsigned ctxIdSig = cctx.sigCtxIdAbsTS(scanPos, dstCoeff); uint32_t cLevel; const BinFracBits fracBitsPar = fracBits.getFracBitsArray(cctx.parityCtxIdAbsTS()); goRiceParam = cctx.templateAbsSumTS(scanPos, dstCoeff); if (tu.cu->slice->getSPS()->getSpsRangeExtension().getTSRCRicePresentFlag() && tu.mtsIdx[compID] == MtsType::SKIP) { goRiceParam = goRiceParam + tu.cu->slice->getTsrcIndex(); } unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, dirMode); const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign); const uint8_t sign = srcCoeff[blkPos] - predCoeff < 0 ? 1 : 0; unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, dirMode); const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId); DTRACE_COND((dstCoeff[blkPos] != 0), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig); const BinFracBits fracBitsSig = fracBits.getFracBitsArray(ctxIdSig); bool lastCoeff = false; // if (scanPosInSB == lastPosCoded && noCoeffCoded == 0) { lastCoeff = true; } int rightPixel, belowPixel; cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff); int numUsedCtxBins = 0; cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError, &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, extendedPrecision, maxLog2TrDynamicRange, numUsedCtxBins); cctx.decimateNumCtxBins(numUsedCtxBins); rdStats.iNumSbbCtxBins += numUsedCtxBins; if (cLevel > 0) { noCoeffCoded++; } dstCoeff[blkPos] = cLevel; if (sign) { dstCoeff[blkPos] = -dstCoeff[blkPos]; } xDequantSample( m_fullCoeff[blkPos], dstCoeff[blkPos], trQuantParams ); m_fullCoeff[blkPos] += predCoeff; baseCost += costCoeff[scanPos]; rdStats.d64SigCost += costSig[scanPos]; if (dstCoeff[blkPos]) { cctx.setSigGroup(); rdStats.d64CodedLevelandDist += costCoeff[scanPos] - costSig[scanPos]; rdStats.d64UncodedDist += costCoeff0[scanPos]; } } //end for (iScanPosinCG) if (!cctx.isSigGroup()) { const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true)); baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost; costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0); cctx.increaseNumCtxBins(rdStats.iNumSbbCtxBins); // skip sub-block } else if (sbId != sbNum - 1 || anySigCG) { // rd-cost if SigCoeffGroupFlag = 0, initialization double costZeroSB = baseCost; const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true)); baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 1); costZeroSB += xGetRateSigCoeffGroup(fracBitsSigGroup, 0); costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 1); costZeroSB += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels costZeroSB -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels costZeroSB -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels if (costZeroSB < baseCost) { cctx.resetSigGroup(); baseCost = costZeroSB; costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0); cctx.increaseNumCtxBins(rdStats.iNumSbbCtxBins); // skip sub-block for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++) { scanPos = cctx.minSubPos() + scanPosInSB; uint32_t blkPos = cctx.blockPos(scanPos); const int posX = cctx.posX(scanPos); const int posY = cctx.posY(scanPos); const int posS = (BdpcmMode::HOR == dirMode) ? posX : posY; const ptrdiff_t posNb = (BdpcmMode::HOR == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride; m_fullCoeff[scanPos] = (0 != posS) ? m_fullCoeff[posNb] : 0; if (dstCoeff[blkPos]) { dstCoeff[blkPos] = 0; costCoeff[scanPos] = costCoeff0[scanPos]; costSig[scanPos] = 0; } } } else { anySigCG = true; } } } //===== estimate last position ===== for (int scanPos = 0; scanPos < maxNumCoeff; scanPos++) { int blkPos = cctx.blockPos(scanPos); TCoeff level = dstCoeff[blkPos]; absSum += abs(level); } } void QuantRDOQ::xDequantSample(TCoeff& pRes, TCoeff& coeff, const TrQuantParams& trQuantParams) { // xDequant if (trQuantParams.rightShift > 0) { const Intermediate_Int qAdd = Intermediate_Int(1) << (trQuantParams.rightShift - 1); pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale + qAdd) >> trQuantParams.rightShift); } else { pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale) << -trQuantParams.rightShift); } } inline uint32_t QuantRDOQ::xGetCodedLevelTSPred(double& rd64CodedCost, double& rd64CodedCost0, double& rd64CodedCostSig, Intermediate_Int levelDouble, int qBits, double errorScale, uint32_t coeffLevels[], double coeffLevelError[], const BinFracBits* fracBitsSig, const BinFracBits& fracBitsPar, CoeffCodingContext& cctx, const FracBitsAccess& fracBitsAccess, const BinFracBits& fracBitsSign, const BinFracBits& fracBitsGt1, const uint8_t sign, int rightPixel, int belowPixel, uint16_t ricePar, bool isLast, bool useLimitedPrefixLength, const int maxLog2TrDynamicRange , int& numUsedCtxBins ) const { double currCostSig = 0; uint32_t bestAbsLevel = 0; numUsedCtxBins = 0; int numBestCtxBin = 0; if (!isLast && coeffLevels[0] < 3) { if (cctx.numCtxBins() >= 4) { rd64CodedCostSig = xGetRateSigCoef(*fracBitsSig, 0); } else { rd64CodedCostSig = xGetICost(1 << SCALE_BITS); } rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig; if (cctx.numCtxBins() >= 4) { numUsedCtxBins++; } if (coeffLevels[0] == 0) { return bestAbsLevel; } } else { rd64CodedCost = MAX_DOUBLE; } if (!isLast) { if (cctx.numCtxBins() >= 4) { currCostSig = xGetRateSigCoef(*fracBitsSig, 1); } else { currCostSig = xGetICost(1 << SCALE_BITS); } if (coeffLevels[0] >= 3 && cctx.numCtxBins() >= 4) { numUsedCtxBins++; } } for (int errorInd = 1; errorInd <= m_testedLevels; errorInd++) { int absLevel = coeffLevels[errorInd - 1]; double dErr = 0.0; dErr = double(levelDouble - (Intermediate_Int(absLevel) << qBits)); coeffLevelError[errorInd] = dErr * dErr * errorScale; int modAbsLevel = absLevel; if (cctx.numCtxBins() >= 4) { modAbsLevel = cctx.deriveModCoeff(rightPixel, belowPixel, absLevel, m_bdpcm != BdpcmMode::NONE); } int numCtxBins = 0; double dCurrCost = coeffLevelError[errorInd] + xGetICost(xGetICRateTS(modAbsLevel, fracBitsPar, cctx, fracBitsAccess, fracBitsSign, fracBitsGt1, numCtxBins, sign, ricePar, useLimitedPrefixLength, maxLog2TrDynamicRange)); if (cctx.numCtxBins() >= 4) dCurrCost += currCostSig; // if cctx.numCtxBins < 4, xGetICRateTS return rate including sign cost. dont need to add any more if (dCurrCost < rd64CodedCost) { bestAbsLevel = absLevel; rd64CodedCost = dCurrCost; rd64CodedCostSig = currCostSig; numBestCtxBin = numCtxBins; } } numUsedCtxBins += numBestCtxBin; return bestAbsLevel; } inline int QuantRDOQ::xGetICRateTS( const uint32_t absLevel, const BinFracBits& fracBitsPar, const CoeffCodingContext& cctx, const FracBitsAccess& fracBitsAccess, const BinFracBits& fracBitsSign, const BinFracBits& fracBitsGt1, int& numCtxBins, const uint8_t sign, const uint16_t ricePar, const bool useLimitedPrefixLength, const int maxLog2TrDynamicRange ) const { if (cctx.numCtxBins() < 4) // Full by-pass coding { int rate = absLevel ? (1 << SCALE_BITS) : 0; // 1 bit to signal sign of non-zero uint32_t symbol = absLevel; uint32_t length; const int threshold = COEF_REMAIN_BIN_REDUCTION; if (symbol < (threshold << ricePar)) { length = symbol >> ricePar; rate += (length + 1 + ricePar) << SCALE_BITS; } else if (useLimitedPrefixLength) { const uint32_t maximumPrefixLength = (32 - (COEF_REMAIN_BIN_REDUCTION + maxLog2TrDynamicRange)); uint32_t prefixLength = 0; uint32_t suffix = (symbol >> ricePar) - COEF_REMAIN_BIN_REDUCTION; while ((prefixLength < maximumPrefixLength) && (suffix > ((2 << prefixLength) - 2))) { prefixLength++; } const uint32_t suffixLength = (prefixLength == maximumPrefixLength) ? (maxLog2TrDynamicRange - ricePar) : (prefixLength + 1/*separator*/); rate += (COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ricePar) << SCALE_BITS; } else { length = ricePar; symbol = symbol - (threshold << ricePar); while (symbol >= (1 << length)) { symbol -= (1 << (length++)); } rate += (threshold + length + 1 - ricePar + length) << SCALE_BITS; } return rate; } else if (cctx.numCtxBins() >= 4 && cctx.numCtxBins() < 8) // First pass context coding and all by-pass coding ( Sign flag is not counted here) { int rate = fracBitsSign.intBits[sign]; // sign bits if (absLevel) { numCtxBins++; } if (absLevel > 1) { rate += fracBitsGt1.intBits[1]; rate += fracBitsPar.intBits[(absLevel - 2) & 1]; numCtxBins += 2; int cutoffVal = 2; if (absLevel >= cutoffVal) { uint32_t symbol = (absLevel - cutoffVal) >> 1; uint32_t length; const int threshold = COEF_REMAIN_BIN_REDUCTION; if (symbol < (threshold << ricePar)) { length = symbol >> ricePar; rate += (length + 1 + ricePar) << SCALE_BITS; } else if (useLimitedPrefixLength) { const uint32_t maximumPrefixLength = (32 - (COEF_REMAIN_BIN_REDUCTION + maxLog2TrDynamicRange)); uint32_t prefixLength = 0; uint32_t suffix = (symbol >> ricePar) - COEF_REMAIN_BIN_REDUCTION; while ((prefixLength < maximumPrefixLength) && (suffix > ((2 << prefixLength) - 2))) { prefixLength++; } const uint32_t suffixLength = (prefixLength == maximumPrefixLength) ? (maxLog2TrDynamicRange - ricePar) : (prefixLength + 1/*separator*/); rate += (COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ricePar) << SCALE_BITS; } else { length = ricePar; symbol = symbol - (threshold << ricePar); while (symbol >= (1 << length)) { symbol -= (1 << (length++)); } rate += (threshold + length + 1 - ricePar + length) << SCALE_BITS; } } } else if (absLevel == 1) { rate += fracBitsGt1.intBits[0]; numCtxBins++; } else { rate = 0; } return rate; } int rate = fracBitsSign.intBits[sign]; if (absLevel) { numCtxBins++; } if( absLevel > 1 ) { rate += fracBitsGt1.intBits[1]; rate += fracBitsPar.intBits[( absLevel - 2 ) & 1]; numCtxBins += 2; int cutoffVal = 2; const int numGtBins = 4; for( int i = 0; i < numGtBins; i++ ) { if( absLevel >= cutoffVal ) { const uint16_t ctxGtX = cctx.greaterXCtxIdAbsTS( cutoffVal>>1 ); const BinFracBits &fracBitsGtX = fracBitsAccess.getFracBitsArray( ctxGtX ); unsigned gtX = ( absLevel >= ( cutoffVal + 2 ) ); rate += fracBitsGtX.intBits[gtX]; numCtxBins++; } cutoffVal += 2; } if( absLevel >= cutoffVal ) { uint32_t symbol = ( absLevel - cutoffVal ) >> 1; uint32_t length; const int threshold = COEF_REMAIN_BIN_REDUCTION; if( symbol < ( threshold << ricePar ) ) { length = symbol >> ricePar; rate += ( length + 1 + ricePar ) << SCALE_BITS; } else if( useLimitedPrefixLength ) { const uint32_t maximumPrefixLength = ( 32 - ( COEF_REMAIN_BIN_REDUCTION + maxLog2TrDynamicRange ) ); uint32_t prefixLength = 0; uint32_t suffix = ( symbol >> ricePar ) - COEF_REMAIN_BIN_REDUCTION; while( ( prefixLength < maximumPrefixLength ) && ( suffix > ( ( 2 << prefixLength ) - 2 ) ) ) { prefixLength++; } const uint32_t suffixLength = ( prefixLength == maximumPrefixLength ) ? ( maxLog2TrDynamicRange - ricePar ) : ( prefixLength + 1/*separator*/ ); rate += ( COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ricePar ) << SCALE_BITS; } else { length = ricePar; symbol = symbol - ( threshold << ricePar ); while( symbol >= ( 1 << length ) ) { symbol -= ( 1 << ( length++ ) ); } rate += ( threshold + length + 1 - ricePar + length ) << SCALE_BITS; } } } else if( absLevel == 1 ) { rate += fracBitsGt1.intBits[0]; numCtxBins++; } else { rate = 0; } return rate; } //! \}