Logo Search packages:      
Sourcecode: p7zip version File versions  Download package

BZip2Encoder.cpp

// BZip2Encoder.cpp

#include "StdAfx.h"

#include "BZip2Encoder.h"

#include "../../../Common/Alloc.h"
#include "../BWT/Mtf8.h"
#include "BZip2CRC.h"

namespace NCompress {
namespace NBZip2 {

static const UInt32 kBufferSize = (1 << 17);
static const int kNumHuffPasses = 4;

CEncoder::CEncoder(): 
  m_Block(0), 
  m_NeedHuffmanCreate(true), 
  m_NumPasses(1), 
  m_OptimizeNumTables(false),
  m_BlockSizeMult(kBlockSizeMultMax)
{}

CEncoder::~CEncoder()
{
  ::BigFree(m_Block);
}

UInt32 CEncoder::ReadRleBlock(Byte *buffer)
{
  UInt32 i = 0;
  Byte prevByte;
  if (m_InStream.ReadByte(prevByte))
  {
    UInt32 blockSize = m_BlockSizeMult * kBlockSizeStep - 1;
    int numReps = 1;
    buffer[i++] = prevByte;
    while (i < blockSize) // "- 1" to support RLE
    {
      Byte b;
      if (!m_InStream.ReadByte(b))
        break;
      if (b != prevByte)
      {
        if (numReps >= kRleModeRepSize)
          buffer[i++] = numReps - kRleModeRepSize;
        buffer[i++] = b;
        numReps = 1;
        prevByte = b;
        continue;
      }
      numReps++;
      if (numReps <= kRleModeRepSize)
        buffer[i++] = b;
      else if (numReps == kRleModeRepSize + 255)
      {
        buffer[i++] = numReps - kRleModeRepSize;
        numReps = 0;
      }
    }
    // it's to support original BZip2 decoder
    if (numReps >= kRleModeRepSize)
      buffer[i++] = numReps - kRleModeRepSize;
  }
  return i;
}

void CEncoder::WriteBits2(UInt32 value, UInt32 numBits)
  { m_OutStreamCurrent->WriteBits(value, numBits); }
void CEncoder::WriteByte2(Byte b) { WriteBits2(b , 8); }
void CEncoder::WriteBit2(bool v) { WriteBits2((v ? 1 : 0), 1); }
void CEncoder::WriteCRC2(UInt32 v) 
{ 
  for (int i = 0; i < 4; i++)
    WriteByte2(((Byte)(v >> (24 - i * 8))));
}

void CEncoder::WriteBits(UInt32 value, UInt32 numBits)
  { m_OutStream.WriteBits(value, numBits); }
void CEncoder::WriteByte(Byte b) { WriteBits(b , 8); }
void CEncoder::WriteBit(bool v) { WriteBits((v ? 1 : 0), 1); }
void CEncoder::WriteCRC(UInt32 v) 
{ 
  for (int i = 0; i < 4; i++)
    WriteByte(((Byte)(v >> (24 - i * 8))));
}


// blockSize > 0
void CEncoder::EncodeBlock(Byte *block, UInt32 blockSize)
{
  WriteBit2(false); // Randomised = false
  
  {
    UInt32 origPtr = m_BlockSorter.Sort(block, blockSize);
    WriteBits2(origPtr, kNumOrigBits);
  }

  CMtf8Encoder mtf;
  int numInUse = 0;
  {
    bool inUse[256];
    bool inUse16[16];
    UInt32 i;
    for (i = 0; i < 256; i++) 
      inUse[i] = false;
    for (i = 0; i < 16; i++) 
      inUse16[i] = false;
    for (i = 0; i < blockSize; i++)
      inUse[block[i]] = true;
    for (i = 0; i < 256; i++) 
      if (inUse[i])
      {
        inUse16[i >> 4] = true;
        mtf.Buffer[numInUse++] = (Byte)i;
      }
    for (i = 0; i < 16; i++) 
      WriteBit2(inUse16[i]);
    for (i = 0; i < 256; i++) 
      if (inUse16[i >> 4])
        WriteBit2(inUse[i]);
  }
  int alphaSize = numInUse + 2;

  Byte *mtfs = m_MtfArray;
  UInt32 mtfArraySize = 0;
  UInt32 symbolCounts[kMaxAlphaSize];
  {
    for (int i = 0; i < kMaxAlphaSize; i++)
      symbolCounts[i] = 0;
  }

  {
    UInt32 rleSize = 0;
    UInt32 i = 0;
    do 
    {
      UInt32 index = m_BlockSorter.Indices[i];
      if (index == 0)
        index = blockSize - 1;
      else
        index--;
      int pos = mtf.FindAndMove(block[index]);
      if (pos == 0)
        rleSize++;
      else
      {
        while (rleSize != 0)
        {
          rleSize--;
          mtfs[mtfArraySize++] = (rleSize & 1);
          symbolCounts[rleSize & 1]++;
          rleSize >>= 1;
        }
        if (pos >= 0xFE)
        {
          mtfs[mtfArraySize++] = 0xFF;
          mtfs[mtfArraySize++] = pos - 0xFE;
        }
        else
          mtfs[mtfArraySize++] = pos + 1;
        symbolCounts[pos + 1]++;
      }
    }
    while (++i < blockSize);

    while (rleSize != 0)
    {
      rleSize--;
      mtfs[mtfArraySize++] = (rleSize & 1);
      symbolCounts[rleSize & 1]++;
      rleSize >>= 1;
    }

    if (alphaSize < 256)
      mtfs[mtfArraySize++] = (Byte)(alphaSize - 1);
    else
    {
      mtfs[mtfArraySize++] = 0xFF;
      mtfs[mtfArraySize++] = (Byte)(alphaSize - 256);
    }
    symbolCounts[alphaSize - 1]++;
  }

  UInt32 numSymbols = 0;
  {
    for (int i = 0; i < kMaxAlphaSize; i++)
      numSymbols += symbolCounts[i];
  }

  int bestNumTables = kNumTablesMin;
  UInt32 bestPrice = 0xFFFFFFFF;
  UInt32 startPos = m_OutStreamCurrent->GetPos();
  UInt32 startCurByte = m_OutStreamCurrent->GetCurByte();
  for (int nt = kNumTablesMin; nt <= kNumTablesMax + 1; nt++)
  {
    int numTables;

    if(m_OptimizeNumTables)
    {
      m_OutStreamCurrent->SetPos(startPos);
      m_OutStreamCurrent->SetCurState((startPos & 7), startCurByte);
      if (nt <= kNumTablesMax)
        numTables = nt;
      else
        numTables = bestNumTables;
    }
    else
    {
      if (numSymbols < 200)  numTables = 2; 
      else if (numSymbols < 600) numTables = 3; 
      else if (numSymbols < 1200) numTables = 4; 
      else if (numSymbols < 2400) numTables = 5; 
      else numTables = 6;
    }

    WriteBits2(numTables, kNumTablesBits);
    
    UInt32 numSelectors = (numSymbols + kGroupSize - 1) / kGroupSize;
    WriteBits2(numSelectors, kNumSelectorsBits);
    
    {
      UInt32 remFreq = numSymbols;
      int gs = 0;
      int t = numTables;
      do
      {
        UInt32 tFreq = remFreq / t;
        int ge = gs;
        UInt32 aFreq = 0;
        while (aFreq < tFreq) //  && ge < alphaSize) 
          aFreq += symbolCounts[ge++];
        
        if (ge - 1 > gs && t != numTables && t != 1 && (((numTables - t) & 1) == 1)) 
          aFreq -= symbolCounts[--ge];
        
        NCompression::NHuffman::CEncoder &huffEncoder = m_HuffEncoders[t - 1];
        int i = 0;
        do
          huffEncoder.m_Items[i].Len = (i >= gs && i < ge) ? 0 : 1;
        while (++i < alphaSize);
        gs = ge;
        remFreq -= aFreq;
      }
      while(--t != 0);
    }
    
    
    for (int pass = 0; pass < kNumHuffPasses; pass++)
    {
      {
        int t = 0;
        do
          m_HuffEncoders[t].StartNewBlock();
        while(++t < numTables);
      }
      
      {
        UInt32 mtfPos = 0;
        UInt32 g = 0;
        do 
        {
          UInt32 symbols[kGroupSize];
          int i = 0;
          do
          {
            UInt32 symbol = mtfs[mtfPos++];
            if (symbol >= 0xFF)
              symbol += mtfs[mtfPos++];
            symbols[i] = symbol;
          }
          while (++i < kGroupSize && mtfPos < mtfArraySize);
          
          UInt32 bestPrice = 0xFFFFFFFF;
          int t = 0;
          do
          {
            NCompression::NHuffman::CItem *items = m_HuffEncoders[t].m_Items;
            UInt32 price = 0;
            int j = 0;
            do
              price += items[symbols[j]].Len;
            while (++j < i);
            if (price < bestPrice)
            {
              m_Selectors[g] = (Byte)t;
              bestPrice = price;
            }
          }
          while(++t < numTables);
          NCompression::NHuffman::CEncoder &huffEncoder = m_HuffEncoders[m_Selectors[g++]];
          int j = 0;
          do
            huffEncoder.AddSymbol(symbols[j]);
          while (++j < i);
        }
        while (mtfPos < mtfArraySize);
      }
      
      int t = 0;
      do
      {
        NCompression::NHuffman::CEncoder &huffEncoder = m_HuffEncoders[t];
        int i = 0;
        do
          if (huffEncoder.m_Items[i].Freq == 0)
            huffEncoder.m_Items[i].Freq = 1;
        while(++i < alphaSize);
        Byte levels[kMaxAlphaSize];
        huffEncoder.BuildTree(levels);
      }
      while(++t < numTables);
    }
    
    {
      Byte mtfSel[kNumTablesMax];
      {
        int t = 0;
        do
          mtfSel[t] = (Byte)t;
        while(++t < numTables);
      }
      
      UInt32 i = 0;
      do
      {
        Byte sel = m_Selectors[i];
        int pos;
        for (pos = 0; mtfSel[pos] != sel; pos++)
          WriteBit2(true);
        WriteBit2(false);
        for (; pos > 0; pos--)
          mtfSel[pos] = mtfSel[pos - 1];
        mtfSel[0] = sel;
      }
      while(++i < numSelectors);
    }
    
    {
      int t = 0;
      do
      {
        NCompression::NHuffman::CItem *items = m_HuffEncoders[t].m_Items;
        UInt32 len = items[0].Len;
        WriteBits2(len, kNumLevelsBits);
        int i = 0;
        do
        {
          UInt32 level = items[i].Len;
          while (len != level)
          {
            WriteBit2(true);
            if (len < level)
            {
              WriteBit2(false);
              len++;
            }
            else 
            {
              WriteBit2(true);
              len--;
            }
          }
          WriteBit2(false);
        }
        while (++i < alphaSize);
      }
      while(++t < numTables);
    }
    
    {
      UInt32 groupSize = 0;
      UInt32 groupIndex = 0;
      NCompression::NHuffman::CEncoder *huffEncoder = 0;
      UInt32 mtfPos = 0;
      do
      {
        UInt32 symbol = mtfs[mtfPos++];
        if (symbol >= 0xFF)
          symbol += mtfs[mtfPos++];
        if (groupSize == 0) 
        {
          groupSize = kGroupSize;
          huffEncoder = &m_HuffEncoders[m_Selectors[groupIndex++]];
        }
        groupSize--;                                    \
        huffEncoder->CodeOneValue(m_OutStreamCurrent, symbol);
      }
      while (mtfPos < mtfArraySize);
    }

    if (!m_OptimizeNumTables)
      break;
    UInt32 price = m_OutStreamCurrent->GetPos() - startPos;
    if (price <= bestPrice)
    {
      if (nt == kNumTablesMax)
        break;
      bestPrice = price;
      bestNumTables = nt;
    }
  }
}

// blockSize > 0
UInt32 CEncoder::EncodeBlockWithHeaders(Byte *block, UInt32 blockSize)
{
  WriteByte2(kBlockSig0);
  WriteByte2(kBlockSig1);
  WriteByte2(kBlockSig2);
  WriteByte2(kBlockSig3);
  WriteByte2(kBlockSig4);
  WriteByte2(kBlockSig5);

  CBZip2CRC crc;
  int numReps = 0;
  Byte prevByte = block[0];
  UInt32 i = 0;
  do 
  {
    Byte b = block[i];
    if (numReps == kRleModeRepSize)
    {
      for (; b > 0; b--)
        crc.UpdateByte(prevByte);
      numReps = 0;
      continue;
    }
    if (prevByte == b)
      numReps++;
    else
    {
      numReps = 1;
      prevByte = b;
    }
    crc.UpdateByte(b);
  }
  while (++i < blockSize);
  UInt32 crcRes = crc.GetDigest();
  WriteCRC2(crcRes);
  EncodeBlock(block, blockSize);
  return crcRes;
}

void CEncoder::EncodeBlock2(CBZip2CombinedCRC &combinedCRC, 
    Byte *block, UInt32 blockSize, UInt32 numPasses)
{
  bool needCompare = false;
  CBZip2CombinedCRC specCombinedCRC = combinedCRC;

  UInt32 startBytePos = m_OutStreamCurrent->GetBytePos();
  UInt32 startPos = m_OutStreamCurrent->GetPos();
  UInt32 startCurByte = m_OutStreamCurrent->GetCurByte();
  UInt32 endCurByte;
  UInt32 endPos;
  if (numPasses > 1 && blockSize >= (1 << 10))
  {
    UInt32 blockSize0 = blockSize / 2;
    for (;(block[blockSize0] == block[blockSize0 - 1] ||
          block[blockSize0 - 1] == block[blockSize0 - 2]) && 
          blockSize0 < blockSize; blockSize0++);
    if (blockSize0 < blockSize)
    {
      EncodeBlock2(specCombinedCRC, block, blockSize0, numPasses - 1);
      EncodeBlock2(specCombinedCRC, block + blockSize0, blockSize - blockSize0, 
          numPasses - 1);
      endPos = m_OutStreamCurrent->GetPos();
      endCurByte = m_OutStreamCurrent->GetCurByte();
      if ((endPos & 7) > 0)
        WriteBits2(0, 8 - (endPos & 7));
      m_OutStreamCurrent->SetCurState((startPos & 7), startCurByte);
      needCompare = true;
    }
  }

  UInt32 startBytePos2 = m_OutStreamCurrent->GetBytePos();
  UInt32 startPos2 = m_OutStreamCurrent->GetPos();
  UInt32 crcVal = EncodeBlockWithHeaders(block, blockSize);
  UInt32 endPos2 = m_OutStreamCurrent->GetPos();

  combinedCRC.Update(crcVal);

  if (needCompare)
  {
    UInt32 size2 = endPos2 - startPos2;
    if (size2 < endPos - startPos)
    { 
      UInt32 numBytes = m_OutStreamCurrent->GetBytePos() - startBytePos2;
      Byte *buffer = m_OutStreamCurrent->GetStream();
      for (UInt32 i = 0; i < numBytes; i++)
        buffer[startBytePos + i] = buffer[startBytePos2 + i];
      m_OutStreamCurrent->SetPos(startPos + endPos2 - startPos2);
    }
    else
    {
      m_OutStreamCurrent->SetPos(endPos);
      m_OutStreamCurrent->SetCurState((endPos & 7), endCurByte);
      combinedCRC = specCombinedCRC;
    }
  }
}

void CEncoder::EncodeBlock3(CBZip2CombinedCRC &combinedCRC, UInt32 blockSize)
{
  CMsbfEncoderTemp outStreamTemp;
  outStreamTemp.SetStream(m_TempArray);
  outStreamTemp.Init();
  m_OutStreamCurrent = &outStreamTemp;

  EncodeBlock2(combinedCRC, m_Block, blockSize, m_NumPasses);

  UInt32 size = outStreamTemp.GetPos();
  UInt32 bytesSize = (size / 8);
  for (UInt32 i = 0; i < bytesSize; i++)
    m_OutStream.WriteBits(m_TempArray[i], 8);
  WriteBits(outStreamTemp.GetCurByte(), (size & 7));
}

HRESULT CEncoder::CodeReal(ISequentialInStream *inStream,
    ISequentialOutStream *outStream, const UInt64 *inSize, const UInt64 *outSize,
    ICompressProgressInfo *progress)
{
  if (!m_BlockSorter.Create(kBlockSizeMax))
    return E_OUTOFMEMORY;
  
  if (m_Block == 0)
  {
    m_Block = (Byte *)BigAlloc(kBlockSizeMax * 5 + kBlockSizeMax / 10 + (20 << 10));
    if (m_Block == 0)
      return E_OUTOFMEMORY;
    m_MtfArray = m_Block + kBlockSizeMax;
    m_TempArray = m_MtfArray + kBlockSizeMax * 2 + 2;
  }
  if (!m_InStream.Create(kBufferSize))
    return E_OUTOFMEMORY;
  if (!m_OutStream.Create(kBufferSize))
    return E_OUTOFMEMORY;

  if (m_NeedHuffmanCreate)
  {
    for (int i = 0; i < kNumTablesMax; i++)
      if (!m_HuffEncoders[i].Create(kMaxAlphaSize, 0, 0, kMaxHuffmanLen))
        return E_OUTOFMEMORY;
    m_NeedHuffmanCreate = false;
  }

  m_InStream.SetStream(inStream);
  m_InStream.Init();

  m_OutStream.SetStream(outStream);
  m_OutStream.Init();

  CFlusher flusher(this);

  CBZip2CombinedCRC combinedCRC;

  WriteByte(kArSig0);
  WriteByte(kArSig1);
  WriteByte(kArSig2);
  WriteByte((Byte)(kArSig3 + m_BlockSizeMult));

  while (true)
  {
    UInt32 blockSize = ReadRleBlock(m_Block);
    if (blockSize == 0)
      break;
    EncodeBlock3(combinedCRC, blockSize);
    if (progress)
    {
      UInt64 packSize = m_InStream.GetProcessedSize();
      UInt64 unpackSize = m_OutStream.GetProcessedSize();
      RINOK(progress->SetRatioInfo(&packSize, &unpackSize));
    }
  }
  WriteByte(kFinSig0);
  WriteByte(kFinSig1);
  WriteByte(kFinSig2);
  WriteByte(kFinSig3);
  WriteByte(kFinSig4);
  WriteByte(kFinSig5);

  WriteCRC(combinedCRC.GetDigest());
  return S_OK;
}

STDMETHODIMP CEncoder::Code(ISequentialInStream *inStream,
    ISequentialOutStream *outStream, const UInt64 *inSize, const UInt64 *outSize,
    ICompressProgressInfo *progress)
{
  try { return CodeReal(inStream, outStream, inSize, outSize, progress); }
  catch(...) { return S_FALSE; }
}

HRESULT CEncoder::SetCoderProperties(const PROPID *propIDs, 
    const PROPVARIANT *properties, UInt32 numProperties)
{
  for(UInt32 i = 0; i < numProperties; i++)
  {
    const PROPVARIANT &property = properties[i]; 
    switch(propIDs[i])
    {
      case NCoderPropID::kNumPasses:
      {
        if (property.vt != VT_UI4)
          return E_INVALIDARG;
        UInt32 numPasses = property.ulVal;
        if(numPasses == 0 || numPasses > 10)
          return E_INVALIDARG;
        m_NumPasses = numPasses;
        m_OptimizeNumTables = (m_NumPasses > 1);
        break;
      }
      case NCoderPropID::kDictionarySize:
      {
        if (property.vt != VT_UI4)
          return E_INVALIDARG;
        UInt32 dictionary = property.ulVal / kBlockSizeStep;
        if (dictionary < kBlockSizeMultMin)
          dictionary = kBlockSizeMultMin;
        else if (dictionary > kBlockSizeMultMax)
          dictionary = kBlockSizeMultMax;
        m_BlockSizeMult = dictionary;
        break;
      }
      default:
        return E_INVALIDARG;
    }
  }
  return S_OK;
}

}}

Generated by  Doxygen 1.6.0   Back to index