naiveproxy/net/tools/huffman_trie/huffman/huffman_builder.h
2018-12-09 21:59:24 -05:00

86 lines
3.0 KiB
C++

// Copyright (c) 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef NET_TOOLS_HUFFMAN_TRIE_HUFFMAN_HUFFMAN_BUILDER_H_
#define NET_TOOLS_HUFFMAN_TRIE_HUFFMAN_HUFFMAN_BUILDER_H_
#include <stdint.h>
#include <map>
#include <memory>
#include <vector>
namespace net {
namespace huffman_trie {
namespace {
class HuffmanNode;
} // namespace
struct HuffmanRepresentation {
uint32_t bits;
uint32_t number_of_bits;
};
// A HuffmanRepresentationTable maps the original characters to their Huffman
// representation. The Huffman representation consists of the number of bits
// needed to represent the character and the actual bits.
using HuffmanRepresentationTable = std::map<uint8_t, HuffmanRepresentation>;
using HuffmanRepresentationPair = std::pair<uint8_t, HuffmanRepresentation>;
// This class tracks the number of times each character is used and calculates
// a space efficient way to represent all tracked characters by constructing a
// Huffman tree based on the number of times each character is seen.
class HuffmanBuilder {
public:
HuffmanBuilder();
~HuffmanBuilder();
// Will increase the count for |character| by one, indicating it has been
// used. |character| must be in the range 0-127.
void RecordUsage(uint8_t character);
// Returns a HuffmanRepresentationTable based on the usage data collected
// through RecordUsage().
HuffmanRepresentationTable ToTable();
// Outputs the Huffman representation as a vector of uint8_t's in a format
// Chromium can use to reconstruct the tree.
//
// The nodes of the tree are pairs of uint8s. The last node in the array is
// the root of the tree. Each pair is two uint8_t values, the first is "left"
// and the second is "right". If a uint8_t value has the MSB set then it
// represents a literal leaf value. Otherwise it's a pointer to the n'th
// element of the array.
std::vector<uint8_t> ToVector();
private:
// Determines the Huffman representation of the characters under |node| and
// inserts them into |*table|. |bits| and |number_of_bits| are used as a
// prefix.
void TreeToTable(HuffmanNode* node,
uint32_t bits,
uint32_t number_of_bits,
HuffmanRepresentationTable* table);
// Converts the tree under |*node| into a byte representation in |*vector|.
// See ToVector() for more information on the format.
uint32_t WriteToVector(HuffmanNode* node, std::vector<uint8_t>* vector);
// Constructs a Huffman tree based on |counts_|. Appends additional nodes to
// the tree until it contains at least 2 leafs.
std::unique_ptr<HuffmanNode> BuildTree();
// Holds usage information for the tracked characters. Maps the character to
// the number of times its usage has been recorded through RecordUsage().
std::map<uint8_t, uint32_t> counts_;
};
} // namespace huffman_trie
} // namespace net
#endif // NET_TOOLS_HUFFMAN_TRIE_HUFFMAN_HUFFMAN_BUILDER_H_