Skip to content

Commit

Permalink
Apply suggestions from code review
Browse files Browse the repository at this point in the history
Co-authored-by: Eugene Kliuchnikov <[email protected]>
  • Loading branch information
heshpdx and eustas committed Nov 19, 2024
1 parent 1054ecc commit 782aadd
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 6 deletions.
7 changes: 4 additions & 3 deletions c/enc/block_splitter_inc.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length,
size_t insert_cost_ix = symbol * num_histograms;
double min_cost = 1e99;
double block_switch_cost = block_switch_bitcost;
static const double threshold = 0.07 / 2000.0;
static const size_t prologue_length = 2000;
static const double multiplier = 0.07 / 2000;
size_t k;
for (k = 0; k < num_histograms; ++k) {
/* We are coding the symbol with entropy code k. */
Expand All @@ -129,8 +130,8 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length,
}
}
/* More blocks for the beginning. */
if (byte_ix < 2000) {
block_switch_cost *= 0.77 + threshold * (double)byte_ix;
if (byte_ix < prologue_length) {
block_switch_cost *= 0.77 + multiplier * (double)byte_ix;
}
for (k = 0; k < num_histograms; ++k) {
cost[k] -= min_cost;
Expand Down
7 changes: 4 additions & 3 deletions c/enc/literal_cost.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
size_t masked_pos = (pos + i) & mask;
size_t histo = histogram[256 * utf8_pos + data[masked_pos]];
static const double threshold = 0.35 / 2000.0;
static const size_t prologue_length = 2000;
static const double multiplier = 0.35 / 2000;
double lit_cost;
if (histo == 0) {
histo = 1;
Expand All @@ -121,8 +122,8 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
Perhaps because the entropy source is changing its properties
rapidly in the beginning of the file, perhaps because the beginning
of the data is a statistical "anomaly". */
if (i < 2000) {
lit_cost += 0.7 - ((double)(2000 - i) * threshold);
if (i < prologue_length) {
lit_cost += 0.35 + multiplier * (double)i;
}
cost[i] = (float)lit_cost;
}
Expand Down

0 comments on commit 782aadd

Please sign in to comment.