From 26b82983bcb9847d8749a23257589372d766833d Mon Sep 17 00:00:00 2001 From: Evgenii Kliuchnikov Date: Thu, 20 Jul 2023 07:07:39 -0700 Subject: [PATCH] speedup encoder on q5-9 / 1MB+ files PiperOrigin-RevId: 549619999 --- c/enc/backward_references.c | 2 ++ c/enc/hash.h | 3 +-- c/enc/hash_longest_match64_inc.h | 22 +++++++++---------- c/enc/quality.h | 37 ++++++++++++++++++++++++++++++++ 4 files changed, 51 insertions(+), 13 deletions(-) diff --git a/c/enc/backward_references.c b/c/enc/backward_references.c index ff5b7becf..f600e6452 100644 --- a/c/enc/backward_references.c +++ b/c/enc/backward_references.c @@ -181,6 +181,7 @@ void BrotliCreateBackwardReferences(size_t num_bytes, CASE_(65) #undef CASE_ default: + BROTLI_DCHECK(false); break; } } @@ -196,6 +197,7 @@ void BrotliCreateBackwardReferences(size_t num_bytes, FOR_GENERIC_HASHERS(CASE_) #undef CASE_ default: + BROTLI_DCHECK(false); break; } } diff --git a/c/enc/hash.h b/c/enc/hash.h index fc6e33400..5677d8292 100644 --- a/c/enc/hash.h +++ b/c/enc/hash.h @@ -78,8 +78,7 @@ typedef struct HasherSearchResult { for this use. * The number has been tuned heuristically against compression benchmarks. */ static const uint32_t kHashMul32 = 0x1E35A7BD; -static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD); -static const uint64_t kHashMul64Long = +static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u); static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) { diff --git a/c/enc/hash_longest_match64_inc.h b/c/enc/hash_longest_match64_inc.h index da75949ea..ea5a8310b 100644 --- a/c/enc/hash_longest_match64_inc.h +++ b/c/enc/hash_longest_match64_inc.h @@ -20,13 +20,12 @@ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; } static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; } /* HashBytes is the function that chooses the bucket to place the address in. */ -static BROTLI_INLINE uint32_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data, - const int shift) { - const uint64_t mask = (~((uint64_t)0U)) >> 24; /* Use only 5 bytes. */ - const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(data) & mask) * kHashMul64Long; +static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data, + uint64_t hash_mul) { + const uint64_t h = BROTLI_UNALIGNED_LOAD64LE(data) * hash_mul; /* The higher bits contain more mixture from the multiplication, so we take our results from there. */ - return (uint32_t)(h >> shift); + return (size_t)(h >> (64 - 15)); } typedef struct HashLongestMatch { @@ -35,8 +34,8 @@ typedef struct HashLongestMatch { /* Only block_size_ newest backward references are kept, and the older are forgotten. */ size_t block_size_; - /* Left-shift for computing hash bucket index from hash value. */ - int hash_shift_; + /* Hash multiplier tuned to match length. */ + uint64_t hash_mul_; /* Mask for accessing entries in a block (in a ring-buffer manner). */ uint32_t block_mask_; @@ -61,7 +60,8 @@ static void FN(Initialize)( self->common_ = common; BROTLI_UNUSED(params); - self->hash_shift_ = 64 - common->params.bucket_bits; + self->hash_mul_ = kHashMul64 << (64 - 5 * 8); + BROTLI_DCHECK(common->params.bucket_bits == 15); self->bucket_size_ = (size_t)1 << common->params.bucket_bits; self->block_bits_ = common->params.block_bits; self->block_size_ = (size_t)1 << common->params.block_bits; @@ -81,7 +81,7 @@ static void FN(Prepare)( if (one_shot && input_size <= partial_prepare_threshold) { size_t i; for (i = 0; i < input_size; ++i) { - const uint32_t key = FN(HashBytes)(&data[i], self->hash_shift_); + const size_t key = FN(HashBytes)(&data[i], self->hash_mul_); num[key] = 0; } } else { @@ -107,7 +107,7 @@ static BROTLI_INLINE void FN(Store)( const size_t mask, const size_t ix) { uint16_t* BROTLI_RESTRICT num = self->num_; uint32_t* BROTLI_RESTRICT buckets = self->buckets_; - const uint32_t key = FN(HashBytes)(&data[ix & mask], self->hash_shift_); + const size_t key = FN(HashBytes)(&data[ix & mask], self->hash_mul_); const size_t minor_ix = num[key] & self->block_mask_; const size_t offset = minor_ix + (key << self->block_bits_); ++num[key]; @@ -212,7 +212,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)( } } { - const uint32_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_); + const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_); uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_]; const size_t down = (num[key] > self->block_size_) ? diff --git a/c/enc/quality.h b/c/enc/quality.h index 4415a54ff..ffdfd72fb 100644 --- a/c/enc/quality.h +++ b/c/enc/quality.h @@ -119,6 +119,41 @@ static BROTLI_INLINE size_t LiteralSpreeLengthForSparseSearch( return params->quality < 9 ? 64 : 512; } +/* Quality to hasher mapping: + + - q02: h02 (longest_match_quickly), b16, l5 + + - q03: h03 (longest_match_quickly), b17, l5 + + - q04: h04 (longest_match_quickly), b17, l5 + - q04: h54 (longest_match_quickly), b20, l7 | for large files + + - q05: h05 (longest_match ), b14, l4 + - q05: h06 (longest_match64 ), b15, l5 | for large files + - q05: h40 (forgetful_chain ), b15, l4 | for small window + + - q06: h05 (longest_match ), b14, l4 + - q06: h06 (longest_match64 ), b15, l5 | for large files + - q06: h40 (forgetful_chain ), b15, l4 | for small window + + - q07: h05 (longest_match ), b15, l4 + - q07: h06 (longest_match64 ), b15, l5 | for large files + - q07: h41 (forgetful_chain ), b15, l4 | for small window + + - q08: h05 (longest_match ), b15, l4 + - q08: h06 (longest_match64 ), b15, l5 | for large files + - q08: h41 (forgetful_chain ), b15, l4 | for small window + + - q09: h05 (longest_match ), b15, l4 + - q09: h06 (longest_match64 ), b15, l5 | for large files + - q09: h42 (forgetful_chain ), b15, l4 | for small window + + - q10: t10 (to_binary_tree ), b17, l128 + + - q11: t10 (to_binary_tree ), b17, l128 + + Where "q" is quality, "h" is hasher type, "b" is bucket bits, + "l" is source len. */ static BROTLI_INLINE void ChooseHasher(const BrotliEncoderParams* params, BrotliHasherParams* hparams) { if (params->quality > 9) { @@ -136,6 +171,8 @@ static BROTLI_INLINE void ChooseHasher(const BrotliEncoderParams* params, hparams->num_last_distances_to_check = params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16; } else { + /* TODO(eustas): often previous setting (H6) is faster and denser; consider + adding an option to use it. */ hparams->type = 5; hparams->block_bits = params->quality - 1; hparams->bucket_bits = params->quality < 7 ? 14 : 15;