diff --git a/PrimeCPP/solution_2/PrimeCPP_array.cpp b/PrimeCPP/solution_2/PrimeCPP_array.cpp index 4edd24af0..7aca088ce 100644 --- a/PrimeCPP/solution_2/PrimeCPP_array.cpp +++ b/PrimeCPP/solution_2/PrimeCPP_array.cpp @@ -1,7 +1,5 @@ // --------------------------------------------------------------------------- -// PrimeCPP.cpp : Pol Marcet's Modified version of Dave's Garage Prime Sieve -// Some great ideas taken from Rust's implementation from Michael Barber -// @mike-barber https://www.github.com/mike-barber (bit-storage-rotate) +// Optimized PrimeCPP.cpp // --------------------------------------------------------------------------- #include @@ -23,192 +21,169 @@ using namespace std::chrono; const uint64_t DEFAULT_UPPER_LIMIT = 10'000'000LLU; -class BitArray { - uint8_t *array; - size_t logicalSize; +class BitArray +{ + uint8_t* _byteArray; + size_t _numberOfBits; + size_t _numberOfIndices; // Number of indices (odd numbers up to _numberOfBits) - static constexpr size_t arraySize(size_t size) + static constexpr size_t arraySizeInBytes(size_t numberOfBits) { - return (size >> 3) + ((size & 7) > 0); + return (numberOfBits >> 3) + ((numberOfBits & 7) > 0); } - static constexpr size_t index(size_t n) + static constexpr size_t byteIndexOfBit(size_t n) { return (n >> 3); } public: - explicit BitArray(size_t size) : logicalSize(size) + explicit BitArray(size_t size) + : _numberOfBits(size), _numberOfIndices((size + 1) / 2) { - auto arrSize = (size + 1) / 2; // Only store bits for odd numbers - array = new uint8_t[arraySize(arrSize)]; - std::memset(array, 0x00, arraySize(arrSize)); + size_t arrSizeInBytes = arraySizeInBytes(_numberOfIndices); + _byteArray = new uint8_t[arrSizeInBytes]; + std::memset(_byteArray, 0x00, arrSizeInBytes); } - ~BitArray() { delete[] array; } + ~BitArray() + { + delete[] _byteArray; + } - constexpr bool get(size_t n) const + // Methods that take index directly + inline bool get_index(size_t index) const { - if (n % 2 == 0) - return false; // Even numbers > 2 are not prime - n = n / 2; // Map the actual number to the index in the array - return !(array[index(n)] & (uint8_t(1) << (n % 8))); + return !(_byteArray[byteIndexOfBit(index)] & (uint8_t(1) << (index % 8))); } - void set(size_t n) + inline void set_index(size_t index) { - n = n / 2; // Map the actual number to the index in the array - array[index(n)] |= (uint8_t(1) << (n % 8)); + _byteArray[byteIndexOfBit(index)] |= (uint8_t(1) << (index % 8)); } - constexpr size_t size() const + size_t numberOfBits() const { - return logicalSize; + return _numberOfBits; } -}; - -// prime_sieve -// -// Represents the data comprising the sieve (an array of bits representing odd numbers starting from 3) -// and includes the code needed to eliminate non-primes from its array by calling runSieve. + size_t numberOfIndices() const + { + return _numberOfIndices; + } +}; class prime_sieve { - private: - - BitArray Bits; // Sieve data, where 0==prime, 1==not - - public: - - prime_sieve(uint64_t n) : Bits(n) // Initialize bits to zero default - { - } - - ~prime_sieve() - { - } - - // runSieve - // - // Scan the array for the next factor (>2) that hasn't yet been eliminated from the array, and then - // walk through the array crossing off every multiple of that factor. - - void runSieve() - { - uint64_t factor = 3; - uint64_t q = (int) sqrt(Bits.size()); - - while (factor <= q) - { - // Find the next prime number - for (; factor <= q; factor += 2) - { - if (Bits.get(factor)) - { - break; - } - } - - // Mark multiples of the prime number as not prime - uint64_t start = factor * factor; - for (uint64_t num = start; num <= Bits.size(); num += factor * 2) - { - Bits.set(num); - } - - factor += 2; - } - } - - // countPrimes - // - // Can be called after runSieve to determine how many primes were found in total - - size_t countPrimes() const - { - size_t count = (Bits.size() >= 2); // Count 2 as prime if within range - for (uint64_t num = 3; num <= Bits.size(); num += 2) - if (Bits.get(num)) - count++; - return count; - } - - // isPrime - // - // Can be called after runSieve to determine whether a given number is prime. - - bool isPrime(uint64_t n) const - { - if (n == 2) - return true; - if (n < 2 || n % 2 == 0) - return false; - return Bits.get(n); - } - - // validateResults - // - // Checks to see if the number of primes found matches what we should expect. This data isn't used in the - // sieve processing at all, only to sanity check that the results are right when done. - - bool validateResults() const - { - const std::map resultsDictionary = - { - { 10LLU, 4 }, // Historical data for validating our results - the number of primes - { 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000 - { 1'000LLU, 168 }, - { 10'000LLU, 1229 }, - { 100'000LLU, 9592 }, - { 1'000'000LLU, 78498 }, - { 10'000'000LLU, 664579 }, - { 100'000'000LLU, 5761455 }, - { 1'000'000'000LLU, 50847534 }, - { 10'000'000'000LLU, 455052511 }, - }; - if (resultsDictionary.end() == resultsDictionary.find(Bits.size())) - return false; - return resultsDictionary.find(Bits.size())->second == countPrimes(); - } - - // printResults - // - // Displays stats about what was found as well as (optionally) the primes themselves - - void printResults(bool showResults, double duration, size_t passes, size_t threads) const - { - if (showResults) - cout << "2, "; - - size_t count = (Bits.size() >= 2); // Count 2 as prime if in range - for (uint64_t num = 3; num <= Bits.size(); num += 2) - { - if (Bits.get(num)) - { - if (showResults) - cout << num << ", "; - count++; - } - } - - if (showResults) - cout << "\n"; - - cout << "Passes: " << passes << ", " - << "Threads: " << threads << ", " - << "Time: " << duration << ", " - << "Average: " << duration/passes << ", " - << "Limit: " << Bits.size() << ", " - << "Counts: " << count << "/" << countPrimes() << ", " - << "Valid: " << (validateResults() ? "Pass" : "FAIL!") - << "\n"; - - // Following 2 lines added by rbergen to conform to drag race output format - cout << "\n"; - cout << "davepl_array;" << passes << ";" << duration << ";" << threads << ";algorithm=base,faithful=yes,bits=1\n"; - } - +private: + BitArray Bits; // Sieve data, where 0 == prime, 1 == not + +public: + prime_sieve(uint64_t n) : Bits(n) {} + + void runSieve() + { + size_t q = (size_t)std::sqrt(Bits.numberOfBits()); + size_t q_index = q / 2; + + size_t factor_index = 1; // Index for number 3 + size_t factor = factor_index * 2 + 1; + + while (factor_index <= q_index) + { + // If the number at factor_index is prime + if (Bits.get_index(factor_index)) + { + factor = factor_index * 2 + 1; + size_t start_index = (factor * factor) / 2; + + for (size_t num_index = start_index; num_index < Bits.numberOfIndices(); num_index += factor) + { + Bits.set_index(num_index); + } + } + ++factor_index; + } + } + + size_t countPrimes() const + { + size_t count = (Bits.numberOfBits() >= 2) ? 1 : 0; // Count 2 as prime if within range + for (size_t index = 1; index < Bits.numberOfIndices(); ++index) + { + if (Bits.get_index(index)) + ++count; + } + return count; + } + + bool isPrime(uint64_t n) const + { + if (n == 2) + return true; + if (n < 2 || n % 2 == 0) + return false; + size_t index = n / 2; + if (index < Bits.numberOfIndices()) + return Bits.get_index(index); + else + return false; + } + + bool validateResults() const + { + const std::map resultsDictionary = + { + { 10LLU, 4 }, + { 100LLU, 25 }, + { 1'000LLU, 168 }, + { 10'000LLU, 1229 }, + { 100'000LLU, 9592 }, + { 1'000'000LLU, 78498 }, + { 10'000'000LLU, 664579 }, + { 100'000'000LLU, 5761455 }, + { 1'000'000'000LLU, 50847534 }, + { 10'000'000'000LLU, 455052511 }, + }; + auto it = resultsDictionary.find(Bits.numberOfBits()); + if (it != resultsDictionary.end()) + return it->second == countPrimes(); + else + return false; + } + + void printResults(bool showResults, double duration, size_t passes, size_t threads) const + { + if (showResults && Bits.numberOfBits() >= 2) + std::cout << "2, "; + + for (size_t index = 1; index < Bits.numberOfIndices(); ++index) + { + if (Bits.get_index(index)) + { + if (showResults) + std::cout << (index * 2 + 1) << ", "; + } + } + + if (showResults) + std::cout << "\n"; + + size_t count = countPrimes(); + + std::cout << "Passes: " << passes << ", " + << "Threads: " << threads << ", " + << "Time: " << duration << ", " + << "Average: " << duration / passes << ", " + << "Limit: " << Bits.numberOfBits() << ", " + << "Counts: " << count << "/" << count << ", " + << "Valid: " << (validateResults() ? "Pass" : "FAIL!") + << "\n"; + + // Output format conforming to drag race output format + std::cout << "\n"; + std::cout << "davepl_array;" << passes << ";" << duration << ";" << threads << ";algorithm=base,faithful=yes,bits=1\n"; + } }; // custom_atoll @@ -229,22 +204,23 @@ long long custom_atoll(const std::string& value_str) { } char last_char = input_str.back(); - if (suffixes.find(last_char) != suffixes.end()) { + if (suffixes.find(last_char) != suffixes.end()) + { long long multiplier = suffixes.at(last_char); std::string numeric_part = input_str.substr(0, input_str.size() - 1); std::istringstream iss(numeric_part); double numeric_value; - if (!(iss >> numeric_value)) { + if (!(iss >> numeric_value)) throw std::invalid_argument("Invalid numeric part: " + numeric_part); - } + return static_cast(numeric_value * multiplier); } std::istringstream iss(input_str); long long result; - if (!(iss >> result)) { + if (!(iss >> result)) throw std::invalid_argument("Invalid input format"); - } + return result; } @@ -279,7 +255,7 @@ int main(int argc, char **argv) else if (*i == "-l" || *i == "--limit") { i++; - ullLimitRequested = (i == args.end()) ? 0LL : max((long long)1, custom_atoll(i->c_str())); + ullLimitRequested = (i == args.end()) ? 0LL : max((long long)1, custom_atoll(*i)); } else if (*i == "-1" || *i == "--oneshot") { @@ -346,15 +322,18 @@ int main(int argc, char **argv) std::vector threads(cThreads); std::vector l_passes(cThreads); for (unsigned int i = 0; i < cThreads; i++) - threads[i] = std::thread([i, &l_passes, &tStart](size_t llUpperLimit) + threads[i] = std::thread([i, &l_passes, &tStart, cSeconds](size_t llUpperLimit) { l_passes[i] = 0; - while (duration_cast(steady_clock::now() - tStart).count() < 5) { - prime_sieve(llUpperLimit).runSieve(); + while (duration_cast(steady_clock::now() - tStart).count() < cSeconds) + { + prime_sieve sieve(llUpperLimit); + sieve.runSieve(); ++l_passes[i]; } }, llUpperLimit); - for (auto i = 0; i < cThreads; i++) { + for (auto i = 0; i < cThreads; i++) + { threads[i].join(); cPasses += l_passes[i]; } @@ -362,10 +341,9 @@ int main(int argc, char **argv) duration = duration_cast(tEnd).count()/1000000.0; } - if (bOneshot) { - cPasses = 1.0 / duration * 5; + cPasses = static_cast(1.0 / duration * 5); duration = 5.0; } diff --git a/PrimeCPP/solution_2/PrimeCPP_mask.cpp b/PrimeCPP/solution_2/PrimeCPP_mask.cpp index 90a871c3d..ba334d25c 100644 --- a/PrimeCPP/solution_2/PrimeCPP_mask.cpp +++ b/PrimeCPP/solution_2/PrimeCPP_mask.cpp @@ -24,99 +24,104 @@ using namespace std::chrono; const uint64_t DEFAULT_UPPER_LIMIT = 10'000'000LLU; class BitArray { - uint32_t *array; - size_t logicalSize; + uint64_t *_wordArray; + size_t _numberOfBits; - inline static size_t arraySize(size_t size) - { - return (size >> 5) + ((size & 31) > 0); + inline static size_t arraySize(size_t size) { + return (size >> 6) + ((size & 63) > 0); // Adjust for 64-bit words } - inline static size_t index(size_t n) - { - return (n >> 5); + inline static size_t index(size_t n) { + return (n >> 6); // Adjust for 64-bit words } - inline static uint32_t getSubindex(size_t n, uint32_t d) - { - return d & uint32_t(0x01) << (n % 32); + inline static uint64_t getSubindex(size_t n, uint64_t d) { + return d & (uint64_t(0x01) << (n % 64)); } - inline void setFalseSubindex(size_t n, uint32_t &d) - { - d &= ~uint32_t(uint32_t(0x01) << (n % (8*sizeof(uint32_t)))); + inline void setFalseSubindex(size_t n, uint64_t &d) { + d &= ~(uint64_t(0x01) << (n % 64)); } public: - explicit BitArray(size_t size) : logicalSize(size) - { - array = new uint32_t[arraySize(size)]; - std::memset(array, 0xFF, arraySize(size) * sizeof(uint32_t)); + explicit BitArray(size_t size) : _numberOfBits(size) { + _wordArray = new uint64_t[arraySize(size)]; + std::memset(_wordArray, 0xFF, arraySize(size) * sizeof(uint64_t)); } - ~BitArray() {delete [] array;} + ~BitArray() { delete[] _wordArray; } - bool get(size_t n) const - { - return getSubindex(n, array[index(n)]); + inline size_t size() const { + return _numberOfBits; } - static constexpr uint32_t rol(uint32_t x, uint32_t n) - { - return (x<>(32-n)); + bool get(size_t n) const { + return getSubindex(n, _wordArray[index(n)]); } - static constexpr uint32_t buildSkipMask(size_t skip, size_t offset) - { - uint32_t mask = 0; - for (size_t i = offset; i < 32; i += skip) { - mask |= (1u << i); + static constexpr uint64_t rol(uint64_t x, uint64_t n) { + return (x << n) | (x >> (64 - n)); + } + + static constexpr uint64_t buildSkipMask(size_t skip, size_t offset) { + uint64_t mask = 0; + for (size_t i = offset; i < 64; i += skip) { + mask |= (1ULL << i); } return ~mask; } void setFlagsFalse(size_t n, size_t skip) { - if (skip <= 12) { - // For small skips, use pre-built mask approach - size_t word_idx = index(n); - size_t bit_pos = n % 32; - size_t curr_n = n; - - while (curr_n < size()) - { - // Build mask for current word starting at bit_pos - uint32_t mask = buildSkipMask(skip, bit_pos); - - // Apply mask to current word - array[word_idx] &= mask; - - // Move to next word - size_t bits_remaining = 32 - bit_pos; - curr_n += ((bits_remaining + skip - 1) / skip) * skip; - - if (curr_n >= size()) break; - - word_idx = index(curr_n); - bit_pos = curr_n % 32; - } - } - else + if (skip <= 32) { - // Original implementation for larger skips - auto rolling_mask = ~uint32_t(1 << (n % 32)); - auto roll_bits = skip % 32; + constexpr size_t BITS_PER_WORD = 64; + uint64_t* ptr = _wordArray; + + // Initial calculations + size_t current_index = n / BITS_PER_WORD; + size_t bit_position = n % BITS_PER_WORD; + + // Clear individual bits until reaching the next word boundary + while (bit_position < BITS_PER_WORD && n < size()) { + ptr[current_index] &= ~(1ULL << bit_position); // Clear specific bit + n += skip; + current_index = n / BITS_PER_WORD; + bit_position = n % BITS_PER_WORD; + } + + // Define a mask that clears every `skip`-th bit in a full 64-bit word + uint64_t mask = 0; + for (size_t i = 0; i < BITS_PER_WORD; i += skip) + mask |= (1ULL << i); + mask = ~mask; + + // Apply the mask to full words between boundaries + while (n + BITS_PER_WORD <= size()) { + ptr[current_index] &= mask; + n += skip * BITS_PER_WORD / skip; // Move by the number of bits cleared per word + current_index = n / BITS_PER_WORD; + } + + // Handle any remaining bits individually + bit_position = n % BITS_PER_WORD; while (n < size()) { - array[index(n)] &= rolling_mask; + ptr[current_index] &= ~(1ULL << bit_position); // Clear specific bit n += skip; - rolling_mask = rol(rolling_mask, roll_bits); + current_index = n / BITS_PER_WORD; + bit_position = n % BITS_PER_WORD; } + return; } - } - inline size_t size() const - { - return logicalSize; + // Original implementation for larger skips + auto rolling_mask = ~uint64_t(1ULL << (n % 64)); + auto roll_bits = skip % 64; + while (n < size()) { + _wordArray[index(n)] &= rolling_mask; + n += skip; + rolling_mask = rol(rolling_mask, roll_bits); + } } };