An implementation of the Sieve of Eratosthenes for finding prime numbers, fully focusing on performance.
The biggest performance gains come from forcing data set manipluation loops to be performed in the C-level code inside Python and not inside loops within the source code of the sieve. The places where this is used in particular are:
- Striking out compounds in the sieve using the list[start::skip] = list syntax
- Creating the final list of primes by using compress() to filter primes from the sieve
from math import sqrt, ceil
from itertools import compress
def sieve(until):
if until < 2:
return []
store_len = until >> 1
is_prime = [True] * store_len
for offset, number in enumerate(range(3, int(sqrt(until)) + 1, 2)):
if is_prime[offset]:
first_compound_at = (number*number-3) >> 1
nr_of_compounds = int(ceil((store_len-first_compound_at) / number))
is_prime[first_compound_at::number] = [False] * nr_of_compounds
return [2] + list(compress(range(3, until + 1, 2), is_prime))
from time import time
Test.describe("Below 2, no primes exist")
Test.assert_equals([], sieve(-1))
Test.assert_equals([], sieve(0))
Test.assert_equals([], sieve(1))
Test.describe("Low primes")
Test.assert_equals([2], sieve(2))
Test.assert_equals([2,3], sieve(3))
Test.assert_equals([2,3], sieve(4))
Test.assert_equals([2,3,5], sieve(5))
Test.assert_equals([2,3,5], sieve(6))
Test.assert_equals([2,3,5,7], sieve(7))
Test.assert_equals([2,3,5,7], sieve(8))
Test.assert_equals([2,3,5,7], sieve(9))
Test.assert_equals([2,3,5,7], sieve(10))
Test.assert_equals([2,3,5,7,11], sieve(11))
Test.assert_equals([2,3,5,7,11], sieve(12))
Test.assert_equals([2,3,5,7,11,13], sieve(13))
Test.describe("A big sieve")
t1 = time()
s = set(sieve(50000000))
t2 = time()
print("The big sieve until 50,000,000 was created in", t2-t1, "seconds")
Test.assert_equals(3001134, len(s), "The sieve must contain 3001134 primes")
# On codewars, I get timings around 2 seconds, but lets give the timing some slack here.
Test.expect(t2-t1 < 5, "The sieve must be created within 5 seconds")
some_primes = {2, 3, 131, 4700939, 11833931, 15418759, 19818061, 25227973, 36341861, 44811667, 49999991}
Test.expect(some_primes.issubset(s))
This is my implementation of the Baillie-PSW prime number test. It is my response to a kumite title that claimed a fast prime test, while the implemented check was not actually that fast.
The response as provided by the isPrime() function is not simply a boolean, but instead a tuple providing two values: a boolean indicating probably primality and a string describing the test to lead to the provided conclusing.
Knowing the actual test that triggered the outcome has proven a very valuable tool during development. If this were production code, I'd most likely make the isPrime() function return a boolean directly.
It was great fun to puzzle together all the required pieces and to translate mathmatical notation into working programming code! I learned a lot of new things about modular arithmatics.
from math import sqrt, ceil from itertools import compress TRIAL_DIVISION_UNTIL=1000 def isPrime(n): """This primality check implements the composite Baillie-PSW test. See: https://en.wikipedia.org/wiki/Baillie%E2%80%93PSW_primality_test Input: n: the number to check Output: a tuple containing two elements: - True if (probable) prime, False otherwise - The name of the test that lead to that conclusion """ if not isInteger(n): return (False, "not an integer") if isBelowLowestPrime(n): return (False, "below lowest prime") if isLowPrime(n): return (True, "low prime") if isTrialDivisionCompound(n): return (False, "trial division") if not isRabinMillerProbablePrime(n): return (False, "rabin-miller") if isPerfectSquare(n): return (False, "perfect square") if not isLucasProbablePrime(n): return (False, "lucas") return (True, "passed all tests") def isInteger(n): return int(n) == n def isBelowLowestPrime(n): return n < 2 def isLowPrime(n): return ( n <= TRIAL_DIVISION_UNTIL and n in lowPrimes ) def isTrialDivisionCompound(n): return any( n % prime == 0 for prime in lowPrimes if prime < n ) def isRabinMillerProbablePrime(n): """Based on pseudo code for the Rabin-Miller algorithm. See: https://en.wikipedia.org/wiki/Miller-Rabin_primality_test This simple version implements only a single pass using base 2, as prescribed by the Baillie-PSW specification.""" (factor, exponent) = factorizeBase2(n - 1) x = pow(2, factor, n) if x == 1 or x == n - 1: return True for _ in range(exponent - 1): x = pow(x, 2, n) if x == 1: return False elif x == n - 1: return True return False def isPerfectSquare(n): """A quick check to make sure that no perfect squares are fed to the Lucas probable prime check.""" return isqrt(n) ** 2 == n def isqrt(n): """Integer square root, see: https://en.wikipedia.org/wiki/Integer_square_root""" x = n y = (x + 1) >> 1 while y < x: x = y y = (x + n // x) >> 1 return x def isLucasProbablePrime(n): """Code based on "Implementing a Lucas probable prime test" to be found at: https://en.wikipedia.org/wiki/Lucas_pseudoprime""" def getLucasSequenceInputAsSuggestedBySelfridge(): seq = ((-1 if i&1 else 1) * (i*2 + 5) for i in range(0, n>>1)) D = next((D for D in seq if jacobi(D, n) == -1)) Q = (1 - D) // 4 P = 1 return D, Q, P D, Q, P = getLucasSequenceInputAsSuggestedBySelfridge() def computeLucasValue(subscript): U = V = k = 0 for digit in format(subscript, "b"): U, V, k = doubleK(U, V, k) if digit == "1": U, V, k = incrementK(U, V, k) return U, V, subscript def doubleK(U, V, subscript): return (U*V) % n, (pow(V, 2, n) - 2 * pow(Q, subscript, n)) % n, subscript << 1 def incrementK(U, V, subscript): U, V = (P*U + V), (D*U + P*V) makeEven = lambda x: x+n if x&1 else x div2modn = lambda x: (x >> 1) % n return (div2modn(makeEven(U)), div2modn(makeEven(V)), subscript+1) # Weak check. U, V, k = computeLucasValue(n+1) if U != 0: return False # Strong check. (factor, exponent) = factorizeBase2(n + 1) U, V, k = computeLucasValue(factor) if U == 0 or V == 0: return True for r in range(exponent-1): U, V, k = doubleK(U, V, k) if V == 0: return True def factorizeBase2(factor, exponent=0): """Factor out 2 from a number. For example factorizeBase2(80) returns (5, 4), which indicates that 2 ** 5 + 4 == 80.""" if factor&1: return (factor, exponent) return factorizeBase2(factor >> 1, exponent + 1) def jacobi(a, n): """Compute the Jacobi symbol. The code uses ideas from: https://www.academia.edu/1012630/A_binary_algorithm_for_the_Jacobi_symbol""" t = 1 while a: if a < 0: a = -a if n&3 == 3: t = -t while not a&1: a = a >> 1 if n&7 == 3 or n&7 == 5: t = -t if (a < n): a, n = n, a if a&3 == 3 and n&3 == 3: t = -t a = (a - n) >> 1 if n&7 == 3 or n&7 == 5: t = -t return t if n == 1 else 0 def sieve(until): """This is an optimized sieve-algorithm that I created for an exercism.io assignment. On my system, it is able to produce primes until 10,000,000 in half a second. See: http://exercism.io/submissions/a6085623b76a6747d300420e""" if until < 2: return [] store_len = until >> 1 is_prime = [True] * store_len for offset, number in enumerate(range(3, int(sqrt(until)) + 1, 2)): if is_prime[offset]: first_compound_at = (number*number-3) >> 1 nr_of_compounds = int(ceil((store_len-first_compound_at) / number)) is_prime[first_compound_at::number] = [False] * nr_of_compounds return [2] + list(compress(range(3, until + 1, 2), is_prime)) lowPrimes = set(sieve(TRIAL_DIVISION_UNTIL))
import mathdef isprime(num):if num<2 or int(num)!=num: return Falseif not num%2 and num>2: return Falsefor n in range(3,math.ceil((num-1)**.5)+1,2):if not num%n:return Falsereturn True- from math import sqrt, ceil
- from itertools import compress
- TRIAL_DIVISION_UNTIL=1000
- def isPrime(n):
- """This primality check implements the composite Baillie-PSW test.
- See: https://en.wikipedia.org/wiki/Baillie%E2%80%93PSW_primality_test
- Input:
- n: the number to check
- Output:
- a tuple containing two elements:
- - True if (probable) prime, False otherwise
- - The name of the test that lead to that conclusion
- """
- if not isInteger(n):
- return (False, "not an integer")
- if isBelowLowestPrime(n):
- return (False, "below lowest prime")
- if isLowPrime(n):
- return (True, "low prime")
- if isTrialDivisionCompound(n):
- return (False, "trial division")
- if not isRabinMillerProbablePrime(n):
- return (False, "rabin-miller")
- if isPerfectSquare(n):
- return (False, "perfect square")
- if not isLucasProbablePrime(n):
- return (False, "lucas")
- return (True, "passed all tests")
- def isInteger(n):
- return int(n) == n
- def isBelowLowestPrime(n):
- return n < 2
- def isLowPrime(n):
- return (
- n <= TRIAL_DIVISION_UNTIL and
- n in lowPrimes
- )
- def isTrialDivisionCompound(n):
- return any(
- n % prime == 0
- for prime in lowPrimes
- if prime < n
- )
- def isRabinMillerProbablePrime(n):
- """Based on pseudo code for the Rabin-Miller algorithm.
- See: https://en.wikipedia.org/wiki/Miller-Rabin_primality_test
- This simple version implements only a single pass using base 2,
- as prescribed by the Baillie-PSW specification."""
- (factor, exponent) = factorizeBase2(n - 1)
- x = pow(2, factor, n)
- if x == 1 or x == n - 1:
- return True
- for _ in range(exponent - 1):
- x = pow(x, 2, n)
- if x == 1:
- return False
- elif x == n - 1:
- return True
- return False
- def isPerfectSquare(n):
- """A quick check to make sure that no perfect squares are fed to the
- Lucas probable prime check."""
- return isqrt(n) ** 2 == n
- def isqrt(n):
- """Integer square root, see:
- https://en.wikipedia.org/wiki/Integer_square_root"""
- x = n
- y = (x + 1) >> 1
- while y < x:
- x = y
- y = (x + n // x) >> 1
- return x
- def isLucasProbablePrime(n):
- """Code based on "Implementing a Lucas probable prime test" to be found at:
- https://en.wikipedia.org/wiki/Lucas_pseudoprime"""
- def getLucasSequenceInputAsSuggestedBySelfridge():
- seq = ((-1 if i&1 else 1) * (i*2 + 5) for i in range(0, n>>1))
- D = next((D for D in seq if jacobi(D, n) == -1))
- Q = (1 - D) // 4
- P = 1
- return D, Q, P
- D, Q, P = getLucasSequenceInputAsSuggestedBySelfridge()
- def computeLucasValue(subscript):
- U = V = k = 0
- for digit in format(subscript, "b"):
- U, V, k = doubleK(U, V, k)
- if digit == "1":
- U, V, k = incrementK(U, V, k)
- return U, V, subscript
- def doubleK(U, V, subscript):
- return (U*V) % n, (pow(V, 2, n) - 2 * pow(Q, subscript, n)) % n, subscript << 1
- def incrementK(U, V, subscript):
- U, V = (P*U + V), (D*U + P*V)
- makeEven = lambda x: x+n if x&1 else x
- div2modn = lambda x: (x >> 1) % n
- return (div2modn(makeEven(U)), div2modn(makeEven(V)), subscript+1)
- # Weak check.
- U, V, k = computeLucasValue(n+1)
- if U != 0:
- return False
- # Strong check.
- (factor, exponent) = factorizeBase2(n + 1)
- U, V, k = computeLucasValue(factor)
- if U == 0 or V == 0:
- return True
- for r in range(exponent-1):
- U, V, k = doubleK(U, V, k)
- if V == 0:
- return True
- def factorizeBase2(factor, exponent=0):
- """Factor out 2 from a number. For example factorizeBase2(80) returns (5, 4),
- which indicates that 2 ** 5 + 4 == 80."""
- if factor&1:
- return (factor, exponent)
- return factorizeBase2(factor >> 1, exponent + 1)
- def jacobi(a, n):
- """Compute the Jacobi symbol. The code uses ideas from:
- https://www.academia.edu/1012630/A_binary_algorithm_for_the_Jacobi_symbol"""
- t = 1
- while a:
- if a < 0:
- a = -a
- if n&3 == 3: t = -t
- while not a&1:
- a = a >> 1
- if n&7 == 3 or n&7 == 5: t = -t
- if (a < n):
- a, n = n, a
- if a&3 == 3 and n&3 == 3: t = -t
- a = (a - n) >> 1
- if n&7 == 3 or n&7 == 5: t = -t
- return t if n == 1 else 0
- def sieve(until):
- """This is an optimized sieve-algorithm that I created for an exercism.io assignment.
- On my system, it is able to produce primes until 10,000,000 in half a second.
- See: http://exercism.io/submissions/a6085623b76a6747d300420e"""
- if until < 2:
- return []
- store_len = until >> 1
- is_prime = [True] * store_len
- for offset, number in enumerate(range(3, int(sqrt(until)) + 1, 2)):
- if is_prime[offset]:
- first_compound_at = (number*number-3) >> 1
- nr_of_compounds = int(ceil((store_len-first_compound_at) / number))
- is_prime[first_compound_at::number] = [False] * nr_of_compounds
- return [2] + list(compress(range(3, until + 1, 2), is_prime))
- lowPrimes = set(sieve(TRIAL_DIVISION_UNTIL))
# You can use Test.describe and Test.it to write BDD style test groupings Test.describe("Checks whether or not a number is prime") Test.expect(not isPrime(-1)[0],"negative numbers can't be prime") Test.expect(not isPrime(5.5)[0],"decimals can't be prime") Test.expect(not isPrime(0)[0],"0 is not prime") Test.expect(not isPrime(1)[0],"1 is not prime") Test.expect(isPrime(2)[0],"2 is prime") Test.expect(isPrime(3)[0],"3 is prime") Test.expect(not isPrime(4)[0],"4 is not prime") Test.expect(isPrime(7)[0],"7 is prime") Test.expect(not isPrime(9)[0],"9 is not prime") Test.expect(not isPrime(49)[0],"49 is prime") Test.expect(isPrime(192837474746574737273627201)[0], "the huge number is a prime") Test.expect(isPrime(1928374747465747372736227262524253547337)[0], "the even huger number is a prime") Test.expect(not isPrime(1928374747465747372736227262524253547339)[0], "the other huger number is not a prime") Test.describe("Checking for a prime must be fast for large numbers as well") Test.expect(isPrime(1734822086675357441)[0],"1734822086675357441 is prime") Test.expect(not isPrime(1734822086675357442)[0],"1734822086675357441 is prime") Test.expect(not isPrime(1734822086675357400000043)[0], "1734822086675357400000043 is not prime") Test.describe("Test the Jacobi symbol function") # The data that is tested against was retrieved from a thirs party web page that # listed a large grid of jacobi symbols. results = [] for n in range(1, 60, 2): for m in range(1, 31): results.append(jacobi(m, n)) Test.assert_equals( "1111111111111111111111111111111-101-101-101-101-101-101-101-101-101-101-1-1101-1-1101-1-1101-1-1101-1-1101-1-11011-11-1-1011-11-1-1011-11-1-1011-11-1-10111101101101101101101101101101101-1111-1-1-11-101-1111-1-1-11-101-1111-1-1-11-111-1-1-1-111-1101-111-1-1-1-111-1101-111110100-1100-10-1-10110100-1100-10-1-1011-11-1-1-111-1-1-11-111011-11-1-1-111-1-1-111-1-11111-11-11-1-1-1-111-101-1-11111-11-111-101100-10-1-10-100110-1101-101100-101111-11-111-1-111-1-11-11-1-1-1-101111-11-11111011110111101111011110111101-101-101-101-101-101-101-101-101-101-101-1-11111-11-1-1-11-1-11-1-1-11-11111-1-110111-111-11111-1-1-11-11-1111-1-1-1-11-1-11-1-11101-10-110-100-1-10110-1-100-101-10-1101-1110-10-1101110011-1-100-1-1-10-110101-111-1-11-11111-1-1-11-1-1-1-11-1-1-11111-11110110-1101100-101-10-1101-10100-1-1011-111-1-1111-1-1-1-1-11-11-111-11-11-1-1-1-1-11-1-11-11-1-1111-111111-1-1-11-1111-1-1-1-1-11-10100-1-10010-1101-10100-1-10010-1101111-11111-1-11-11-1111-1-11-1-111-111-1-11111110111111011111101111110111-10110-1-10-110110100110-1101-10-1101-1-11-111-1111-11-1111-1-1-1-1-1-111-1-111-111-110-111100-1110111-10-10-1-101-11-101101-10110-1-10-1101-100-10-1-101-101101-1111-11-11-1-11-1-1111-11111-1-111111-1", "".join(map(repr, results))) Test.describe("Test the pseudo prime numbers as generated by Rabin-Miller and Lucas") # By checking these, we can gain a lot of trust about the quality # of the implementation of those checks. s = set(sieve(10000000)); pseudo = [] missing = [] for i in range(21, 100000, 2): if not isPerfectSquare(i): if isLucasProbablePrime(i): if not i in s: pseudo.append("L" + str(i)) elif i in s: missing.append("L" + str(i)) if isRabinMillerProbablePrime(i): if not i in s: pseudo.append("R" + str(i)) elif i in s: missing.append("R" + str(i)) Test.assert_equals([], missing) Test.assert_equals( ['R2047', 'R3277', 'R4033', 'R4681', 'L5459', 'L5777', 'R8321', 'L10877', 'R15841', 'L16109', 'L18971', 'L22499', 'L24569', 'L25199', 'R29341', 'L40309', 'R42799', 'R49141', 'R52633', 'L58519', 'R65281', 'R74665', 'L75077', 'R80581', 'R85489', 'R88357', 'R90751', 'L97439'], pseudo)
# TODO: Replace examples and use TDD development by writing your own tests# These are some of the methods available:# test.expect(boolean, [optional] message)# test.assert_equals(actual, expected, [optional] message)# test.assert_not_equals(actual, expected, [optional] message)- # You can use Test.describe and Test.it to write BDD style test groupings
Test.describe("Gets whether a number is prime")Test.it("takes the square root")Test.expect(not isprime(-1),"negative numbers can't be prime")Test.expect(not isprime(5.5),"decimals can't be prime")Test.expect(not isprime(0),"0 is not prime")Test.expect(not isprime(1),"1 is not prime")Test.expect(isprime(2),"2 is prime")Test.expect(isprime(3),"3 is prime")Test.expect(not isprime(4),"4 is not prime")Test.expect(isprime(7),"7 is prime")Test.expect(not isprime(9),"9 is not prime")Test.expect(not isprime(49),"49 is prime")- Test.describe("Checks whether or not a number is prime")
- Test.expect(not isPrime(-1)[0],"negative numbers can't be prime")
- Test.expect(not isPrime(5.5)[0],"decimals can't be prime")
- Test.expect(not isPrime(0)[0],"0 is not prime")
- Test.expect(not isPrime(1)[0],"1 is not prime")
- Test.expect(isPrime(2)[0],"2 is prime")
- Test.expect(isPrime(3)[0],"3 is prime")
- Test.expect(not isPrime(4)[0],"4 is not prime")
- Test.expect(isPrime(7)[0],"7 is prime")
- Test.expect(not isPrime(9)[0],"9 is not prime")
- Test.expect(not isPrime(49)[0],"49 is prime")
- Test.expect(isPrime(192837474746574737273627201)[0], "the huge number is a prime")
- Test.expect(isPrime(1928374747465747372736227262524253547337)[0], "the even huger number is a prime")
- Test.expect(not isPrime(1928374747465747372736227262524253547339)[0], "the other huger number is not a prime")
- Test.describe("Checking for a prime must be fast for large numbers as well")
- Test.expect(isPrime(1734822086675357441)[0],"1734822086675357441 is prime")
- Test.expect(not isPrime(1734822086675357442)[0],"1734822086675357441 is prime")
- Test.expect(not isPrime(1734822086675357400000043)[0], "1734822086675357400000043 is not prime")
- Test.describe("Test the Jacobi symbol function")
- # The data that is tested against was retrieved from a thirs party web page that
- # listed a large grid of jacobi symbols.
- results = []
- for n in range(1, 60, 2):
- for m in range(1, 31):
- results.append(jacobi(m, n))
- Test.assert_equals(
- "1111111111111111111111111111111-101-101-101-101-101-101-101-101-101-101-1-1101-1-1101-1-1101-1-1101-1-1101-1-11011-11-1-1011-11-1-1011-11-1-1011-11-1-10111101101101101101101101101101101-1111-1-1-11-101-1111-1-1-11-101-1111-1-1-11-111-1-1-1-111-1101-111-1-1-1-111-1101-111110100-1100-10-1-10110100-1100-10-1-1011-11-1-1-111-1-1-11-111011-11-1-1-111-1-1-111-1-11111-11-11-1-1-1-111-101-1-11111-11-111-101100-10-1-10-100110-1101-101100-101111-11-111-1-111-1-11-11-1-1-1-101111-11-11111011110111101111011110111101-101-101-101-101-101-101-101-101-101-101-1-11111-11-1-1-11-1-11-1-1-11-11111-1-110111-111-11111-1-1-11-11-1111-1-1-1-11-1-11-1-11101-10-110-100-1-10110-1-100-101-10-1101-1110-10-1101110011-1-100-1-1-10-110101-111-1-11-11111-1-1-11-1-1-1-11-1-1-11111-11110110-1101100-101-10-1101-10100-1-1011-111-1-1111-1-1-1-1-11-11-111-11-11-1-1-1-1-11-1-11-11-1-1111-111111-1-1-11-1111-1-1-1-1-11-10100-1-10010-1101-10100-1-10010-1101111-11111-1-11-11-1111-1-11-1-111-111-1-11111110111111011111101111110111-10110-1-10-110110100110-1101-10-1101-1-11-111-1111-11-1111-1-1-1-1-1-111-1-111-111-110-111100-1110111-10-10-1-101-11-101101-10110-1-10-1101-100-10-1-101-101101-1111-11-11-1-11-1-1111-11111-1-111111-1",
- "".join(map(repr, results)))
- Test.describe("Test the pseudo prime numbers as generated by Rabin-Miller and Lucas")
- # By checking these, we can gain a lot of trust about the quality
- # of the implementation of those checks.
- s = set(sieve(10000000));
- pseudo = []
- missing = []
- for i in range(21, 100000, 2):
- if not isPerfectSquare(i):
- if isLucasProbablePrime(i):
- if not i in s:
- pseudo.append("L" + str(i))
- elif i in s:
- missing.append("L" + str(i))
- if isRabinMillerProbablePrime(i):
- if not i in s:
- pseudo.append("R" + str(i))
- elif i in s:
- missing.append("R" + str(i))
- Test.assert_equals([], missing)
- Test.assert_equals(
- ['R2047', 'R3277', 'R4033', 'R4681', 'L5459', 'L5777', 'R8321',
- 'L10877', 'R15841', 'L16109', 'L18971', 'L22499', 'L24569',
- 'L25199', 'R29341', 'L40309', 'R42799', 'R49141', 'R52633',
- 'L58519', 'R65281', 'R74665', 'L75077', 'R80581', 'R85489',
- 'R88357', 'R90751', 'L97439'], pseudo)