From 9dbf50e001422b1800fcd9a42eebb52b15a0c13b Mon Sep 17 00:00:00 2001 From: Maccabee Levine Date: Wed, 13 Nov 2024 22:27:42 +0000 Subject: [PATCH 1/6] RateLimiter: Use Cloudflare Turnstile to detect bots --- config/vufind/RateLimiter.yaml | 27 ++++ module/VuFind/config/module.config.php | 4 + module/VuFind/src/VuFind/Bootstrapper.php | 17 ++- .../VuFind/Controller/TurnstileController.php | 118 ++++++++++++++++++ .../VuFind/RateLimiter/RateLimiterManager.php | 99 ++++++++++++++- .../RateLimiter/RateLimiterManagerFactory.php | 32 ++++- .../templates/turnstile/challenge.phtml | 22 ++++ 7 files changed, 307 insertions(+), 12 deletions(-) create mode 100644 module/VuFind/src/VuFind/Controller/TurnstileController.php create mode 100644 themes/bootstrap5/templates/turnstile/challenge.phtml diff --git a/config/vufind/RateLimiter.yaml b/config/vufind/RateLimiter.yaml index 24cc44b1a18..9399c9341c0 100644 --- a/config/vufind/RateLimiter.yaml +++ b/config/vufind/RateLimiter.yaml @@ -40,6 +40,12 @@ Storage: #redis_version : 6 #redis_standalone : true + # Namespace for Turnstile result cache (default is Turnstile) + # turnstileNamespace: Turnstile + + # Time-to-live (seconds) for Turnstile result cache (default is 86400, i.e. 1 day) + # turnstileTtl: 86400 + # Policies define the actual rate limiting settings. The request is checked against # the list of policies, and the first matching policy is used. # Keys under "Policies" are used as rate limiter IDs. @@ -82,6 +88,13 @@ Storage: # limit: 500 # rate: { interval: '10 minutes', amount: 100 } # +# turnstileRateLimiterSettings Defined like rateLimiterSettings, but when the limit is +# reached, a Turnstile challenge is used instead of simply +# returning a 429. The challenge result is cached. +# Failing the challenge results in standard 429 behavior. +# Passing the challenge bypasses this check, but note the +# rateLimiterSettings quota is still separately applied. +# # addHeaders Whether to add X-RateLimit-* headers (default is false) # # reportOnly If set to true, will not enforce the policy even if the main @@ -130,3 +143,17 @@ Policies: policy: token_bucket limit: 2000 rate: { interval: '10 minutes', amount: 400 } + +# Cloudflare Turnstile is a cloud-based "CAPTCHA alternative" to detect bots. +# https://developers.cloudflare.com/turnstile/ +# These are the global settings for Turnstile. See also the turnstile config above: +# - within each policy, to enable turnstile for that policy +# - storage settings for the result cache +# Turnstile: + # These two keys are required. See also values they can be set to for testing purposes: + # https://developers.cloudflare.com/turnstile/troubleshooting/testing/ + # siteKey: 0x1234567890 + # secretKey: 0x1234567890 + + # Verify API URL. Default is https://challenges.cloudflare.com/turnstile/v0/siteverify + # verifyUrl: https://challenges.cloudflare.com/turnstile/v0/siteverify diff --git a/module/VuFind/config/module.config.php b/module/VuFind/config/module.config.php index 283abde9f58..3a8b1b1d68f 100644 --- a/module/VuFind/config/module.config.php +++ b/module/VuFind/config/module.config.php @@ -222,6 +222,7 @@ 'VuFind\Controller\SummonController' => 'VuFind\Controller\AbstractBaseFactory', 'VuFind\Controller\SummonrecordController' => 'VuFind\Controller\AbstractBaseFactory', 'VuFind\Controller\TagController' => 'VuFind\Controller\AbstractBaseFactory', + 'VuFind\Controller\TurnstileController' => 'VuFind\Controller\AbstractBaseFactory', 'VuFind\Controller\UpgradeController' => 'VuFind\Controller\UpgradeControllerFactory', 'VuFind\Controller\WebController' => 'VuFind\Controller\AbstractBaseFactory', 'VuFind\Controller\WorldcatController' => 'VuFind\Controller\AbstractBaseFactory', @@ -346,6 +347,8 @@ 'summonrecord' => 'VuFind\Controller\SummonrecordController', 'Tag' => 'VuFind\Controller\TagController', 'tag' => 'VuFind\Controller\TagController', + 'Turnstile' => 'VuFind\Controller\TurnstileController', + 'turnstile' => 'VuFind\Controller\TurnstileController', 'Upgrade' => 'VuFind\Controller\UpgradeController', 'upgrade' => 'VuFind\Controller\UpgradeController', 'Web' => 'VuFind\Controller\WebController', @@ -826,6 +829,7 @@ 'Search2/Versions', 'SimulatedSSO/Login', 'Summon/Advanced', 'Summon/FacetList', 'Summon/Home', 'Summon/Search', 'Tag/Home', + 'Turnstile/Challenge', 'Turnstile/Verify', 'Upgrade/ConfirmDeprecatedColumns', 'Upgrade/FixAnonymousTags', 'Upgrade/FixDuplicateTags', 'Upgrade/FixConfig', 'Upgrade/FixDatabase', 'Upgrade/FixMetadata', diff --git a/module/VuFind/src/VuFind/Bootstrapper.php b/module/VuFind/src/VuFind/Bootstrapper.php index 185adcf99fc..4c49e8476f5 100644 --- a/module/VuFind/src/VuFind/Bootstrapper.php +++ b/module/VuFind/src/VuFind/Bootstrapper.php @@ -394,8 +394,21 @@ protected function initRateLimiter(): void $result = $rateLimiterManager->check($event); if (!$result['allow']) { $response = $event->getResponse(); - $response->setStatusCode(429); - $response->setContent($result['message']); + if ($result['performTurnstileChallenge'] ?? false) { + $response->setStatusCode(307); + $policyId = $rateLimiterManager->checkPolicyUsesTurnstile($event); + $context = base64_encode(json_encode([ + 'policyId' => $policyId, + 'destination' => $event->getRequest()->getUri()->getPath(), + ])); + $response->getHeaders()->addHeaderLine( + 'Location', + '/vufind/Turnstile/Challenge?context=' . $context + ); + } else { + $response->setStatusCode(429); + $response->setContent($result['message']); + } $event->stopPropagation(true); return $response; } diff --git a/module/VuFind/src/VuFind/Controller/TurnstileController.php b/module/VuFind/src/VuFind/Controller/TurnstileController.php new file mode 100644 index 00000000000..67602fc5272 --- /dev/null +++ b/module/VuFind/src/VuFind/Controller/TurnstileController.php @@ -0,0 +1,118 @@ + + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link https://vufind.org Main Page + */ + +namespace VuFind\Controller; + +use Laminas\Log\LoggerAwareInterface; +use VuFind\Log\LoggerAwareTrait; +use VuFindHttp\HttpServiceAwareInterface; +use VuFindHttp\HttpServiceAwareTrait; + +/** + * Controller Cloudflare Turnstile access checks. + * + * @category VuFind + * @package Controller + * @author Maccabee Levine + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link https://vufind.org Main Page + */ +class TurnstileController extends AbstractBase implements + HttpServiceAwareInterface, + LoggerAwareInterface +{ + use HttpServiceAwareTrait; + use LoggerAwareTrait; + + /** + * Present the Turnstile challenge to the user + * + * @return mixed + */ + public function challengeAction() + { + $context = json_decode(base64_decode($this->params()->fromQuery('context')), true); + + $yamlReader = $this->getService(\VuFind\Config\YamlReader::class); + $config = $yamlReader->get('RateLimiter.yaml'); + $context['siteKey'] = $config['Turnstile']['siteKey']; + + $this->layout()->searchbox = false; + return $this->createViewModel($context); + } + + /** + * Verify the Turnstile widget result against the Turnstile backend + * + * @return mixed + */ + public function verifyAction() + { + $token = $this->params()->fromPost('token'); + $policyId = $this->params()->fromPost('policyId'); + $destination = $this->params()->fromPost('destination'); + + // Call the Turnstile verify API to validate the token + $yamlReader = $this->getService(\VuFind\Config\YamlReader::class); + $config = $yamlReader->get('RateLimiter.yaml'); + $secretKey = $config['Turnstile']['secretKey']; + $url = $config['Turnstile']['verifyUrl'] ?? + 'https://challenges.cloudflare.com/turnstile/v0/siteverify'; + $body = [ + 'secret' => $secretKey, + 'response' => $token, + ]; + $response = $this->httpService->post( + $url, + json_encode($body), + 'application/json' + ); + + if ($response->isOk()) { + $responseData = json_decode($response->getBody(), true); + $success = $responseData['success']; + } else { + // Unexpected error. Treat as a positive result, since it's not the user's fault. + $this->logWarning('Verification process failed, allowing traffic: ' + . $response->getStatusCode() . $response->getBody()); + $success = true; + } + + // Save the Turnstile result for future requests + $rateLimiterManager = $this->getService(\VuFind\RateLimiter\RateLimiterManager::class); + $rateLimiterManager->setTurnstileResult( + $policyId, + $this->event->getRequest()->getServer('REMOTE_ADDR'), + $success + ); + + // Either way, return a http redirect to the referrer page. + return $this->redirect()->toUrl($destination); + } +} diff --git a/module/VuFind/src/VuFind/RateLimiter/RateLimiterManager.php b/module/VuFind/src/VuFind/RateLimiter/RateLimiterManager.php index 1f45d914846..e94ae2d8dbe 100644 --- a/module/VuFind/src/VuFind/RateLimiter/RateLimiterManager.php +++ b/module/VuFind/src/VuFind/RateLimiter/RateLimiterManager.php @@ -30,6 +30,7 @@ namespace VuFind\RateLimiter; use Closure; +use Laminas\Cache\Storage\StorageInterface; use Laminas\EventManager\EventInterface; use Laminas\Log\LoggerAwareInterface; use Laminas\Mvc\MvcEvent; @@ -72,17 +73,19 @@ class RateLimiterManager implements LoggerAwareInterface, TranslatorAwareInterfa /** * Constructor * - * @param array $config Rate limiter configuration - * @param string $clientIp Client's IP address - * @param ?int $userId User ID or null if not logged in - * @param Closure $rateLimiterFactoryCallback Rate limiter factory callback - * @param IpAddressUtils $ipUtils IP address utilities + * @param array $config Rate limiter configuration + * @param string $clientIp Client's IP address + * @param ?int $userId User ID or null if not logged in + * @param Closure $rateLimiterFactoryCallback Rate limiter factory callback + * @param StorageInterface $turnstileCache A cache for Turnstile results + * @param IpAddressUtils $ipUtils IP address utilities */ public function __construct( protected array $config, protected string $clientIp, protected ?int $userId, protected Closure $rateLimiterFactoryCallback, + protected StorageInterface $turnstileCache, protected IpAddressUtils $ipUtils ) { $this->clientLogDetails = "ip:$clientIp"; @@ -146,6 +149,16 @@ public function check(EventInterface $event): array // We have a policy matching the route, so check rate limiter: $limiter = ($this->rateLimiterFactoryCallback)($this->config, $policyId, $this->clientIp, $this->userId); $limit = $limiter->consume(1); + if ($this->config['Policies'][$policyId]['turnstileRateLimiterSettings'] ?? false) { + $turnstileLimiter = ($this->rateLimiterFactoryCallback)( + $this->config, + $policyId, + $this->clientIp, + $this->userId, + 'turnstileRateLimiterSettings' + ); + $turnstileLimit = $turnstileLimiter->consume(1); + } $result = [ 'allow' => true, 'requestsRemaining' => $limit->getRemainingTokens(), @@ -171,6 +184,15 @@ public function check(EventInterface $event): array ] ); } + if (isset($turnstileLimit)) { + $priorTurnstileResult = $this->checkPriorTurnstileResult($policyId, $this->clientIp); + if (!$priorTurnstileResult && !$turnstileLimit->isAccepted()) { + $result['allow'] = false; + $result['message'] = $this->getTooManyRequestsResponseMessage($event, $result); + $result['performTurnstileChallenge'] = ($priorTurnstileResult === null); + return $result; + } + } if ($limit->isAccepted()) { return $result; } @@ -199,6 +221,9 @@ public function check(EventInterface $event): array */ protected function getPolicyIdForEvent(MvcEvent $event): ?string { + if ($event->getRouteMatch()->getParams()['controller'] == 'Turnstile') { + return null; + } $isCrawler = null; foreach ($this->config['Policies'] ?? [] as $name => $settings) { if (null !== ($loggedIn = $settings['loggedIn'] ?? null)) { @@ -318,4 +343,68 @@ protected function isCrawlerRequest(MvcEvent $event): bool $crawlerDetect = new \Jaybizzle\CrawlerDetect\CrawlerDetect(); return $crawlerDetect->isCrawler($agent); } + + /** + * Check whether the RateLimiter policy for this event uses Turnstile. + * + * @param MvcEvent $event Request event + * + * @return bool + */ + public function checkPolicyUsesTurnstile($event) + { + $policyId = $this->getPolicyIdForEvent($event); + if ($this->config['Policies'][$policyId]['turnstileRateLimiterSettings'] ?? false) { + return $policyId; + } + return false; + } + + /** + * Check for a prior, cached result from Turnstile under this client IP and policy. + * + * @param string $policyId The policy ID + * @param string $clientIp The client IP + * + * @return ?bool Null if there is no prior result, or if Turnstile is disabled; + * otherwise a boolean representing the Turnstile result. + */ + protected function checkPriorTurnstileResult($policyId, $clientIp) + { + if (!($this->config['Policies'][$policyId]['turnstileRateLimiterSettings'] ?? false)) { + return null; + } + $cacheKey = $this->getTurnstileCacheKey($policyId, $clientIp); + return $this->turnstileCache->getItem($cacheKey); + } + + /** + * Store a Turnstile result for this client IP and policy. + * + * @param string $policyId The policy ID + * @param string $clientIp The client IP + * @param bool $success The result to store + * + * @return void + */ + public function setTurnstileResult($policyId, $clientIp, $success) + { + $cacheKey = $this->getTurnstileCacheKey($policyId, $clientIp); + $this->turnstileCache->setItem($cacheKey, $success); + } + + /** + * Generate a key for the Turnstile cache. + * + * @param string $policyId The policy ID + * @param string $clientIp The client IP + * + * @return string The cache key + */ + protected function getTurnstileCacheKey($policyId, $clientIp) + { + $key = $policyId . '--' . $clientIp; + $key = str_replace('.', '-', $key); + return $key; + } } diff --git a/module/VuFind/src/VuFind/RateLimiter/RateLimiterManagerFactory.php b/module/VuFind/src/VuFind/RateLimiter/RateLimiterManagerFactory.php index 35cb97561ea..47ac35d442c 100644 --- a/module/VuFind/src/VuFind/RateLimiter/RateLimiterManagerFactory.php +++ b/module/VuFind/src/VuFind/RateLimiter/RateLimiterManagerFactory.php @@ -94,6 +94,7 @@ public function __invoke( $request->getServer('REMOTE_ADDR'), $authManager->getUserObject()?->getId(), Closure::fromCallable([$this, 'getRateLimiter']), + $this->createTurnstileCache($config), $container->get(\VuFind\Net\IpAddressUtils::class) ); } @@ -112,32 +113,35 @@ protected function getRateLimiter( array $config, string $policyId, string $clientIp, - ?string $userId + ?string $userId, + string $configSection = 'rateLimiterSettings' ): LimiterInterface { $policy = $config['Policies'][$policyId] ?? []; - $rateLimiterConfig = $policy['rateLimiterSettings'] ?? []; + $rateLimiterConfig = $policy[$configSection] ?? []; $rateLimiterConfig['id'] = $policyId; if (null !== $userId && !($policy['preferIPAddress'] ?? false)) { $clientId = "u:$userId"; } else { $clientId = "ip:$clientIp"; } - $factory = new RateLimiterFactory($rateLimiterConfig, $this->createCache($config)); + $factory = new RateLimiterFactory($rateLimiterConfig, $this->createCache($config, $configSection)); return $factory->create($clientId); } /** * Create cache for the rate limiter * - * @param array $config Rate limiter configuration + * @param array $config Rate limiter configuration + * @param string $namespaceSuffix Qualifier for the namespace * * @return ?StorageInterface */ - protected function createCache(array $config): StorageInterface + protected function createCache(array $config, string $namespaceSuffix): StorageInterface { $storageConfig = $config['Storage'] ?? []; $adapter = $storageConfig['adapter'] ?? 'memcached'; $storageConfig['options']['namespace'] ??= 'RateLimiter'; + $storageConfig['options']['namespace'] .= '-' . $namespaceSuffix; // Handle Redis cache separately: $adapterLc = strtolower($adapter); @@ -207,4 +211,22 @@ protected function createRedisCache(array $storageConfig): StorageInterface return new CredisStorage($redis, $options); } + + /** + * Create a cache for Turnstile results. + * + * @param array $config Rate limiter configuration + * + * @return ?StorageInterface + */ + protected function createTurnstileCache(array $config): \Laminas\Cache\Storage\StorageInterface + { + $turnstileConfig = unserialize(serialize($config)); + $storageOptions = $turnstileConfig['Storage']['options'] ?? []; + $storageOptions['namespace'] = $storageOptions['turnstileNamespace'] ?? 'Turnstile'; + $cacheManager = $this->getService(\VuFind\Cache\Manager::class); + $cache = $cacheManager->getCache('object', $storageOptions['namespace']); + $cache->getOptions()->setTtl($storageOptions['turnstileTtl'] ?? 60 * 60 * 24); + return $cache; + } } diff --git a/themes/bootstrap5/templates/turnstile/challenge.phtml b/themes/bootstrap5/templates/turnstile/challenge.phtml new file mode 100644 index 00000000000..ff76b3180d2 --- /dev/null +++ b/themes/bootstrap5/templates/turnstile/challenge.phtml @@ -0,0 +1,22 @@ +headScript()->appendFile('https://challenges.cloudflare.com/turnstile/v0/api.js', 'text/javascript', ['defer' => '', 'async' => '']); + $js = << +inlineScript()->appendScript($js)?> + +
+
+ + + +
From 529f12dce610f48ea3d950e36029e7327fbd33cd Mon Sep 17 00:00:00 2001 From: Maccabee Levine Date: Wed, 13 Nov 2024 23:10:45 +0000 Subject: [PATCH 2/6] Add missing doc param --- .../src/VuFind/RateLimiter/RateLimiterManagerFactory.php | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/module/VuFind/src/VuFind/RateLimiter/RateLimiterManagerFactory.php b/module/VuFind/src/VuFind/RateLimiter/RateLimiterManagerFactory.php index 47ac35d442c..f4b1cb441b9 100644 --- a/module/VuFind/src/VuFind/RateLimiter/RateLimiterManagerFactory.php +++ b/module/VuFind/src/VuFind/RateLimiter/RateLimiterManagerFactory.php @@ -102,10 +102,11 @@ public function __invoke( /** * Get rate limiter * - * @param array $config Rate limiter configuration - * @param string $policyId Policy ID - * @param string $clientIp Client's IP address - * @param ?string $userId User ID or null if not logged in + * @param array $config Rate limiter configuration + * @param string $policyId Policy ID + * @param string $clientIp Client's IP address + * @param ?string $userId User ID or null if not logged in + * @param string $configSection Section of $config to get the rate limiter settings * * @return LimiterInterface */ From 3390d6b7b96da68ee9fb78069c16e6a48310afb3 Mon Sep 17 00:00:00 2001 From: Maccabee Levine Date: Wed, 13 Nov 2024 23:17:26 +0000 Subject: [PATCH 3/6] Clarify that turnstile is disabled by default --- config/vufind/RateLimiter.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/config/vufind/RateLimiter.yaml b/config/vufind/RateLimiter.yaml index 9399c9341c0..d7b3c2b0b1e 100644 --- a/config/vufind/RateLimiter.yaml +++ b/config/vufind/RateLimiter.yaml @@ -94,6 +94,7 @@ Storage: # Failing the challenge results in standard 429 behavior. # Passing the challenge bypasses this check, but note the # rateLimiterSettings quota is still separately applied. +# Default (not defined) means don't use Turnstile. # # addHeaders Whether to add X-RateLimit-* headers (default is false) # From eafe450a32094af3079193b55349029307ff0109 Mon Sep 17 00:00:00 2001 From: Maccabee Levine Date: Wed, 13 Nov 2024 23:30:49 +0000 Subject: [PATCH 4/6] Reference config.ini CAPTCHAs. --- config/vufind/RateLimiter.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/config/vufind/RateLimiter.yaml b/config/vufind/RateLimiter.yaml index d7b3c2b0b1e..3ca828c42e8 100644 --- a/config/vufind/RateLimiter.yaml +++ b/config/vufind/RateLimiter.yaml @@ -147,6 +147,7 @@ Policies: # Cloudflare Turnstile is a cloud-based "CAPTCHA alternative" to detect bots. # https://developers.cloudflare.com/turnstile/ +# See also [Captcha] settings in config.ini for actual CAPTCHAs. # These are the global settings for Turnstile. See also the turnstile config above: # - within each policy, to enable turnstile for that policy # - storage settings for the result cache From 14f595a32ba29c6551f4144744c2caccd00b490b Mon Sep 17 00:00:00 2001 From: Maccabee Levine Date: Thu, 14 Nov 2024 14:12:58 +0000 Subject: [PATCH 5/6] Fix url generation --- module/VuFind/src/VuFind/Bootstrapper.php | 2 +- themes/bootstrap5/templates/turnstile/challenge.phtml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/module/VuFind/src/VuFind/Bootstrapper.php b/module/VuFind/src/VuFind/Bootstrapper.php index 4c49e8476f5..8197114ef48 100644 --- a/module/VuFind/src/VuFind/Bootstrapper.php +++ b/module/VuFind/src/VuFind/Bootstrapper.php @@ -403,7 +403,7 @@ protected function initRateLimiter(): void ])); $response->getHeaders()->addHeaderLine( 'Location', - '/vufind/Turnstile/Challenge?context=' . $context + $event->getRequest()->getBaseUrl() . '/Turnstile/Challenge?context=' . $context ); } else { $response->setStatusCode(429); diff --git a/themes/bootstrap5/templates/turnstile/challenge.phtml b/themes/bootstrap5/templates/turnstile/challenge.phtml index ff76b3180d2..19f60995719 100644 --- a/themes/bootstrap5/templates/turnstile/challenge.phtml +++ b/themes/bootstrap5/templates/turnstile/challenge.phtml @@ -15,7 +15,7 @@ data-callback="turnstileChallengeEnded" data-error-callback="turnstileChallengeEnded" > -
+ From ecd4a2f894c1021b0a3b032ebfeaca2bb5b70ec0 Mon Sep 17 00:00:00 2001 From: Maccabee Levine Date: Thu, 14 Nov 2024 14:26:18 +0000 Subject: [PATCH 6/6] Avoid unnecessary lookup if possible --- module/VuFind/src/VuFind/RateLimiter/RateLimiterManager.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/module/VuFind/src/VuFind/RateLimiter/RateLimiterManager.php b/module/VuFind/src/VuFind/RateLimiter/RateLimiterManager.php index e94ae2d8dbe..5c86ee9b116 100644 --- a/module/VuFind/src/VuFind/RateLimiter/RateLimiterManager.php +++ b/module/VuFind/src/VuFind/RateLimiter/RateLimiterManager.php @@ -184,9 +184,9 @@ public function check(EventInterface $event): array ] ); } - if (isset($turnstileLimit)) { + if (isset($turnstileLimit) && !$turnstileLimit->isAccepted()) { $priorTurnstileResult = $this->checkPriorTurnstileResult($policyId, $this->clientIp); - if (!$priorTurnstileResult && !$turnstileLimit->isAccepted()) { + if (!$priorTurnstileResult) { $result['allow'] = false; $result['message'] = $this->getTooManyRequestsResponseMessage($event, $result); $result['performTurnstileChallenge'] = ($priorTurnstileResult === null);