From 095a577e58230db100dea5059bf44309664af48b Mon Sep 17 00:00:00 2001 From: Esa Jokinen Date: Tue, 2 Jul 2024 22:01:31 +0300 Subject: [PATCH] Configurable update interval (1-120 days) for CPE & CVE --- .../core/database_maintenance/main_updater.py | 15 ++- .../database_maintenance/sources_process.py | 110 +++++++++++------- 2 files changed, 80 insertions(+), 45 deletions(-) diff --git a/CveXplore/core/database_maintenance/main_updater.py b/CveXplore/core/database_maintenance/main_updater.py index 653b139e9..a1eeba723 100644 --- a/CveXplore/core/database_maintenance/main_updater.py +++ b/CveXplore/core/database_maintenance/main_updater.py @@ -66,11 +66,11 @@ def reset_download_sources_to_default(self): return True - def update(self, update_source: str | list = None): + def update(self, update_source: str | list = None, update_days: int = 0): """ Method used for updating the database """ - self.logger.info(f"Starting Database update....") + self.logger.info(f"Starting Database update...") start_time = time.time() if not self.do_initialize: @@ -88,7 +88,16 @@ def update(self, update_source: str | list = None): if update_source is None: for source in self.sources: up = source["updater"]() - up.update() + if update_days > 0: + if source["name"] in ("cpe", "cve"): + up.update(update_days=update_days) + else: + self.logger.warning( + f"Update interval in days not supported by source {source}; ignoring" + ) + up.update() + else: + up.update() elif isinstance(update_source, list): for source in update_source: diff --git a/CveXplore/core/database_maintenance/sources_process.py b/CveXplore/core/database_maintenance/sources_process.py index 3030ac16c..4ad7cfb6c 100644 --- a/CveXplore/core/database_maintenance/sources_process.py +++ b/CveXplore/core/database_maintenance/sources_process.py @@ -99,7 +99,7 @@ def process_the_item(self, item: dict = None): return cpe - def process_downloads(self, sites: list | None = None): + def process_downloads(self, sites: list | None = None, update_days: int = 0): """ Method to download and process files """ @@ -156,29 +156,40 @@ def process_downloads(self, sites: list | None = None): f"Retrieval of api data on url: {data_list.args[0]} failed...." ) else: - last_mod_start_date = self.database[self.feed_type.lower()].find_one( - {}, {"lastModified": 1}, sort=[("lastModified", -1)] - ) + # Get datetime from runtime + last_mod_end_date = datetime.datetime.now() - if last_mod_start_date is not None: - if "lastModified" in last_mod_start_date: - last_mod_start_date = last_mod_start_date[ - "lastModified" - ] + datetime.timedelta( - 0, 1 - ) # add one second to prevent false results... - else: - raise KeyError( - "Missing field 'lastModified' from database query..." - ) - else: + # Use configured day interval or detect from the latest entry in the database + if update_days > 120: self.logger.warning( - "No records found in the mongodb cpe collection.." + f"Update interval over 120 days not supported by the NVD API; ignoring" ) - return - - # Get datetime from runtime - last_mod_end_date = datetime.datetime.now() + if update_days > 0 and update_days < 120: + last_mod_start_date = last_mod_end_date - datetime.timedelta( + days=update_days + ) + else: + last_mod_start_date = self.database[ + self.feed_type.lower() + ].find_one({}, {"lastModified": 1}, sort=[("lastModified", -1)]) + + if last_mod_start_date is not None: + if "lastModified" in last_mod_start_date: + last_mod_start_date = last_mod_start_date[ + "lastModified" + ] + datetime.timedelta( + 0, 1 + ) # add one second to prevent false results... + else: + raise KeyError( + "Missing field 'lastModified' from database query..." + ) + else: + self.logger.warning( + "No records found in the mongodb cpe collection.." + ) + return + self.logger.info(f"Retrieving CPEs starting from {last_mod_start_date}") try: total_results = self.api_handler.get_count( @@ -231,10 +242,10 @@ def process_downloads(self, sites: list | None = None): f"Duration: {datetime.timedelta(seconds=time.time() - start_time)}" ) - def update(self, **kwargs): + def update(self, update_days: int = 0): self.logger.info("CPE database update started") - self.process_downloads() + self.process_downloads(update_days=update_days) # if collection is non-existent; assume it's not an update if self.feed_type.lower() not in self.getTableNames(): @@ -644,7 +655,7 @@ def process_the_item(self, item: dict = None): return cve - def process_downloads(self, sites: list = None): + def process_downloads(self, sites: list = None, update_days: int = 0): """ Method to download and process files """ @@ -701,25 +712,40 @@ def process_downloads(self, sites: list = None): f"Retrieval of api data on url: {data_list.args[0]} failed...." ) else: - last_mod_start_date = self.database[self.feed_type.lower()].find_one( - {}, {"lastModified": 1}, sort=[("lastModified", -1)] - ) + # Get datetime from runtime + last_mod_end_date = datetime.datetime.now() - if last_mod_start_date is not None: - if "lastModified" in last_mod_start_date: - last_mod_start_date = last_mod_start_date["lastModified"] - else: - raise KeyError( - "Missing field 'lastModified' from database query..." - ) - else: + # Use configured day interval or detect from the latest entry in the database + if update_days > 120: self.logger.warning( - "No records found in the mongodb cves collection.." + f"Update interval over 120 days not supported by the NVD API; ignoring" ) - return - - # Get datetime from runtime - last_mod_end_date = datetime.datetime.now() + if update_days > 0 and update_days < 120: + last_mod_start_date = last_mod_end_date - datetime.timedelta( + days=update_days + ) + else: + last_mod_start_date = self.database[ + self.feed_type.lower() + ].find_one({}, {"lastModified": 1}, sort=[("lastModified", -1)]) + + if last_mod_start_date is not None: + if "lastModified" in last_mod_start_date: + last_mod_start_date = last_mod_start_date[ + "lastModified" + ] + datetime.timedelta( + 0, 1 + ) # add one second to prevent false results... + else: + raise KeyError( + "Missing field 'lastModified' from database query..." + ) + else: + self.logger.warning( + "No records found in the mongodb cpe collection.." + ) + return + self.logger.info(f"Retrieving CVEs starting from {last_mod_start_date}") try: total_results = self.api_handler.get_count( @@ -772,10 +798,10 @@ def process_downloads(self, sites: list = None): f"Duration: {datetime.timedelta(seconds=time.time() - start_time)}" ) - def update(self): + def update(self, update_days: int = 0): self.logger.info("CVE database update started") - self.process_downloads() + self.process_downloads(update_days=update_days) # if collection is non-existent; assume it's not an update if self.feed_type.lower() not in self.getTableNames():