From f650498bcbfdf52ef6ae66f4ae22a2af8533fe1f Mon Sep 17 00:00:00 2001
From: Soren Spicknall <soren@spicknall.us>
Date: Fri, 19 Jul 2024 14:21:40 -0500
Subject: [PATCH 1/4] Add configurable timeout to BigQuery transactions

---
 parsons/google/google_bigquery.py | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/parsons/google/google_bigquery.py b/parsons/google/google_bigquery.py
index ab47cadce9..a16f1b4c1a 100644
--- a/parsons/google/google_bigquery.py
+++ b/parsons/google/google_bigquery.py
@@ -361,6 +361,7 @@ def copy_from_gcs(
         compression_type: str = "gzip",
         new_file_extension: str = "csv",
         template_table: Optional[str] = None,
+        max_timeout: int = 30,
         **load_kwargs,
     ):
         """
@@ -421,6 +422,8 @@ def copy_from_gcs(
             template_table: str
                 Table name to be used as the load schema. Load operation wil use the same
                 columns and data types as the template table.
+            max_timeout: int
+                The maximum number of seconds to wait for a request before the job fails.
             **load_kwargs: kwargs
                 Other arguments to pass to the underlying load_table_from_uri
                 call on the BigQuery client.
@@ -463,12 +466,14 @@ def copy_from_gcs(
                     job_config=job_config,
                     compression_type=compression_type,
                     new_file_extension=new_file_extension,
+                    max_timeout=max_timeout,
                 )
             else:
                 return self._load_table_from_uri(
                     source_uris=gcs_blob_uri,
                     destination=table_ref,
                     job_config=job_config,
+                    max_timeout=max_timeout,
                     **load_kwargs,
                 )
         except exceptions.BadRequest as e:
@@ -493,6 +498,7 @@ def copy_from_gcs(
                     job_config=job_config,
                     compression_type=compression_type,
                     new_file_extension=new_file_extension,
+                    max_timeout=max_timeout,
                 )
             elif "Schema has no field" in str(e):
                 logger.debug(f"{gcs_blob_uri.split('/')[-1]} is empty, skipping file")
@@ -500,6 +506,10 @@ def copy_from_gcs(
 
             else:
                 raise e
+        
+        except exceptions.DeadlineExceeded as e:
+            logger.error(f"Max timeout exceeded for {gcs_blob_uri.split('/')[-1]}")
+            raise e
 
     def copy_large_compressed_file_from_gcs(
         self,
@@ -519,6 +529,7 @@ def copy_large_compressed_file_from_gcs(
         compression_type: str = "gzip",
         new_file_extension: str = "csv",
         template_table: Optional[str] = None,
+        max_timeout: int = 30,
         **load_kwargs,
     ):
         """
@@ -577,6 +588,8 @@ def copy_large_compressed_file_from_gcs(
             template_table: str
                 Table name to be used as the load schema. Load operation wil use the same
                 columns and data types as the template table.
+            max_timeout: int
+                The maximum number of seconds to wait for a request before the job fails.
             **load_kwargs: kwargs
                 Other arguments to pass to the underlying load_table_from_uri call on the BigQuery
                 client.
@@ -621,6 +634,7 @@ def copy_large_compressed_file_from_gcs(
                 source_uris=uncompressed_gcs_uri,
                 destination=table_ref,
                 job_config=job_config,
+                max_timeout=max_timeout,
                 **load_kwargs,
             )
 
@@ -647,6 +661,7 @@ def copy_s3(
         tmp_gcs_bucket: Optional[str] = None,
         template_table: Optional[str] = None,
         job_config: Optional[LoadJobConfig] = None,
+        max_timeout: int = 30,
         **load_kwargs,
     ):
         """
@@ -692,6 +707,8 @@ def copy_s3(
                 on the BigQuery client. The function will create its own if not provided. Note
                 if there are any conflicts between the job_config and other parameters, the
                 job_config values are preferred.
+            max_timeout: int
+                The maximum number of seconds to wait for a request before the job fails.
 
         `Returns`
             Parsons Table or ``None``
@@ -724,6 +741,7 @@ def copy_s3(
                 nullas=nullas,
                 job_config=job_config,
                 template_table=template_table,
+                max_timeout=max_timeout,
                 **load_kwargs,
             )
         finally:
@@ -745,6 +763,7 @@ def copy(
         allow_jagged_rows: bool = True,
         quote: Optional[str] = None,
         schema: Optional[List[dict]] = None,
+        max_timeout: int = 30,
         **load_kwargs,
     ):
         """
@@ -774,6 +793,8 @@ def copy(
             template_table: str
                 Table name to be used as the load schema. Load operation wil use the same
                 columns and data types as the template table.
+            max_timeout: int
+                The maximum number of seconds to wait for a request before the job fails.
             **load_kwargs: kwargs
                 Arguments to pass to the underlying load_table_from_uri call on the BigQuery
                 client.
@@ -823,6 +844,7 @@ def copy(
                 source_uris=temp_blob_uri,
                 destination=self.get_table_ref(table_name=table_name),
                 job_config=job_config,
+                max_timeout=max_timeout
                 **load_kwargs,
             )
         finally:
@@ -1339,7 +1361,7 @@ def _validate_copy_inputs(self, if_exists: str, data_type: str):
         if data_type not in ["csv", "json"]:
             raise ValueError(f"Only supports csv or json files [data_type = {data_type}]")
 
-    def _load_table_from_uri(self, source_uris, destination, job_config, **load_kwargs):
+    def _load_table_from_uri(self, source_uris, destination, job_config, max_timeout, **load_kwargs):
         load_job = self.client.load_table_from_uri(
             source_uris=source_uris,
             destination=destination,
@@ -1348,7 +1370,7 @@ def _load_table_from_uri(self, source_uris, destination, job_config, **load_kwar
         )
 
         try:
-            load_job.result()
+            load_job.result(timeout=max_timeout)
             return load_job
         except exceptions.BadRequest as e:
             for idx, error_ in enumerate(load_job.errors):

From e1de9fab18e266df819d46aed9217f8e9fea9c47 Mon Sep 17 00:00:00 2001
From: Soren Spicknall <soren@spicknall.us>
Date: Fri, 19 Jul 2024 14:40:37 -0500
Subject: [PATCH 2/4] Missing comma

---
 parsons/google/google_bigquery.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/parsons/google/google_bigquery.py b/parsons/google/google_bigquery.py
index a16f1b4c1a..2b9f7af27e 100644
--- a/parsons/google/google_bigquery.py
+++ b/parsons/google/google_bigquery.py
@@ -844,7 +844,7 @@ def copy(
                 source_uris=temp_blob_uri,
                 destination=self.get_table_ref(table_name=table_name),
                 job_config=job_config,
-                max_timeout=max_timeout
+                max_timeout=max_timeout,
                 **load_kwargs,
             )
         finally:

From b7c174a26b03e5a673ea4be91c564cbd8217f924 Mon Sep 17 00:00:00 2001
From: Soren Spicknall <soren@spicknall.us>
Date: Fri, 19 Jul 2024 14:43:21 -0500
Subject: [PATCH 3/4] Linting fixes

---
 parsons/google/google_bigquery.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/parsons/google/google_bigquery.py b/parsons/google/google_bigquery.py
index 2b9f7af27e..7febf6a9a3 100644
--- a/parsons/google/google_bigquery.py
+++ b/parsons/google/google_bigquery.py
@@ -506,7 +506,7 @@ def copy_from_gcs(
 
             else:
                 raise e
-        
+
         except exceptions.DeadlineExceeded as e:
             logger.error(f"Max timeout exceeded for {gcs_blob_uri.split('/')[-1]}")
             raise e
@@ -1361,7 +1361,8 @@ def _validate_copy_inputs(self, if_exists: str, data_type: str):
         if data_type not in ["csv", "json"]:
             raise ValueError(f"Only supports csv or json files [data_type = {data_type}]")
 
-    def _load_table_from_uri(self, source_uris, destination, job_config, max_timeout, **load_kwargs):
+    def _load_table_from_uri(self, source_uris, destination, job_config, max_timeout,
+        **load_kwargs):
         load_job = self.client.load_table_from_uri(
             source_uris=source_uris,
             destination=destination,

From 8a6cc1009c314809384b48ac75c317d872cb2d25 Mon Sep 17 00:00:00 2001
From: Soren Spicknall <soren@spicknall.us>
Date: Fri, 19 Jul 2024 14:45:21 -0500
Subject: [PATCH 4/4] Correct indentation depth

---
 parsons/google/google_bigquery.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/parsons/google/google_bigquery.py b/parsons/google/google_bigquery.py
index 7febf6a9a3..64f2396548 100644
--- a/parsons/google/google_bigquery.py
+++ b/parsons/google/google_bigquery.py
@@ -1362,7 +1362,7 @@ def _validate_copy_inputs(self, if_exists: str, data_type: str):
             raise ValueError(f"Only supports csv or json files [data_type = {data_type}]")
 
     def _load_table_from_uri(self, source_uris, destination, job_config, max_timeout,
-        **load_kwargs):
+                             **load_kwargs):
         load_job = self.client.load_table_from_uri(
             source_uris=source_uris,
             destination=destination,