fivetran · fivetran-avinash · Jan 9, 2024 · Jan 9, 2024 · Jan 9, 2024 · Jan 9, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,18 @@
+# dbt_jira v0.16.0
+[PR #114](https://github.com/fivetran/dbt_jira/pull/114) contains the following updates:
+
+## 🚨 Breaking Changes 🚨
+- Refactored `int_jira__issue_calendar_spine` to reduce computational load and ensure proper incremental runs.
+- Created the `int_jira__calendar_spine` to move the calendar spine into its own model. That is then joined into the issue calendar spine.
+- Moved the `issue_dates` CTE join into `int_jira__field_history_scd` to avoid loading all the issue lines within the issue calendar spine on incremental loads. 
+- Updated and added incremental logic in both `int_jira__issue_calendar_spine` and `jira__daily_issue_field_history` to filter and insert records based on the issues present in the respective models. 
+
+## 🚘 Under the Hood 🚘 
+- Updated `jira__daily_issue_field_history` to exclude the `created_on` and `open_until` fields from the pivoted field operations in this model. These fields were created from the `issue_dates` CTE and are only needed for the `int_jira__issue_calendar_spine`.
+
+## 📜 Documentation Updates 📜 
+- Updated the README (see the `Extend the history of an issue past its closing date` section [in Additional Configurations](https://github.com/fivetran/dbt_jira?tab=readme-ov-file#optional-step-5-additional-configurations)) with an explanation for what the `jira_issue_history_buffer` does in `int_jira__issue_calendar_spine`. 
+
 # dbt_jira v0.15.0
 [PR #108](https://github.com/fivetran/dbt_jira/pull/108) contains the following updates:
 ## 🚨 Breaking Changes 🚨

diff --git a/README.md b/README.md
@@ -92,6 +92,7 @@ vars:
     issue_field_history_columns: ['the', 'list', 'of', 'field', 'names']
 ```
 
+
 ### Adjust the field-grain for issue field history transformations if duplicate field names
 This package provides the option to use `field_name` instead of `field_id` as the field-grain for issue field history transformations. By default, the package strictly partitions and joins issue field data using `field_id`. However, this assumes that it is impossible to have fields with the same name in Jira. For instance, it is very easy to create another `Sprint` field, and different Jira users across your organization may choose the wrong or inconsistent version of the field. As such, the `jira_field_grain` variable may be adjusted to change the field-grain behavior of the issue field history  models. You may adjust the variable using the following configuration in your root dbt_project.yml.
 
@@ -100,6 +101,17 @@ vars:
     jira_field_grain: 'field_name' # field_id by default
 ```
 
+### Extend the history of an issue past its closing date
+This packages allows you the option to utilize a buffer variable to bring in issues past their date of close. This is because issues can be left unresolved past that date. This buffer variable ensures that this daily issue history will not be cut off. 
+
+You may adjust the variable using the following configuration in your root `dbt_project.yml`.  
+
+```yml
+vars:
+    jira_issue_history_buffer: insert_number_of_months # 1 by default
+```
+
+
 ### Change the build schema
 By default, this package builds the Jira staging models within a schema titled (`<target_schema>` + `_jira_source`) and your Jira modeling models within a schema titled (`<target_schema>` + `_jira`) in your destination. If this is not where you would like your Jira data to be written to, add the following configuration to your root `dbt_project.yml` file:
 

diff --git a/dbt_project.yml b/dbt_project.yml
@@ -1,5 +1,5 @@
 name: 'jira'
-version: '0.15.0'
+version: '0.16.0'
 config-version: 2
 require-dbt-version: [">=1.3.0", "<2.0.0"]
 vars:

diff --git a/docs/catalog.json b/docs/catalog.json
diff --git a/docs/index.html b/docs/index.html
diff --git a/docs/manifest.json b/docs/manifest.json
diff --git a/docs/run_results.json b/docs/run_results.json
diff --git a/integration_tests/.gitignore b/integration_tests/.gitignore
@@ -3,4 +3,5 @@ target/
 dbt_modules/
 logs/
 .DS_Store
-**/env/
+env/
+package-lock.yml
diff --git a/integration_tests/ci/sample.profiles.yml b/integration_tests/ci/sample.profiles.yml
@@ -16,13 +16,13 @@ integration_tests:
       pass: "{{ env_var('CI_REDSHIFT_DBT_PASS') }}"
       dbname: "{{ env_var('CI_REDSHIFT_DBT_DBNAME') }}"
       port: 5439
-      schema: jira_integrations_tests_1
+      schema: jira_integrations_tests_2
       threads: 8
     bigquery:
       type: bigquery
       method: service-account-json
       project: 'dbt-package-testing'
-      schema: jira_integrations_tests_1
+      schema: jira_integrations_tests_2
       threads: 8
       keyfile_json: "{{ env_var('GCLOUD_SERVICE_KEY') | as_native }}"
     snowflake:
@@ -33,7 +33,7 @@ integration_tests:
       role: "{{ env_var('CI_SNOWFLAKE_DBT_ROLE') }}"
       database: "{{ env_var('CI_SNOWFLAKE_DBT_DATABASE') }}"
       warehouse: "{{ env_var('CI_SNOWFLAKE_DBT_WAREHOUSE') }}"
-      schema: jira_integrations_tests_1
+      schema: jira_integrations_tests_2
       threads: 8
     postgres:
       type: postgres
@@ -42,13 +42,13 @@ integration_tests:
       pass: "{{ env_var('CI_POSTGRES_DBT_PASS') }}"
       dbname: "{{ env_var('CI_POSTGRES_DBT_DBNAME') }}"
       port: 5432
-      schema: jira_integrations_tests_1
+      schema: jira_integrations_tests_2
       threads: 8
     databricks:
       catalog: "{{ env_var('CI_DATABRICKS_DBT_CATALOG') }}"
       host: "{{ env_var('CI_DATABRICKS_DBT_HOST') }}"
       http_path: "{{ env_var('CI_DATABRICKS_DBT_HTTP_PATH') }}"
-      schema: jira_integrations_tests_1
+      schema: jira_integrations_tests_2
       threads: 2
       token: "{{ env_var('CI_DATABRICKS_DBT_TOKEN') }}"
       type: databricks
diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml
@@ -1,12 +1,12 @@
 name: 'jira_integration_tests'
-version: '0.15.0'
+version: '0.16.0'
 config-version: 2
 profile: 'integration_tests'
 
 
 vars:
   jira_source:
-    jira_schema: jira_integrations_tests_1
+    jira_schema: jira_integrations_tests_2
     jira_comment_identifier: "comment"
     jira_component_identifier: "component"
     jira_epic_identifier: "epic"

diff --git a/models/intermediate/field_history/int_jira__calendar_spine.sql b/models/intermediate/field_history/int_jira__calendar_spine.sql
@@ -0,0 +1,34 @@
+with spine as (
+
+    {% if execute %}
+    {% set first_date_query %}
+    -- start at the first created issue
+        select  min( created ) as min_date from {{ source('jira','issue') }}
+    {% endset %}
+    {% set first_date = run_query(first_date_query).columns[0][0]|string %}
+
+    {% else %} {% set first_date = "2016-01-01" %}
+    {% endif %}
+
+
+    select * 
+    from (
+        {{
+            dbt_utils.date_spine(
+                datepart = "day", 
+                start_date =  "cast('" ~ first_date[0:10] ~ "' as date)", 
+                end_date = dbt.dateadd("week", 1, dbt.current_timestamp_in_utc_backcompat())
+            )   
+        }} 
+    ) as date_spine
+
+),
+
+recast as (
+
+    select cast(date_day as date) as date_day
+    from spine
+)
+
+select *
+from recast
diff --git a/models/intermediate/field_history/int_jira__field_history_scd.sql b/models/intermediate/field_history/int_jira__field_history_scd.sql
@@ -2,12 +2,19 @@
 
 {%- set issue_columns = adapter.get_columns_in_relation(ref('int_jira__pivot_daily_field_history')) -%}
 
-with change_data as (
+with issue as (
+
+    select * 
+    from {{ var('issue') }}
+),   
+
+change_data as (
 
     select *
     from {{ ref('int_jira__pivot_daily_field_history') }}
+), 
 
-), set_values as (
+set_values as (
 
     select 
         valid_starting_on, 
@@ -27,7 +34,9 @@ with change_data as (
 
     from change_data
 
-), fill_values as (
+), 
+
+fill_values as (
 
 -- each row of the pivoted table includes field values if that field was updated on that day
 -- we need to backfill to persist values that have been previously updated and are still valid 
@@ -50,7 +59,20 @@ with change_data as (
 
     from set_values
 
+),
+
+issue_dates as (
+
+    select
+        fill_values.*,
+        cast( {{ dbt.date_trunc('day', 'issue.created_at') }} as date) as created_on,
+        -- resolved_at will become null if an issue is marked as un-resolved. if this sorta thing happens often, you may want to run full-refreshes of the field_history models often
+        -- if it's not resolved include everything up to today. if it is, look at the last time it was updated 
+        cast({{ dbt.date_trunc('day', 'case when issue.resolved_at is null then ' ~ dbt.current_timestamp_in_utc_backcompat() ~ ' else cast(fill_values.valid_starting_on as ' ~ dbt.type_timestamp() ~ ') end') }} as date) as open_until
+    from fill_values
+    left join issue
+        on fill_values.issue_id = issue.issue_id
 )
 
 select *
-from fill_values
+from issue_dates
diff --git a/models/intermediate/field_history/int_jira__issue_calendar_spine.sql b/models/intermediate/field_history/int_jira__issue_calendar_spine.sql
@@ -11,53 +11,19 @@
 
 with spine as (
 
-    {% if execute %}
-    {% set first_date_query %}
-    -- start at the first created issue
-        select  min( created ) as min_date from {{ source('jira','issue') }}
-    {% endset %}
-    {% set first_date = run_query(first_date_query).columns[0][0]|string %}
-
-    {% else %} {% set first_date = "2016-01-01" %}
-    {% endif %}
-
-
-    select * 
-    from (
-        {{
-            dbt_utils.date_spine(
-                datepart = "day", 
-                start_date =  "cast('" ~ first_date[0:10] ~ "' as date)", 
-                end_date = dbt.dateadd("week", 1, dbt.current_timestamp_in_utc_backcompat())
-            )   
-        }} 
-    ) as date_spine
+    select *
+    from {{ ref('int_jira__calendar_spine') }} 
 
-    {% if is_incremental() %}
-    -- compare to the earliest possible open_until date so that if a resolved issue is updated after a long period of inactivity, we don't need a full refresh
-    -- essentially we need to be able to backfill
-    where cast( date_day as date) >= (select min(earliest_open_until_date) from {{ this }} )
-    {% endif %}
+    {# {% if is_incremental() %}
+    where date_day >= (select min(earliest_open_until_date) from {{ this }})
+    {% endif %}   #}
 ),
 
-issue_history_scd as (
+issue_dates as (
 
     select *
-    from {{ ref('int_jira__field_history_scd') }}
-),
-
-issue_dates as (
-
-    select
-        issue_history_scd.issue_id,
-        cast( {{ dbt.date_trunc('day', 'issue.created_at') }} as date) as created_on,
-        -- resolved_at will become null if an issue is marked as un-resolved. if this sorta thing happens often, you may want to run full-refreshes of the field_history models often
-        -- if it's not resolved include everything up to today. if it is, look at the last time it was updated 
-        cast({{ dbt.date_trunc('day', 'case when issue.resolved_at is null then ' ~ dbt.current_timestamp_in_utc_backcompat() ~ ' else cast(issue_history_scd.valid_starting_on as ' ~ dbt.type_timestamp() ~ ') end') }} as date) as open_until
-    from issue_history_scd
-    left join {{ var('issue') }} as issue
-        on issue_history_scd.issue_id = issue.issue_id
-),
+    from {{ ref('int_jira__field_history_scd') }}  
+), 
 
 issue_spine as (
 
@@ -66,13 +32,14 @@ issue_spine as (
         issue_dates.issue_id,
         -- will take the table-wide min of this in the incremental block at the top of this model
         min(issue_dates.open_until) as earliest_open_until_date
-
     from spine 
     join issue_dates on
         issue_dates.created_on <= spine.date_day
         and {{ dbt.dateadd('month', var('jira_issue_history_buffer', 1), 'issue_dates.open_until') }} >= spine.date_day
         -- if we cut off issues, we're going to have to do a full refresh to catch issues that have been un-resolved
-
+    {% if is_incremental() %}
+    where spine.date_day >= (select max(this.earliest_open_until_date) from {{ this }} as this where issue_dates.issue_id = this.issue_id )
+    {% endif %}
     group by 1,2
 ),
 
@@ -83,9 +50,8 @@ surrogate_key as (
         issue_id,
         {{ dbt_utils.generate_surrogate_key(['date_day','issue_id']) }} as issue_day_id,
         earliest_open_until_date
-
     from issue_spine
-
     where date_day <= cast( {{ dbt.date_trunc('day',dbt.current_timestamp_in_utc_backcompat()) }} as date)
 )
-select * from surrogate_key 
+
+select * from surrogate_key
diff --git a/models/jira__daily_issue_field_history.sql b/models/jira__daily_issue_field_history.sql
@@ -15,12 +15,12 @@
 -- in intermediate/field_history/
 with pivoted_daily_history as (
 
-    select * 
-    from {{ ref('int_jira__field_history_scd') }}
+    select {{ dbt_utils.star(from=ref('int_jira__field_history_scd'), except=['created_on','open_until']) }} 
+    from {{ ref('int_jira__field_history_scd') }} as scd
 
     {% if is_incremental() %}
 
-    where valid_starting_on >= (select max(date_day) from {{ this }} )
+    where scd.valid_starting_on >= (select max(this.date_day) from {{ this }} as this where this.issue_id = scd.issue_id )
 
 -- If no issue fields have been updated since the last incremental run, the pivoted_daily_history CTE will return no record/rows.
 -- When this is the case, we need to grab the most recent day's records from the previously built table so that we can persist 
@@ -29,8 +29,8 @@ with pivoted_daily_history as (
 ), most_recent_data as ( 
     select 
         *
-    from {{ this }}
-    where date_day = (select max(date_day) from {{ this }} )
+    from {{ this }} as recent
+    where recent.date_day = (select max(this.date_day) from {{ this }} as this where this.issue_id = recent.issue_id )
 
 {% endif %}
 
@@ -64,10 +64,10 @@ components as (
 calendar as (
 
     select *
-    from {{ ref('int_jira__issue_calendar_spine') }}
+    from {{ ref('int_jira__issue_calendar_spine') }} as spine
 
     {% if is_incremental() %}
-    where date_day >= (select max(date_day) from {{ this }} )
+    where spine.date_day >= (select max(this.date_day) from {{ this }} as this where this.issue_id = spine.issue_id )
     {% endif %}
 ),
 
@@ -82,7 +82,7 @@ joined as (
                 {% if col.name|lower == 'components' and var('jira_using_components', True) %}
                 , coalesce(pivoted_daily_history.components, most_recent_data.components) as components
 
-                {% elif col.name|lower not in ['issue_day_id', 'issue_id', 'valid_starting_on', 'components'] %} 
+                {% elif col.name|lower not in ['issue_day_id', 'issue_id', 'valid_starting_on', 'components', 'created_on', 'open_until'] %} 
                 , coalesce(pivoted_daily_history.{{ col.name }}, most_recent_data.{{ col.name }}) as {{ col.name }}
 
                 {% endif %}
@@ -93,7 +93,7 @@ joined as (
                 {% if col.name|lower == 'components' and var('jira_using_components', True) %}
                 , pivoted_daily_history.components   
 
-                {% elif col.name|lower not in ['issue_day_id', 'issue_id', 'valid_starting_on', 'components'] %} 
+                {% elif col.name|lower not in ['issue_day_id', 'issue_id', 'valid_starting_on', 'components', 'created_on', 'open_until'] %} 
                 , pivoted_daily_history.{{ col.name }}
 
                 {% endif %}
@@ -121,7 +121,7 @@ set_values as (
             order by date_day rows unbounded preceding) as status_id_field_partition
 
         -- list of exception columns
-        {% set exception_cols = ['issue_id', 'issue_day_id', 'valid_starting_on', 'status', 'status_id', 'components', 'issue_type'] %}
+        {% set exception_cols = ['issue_id', 'issue_day_id', 'valid_starting_on', 'status', 'status_id', 'components', 'issue_type', 'created_on', 'open_until'] %}
 
         {% for col in pivot_data_columns %}
             {% if col.name|lower == 'components' and var('jira_using_components', True) %}
@@ -175,7 +175,7 @@ fill_values as (
                 partition by issue_id, component_field_partition 
                 order by date_day asc rows between unbounded preceding and current row) as components
 
-            {% elif col.name|lower not in ['issue_id', 'issue_day_id', 'valid_starting_on', 'status', 'status_id', 'components'] %}
+            {% elif col.name|lower not in ['issue_id', 'issue_day_id', 'valid_starting_on', 'status', 'status_id', 'components', 'created_on', 'open_until'] %}
             -- grab the value that started this batch/partition
             , first_value( {{ col.name }} ) over (
                 partition by issue_id, {{ col.name }}_field_partition 
@@ -197,7 +197,7 @@ fix_null_values as (
             {% if col.name|lower == 'components' and var('jira_using_components', True) %}
             , case when components = 'is_null' then null else components end as components
 
-            {% elif col.name|lower not in ['issue_id','issue_day_id','valid_starting_on', 'status', 'components'] %}
+            {% elif col.name|lower not in ['issue_id','issue_day_id','valid_starting_on', 'status', 'components', 'created_on', 'open_until'] %}
             -- we de-nulled the true null values earlier in order to differentiate them from nulls that just needed to be backfilled
             , case when {{ col.name }} = 'is_null' then null else {{ col.name }} end as {{ col.name }}
 
@@ -219,7 +219,7 @@ surrogate_key as (
             {% if col.name|lower == 'components' and var('jira_using_components', True) %}
             , fix_null_values.components as components
 
-            {% elif col.name|lower not in ['issue_id','issue_day_id','valid_starting_on', 'status', 'components'] %} 
+            {% elif col.name|lower not in ['issue_id','issue_day_id','valid_starting_on', 'status', 'components', 'created_on', 'open_until'] %} 
             , fix_null_values.{{ col.name }} as {{ col.name }}
 
             {% endif %}