generated from jupyter-naas/data-product-framework
-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #183 from jupyter-naas/178-schedule-abi-using-gith…
…ub-action WIP: 178 schedule abi using GitHub action
- Loading branch information
Showing
9 changed files
with
426 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
import yaml, os | ||
import pydash as _ | ||
|
||
template_str = """ | ||
name: CI/CD Workflow | ||
on: {} | ||
jobs: | ||
scheduler: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v4 | ||
- name: Log in to GitHub Container Registry | ||
uses: docker/login-action@v3 | ||
with: | ||
registry: ghcr.io | ||
username: ${{ github.actor }} | ||
password: ${{ secrets.GITHUB_TOKEN }} | ||
- name: Pull Docker image | ||
run: docker pull ghcr.io/${{ github.repository }}/abi:latest | ||
# - name: Run Papermill | ||
# run: | | ||
# docker run --name abi-execution -i --platform linux/amd64 ghcr.io/${{ github.repository }}/abi:latest ls | ||
# mkdir output | ||
# docker cp abi-execution:/app/__pipeline__.ipynb ./output/__pipeline__.ipynb | ||
# - name: Upload output artifacts | ||
# uses: actions/upload-artifact@v4 | ||
# with: | ||
# name: output-files | ||
# path: ./output | ||
""" | ||
|
||
def generate_schedulers(config : dict, template : str): | ||
for scheduler in config["schedulers"]: | ||
# Skip disabled schedulers | ||
if scheduler.get("enabled", False) is False: | ||
continue | ||
|
||
# Load template | ||
cicd = yaml.safe_load(template_str) | ||
del cicd[True] | ||
|
||
_.set_(cicd, "name", f"Scheduler - {scheduler['name']}") | ||
|
||
cicd["on"] = {"schedule": [{"cron": scheduler["cron"]}], "workflow_dispatch": {}} | ||
|
||
|
||
new_step = {} | ||
|
||
new_step['name'] = scheduler['name'] | ||
|
||
new_step['run'] = f""" | ||
# Generate unique id | ||
export SCHEDULER_ID=$(python -c "import uuid; print(uuid.uuid4())") | ||
# Execute the Scheduler script | ||
docker run --name $SCHEDULER_ID -i --platform linux/amd64 ghcr.io/${{ github.repository }}/abi:latest python .github/scripts/run_scheduler.py "{scheduler['name']}" | ||
# Create the output directory that will be used to store the output files and save them as artifacts. | ||
mkdir -p outputs/ | ||
# Copy the output files from the container to the host. | ||
docker cp $SCHEDULER_ID:/app/outputs ./outputs/ | ||
""" | ||
|
||
# Append the new step to the steps list | ||
cicd["jobs"]["scheduler"]["steps"].append(new_step) | ||
|
||
cicd["jobs"]["scheduler"]["steps"].append({ | ||
'name': 'Upload output artifacts', | ||
'uses': 'actions/upload-artifact@v4', | ||
'with': { | ||
'name': 'output-files', | ||
'path': './outputs' | ||
} | ||
}) | ||
|
||
# Write to file. | ||
# Make sure scheduler name is a valid filename. | ||
scheduler_name = scheduler["name"].replace(" ", "_").lower() | ||
scheduler_path = os.path.join('.github/workflows', f'scheduler__{scheduler_name}.yaml') | ||
yaml.dump(cicd, open(scheduler_path, "w")) | ||
print(f'✅ Scheduler file generated: {scheduler_path}') | ||
|
||
if __name__ == "__main__": | ||
with open("config.yml", "r") as file: | ||
config = yaml.safe_load(file) | ||
|
||
generate_schedulers(config, template_str) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
#!/usr/bin/env python | ||
|
||
# This script is used to run a scheduler defined in config.yml | ||
|
||
import os | ||
import sys | ||
|
||
import papermill | ||
import yaml | ||
import re | ||
|
||
class SchedulerNotFoundError(Exception): | ||
pass | ||
|
||
|
||
class UnknownStepTypeError(Exception): | ||
pass | ||
|
||
# Backing up environment variables. | ||
environment_vars_backup: dict[str, str] = os.environ.copy() | ||
|
||
|
||
def sanitize_string_to_filename(filename): | ||
# Remove invalid characters | ||
filename = re.sub(r'[\\/*?:"<>|]', '', filename) | ||
|
||
# Replace spaces with underscores | ||
filename = filename.replace(' ', '_') | ||
|
||
# Remove leading/trailing whitespace | ||
filename = filename.strip() | ||
|
||
# Ensure filename doesn't exceed the max length | ||
max_filename_length = 255 | ||
if len(filename) > max_filename_length: | ||
filename = filename[:max_filename_length] | ||
|
||
return filename.lower() | ||
|
||
def get_scheduler(scheduler_name: str): | ||
with open("config.yml", "r") as file: | ||
config = yaml.safe_load(file) | ||
|
||
for scheduler in config["schedulers"]: | ||
if scheduler["name"] == scheduler_name: | ||
return scheduler | ||
|
||
raise SchedulerNotFoundError( | ||
f"Scheduler '{scheduler_name}' not found in config.yml" | ||
) | ||
|
||
|
||
def reset_environment_vars(): | ||
os.environ.clear() | ||
os.environ.update(environment_vars_backup) | ||
|
||
|
||
def run_notebook_step(scheduler_name: str, step: dict): | ||
reset_environment_vars() | ||
|
||
if "environment_variables" in step: | ||
for key, value in step["environment_variables"].items(): | ||
os.environ[key] = value | ||
|
||
entrypoint_path = '/'.join(step["entrypoint"].split('/')[:-1]) | ||
notebook_name = step["entrypoint"].split('/')[-1] | ||
|
||
output_path = os.path.join(f"outputs/scheduler_executions/{sanitize_string_to_filename(scheduler_name)}/{sanitize_string_to_filename(step['name'])}", entrypoint_path) | ||
os.makedirs(output_path, exist_ok=True) | ||
|
||
papermill.execute_notebook( | ||
input_path=step["entrypoint"], | ||
output_path=os.path.join(output_path, notebook_name), | ||
parameters=step.get("inputs", {}), | ||
) | ||
|
||
def run_scheduler(scheduler_name: str): | ||
scheduler = get_scheduler(scheduler_name) | ||
|
||
for step in scheduler["steps"]: | ||
if step.get("enabled", False) is False: | ||
continue | ||
|
||
if step.get("type") == "notebook": | ||
run_notebook_step(scheduler_name, step) | ||
else: | ||
raise UnknownStepTypeError(f"Unknown step type: {step.get('type')}") | ||
|
||
|
||
if __name__ == "__main__": | ||
scheduler_name = sys.argv[1] | ||
run_scheduler(scheduler_name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#!/usr/bin/env python | ||
|
||
import sys | ||
import json | ||
import yaml | ||
from jsonschema import validate, ValidationError | ||
|
||
if len(sys.argv) != 3: | ||
print("Usage: python validate_jsonschema_yaml.py <schemafile> <datafile>") | ||
sys.exit(1) | ||
|
||
schema_file = sys.argv[1] | ||
data_file = sys.argv[2] | ||
|
||
# Load JSON schema | ||
with open(schema_file) as f: | ||
schema = json.load(f) | ||
|
||
# Load YAML data | ||
with open(data_file) as f: | ||
data = yaml.safe_load(f) | ||
|
||
# Validate | ||
try: | ||
validate(instance=data, schema=schema) | ||
except ValidationError as e: | ||
print(e) | ||
sys.exit(1) | ||
|
||
print("YAML data is valid") | ||
sys.exit(0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
name: Build ABI Container | ||
|
||
on: | ||
push: | ||
branches: | ||
- main | ||
# pull_request: | ||
# branches: | ||
# - main | ||
|
||
jobs: | ||
build: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v4 | ||
|
||
- name: Set up Docker Buildx | ||
uses: docker/setup-buildx-action@v3 | ||
|
||
- name: Cache Docker layers | ||
uses: actions/cache@v4 | ||
with: | ||
path: /tmp/.buildx-cache | ||
key: ${{ runner.os }}-buildx-${{ github.sha }} | ||
restore-keys: | | ||
${{ runner.os }}-buildx- | ||
- name: Log in to GitHub Container Registry | ||
uses: docker/login-action@v3 | ||
with: | ||
registry: ghcr.io | ||
username: ${{ github.actor }} | ||
password: ${{ secrets.GITHUB_TOKEN }} | ||
|
||
- name: Build Docker image | ||
run: | | ||
make build | ||
docker tag abi:latest ghcr.io/${{ github.repository }}/abi:latest | ||
- name: Push Docker image to GitHub Container Registry | ||
run: docker push ghcr.io/${{ github.repository }}/abi:latest | ||
|
||
- name: Run Papermill | ||
run: | | ||
docker run --name abi-execution --platform linux/amd64 ghcr.io/${{ github.repository }}/abi:latest papermill tests/ci.ipynb outputs/ci.output.ipynb | ||
mkdir execution_outputs | ||
docker cp abi-execution:/app/outputs ./execution_outputs/ | ||
- name: Upload output artifacts | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: output-files | ||
path: ./execution_outputs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
jobs: | ||
scheduler: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v4 | ||
- name: Log in to GitHub Container Registry | ||
uses: docker/login-action@v3 | ||
with: | ||
password: ${{ secrets.GITHUB_TOKEN }} | ||
registry: ghcr.io | ||
username: ${{ github.actor }} | ||
- name: Pull Docker image | ||
run: docker pull ghcr.io/${{ github.repository }}/abi:latest | ||
- name: main | ||
run: ' | ||
# Generate unique id | ||
export SCHEDULER_ID=$(python -c "import uuid; print(uuid.uuid4())") | ||
# Execute the Scheduler script | ||
docker run --name $SCHEDULER_ID -i --platform linux/amd64 ghcr.io/${ github.repository | ||
}/abi:latest python .github/scripts/run_scheduler.py "main" | ||
# Create the output directory that will be used to store the output files | ||
and save them as artifacts. | ||
mkdir -p outputs/ | ||
# Copy the output files from the container to the host. | ||
docker cp $SCHEDULER_ID:/app/outputs ./outputs/ | ||
' | ||
- name: Upload output artifacts | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: output-files | ||
path: ./outputs | ||
name: Scheduler - main | ||
'on': | ||
schedule: | ||
- cron: '*/5 * * * *' | ||
workflow_dispatch: {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,6 @@ | ||
{ | ||
"python.defaultInterpreterPath": ".abi-config/bin/python" | ||
"python.defaultInterpreterPath": ".abi-config/bin/python", | ||
"conventionalCommits.scopes": [ | ||
"CI/CD" | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.