Skip to content

Commit

Permalink
Merge pull request #5 from wednesday-solutions/fix/mypy-type
Browse files Browse the repository at this point in the history
Feat: Test Cases Added
  • Loading branch information
idipanshu authored Dec 13, 2023
2 parents bf44e50 + bc8287f commit 16fbbae
Show file tree
Hide file tree
Showing 16 changed files with 175 additions and 29 deletions.
22 changes: 14 additions & 8 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,37 @@ on:
jobs:
run-ci:
runs-on: ubuntu-latest
container:
image: dipanshuwed/glue4.0
container: dipanshuwed/glue4.0:latest

steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Install dependencies
run: |
pip3 install mypy pylint coverage
yum install jq -y # jq is required for sonar
python3 -m pip install --upgrade pip
pip3 install -r requirements.txt
yum install -y jq
- name: Type check
run: mypy ./ --ignore-missing-imports

- name: Lint
run: pylint app/ main.py setup.py --output pylint-report.txt
run: |
pylint app tests main.py setup.py --output pylint-report.txt
pylint app tests main.py setup.py
- name: Testing
run: coverage run --source=app -m unittest discover -s app/tests/
run: |
export KAGGLE_KEY=MOCKKEY
export KAGGLE_USERNAME=MOCKUSERNAME
coverage run --source=app -m unittest discover -s tests
- name: Test coverage report
run: |
coverage report
coverage xml
- name: SonarQube Scan
uses: sonarsource/sonarqube-scan-action@master
with:
Expand All @@ -43,7 +49,7 @@ jobs:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}

- uses: sonarsource/sonarqube-quality-gate-action@master
timeout-minutes: 5
env:
Expand Down
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
.env
.coverage
app/__pycache__
app/tests/__pycache__
*__pycache__
temp
htmlcov
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ repos:

- id: lint
name: Linting
entry: pylint app/ main.py setup.py
entry: pylint app tests main.py setup.py
language: system
always_run: true
types: [python3]
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ Refer: [Glue Programming libraries](https://docs.aws.amazon.com/glue/latest/dg/a

To run tests in the root of the directory use:

coverage run --source=app -m unittest discover -s app/tests/
coverage run --source=app -m unittest discover -s tests
coverage report

Note that awsglue libraries are not availabe to download, so use AWS Glue 4 Docker container.
1 change: 0 additions & 1 deletion app/Extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

def extract_from_kaggle(flag: bool):
if flag:
print("WRONG BIT RAN!!!!")
read_path = "/dbfs/mnt/rawdata/"
write_path = "/mnt/transformed/"
else:
Expand Down
4 changes: 3 additions & 1 deletion app/SparkWrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
from pyspark.sql import Window, WindowSpec


def create_frame(sc: SparkSession, path: str):
def create_frame(sc: SparkSession | None, path: str):
if sc is None:
raise TypeError(f"{sc} is None. Pass Spark Session")
df = sc.read.csv(path, inferSchema=True, header=True)
return df

Expand Down
17 changes: 13 additions & 4 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,21 +31,30 @@

flag = bool(flag == "True")

# if 'spark' in locals():
# flag = True
# else:
# spark = None
# dbutils = None
# flag = False


# COMMAND ----------

if flag:
os.environ["KAGGLE_USERNAME"] = keys["kaggle_username"]
os.environ["KAGGLE_USERNAME"] = dbutils.widgets.get("kaggle_username")

os.environ["KAGGLE_KEY"] = keys["kaggle_token"]
os.environ["KAGGLE_KEY"] = dbutils.widgets.get("kaggle_token")

os.environ["storage_account_name"] = keys["storage_account_name"]
os.environ["storage_account_name"] = dbutils.widgets.get("storage_account_name")

os.environ["datalake_access_key"] = keys["datalake_access_key"]
os.environ["datalake_access_key"] = dbutils.widgets.get("datalake_access_key")


# COMMAND ----------
if flag:
import app.connect_databricks as cd

# creating mounts
cd.create_mount(dbutils, "zipdata", "/mnt/zipdata/")
cd.create_mount(dbutils, "rawdata", "/mnt/rawdata/")
Expand Down
6 changes: 6 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
mypy~=1.7.1
pylint~=3.0.2
coverage~=7.3.2
python-dotenv~=1.0.0
kaggle~=1.5.16
pre-commit~=3.6.0
6 changes: 3 additions & 3 deletions sonar-project.properties
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ sonar.python.coverage.reportPaths=*coverage.xml
sonar.python.pylint_config=.pylintrc
sonar.python.pylint=/usr/local/bin/pylint
sonar.inclusions=**/app/**,**/main.py
sonar.exclusions=**/app/**/tests/test_*.py,**/__init__.py,**/src/**/*.csv,**/app/tests/mock/*.*
sonar.test.exclusions=**/app/**/tests/test_*.py,**/__init__.py,**/app/**/*.csv,**/app/tests/mock/*.*
sonar.coverage.exclusions=**/app/**/tests/test_*.py,**/__init__.py,**/app/**/*.csv,**/app/tests/mock/*.*
sonar.exclusions=**/tests/test_*.py,**/__init__.py,**/tests/mock/*.*
sonar.test.exclusions=**/tests/test_*.py,**/__init__.py,**/tests/mock/*.*
sonar.coverage.exclusions=**/tests/test_*.py,**/__init__.py,**/tests/mock/*.*
sonar.text.excluded.file.suffixes=csv
sonar.python.version=3.7
File renamed without changes.
File renamed without changes.
File renamed without changes.
74 changes: 74 additions & 0 deletions tests/test_Extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import unittest
from unittest.mock import patch
from app.Extraction import extract_from_kaggle


class TestExtraction(unittest.TestCase):
@patch("app.Extraction.kaggle")
def test_extract_from_kaggle_success(self, mock_kaggle):
mock_kaggle_instance = mock_kaggle
mock_api_instance = mock_kaggle_instance.KaggleApi.return_value
# Mocking authenticate and dataset_download_cli methods
mock_api_instance.authenticate.return_value = None
mock_api_instance.dataset_download_cli.return_value = None

# Call the function to test with flag=True for success case
result = extract_from_kaggle(True)

# Assertions
mock_kaggle_instance.KaggleApi.assert_called_once()
mock_api_instance.authenticate.assert_called_once()
mock_api_instance.dataset_download_cli.assert_called_once_with(
"mastmustu/insurance-claims-fraud-data",
unzip=True,
path="/dbfs/mnt/rawdata/",
)

self.assertEqual(result, ("/mnt/rawdata/", "/mnt/transformed/"))

@patch("app.Extraction.kaggle")
def test_extract_from_kaggle_success_false(self, mock_kaggle):
mock_kaggle_instance = mock_kaggle
mock_api_instance = mock_kaggle_instance.KaggleApi.return_value
# Mocking authenticate and dataset_download_cli methods
mock_api_instance.authenticate.return_value = None
mock_api_instance.dataset_download_cli.return_value = None

# Call the function to test with flag=True for success case
result = extract_from_kaggle(False)

# Assertions
mock_kaggle_instance.KaggleApi.assert_called_once()
mock_api_instance.authenticate.assert_called_once()
mock_api_instance.dataset_download_cli.assert_called_once_with(
"mastmustu/insurance-claims-fraud-data", unzip=True, path="temp/"
)

self.assertEqual(
result,
(
"s3://glue-bucket-vighnesh/rawdata/",
"s3://glue-bucket-vighnesh/transformed/",
),
)

@patch("app.Extraction.kaggle")
def test_extract_from_kaggle_failure(self, mock_kaggle):
mock_kaggle_instance = mock_kaggle
mock_api_instance = mock_kaggle_instance.KaggleApi.return_value
# Mocking authenticate and dataset_download_cli methods
mock_api_instance.authenticate.side_effect = Exception(
"Simulated authentication failure"
)

# Call the function to test with flag=False for failure case
with self.assertRaises(Exception) as context:
extract_from_kaggle(False)

# Assertions
mock_kaggle_instance.KaggleApi.assert_called_once()
mock_api_instance.authenticate.assert_called_once()
mock_api_instance.dataset_download_cli.assert_not_called()

# Check if the correct exception is raised
self.assertEqual(str(context.exception), "Simulated authentication failure")
8 changes: 3 additions & 5 deletions app/tests/test_SparkWrapper.py → tests/test_SparkWrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@ def setUp(self) -> None:
self.spark = (
SparkSession.builder.master("local").appName("Testing").getOrCreate()
)
self.df = self.spark.read.csv(
"app/tests/mock/sample.csv", inferSchema=True, header=True
)
self.path = "tests/mock/sample.csv"
self.df = self.spark.read.csv(self.path, inferSchema=True, header=True)
super().setUp()

def tearDown(self) -> None:
Expand Down Expand Up @@ -43,8 +42,7 @@ def test_rename_columns(self):
self.assertListEqual(actual_columns, expected_columns)

def test_create_frame(self):
path = "app/tests/mock/sample.csv"
df = create_frame(self.spark, path).drop("date")
df = create_frame(self.spark, self.path).drop("date")
actual_data = df.collect()

expected_data = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@ def setUp(self) -> None:
self.spark = (
SparkSession.builder.appName("Testing").master("local[*]").getOrCreate()
)
self.df = self.spark.read.csv(
"app/tests/mock/sample.csv", inferSchema=True, header=True
)
self.path = "tests/mock/sample.csv"
self.df = self.spark.read.csv(self.path, inferSchema=True, header=True)
super().setUp()

def tearDown(self) -> None:
Expand Down
52 changes: 52 additions & 0 deletions tests/test_connect_glue.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import unittest
from unittest.mock import patch, MagicMock
from app.connect_glue import init_glue


class TestInitGlue(unittest.TestCase):
@patch("app.connect_glue.SparkContext")
@patch("app.connect_glue.GlueContext")
@patch("app.connect_glue.Job")
def test_init_glue(self, mock_job, mock_glue_context, mock_spark_context):
# Mock the SparkContext, GlueContext, and Job
mock_spark_context_instance = MagicMock()
mock_glue_context_instance = MagicMock()
mock_job_instance = MagicMock()

# Set up the behavior of the mock instances
mock_spark_context.return_value = mock_spark_context_instance
mock_glue_context.return_value = mock_glue_context_instance
mock_job.return_value = mock_job_instance

# Call the function to test
glue_context, spark, job = init_glue()

# Assertions
mock_spark_context.assert_called_once()
mock_glue_context.assert_called_once_with(mock_spark_context_instance)
mock_job.assert_called_once_with(mock_glue_context_instance)

# Check if the returned values are correct
self.assertEqual(glue_context, mock_glue_context_instance)
self.assertEqual(spark, mock_glue_context_instance.spark_session)
self.assertEqual(job, mock_job_instance)

@patch("app.connect_glue.SparkContext")
@patch("app.connect_glue.GlueContext")
@patch("app.connect_glue.Job")
def test_init_glue_failure(self, mock_job, mock_glue_context, mock_spark_context):
# Simulate a ValueError during SparkContext initialization
error_statement = "Simulated SparkContext initialization failure"
mock_spark_context.side_effect = ValueError(error_statement)

# Call the function to test
with self.assertRaises(ValueError) as context:
init_glue()

# Assertions
mock_spark_context.assert_called_once()
mock_glue_context.assert_not_called() # GlueContext should not be called if SparkContext initialization fails
mock_job.assert_not_called() # Job should not be called if SparkContext initialization fails

# Check if the error displayed correctly
self.assertEqual(str(context.exception), error_statement)

0 comments on commit 16fbbae

Please sign in to comment.