More fixes for NPM data normalizer

And run tests in CI
fabric8-analytics · Mar 18, 2019 · adc6253 · adc6253
1 parent d8503fe
commit adc6253
Show file tree

Hide file tree

Showing 6 changed files with 56 additions and 38 deletions.
diff --git a/f8a_worker/data_normalizer/abstract.py b/f8a_worker/data_normalizer/abstract.py
@@ -67,7 +67,10 @@ def _join_name_email(name_email_dict, name_key='name', email_key='email'):
         # {'name':'A', 'email':'[email protected]'} -> 'A <[email protected]>'
         """
         if not isinstance(name_email_dict, dict):
-            return name_email_dict
+            return None
+
+        if not name_email_dict:
+            return None
 
         name_email_str = name_email_dict.get(name_key) or ''
         if isinstance(name_email_dict.get(email_key), str):

diff --git a/f8a_worker/data_normalizer/javascript.py b/f8a_worker/data_normalizer/javascript.py
@@ -75,17 +75,20 @@ def _are_tests_implemented(self):
     def _transform_bug_reporting(self):
         if isinstance(self._data.get('bug_reporting'), dict):
             self._data['bug_reporting'] = self._join_name_email(self._data['bug_reporting'], 'url')
+        else:
+            self._data['bug_reporting'] = None
 
     def _transform_author(self):
-        if self._data.get('author'):
-            if isinstance(self._data.get('author'), dict):
-                self._data['author'] = self._join_name_email(self._data['author'])
-            elif isinstance(self._data.get('author'), list):
-                # Process it even it violates https://docs.npmjs.com/files/package.json
-                if isinstance(self._data['author'][0], dict):
-                    self._data['author'] = self._join_name_email(self._data['author'][0])
-                elif isinstance(self._data['author'][0], str):
-                    self._data['author'] = self._data['author'][0]
+        if isinstance(self._data.get('author'), dict):
+            self._data['author'] = self._join_name_email(self._data['author'])
+        elif isinstance(self._data.get('author'), list):
+            # Process it even it violates https://docs.npmjs.com/files/package.json
+            if isinstance(self._data['author'][0], dict):
+                self._data['author'] = self._join_name_email(self._data['author'][0])
+            elif isinstance(self._data['author'][0], str):
+                self._data['author'] = self._data['author'][0]
+        else:
+            self._data['author'] = None
 
     def _transform_contributors(self):
         if self._data['contributors'] is not None:
@@ -184,19 +187,26 @@ def _transform_description(self):
             return
         elif isinstance(value, (list, tuple)):
             self._data[key] = ' '.join(value)
-        else:
+        elif value is not None:
             self._data[key] = str(value)
+        else:
+            self._data[key] = None
 
     def _transform_dependencies(self):
         # transform dict dependencies into flat list of strings
         # name and version spec are separated by ' ' space
         for dep_section in ('dependencies', 'devel_dependencies'):
+            if isinstance(self._data.get(dep_section), list):
+                return
             # we also want to translate empty dict to empty list
-            if isinstance(self._data.get(dep_section), dict):
+            elif isinstance(self._data.get(dep_section), dict):
                 flat_deps = []
                 for name, spec in self._data[dep_section].items():
                     flat_deps.append('{} {}'.format(name, spec))
                 self._data[dep_section] = flat_deps
+            else:
+                # some trash, like for example a boolean value; ignore...
+                self._data[dep_section] = []
 
     def _transform_engines(self):
         engines = self._data['engines']

diff --git a/tests/data_normalizer/__init__.py b/tests/data_normalizer/__init__.py
@@ -0,0 +1 @@
+"""Tests for data normalizers."""
diff --git a/tests/data_normalizer/abstract.py → tests/data_normalizer/test_abstract.py b/tests/data_normalizer/abstract.py → tests/data_normalizer/test_abstract.py
@@ -1,7 +1,7 @@
 """Tests for abstract data normalizer."""
 
 import pytest
-from f8a_worker.data_normalizer import AbstractDataNormalizer
+from f8a_worker.data_normalizer import PythonDataNormalizer, AbstractDataNormalizer
 
 
 @pytest.mark.parametrize('args, expected', [
@@ -29,37 +29,39 @@ def test__split_keywords(args, expected):
     assert AbstractDataNormalizer._split_keywords(**args) == expected
 
 
-@pytest.mark.parametrize('args, expected', [
+@pytest.mark.parametrize('data,keymap,expected', [
     # pick one key which IS there
-    ({'data': {'author': 'me', 'version': '0.1.2'}, 'keymap': (('author',),)},
-     {'author': 'me'}),
+    ({'author': 'me', 'version': '0.1.2'}, (('author',),), {'author': 'me'}),
     # pick one key which IS NOT there
-    ({'data': {'author-name': 'me', 'version': '0.1.2'}, 'keymap': (('author',),)},
+    ({'author-name': 'me', 'version': '0.1.2'}, (('author',),),
      {'author': None}),
     # pick & and rename one key which IS there
-    ({'data': {'author-name': 'me'}, 'keymap': (('author-name', 'author',),)},
+    ({'author-name': 'me'}, (('author-name', 'author',),),
      {'author': 'me'}),
     # pick & and rename one key which IS NOT there
-    ({'data': {'authors': 'they'}, 'keymap': (('author-name', 'author',),)},
+    ({'authors': 'they'}, (('author-name', 'author',),),
      {'author': None}),
     # pick one of keys
-    ({'data': {'license': 'MIT'}, 'keymap': ((('license', 'licenses',), ),)},
+    ({'license': 'MIT'}, ((('license', 'licenses',), ),),
      {'license': 'MIT'}),
     # pick one of keys
-    ({'data': {'licenses': ['MIT', 'BSD']}, 'keymap': ((('license', 'licenses',),),)},
+    ({'licenses': ['MIT', 'BSD']}, ((('license', 'licenses',),),),
      {'licenses': ['MIT', 'BSD']}),
     # pick one of keys and rename it
-    ({'data': {'license': 'MIT'}, 'keymap': ((('license', 'licenses',), 'declared_licenses'),)},
+    ({'license': 'MIT'}, ((('license', 'licenses',), 'declared_licenses'),),
      {'declared_licenses': 'MIT'}),
 ])
-def test__transform_keys(args, expected):
+def test__transform_keys(data, keymap, expected):
     """Test AbstractDataNormalizer.transform_keys()."""
-    assert AbstractDataNormalizer._transform_keys(**args) == expected
+    # Testing with PythonDataNormalizer as its constructor just calls AbstractDataNormalizer
+    dn = PythonDataNormalizer(data)
+    assert dn._transform_keys(keymap) == expected
 
 
 @pytest.mark.parametrize('args, expected', [
     ({'name_email_dict': {'name': 'A', 'email': '[email protected]'}},
      "A <[email protected]>"),
+    ({'name_email_dict': {}}, None),
     ({'name_email_dict': {'name': 'A'}},
      "A"),
     ({'name_email_dict': {'email': '[email protected]'}},

diff --git a/tests/data_normalizer/java.py → tests/data_normalizer/test_java.py b/tests/data_normalizer/java.py → tests/data_normalizer/test_java.py
diff --git a/tests/data_normalizer/javascript.py → tests/data_normalizer/test_javascript.py b/tests/data_normalizer/javascript.py → tests/data_normalizer/test_javascript.py
@@ -7,6 +7,10 @@
 @pytest.mark.parametrize('data, expected', [
     ({'author': {'name': 'Santa Claus', 'email': '[email protected]', 'url': 'north'}},
      {'author': 'Santa Claus <[email protected]>'}),
+    ({'author': {}},
+     {'author': None}),
+    ({'author': ()},
+     {'author': None}),
     ({'contributors': [{'email': '[email protected]', 'name': 'mscdex', 'url': 'there'},
                        {'email': '[email protected]', 'name': 'fishrock123'}]},
      {'contributors': ['mscdex <[email protected]>',
@@ -17,6 +21,8 @@
                       'fishrock123 <[email protected]>']}),
     ({'bugs': {'url': 'https://github.com/owner/project/issues', 'email': '[email protected]'}},
      {'bug_reporting': 'https://github.com/owner/project/issues <[email protected]>'}),
+    ({'bugs': [{'url': 'https://github.com/owner/project/issues', 'email': '[email protected]'}]},
+     {'bug_reporting': None}),
     ({'license': 'BSD-3-Clause'},
      {'declared_licenses': ['BSD-3-Clause']}),
     ({'license': ''},
@@ -52,7 +58,7 @@
     ({'description': ('More', 'NPM')},
      {'description': 'More NPM'}),
     ({'description': None},
-     {'description': ''}),
+     {'description': None}),
     ({'description': {}},
      {'description': '{}'}),
     ({'devDependencies': {'mocha': '~2.0.0'}},
@@ -68,22 +74,18 @@ def test_transforming_javascript_data(data, expected):
         assert transformed_data[key] == value
 
 
-@pytest.mark.parametrize('args, expected', [
-    ({'data': {}},
-     False),
+@pytest.mark.parametrize('data,expected', [
+    ({}, False),
     # package.json (nodejs), no 'scripts'
-    ({'data': {"scripts": None}},
-     False),
+    ({"scripts": None}, False),
     # package.json (nodejs), missing "test"
-    ({'data': {"scripts": {"docs": "jsdoc2md -t ..."}}},
-     False),
+    ({"scripts": {"docs": "jsdoc2md -t ..."}}, False),
     # package.json, default 'npm init' test script
-    ({'data': {"scripts": {"test": "echo \"Error: no test specified\" && exit 1"}}},
-     False),
+    ({"scripts": {"test": "echo \"Error: no test specified\" && exit 1"}}, False),
     # package.json, ok
-    ({'data': {"scripts": {"test": "tape test/*.js", "docs": "jsdoc2md -t"}}},
-     True)
+    ({"scripts": {"test": "tape test/*.js", "docs": "jsdoc2md -t"}}, True)
 ])
-def test__are_tests_implemented(args, expected):
+def test__are_tests_implemented(data, expected):
     """Test NpmDataNormalizer._are_tests_implemented()."""
-    assert NpmDataNormalizer._are_tests_implemented(**args) == expected
+    dn = NpmDataNormalizer(data)
+    assert dn._are_tests_implemented() == expected