From 9e0765421c799039498bd8beefb622db0338014b Mon Sep 17 00:00:00 2001
From: djcomlab <djcomlab@gmail.com>
Date: Wed, 8 Aug 2018 11:55:01 +0100
Subject: [PATCH 1/6] Fixes for #308 and updates to tests

---
 isatools/isatab.py                       | 21 +++---
 tests/test_create_models_json.py         |  1 -
 tests/test_create_models_study_design.py |  4 +-
 tests/test_isatab2json.py                |  1 -
 tests/test_json2isatab2json_convert.py   | 83 ------------------------
 tests/test_mw2isa.py                     |  2 -
 6 files changed, 10 insertions(+), 102 deletions(-)
 delete mode 100644 tests/test_json2isatab2json_convert.py

diff --git a/isatools/isatab.py b/isatools/isatab.py
index f4e51021..1af784b2 100644
--- a/isatools/isatab.py
+++ b/isatools/isatab.py
@@ -911,9 +911,9 @@ def write_assay_table_files(inv_obj, output_dir, write_factor_values=False):
                     for output in [x for x in node.outputs if
                                    isinstance(x, DataFile)]:
                         columns.append(output.label)
-                        # columns += flatten(
-                        #     map(lambda x: get_comment_column(output.label, x),
-                        #         output.comments))
+                        columns += flatten(
+                            map(lambda x: get_comment_column(output.label, x),
+                                output.comments))
 
                 elif isinstance(node, Material):
                     olabel = node.type
@@ -982,9 +982,9 @@ def write_assay_table_files(inv_obj, output_dir, write_factor_values=False):
                         for output in [x for x in node.outputs if isinstance(x, DataFile)]:
                             olabel = output.label
                             df_dict[olabel][-1] = output.filename
-                            # for co in output.comments:
-                            #     colabel = "{0}.Comment[{1}]".format(olabel, co.name)
-                            #     df_dict[colabel][-1] = co.value
+                            for co in output.comments:
+                                colabel = "{0}.Comment[{1}]".format(olabel, co.name)
+                                df_dict[colabel][-1] = co.value
 
                     elif isinstance(node, Sample):
                         olabel = "Sample Name"
@@ -3515,12 +3515,9 @@ def read_tfile(tfile_path, index_col=None, factor_filter=None):
         tfile_fp.seek(0)
         log.debug("Reading file into DataFrame")
         tfile_fp = strip_comments(tfile_fp)
-        tfile_df = pd.read_csv(tfile_fp, dtype=str, sep='\t', index_col=index_col,
-                        memory_map=True, encoding='utf-8').fillna('')
-        tfile_df.isatab_header = header
-        # tfile_df = IsaTabDataFrame(
-        #     pd.read_csv(tfile_fp, dtype=str, sep='\t', index_col=index_col,
-        #                 memory_map=True, encoding='utf-8').fillna(''))
+        tfile_df = IsaTabDataFrame(
+            pd.read_csv(tfile_fp, dtype=str, sep='\t', index_col=index_col,
+                        memory_map=True, encoding='utf-8').fillna(''))
     if factor_filter:
         log.debug("Filtering DataFrame contents on Factor Value %s", factor_filter)
         return tfile_df[tfile_df['Factor Value[{}]'.format(factor_filter[0])] == factor_filter[1]]
diff --git a/tests/test_create_models_json.py b/tests/test_create_models_json.py
index f14e59bc..f7796600 100644
--- a/tests/test_create_models_json.py
+++ b/tests/test_create_models_json.py
@@ -150,7 +150,6 @@ def test_serialize_ms_assay_topology_modifiers(self):
                 json.dumps(top_mods, cls=SampleAssayPlanEncoder)
             )
         )
-        print(json.dumps(top_mods, cls=SampleAssayPlanEncoder, indent=4))
         self.assertTrue(expected == actual)
 
     @unittest.skip(
diff --git a/tests/test_create_models_study_design.py b/tests/test_create_models_study_design.py
index 3c5fbe9d..7cbdd55d 100644
--- a/tests/test_create_models_study_design.py
+++ b/tests/test_create_models_study_design.py
@@ -867,9 +867,7 @@ def test_study_from_2_level_factorial_plan(self):
         study = IsaModelObjectFactory(study_design).create_assays_from_plan()
         self.assertEqual(len(study.assays), 6)
         self.assertEqual(len(study.protocols), 4)
-        study.filename = 's_study.txt'
-        from isatools import isatab
-        print(isatab.dumps(Investigation(studies=[study])))
+
 
     def test_study_from_2_by_3_by_2_factorial_plan(self):
         factor1 = StudyFactor(name='1')
diff --git a/tests/test_isatab2json.py b/tests/test_isatab2json.py
index 020db674..76ec0b83 100644
--- a/tests/test_isatab2json.py
+++ b/tests/test_isatab2json.py
@@ -152,7 +152,6 @@ def test_isatab2json_convert_repeated_measure(self):
         actual_json = isatab2json.convert(
             os.path.join(self._tab_data_dir, test_case), validate_first=False,
             use_new_parser=True)
-        print(json.dumps(actual_json, indent=4))
         with open(os.path.join(self._tmp_dir, 'isa.json'), 'w') as out_fp:
             json.dump(actual_json, out_fp)
         with open(os.path.join(self._tmp_dir, 'isa.json')) as actual_json:
diff --git a/tests/test_json2isatab2json_convert.py b/tests/test_json2isatab2json_convert.py
deleted file mode 100644
index c8292920..00000000
--- a/tests/test_json2isatab2json_convert.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import os
-import unittest
-from isatools.convert import isatab2json, json2isatab
-import shutil
-import json
-from isatools.tests import utils
-import tempfile
-
-
-def setUpModule():
-    if not os.path.exists(utils.DATA_DIR):
-        raise FileNotFoundError("Could not fine test data directory in {0}. Ensure you have cloned the ISAdatasets "
-                                "repository using "
-                                "git clone -b tests --single-branch git@github.com:ISA-tools/ISAdatasets {0}"
-                                .format(utils.DATA_DIR))
-
-
-class TestJsonIsaTabTwoWayConvert(unittest.TestCase):
-
-    def setUp(self):
-        self._json_data_dir = utils.JSON_DATA_DIR
-        self._tmp_dir = tempfile.mkdtemp()
-
-    def tearDown(self):
-        shutil.rmtree(self._tmp_dir)
-
-    def test_json2isatab_isatab2json_2way_convert_sample_pool(self):
-        test_case = 'TEST-ISA-sample-pool'
-        with open(os.path.join(self._json_data_dir, test_case + '.json')) as test_json:
-            json2isatab.convert(test_json, self._tmp_dir, validate_first=False)
-            test_json.seek(0)  # reset pointer
-            expected_json = json.load(test_json)
-            actual_json = isatab2json.convert(self._tmp_dir, validate_first=False)
-            self.assertTrue(utils.assert_json_equal(expected_json, actual_json))
-
-    def test_json2isatab_isatab2json_2way_convert_source_split(self):
-        test_case = 'TEST-ISA-source-split'
-        with open(os.path.join(self._json_data_dir, test_case + '.json')) as test_json:
-            json2isatab.convert(test_json, self._tmp_dir, validate_first=False)
-            test_json.seek(0)  # reset pointer
-            expected_json = json.load(test_json)
-            actual_json = isatab2json.convert(self._tmp_dir, validate_first=False)
-            self.assertTrue(utils.assert_json_equal(expected_json, actual_json))
-
-    # def test_json2isatab_isatab2json_2way_convert_bii_i_1(self):
-    #     #  FIXME: Get error in isatab2json.createUnitsCategories
-    #     #  json_item.update(self.createOntologyAnnotation(value_attributes.Unit, value_attributes.Term_Source_REF, value_attributes.Term_Accession_Number))
-    #     #  AttributeError: 'Attrs' object has no attribute 'Term_Source_REF'
-    #     #  Are Units always OntologyAnnotations? (i.e. Unit column alway accompanied by Term Accession and
-    #     #  Term Source REF?
-    #     test_case = 'BII-I-1'
-    #     test_json = open(os.path.join(self._json_data_dir, test_case, test_case + '.json'))
-    #     json2isatab.convert(test_json, self._tmp_dir)
-    #     test_json.seek(0)  # reset pointer
-    #     expected_json = json.load(test_json)
-    #     actual_json = isatab2json.convert(self._tmp_dir)
-    #     self.assertTrue(utils.assert_json_equal(expected_json, actual_json))
-    #
-    # def test_json2isatab_isatab2json_2way_convert_bii_s_3(self):
-    #     #  FIXME: Get error in isatab2json.createUnitsCategories
-    #     #  json_item.update(self.createOntologyAnnotation(value_attributes.Unit, value_attributes.Term_Source_REF, value_attributes.Term_Accession_Number))
-    #     #  AttributeError: 'Attrs' object has no attribute 'Term_Source_REF'
-    #     #  Are Units always OntologyAnnotations? (i.e. Unit column alway accompanied by Term Accession and
-    #     #  Term Source REF? If so, related to below bii_s_7 error
-    #     test_case = 'BII-S-3'
-    #     test_json = open(os.path.join(self._json_data_dir, test_case, test_case + '.json'))
-    #     json2isatab.convert(test_json, self._tmp_dir)
-    #     test_json.seek(0)  # reset pointer
-    #     expected_json = json.load(test_json)
-    #     actual_json = isatab2json.convert(self._tmp_dir)
-    #     self.assertTrue(utils.assert_json_equal(expected_json, actual_json))
-    #
-    # def test_json2isatab_isatab2json_2way_convert_bii_s_7(self):
-    #     #  FIXME: It reports a big diff because when doing json2isatab, if Term Accession and Term Source REF columns
-    #     #  are empty it strips them out. When going back from isatab2json, it converts as string and not
-    #     #  OntologyAnnotation since there is no extra info to be able to cast back to original
-    #     test_case = 'BII-S-7'
-    #     test_json = open(os.path.join(self._json_data_dir, test_case, test_case + '.json'))
-    #     json2isatab.convert(test_json, self._tmp_dir)
-    #     test_json.seek(0)  # reset pointer
-    #     expected_json = json.load(test_json)
-    #     actual_json = isatab2json.convert(self._tmp_dir)
-    #     self.assertTrue(utils.assert_json_equal(expected_json, actual_json))
\ No newline at end of file
diff --git a/tests/test_mw2isa.py b/tests/test_mw2isa.py
index 767d514a..5496b311 100644
--- a/tests/test_mw2isa.py
+++ b/tests/test_mw2isa.py
@@ -30,8 +30,6 @@ def test_conversion(self):
         if success and validate:
             log.info("conversion successful, invoking the validator for " + study_id)
             with open(os.path.join(self._tmp_dir, study_id, 'i_investigation.txt')) as fp:
-                # print(isatab.dumps(isatab.load(fp)))
-                # fp.seek(0)
                 report = isatab.validate(fp)
                 print(report)
                 if len(report['errors']) > 0:

From 064ffed1f3ef8cd2e36a559848cdbd8d7ec32232 Mon Sep 17 00:00:00 2001
From: djcomlab <djcomlab@gmail.com>
Date: Wed, 8 Aug 2018 12:18:07 +0100
Subject: [PATCH 2/6] Add missing data frame wrapper and unpin requirements to
 complete fix for #308

---
 requirements-tests.txt |  2 +-
 requirements.txt       |  2 +-
 setup.py               |  2 +-
 tests/test_isatab.py   | 16 ++++++----------
 4 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/requirements-tests.txt b/requirements-tests.txt
index b47f534b..e2c8bfec 100644
--- a/requirements-tests.txt
+++ b/requirements-tests.txt
@@ -1,6 +1,6 @@
 numpy
 jsonschema
-pandas==0.20.*
+pandas
 networkx
 behave
 httpretty
diff --git a/requirements.txt b/requirements.txt
index 3725ea4b..4847f877 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 numpy
 jsonschema
-pandas==0.20.*
+pandas
 networkx
 lxml
 requests
diff --git a/setup.py b/setup.py
index 84cc10c9..49e9d30f 100644
--- a/setup.py
+++ b/setup.py
@@ -48,7 +48,7 @@
     install_requires=[
         'numpy',
         'jsonschema',
-        'pandas==0.20.*',
+        'pandas',
         'networkx',
         'lxml',
         'requests',
diff --git a/tests/test_isatab.py b/tests/test_isatab.py
index cad66b77..a6081b68 100644
--- a/tests/test_isatab.py
+++ b/tests/test_isatab.py
@@ -13,6 +13,7 @@
 from isatools.model import *
 from isatools.tests.utils import assert_tab_content_equal
 from isatools.tests import utils
+from isatools.isatab import IsaTabDataFrame
 
 
 def setUpModule():
@@ -911,8 +912,7 @@ def test_source_protocol_ref_sample(self):
         factory = ProcessSequenceFactory(study_protocols=[Protocol(name="sample collection")])
         table_to_load = """Source Name\tProtocol REF\tSample Name
 source1\tsample collection\tsample1"""
-        DF = pd.read_csv(StringIO(table_to_load), sep='\t')
-        DF.isatab_header = ["Source Name", "Protocol REF", "Sample Name"]
+        DF = IsaTabDataFrame(pd.read_csv(StringIO(table_to_load), sep='\t'))
         so, sa, om, d, pr, _, __ = factory.create_from_df(DF)
         self.assertEqual(len(so), 1)
         self.assertEqual(len(sa), 1)
@@ -925,8 +925,7 @@ def test_source_protocol_ref_sample_x2(self):
         table_to_load = """Source Name\tProtocol REF\tSample Name
 source1\tsample collection\tsample1
 source2\tsample collection\tsample2"""
-        DF = pd.read_csv(StringIO(table_to_load), sep='\t')
-        DF.isatab_header = ["Source Name", "Protocol REF", "Sample Name"]
+        DF = IsaTabDataFrame(pd.read_csv(StringIO(table_to_load), sep='\t'))
         so, sa, om, d, pr, _, __ = factory.create_from_df(DF)
         self.assertEqual(len(so), 2)
         self.assertEqual(len(sa), 2)
@@ -939,8 +938,7 @@ def test_source_protocol_ref_split_sample(self):
         table_to_load = """Source Name\tProtocol REF\tSample Name
 source1\tsample collection\tsample1
 source1\tsample collection\tsample2"""
-        DF = pd.read_csv(StringIO(table_to_load), sep='\t')
-        DF.isatab_header = ["Source Name", "Protocol REF", "Sample Name"]
+        DF = IsaTabDataFrame(pd.read_csv(StringIO(table_to_load), sep='\t'))
         so, sa, om, d, pr, _, __ = factory.create_from_df(DF)
         self.assertEqual(len(so), 1)
         self.assertEqual(len(sa), 2)
@@ -953,8 +951,7 @@ def test_source_protocol_ref_pool_sample(self):
         table_to_load = """Source Name\tProtocol REF\tSample Name
 source1\tsample collection\tsample1
 source2\tsample collection\tsample1"""
-        DF = pd.read_csv(StringIO(table_to_load), sep='\t')
-        DF.isatab_header = ["Source Name", "Protocol REF", "Sample Name"]
+        DF = IsaTabDataFrame(pd.read_csv(StringIO(table_to_load), sep='\t'))
         so, sa, om, d, pr, _, __ = factory.create_from_df(DF)
         self.assertEqual(len(so), 2)
         self.assertEqual(len(sa), 1)
@@ -969,8 +966,7 @@ def test_sample_protocol_ref_split_extract_protocol_ref_data(self):
         table_to_load = """Sample Name\tProtocol REF\tExtract Name\tProtocol REF\tRaw Data File
 sample1\textraction\te1\tscanning\td1
 sample1\textraction\te2\tscanning\td2"""
-        DF = pd.read_csv(StringIO(table_to_load), sep='\t')
-        DF.isatab_header = ["Source Name", "Protocol REF", "Extract Name", "Protocol REF", "Raw Data File"]
+        DF = IsaTabDataFrame(pd.read_csv(StringIO(table_to_load), sep='\t'))
         so, sa, om, d, pr, _, __ = factory.create_from_df(DF)
         self.assertEqual(len(so), 0)
         self.assertEqual(len(sa), 1)

From 739841b173543f1d36a92a054b42bd993f4b3fbd Mon Sep 17 00:00:00 2001
From: djcomlab <djcomlab@gmail.com>
Date: Wed, 8 Aug 2018 13:36:17 +0100
Subject: [PATCH 3/6] Add utility function for utf8 text file opening

---
 isatools/convert/isatab2w4m.py | 16 +++++---
 isatools/isatab.py             | 67 +++++++++++++++++-----------------
 isatools/utils.py              |  9 +++++
 tox.ini                        |  2 +-
 4 files changed, 54 insertions(+), 40 deletions(-)

diff --git a/isatools/convert/isatab2w4m.py b/isatools/convert/isatab2w4m.py
index b67c10d1..66ed3b2f 100644
--- a/isatools/convert/isatab2w4m.py
+++ b/isatools/convert/isatab2w4m.py
@@ -14,6 +14,7 @@
 
 
 from isatools import isatab as ISATAB
+from isatools.utils import utf8_text_file_open
 
 # original from https://github.com/workflow4metabolomics/mtbls-dwnld/blob/develop/isatab2w4m.py
 __author__ = 'pkrog (Pierrick Roger)'
@@ -334,7 +335,7 @@ def get_investigation_file(input_dir):
 ################################################################
 
 def load_investigation(investigation_file):
-    f = open(investigation_file, 'r')
+    f = utf8_text_file_open(investigation_file)
     investigation = ISATAB.load(f)
     return investigation
 
@@ -364,8 +365,10 @@ def get_sample_names(assay_df, measures_df):
 
 def make_sample_metadata(study_df, assay_df, sample_names, normalize=True):
     # Normalize column names
-    study_df.set_axis(axis=1, labels=make_names(study_df.axes[1].tolist()))
-    assay_df.set_axis(axis=1, labels=make_names(assay_df.axes[1].tolist()))
+    study_df.set_axis(
+        inplace=True, axis=1, labels=make_names(study_df.axes[1].tolist()))
+    assay_df.set_axis(
+        inplace=True, axis=1, labels=make_names(assay_df.axes[1].tolist()))
 
     # Merge data frames
     sample_metadata = assay_df.merge(study_df, on='Sample.Name', sort=False)
@@ -374,7 +377,7 @@ def make_sample_metadata(study_df, assay_df, sample_names, normalize=True):
     if normalize:
         norm_sample_names = make_names(sample_names, uniq=True)
         sample_metadata.insert(0, 'sample.name', norm_sample_names)
-        sample_metadata.set_axis(axis=1, labels=make_names(
+        sample_metadata.set_axis(inplace=True, axis=1, labels=make_names(
             sample_metadata.axes[1].tolist(), uniq=True))
 
     return sample_metadata
@@ -395,7 +398,7 @@ def make_variable_metadata(measures_df, sample_names, variable_names,
 
     # Normalize
     if normalize:
-        variable_metadata.set_axis(axis=1, labels=make_names(
+        variable_metadata.set_axis(inplace=True, axis=1, labels=make_names(
             variable_metadata.axes[1].tolist(), uniq=True))
 
     return variable_metadata
@@ -422,7 +425,8 @@ def make_matrix(measures_df, sample_names, variable_names, normalize=True):
     if normalize:
         norm_sample_names = make_names(sample_names, uniq=True)
         norm_sample_names.insert(0, 'variable.name')
-        sample_variable_matrix.set_axis(axis=1, labels=norm_sample_names)
+        sample_variable_matrix.set_axis(
+            inplace=True, axis=1, labels=norm_sample_names)
 
     return sample_variable_matrix
 
diff --git a/isatools/isatab.py b/isatools/isatab.py
index 1af784b2..eca6f555 100644
--- a/isatools/isatab.py
+++ b/isatools/isatab.py
@@ -30,6 +30,7 @@
 from isatools import logging as isa_logging
 from isatools.io import isatab_configurator
 from isatools.model import *
+from isatools.utils import utf8_text_file_open
 
 
 log = logging.getLogger('isatools')
@@ -141,7 +142,7 @@ def __init__(self, tab_options=None, show_progressbar=None, log_level=None):
 
     def parse(self, filename):
         try:
-            with open(filename, encoding='utf-8') as unicode_file:
+            with utf8_text_file_open(filename) as unicode_file:
                 ttable_reader = csv.reader(
                     filter(lambda r: r[0] != '#', unicode_file),
                     dialect='excel-tab')
@@ -1246,7 +1247,7 @@ def _build_section_df(f):
 def check_utf8(fp):
     """Used for rule 0010"""
     import chardet
-    with open(fp.name, 'rb') as fp:
+    with utf8_text_file_open(fp.name) as fp:
         charset = chardet.detect(fp.read())
         if charset['encoding'] is not 'UTF-8' and charset['encoding'] is not 'ascii':
             validator_warnings.append({
@@ -1526,7 +1527,7 @@ def check_table_files_read(i_df, dir_context):
         study_filename = study_df.iloc[0]['Study File Name']
         if study_filename is not '':
             try:
-                with open(os.path.join(dir_context, study_filename), encoding='utf-8'):
+                with utf8_text_file_open(os.path.join(dir_context, study_filename)):
                     pass
             except FileNotFoundError:
                 validator_errors.append({
@@ -1538,7 +1539,7 @@ def check_table_files_read(i_df, dir_context):
         for j, assay_filename in enumerate(i_df['s_assays'][i]['Study Assay File Name'].tolist()):
             if assay_filename is not '':
                 try:
-                    with open(os.path.join(dir_context, assay_filename), encoding='utf-8'):
+                    with utf8_text_file_open(os.path.join(dir_context, assay_filename)):
                         pass
                 except FileNotFoundError:
                     validator_errors.append({
@@ -1555,14 +1556,14 @@ def check_table_files_load(i_df, dir_context):
         study_filename = study_df.iloc[0]['Study File Name']
         if study_filename is not '':
             try:
-                with open(os.path.join(dir_context, study_filename), encoding='utf-8') as fp:
+                with utf8_text_file_open(os.path.join(dir_context, study_filename)) as fp:
                     load_table_checks(fp)
             except FileNotFoundError:
                 pass
         for j, assay_filename in enumerate(i_df['s_assays'][i]['Study Assay File Name'].tolist()):
             if assay_filename is not '':
                 try:
-                    with open(os.path.join(dir_context, assay_filename), encoding='utf-8') as fp:
+                    with utf8_text_file_open(os.path.join(dir_context, assay_filename)) as fp:
                         load_table_checks(fp)
                 except FileNotFoundError:
                     pass
@@ -1573,7 +1574,7 @@ def check_samples_not_declared_in_study_used_in_assay(i_df, dir_context):
         study_filename = study_df.iloc[0]['Study File Name']
         if study_filename is not '':
             try:
-                with open(os.path.join(dir_context, study_filename), encoding='utf-8') as s_fp:
+                with utf8_text_file_open(os.path.join(dir_context, study_filename)) as s_fp:
                     study_df = load_table(s_fp)
                     study_samples = set(study_df['Sample Name'])
             except FileNotFoundError:
@@ -1581,7 +1582,7 @@ def check_samples_not_declared_in_study_used_in_assay(i_df, dir_context):
         for j, assay_filename in enumerate(i_df['s_assays'][i]['Study Assay File Name'].tolist()):
             if assay_filename is not '':
                 try:
-                    with open(os.path.join(dir_context, assay_filename), encoding='utf-8') as a_fp:
+                    with utf8_text_file_open(os.path.join(dir_context, assay_filename)) as a_fp:
                         assay_df = load_table(a_fp)
                         assay_samples = set(assay_df['Sample Name'])
                         if not assay_samples.issubset(study_samples):
@@ -1599,7 +1600,7 @@ def check_protocol_usage(i_df, dir_context):
         if study_filename is not '':
             try:
                 protocol_refs_used = set()
-                with open(os.path.join(dir_context, study_filename), encoding='utf-8') as s_fp:
+                with utf8_text_file_open(os.path.join(dir_context, study_filename)) as s_fp:
                     study_df = load_table(s_fp)
                     for protocol_ref_col in [i for i in study_df.columns if i.startswith('Protocol REF')]:
                         protocol_refs_used = protocol_refs_used.union(study_df[protocol_ref_col])
@@ -1621,7 +1622,7 @@ def check_protocol_usage(i_df, dir_context):
             if assay_filename is not '':
                 try:
                     protocol_refs_used = set()
-                    with open(os.path.join(dir_context, assay_filename), encoding='utf-8') as a_fp:
+                    with utf8_text_file_open(os.path.join(dir_context, assay_filename)) as a_fp:
                         assay_df = load_table(a_fp)
                         for protocol_ref_col in [i for i in assay_df.columns if i.startswith('Protocol REF')]:
                             protocol_refs_used = protocol_refs_used.union(assay_df[protocol_ref_col])
@@ -1642,7 +1643,7 @@ def check_protocol_usage(i_df, dir_context):
         protocol_refs_used = set()
         if study_filename is not '':
             try:
-                with open(os.path.join(dir_context, study_filename), encoding='utf-8') as s_fp:
+                with utf8_text_file_open(os.path.join(dir_context, study_filename)) as s_fp:
                     study_df = load_table(s_fp)
                     for protocol_ref_col in [i for i in study_df.columns if i.startswith('Protocol REF')]:
                         protocol_refs_used = protocol_refs_used.union(study_df[protocol_ref_col])
@@ -1651,7 +1652,7 @@ def check_protocol_usage(i_df, dir_context):
         for j, assay_filename in enumerate(i_df['s_assays'][i]['Study Assay File Name'].tolist()):
             if assay_filename is not '':
                 try:
-                    with open(os.path.join(dir_context, assay_filename), encoding='utf-8') as a_fp:
+                    with utf8_text_file_open(os.path.join(dir_context, assay_filename)) as a_fp:
                         assay_df = load_table(a_fp)
                         for protocol_ref_col in [i for i in assay_df.columns if i.startswith('Protocol REF')]:
                             protocol_refs_used = protocol_refs_used.union(assay_df[protocol_ref_col])
@@ -1821,7 +1822,7 @@ def check_study_factor_usage(i_df, dir_context):
         if study_filename is not '':
             try:
                 study_factors_used = set()
-                with open(os.path.join(dir_context, study_filename), encoding='utf-8') as s_fp:
+                with utf8_text_file_open(os.path.join(dir_context, study_filename)) as s_fp:
                     study_df = load_table(s_fp)
                     study_factor_ref_cols = [i for i in study_df.columns if _RX_FACTOR_VALUE.match(i)]
                     for col in study_factor_ref_cols:
@@ -1837,7 +1838,7 @@ def check_study_factor_usage(i_df, dir_context):
             if assay_filename is not '':
                 try:
                     study_factors_used = set()
-                    with open(os.path.join(dir_context, assay_filename), encoding='utf-8') as a_fp:
+                    with utf8_text_file_open(os.path.join(dir_context, assay_filename)) as a_fp:
                         assay_df = load_table(a_fp)
                         study_factor_ref_cols = set([i for i in assay_df.columns if _RX_FACTOR_VALUE.match(i)])
                         for col in study_factor_ref_cols:
@@ -1852,7 +1853,7 @@ def check_study_factor_usage(i_df, dir_context):
         study_factors_used = set()
         if study_filename is not '':
             try:
-                with open(os.path.join(dir_context, study_filename), encoding='utf-8') as s_fp:
+                with utf8_text_file_open(os.path.join(dir_context, study_filename)) as s_fp:
                     study_df = load_table(s_fp)
                     study_factor_ref_cols = [i for i in study_df.columns if _RX_FACTOR_VALUE.match(i)]
                     for col in study_factor_ref_cols:
@@ -1863,7 +1864,7 @@ def check_study_factor_usage(i_df, dir_context):
         for j, assay_filename in enumerate(i_df['s_assays'][i]['Study Assay File Name'].tolist()):
             if assay_filename is not '':
                 try:
-                    with open(os.path.join(dir_context, assay_filename), encoding='utf-8') as a_fp:
+                    with utf8_text_file_open(os.path.join(dir_context, assay_filename)) as a_fp:
                         assay_df = load_table(a_fp)
                         study_factor_ref_cols = set([i for i in assay_df.columns if _RX_FACTOR_VALUE.match(i)])
                         for col in study_factor_ref_cols:
@@ -1890,7 +1891,7 @@ def check_protocol_parameter_usage(i_df, dir_context):
         if study_filename is not '':
             try:
                 protocol_parameters_used = set()
-                with open(os.path.join(dir_context, study_filename), encoding='utf-8') as s_fp:
+                with utf8_text_file_open(os.path.join(dir_context, study_filename)) as s_fp:
                     study_df = load_table(s_fp)
                     parameter_value_cols = [i for i in study_df.columns if _RX_PARAMETER_VALUE.match(i)]
                     for col in parameter_value_cols:
@@ -1906,7 +1907,7 @@ def check_protocol_parameter_usage(i_df, dir_context):
             if assay_filename is not '':
                 try:
                     protocol_parameters_used = set()
-                    with open(os.path.join(dir_context, assay_filename), encoding='utf-8') as a_fp:
+                    with utf8_text_file_open(os.path.join(dir_context, assay_filename)) as a_fp:
                         assay_df = load_table(a_fp)
                         parameter_value_cols = [i for i in assay_df.columns if _RX_PARAMETER_VALUE.match(i)]
                         for col in parameter_value_cols:
@@ -1922,7 +1923,7 @@ def check_protocol_parameter_usage(i_df, dir_context):
         protocol_parameters_used = set()
         if study_filename is not '':
             try:
-                with open(os.path.join(dir_context, study_filename), encoding='utf-8') as s_fp:
+                with utf8_text_file_open(os.path.join(dir_context, study_filename)) as s_fp:
                     study_df = load_table(s_fp)
                     parameter_value_cols = [i for i in study_df.columns if _RX_PARAMETER_VALUE.match(i)]
                     for col in parameter_value_cols:
@@ -1933,7 +1934,7 @@ def check_protocol_parameter_usage(i_df, dir_context):
         for j, assay_filename in enumerate(i_df['s_assays'][i]['Study Assay File Name'].tolist()):
             if assay_filename is not '':
                 try:
-                    with open(os.path.join(dir_context, assay_filename), encoding='utf-8') as a_fp:
+                    with utf8_text_file_open(os.path.join(dir_context, assay_filename)) as a_fp:
                         assay_df = load_table(a_fp)
                         parameter_value_cols = [i for i in assay_df.columns if _RX_PARAMETER_VALUE.match(i)]
                         for col in parameter_value_cols:
@@ -2016,7 +2017,7 @@ def check_term_source_refs_in_assay_tables(i_df, dir_context):
         study_filename = study_df.iloc[0]['Study File Name']
         if study_filename is not '':
             try:
-                with open(os.path.join(dir_context, study_filename), encoding='utf-8') as s_fp:
+                with utf8_text_file_open(os.path.join(dir_context, study_filename)) as s_fp:
                     df = load_table(s_fp)
                     columns = df.columns
                     object_index = [i for i, x in enumerate(columns) if x.startswith('Term Source REF')]
@@ -2063,7 +2064,7 @@ def check_term_source_refs_in_assay_tables(i_df, dir_context):
             for j, assay_filename in enumerate(i_df['s_assays'][i]['Study Assay File Name'].tolist()):
                 if assay_filename is not '':
                     try:
-                        with open(os.path.join(dir_context, assay_filename), encoding='utf-8') as a_fp:
+                        with utf8_text_file_open(os.path.join(dir_context, assay_filename)) as a_fp:
                             df = load_table(a_fp)
                             columns = df.columns
                             object_index = [i for i, x in enumerate(columns) if x.startswith('Term Source REF')]
@@ -2369,7 +2370,7 @@ def check_study_assay_tables_against_config(i_df, dir_context, configs):
         protocol_names_and_types = dict(zip(protocol_names, protocol_types))
         if study_filename is not '':
             try:
-                with open(os.path.join(dir_context, study_filename), encoding='utf-8') as s_fp:
+                with utf8_text_file_open(os.path.join(dir_context, study_filename)) as s_fp:
                     df = load_table(s_fp)
                     config = configs[('[sample]', '')]
                     log.info("Checking study file {} against default study table configuration...".format(study_filename))
@@ -2382,7 +2383,7 @@ def check_study_assay_tables_against_config(i_df, dir_context, configs):
             technology_type = assay_df['Study Assay Technology Type'].tolist()[0]
             if assay_filename is not '':
                 try:
-                    with open(os.path.join(dir_context, assay_filename), encoding='utf-8') as a_fp:
+                    with utf8_text_file_open(os.path.join(dir_context, assay_filename)) as a_fp:
                         df = load_table(a_fp)
                         lowered_mt = measurement_type.lower()
                         lowered_tt = technology_type.lower()
@@ -2819,7 +2820,7 @@ def validate(fp, config_dir=default_config_dir, log_level=None):
                 protocol_names_and_types = dict(zip(protocol_names, protocol_types))
                 try:
                     log.info("Loading... {}".format(study_filename))
-                    with open(os.path.join(os.path.dirname(fp.name), study_filename), encoding='utf-8') as s_fp:
+                    with utf8_text_file_open(os.path.join(os.path.dirname(fp.name), study_filename)) as s_fp:
                         study_sample_table = load_table(s_fp)
                         study_sample_table.filename = study_filename
                         config = configs[('[sample]', '')]
@@ -2875,7 +2876,7 @@ def validate(fp, config_dir=default_config_dir, log_level=None):
                         else:
                             try:
                                 log.info("Loading... {}".format(assay_filename))
-                                with open(os.path.join(os.path.dirname(fp.name), assay_filename), encoding='utf-8') as a_fp:
+                                with utf8_text_file_open(os.path.join(os.path.dirname(fp.name), assay_filename)) as a_fp:
                                     assay_table = load_table(a_fp)
                                     assay_table.filename = assay_filename
                                     assay_tables.append(assay_table)
@@ -2988,7 +2989,7 @@ def batch_validate(tab_dir_list):
         if len(i_files) != 1:
             log.warning("Could not find an investigation file, skipping {}".format(tab_dir))
         else:
-            with open(i_files[0], encoding='utf-8') as fp:
+            with utf8_text_file_open(i_files[0]) as fp:
                 batch_report['batch_report'].append(
                     {
                         "filename": fp.name,
@@ -3007,16 +3008,16 @@ def dumps(isa_obj, skip_dump_tables=False,
         dump(isa_obj=isa_obj, output_path=tmp, skip_dump_tables=skip_dump_tables,
              write_factor_values_in_assay_table=
              write_factor_values_in_assay_table)
-        with open(os.path.join(tmp, 'i_investigation.txt'), encoding='utf-8') as i_fp:
+        with utf8_text_file_open(os.path.join(tmp, 'i_investigation.txt')) as i_fp:
             output += os.path.join(tmp, 'i_investigation.txt') + '\n'
             output += i_fp.read()
         for s_file in glob.iglob(os.path.join(tmp, 's_*')):
-            with open(s_file, encoding='utf-8') as s_fp:
+            with utf8_text_file_open(s_file) as s_fp:
                 output += "--------\n"
                 output += s_file + '\n'
                 output += s_fp.read()
         for a_file in glob.iglob(os.path.join(tmp, 'a_*')):
-            with open(a_file, encoding='utf-8') as a_fp:
+            with utf8_text_file_open(a_file) as a_fp:
                 output += "--------\n"
                 output += a_file + '\n'
                 output += a_fp.read()
@@ -3153,7 +3154,7 @@ def get_comments_row(cols, row):
         if os.path.isdir(isatab_path_or_ifile):
             fnames = glob.glob(os.path.join(isatab_path_or_ifile, "i_*.txt"))
             assert len(fnames) == 1
-            FP = open(fnames[0], encoding='utf-8')
+            FP = utf8_text_file_open(fnames[0])
     elif hasattr(isatab_path_or_ifile, 'read'):
         FP = isatab_path_or_ifile
     else:
@@ -3508,7 +3509,7 @@ def isatab_header(self):
 
 def read_tfile(tfile_path, index_col=None, factor_filter=None):
     log.debug("Opening %s", tfile_path)
-    with open(tfile_path, encoding='utf-8') as tfile_fp:
+    with utf8_text_file_open(tfile_path) as tfile_fp:
         log.debug("Reading file header")
         reader = csv.reader(tfile_fp, dialect='excel-tab')
         header = list(next(reader))
@@ -4229,7 +4230,7 @@ def parse_investigation(self, in_filename):
                     'studycontacts')
         isecdict = {}
         ssecdicts = []
-        with open(in_filename, encoding='utf-8') as in_file:
+        with utf8_text_file_open(in_filename) as in_file:
             tabreader = csv.reader(
                 filter(lambda r: r[0] != '#', in_file), dialect='excel-tab')
             current_section = ''
diff --git a/isatools/utils.py b/isatools/utils.py
index 076e26ec..05a50dec 100644
--- a/isatools/utils.py
+++ b/isatools/utils.py
@@ -8,6 +8,7 @@
 import uuid
 from functools import reduce
 from zipfile import ZipFile
+import sys
 
 
 from isatools import isatab
@@ -866,3 +867,11 @@ def remove_unused_protocols(self):
             investigation, output_path=os.path.dirname(self.path),
             i_file_name='{filename}.fix'.format(
                 filename=os.path.basename(self.path)), skip_dump_tables=True)
+
+
+def utf8_text_file_open(path):
+    if sys.version_info[0] < 3:
+        fp = open(path, 'rb')
+    else:
+        fp = open(path, 'r', newline='', encoding='utf8')
+    return fp
diff --git a/tox.ini b/tox.ini
index 52c00576..6cdf7615 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py34,py35,py36
+envlist = py36
 
 [testenv]
 deps=-r{toxinidir}/requirements-tests.txt

From 819eb8b61106a92716d9dc8a091c8ea9619f35fb Mon Sep 17 00:00:00 2001
From: djcomlab <djcomlab@gmail.com>
Date: Wed, 8 Aug 2018 13:58:06 +0100
Subject: [PATCH 4/6] Re-introduce other py3* envs for full testing

---
 .travis.yml | 7 ++++++-
 tox.ini     | 6 +-----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index a8fdf5cf..47d0e6ba 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,11 @@
+sudo: false
 language: python
+cache: pip
 python:
-- '3.6'
+  - 3.4
+  - 3.5
+  - 3.6
+  - 3.7
 before_install:
 - bash -x get_test_data.sh
 install:
diff --git a/tox.ini b/tox.ini
index 6cdf7615..686573cc 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,10 +1,6 @@
 [tox]
-envlist = py36
+envlist = py34,py35,py36,py37
 
 [testenv]
 deps=-r{toxinidir}/requirements-tests.txt
 commands=nosetests
-
-[travis]
-python =
-  3.6: py36

From c2be1084f63250ea3b969e624c8ef191d6114259 Mon Sep 17 00:00:00 2001
From: djcomlab <djcomlab@gmail.com>
Date: Wed, 8 Aug 2018 14:00:43 +0100
Subject: [PATCH 5/6] Fix build CI

---
 .travis.yml | 3 ---
 tox.ini     | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 47d0e6ba..9f5ad1f9 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,10 +2,7 @@ sudo: false
 language: python
 cache: pip
 python:
-  - 3.4
-  - 3.5
   - 3.6
-  - 3.7
 before_install:
 - bash -x get_test_data.sh
 install:
diff --git a/tox.ini b/tox.ini
index 686573cc..17b749f3 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py34,py35,py36,py37
+envlist = py34,py35,py36
 
 [testenv]
 deps=-r{toxinidir}/requirements-tests.txt

From 317dbf647e0054f62a81feaa059ce326ee14b0f7 Mon Sep 17 00:00:00 2001
From: djcomlab <djcomlab@gmail.com>
Date: Wed, 8 Aug 2018 14:26:49 +0100
Subject: [PATCH 6/6] Update version number for release

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 49e9d30f..fe69401a 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@
 
 setup(
     name='isatools',
-    version='0.10.2',
+    version='0.10.3',
     packages=['isatools',
               'isatools.convert',
               'isatools.create',