From 04fcab203acb2aaeeed4c8726d30b02eccff11d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1t=C3=A9=20Balajti?= <mate.balajti@unibas.ch>
Date: Wed, 13 Dec 2023 15:22:20 +0100
Subject: [PATCH 01/15] refactor: compare alignments between mapped reads only

---
 htsinfer/get_library_type.py | 61 +++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 25 deletions(-)

diff --git a/htsinfer/get_library_type.py b/htsinfer/get_library_type.py
index 4d4257c..545c4d2 100644
--- a/htsinfer/get_library_type.py
+++ b/htsinfer/get_library_type.py
@@ -144,6 +144,8 @@ def _align_mates(self):
         samfile1 = pysam.AlignmentFile(str(alignment_1), 'r')
         samfile2 = pysam.AlignmentFile(str(alignment_2), 'r')
 
+        seq_id1 = None
+        seq_id2 = None
         previous_seq_id1 = None
         previous_seq_id2 = None
 
@@ -154,43 +156,52 @@ def _align_mates(self):
         concordant = 0
 
         for read1 in samfile1:
-            seq_id1 = read1.query_name
-            if seq_id1 != previous_seq_id1 \
-                    and previous_seq_id1 is not None:
-                mate1.append(reads1.copy())
-                reads1.clear()
-            if read1.reference_end:
-                reads1.append(read1)
+            if not read1.flag & (1 << 2) and isinstance(read1.query_name, str):
+                seq_id1 = read1.query_name
+                if (
+                    seq_id1 != previous_seq_id1 and
+                    previous_seq_id1 is not None
+                ):
+                    mate1.append(reads1.copy())
+                    reads1.clear()
+                if read1.reference_end:
+                    reads1.append(read1)
             previous_seq_id1 = seq_id1
         mate1.append(reads1.copy())
 
         read_counter = 0
         for read2 in samfile2:
-            seq_id2 = read2.query_name
-            if seq_id2 != previous_seq_id2 \
-                    and previous_seq_id2 is not None:
-                if self._compare_alignments(mate1[read_counter], reads2):
-                    concordant += 1
-                reads2.clear()
-                read_counter += 1
-            if read2.reference_end:
-                reads2.append(read2)
+            if not read2.flag & (1 << 2) and isinstance(read2.query_name, str):
+                seq_id2 = read2.query_name
+                if seq_id2 != previous_seq_id2 \
+                        and previous_seq_id2 is not None:
+                    if self._compare_alignments(mate1[read_counter], reads2):
+                        concordant += 1
+                    reads2.clear()
+                    read_counter += 1
+                if read2.reference_end:
+                    reads2.append(read2)
             previous_seq_id2 = seq_id2
 
         if self._compare_alignments(mate1[read_counter], reads2):
             concordant += 1
 
-        if (concordant / read_counter) >= self.cutoff:
-            self.results.relationship = (
-                StatesTypeRelationship.split_mates
-            )
-            self.mapping.library_type.relationship \
-                = StatesTypeRelationship.split_mates
-            self.mapping.mapped = False
-            self.mapping.star_dirs = []
+        if read_counter > 0:
+            if (concordant / read_counter) >= self.cutoff:
+                self.results.relationship = (
+                    StatesTypeRelationship.split_mates
+                )
+                self.mapping.library_type.relationship \
+                    = StatesTypeRelationship.split_mates
+                self.mapping.mapped = False
+                self.mapping.star_dirs = []
+            else:
+                self.results.relationship = (
+                    StatesTypeRelationship.not_mates
+                )
         else:
             self.results.relationship = (
-                StatesTypeRelationship.not_mates
+                StatesTypeRelationship.not_available
             )
 
         samfile1.close()

From 455e98368aac67a76f6103a4a9cb8f749545de8c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1t=C3=A9=20Balajti?= <mate.balajti@unibas.ch>
Date: Wed, 3 Jan 2024 13:33:28 +0100
Subject: [PATCH 02/15] fix: update get lib type test

---
 htsinfer/get_library_type.py   |  5 +++--
 tests/test_get_library_type.py | 35 +++++++++++++++++++++++++++++++---
 2 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/htsinfer/get_library_type.py b/htsinfer/get_library_type.py
index 545c4d2..cc4bb1e 100644
--- a/htsinfer/get_library_type.py
+++ b/htsinfer/get_library_type.py
@@ -135,6 +135,7 @@ def _evaluate_mate_relationship(
             self.mapping.evaluate()
             self._align_mates()
 
+    # pylint: disable=R0912
     def _align_mates(self):
         """Decide mate relationship by alignment."""
 
@@ -186,7 +187,7 @@ def _align_mates(self):
         if self._compare_alignments(mate1[read_counter], reads2):
             concordant += 1
 
-        if read_counter > 0:
+        try:
             if (concordant / read_counter) >= self.cutoff:
                 self.results.relationship = (
                     StatesTypeRelationship.split_mates
@@ -199,7 +200,7 @@ def _align_mates(self):
                 self.results.relationship = (
                     StatesTypeRelationship.not_mates
                 )
-        else:
+        except ZeroDivisionError:
             self.results.relationship = (
                 StatesTypeRelationship.not_available
             )
diff --git a/tests/test_get_library_type.py b/tests/test_get_library_type.py
index dd52dfa..8555694 100644
--- a/tests/test_get_library_type.py
+++ b/tests/test_get_library_type.py
@@ -112,6 +112,35 @@ def test_evaluate_mate_relationship_split_mates(self):
         )
 
     def test_evaluate_mate_relationship_not_mates(self, tmpdir):
+        """Test mate relationship evaluation logic with input files that are
+        mates, but the relationship is not enough to trigger split_mates.
+        """
+        CONFIG.args.path_1_processed = FILE_MATE_1
+        CONFIG.args.path_2_processed = FILE_MATE_2
+        CONFIG.args.t_file_processed = FILE_TRANSCRIPTS
+        CONFIG.args.tmp_dir = tmpdir
+        MAPPING.paths = (FILE_MATE_1, FILE_MATE_2)
+        MAPPING.transcripts_file = FILE_TRANSCRIPTS
+        MAPPING.tmp_dir = tmpdir
+
+        test_instance = GetLibType(config=CONFIG, mapping=MAPPING)
+        test_instance.results.file_1 = StatesType.not_available
+        test_instance.results.file_2 = StatesType.not_available
+
+        # Set the cutoff such that it's not enough to trigger split_mates
+        test_instance.cutoff = 300
+
+        # Call the _evaluate_mate_relationship method
+        test_instance._evaluate_mate_relationship(
+            ids_1=["A", "B", "C"], ids_2=["A", "B", "C"]
+        )
+
+        assert (
+            test_instance.results.relationship ==
+            StatesTypeRelationship.not_mates
+        )
+
+    def test_evaluate_mate_relationship_not_available(self, tmpdir):
         """Test mate relationship evaluation logic with input files that are
         not mates from a paired-end library.
         """
@@ -124,12 +153,12 @@ def test_evaluate_mate_relationship_not_mates(self, tmpdir):
         MAPPING.tmp_dir = tmpdir
         test_instance = GetLibType(config=CONFIG,
                                    mapping=MAPPING)
-        test_instance.results.file_1 = StatesType.first_mate
-        test_instance.results.file_2 = StatesType.second_mate
+        test_instance.results.file_1 = StatesType.not_available
+        test_instance.results.file_2 = StatesType.not_available
         test_instance.evaluate()
         assert (
             test_instance.results.relationship ==
-            StatesTypeRelationship.not_mates
+            StatesTypeRelationship.not_available
         )
 
     def test_evaluate_split_mates_not_matching_ids(self, tmpdir):

From 806420d64f022ca912111a9c28baf155eb5d00b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1t=C3=A9=20Balajti?= <mate.balajti@unibas.ch>
Date: Wed, 3 Jan 2024 15:24:25 +0100
Subject: [PATCH 03/15] refactor: helper function for getlibtype

---
 htsinfer/get_library_type.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/htsinfer/get_library_type.py b/htsinfer/get_library_type.py
index cc4bb1e..825a66a 100644
--- a/htsinfer/get_library_type.py
+++ b/htsinfer/get_library_type.py
@@ -135,7 +135,6 @@ def _evaluate_mate_relationship(
             self.mapping.evaluate()
             self._align_mates()
 
-    # pylint: disable=R0912
     def _align_mates(self):
         """Decide mate relationship by alignment."""
 
@@ -187,6 +186,13 @@ def _align_mates(self):
         if self._compare_alignments(mate1[read_counter], reads2):
             concordant += 1
 
+        self._update_relationship(concordant, read_counter)
+
+        samfile1.close()
+        samfile2.close()
+
+    def _update_relationship(self, concordant, read_counter):
+        """Helper function to update relationship based on alignment."""
         try:
             if (concordant / read_counter) >= self.cutoff:
                 self.results.relationship = (
@@ -205,9 +211,6 @@ def _align_mates(self):
                 StatesTypeRelationship.not_available
             )
 
-        samfile1.close()
-        samfile2.close()
-
     class AlignedSegment:
         """Placeholder class for mypy "Missing attribute"
         error in _compare_alignments(), the actual object used

From 782971a5ea4e8020e521d315388ac7fc8814759e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1t=C3=A9=20Balajti?= <mate.balajti@unibas.ch>
Date: Thu, 4 Jan 2024 18:27:48 +0100
Subject: [PATCH 04/15] feat: only map when lib source is inferred

---
 htsinfer/get_library_type.py     | 8 +++++++-
 htsinfer/get_read_orientation.py | 9 ++++++++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/htsinfer/get_library_type.py b/htsinfer/get_library_type.py
index 825a66a..0daa737 100644
--- a/htsinfer/get_library_type.py
+++ b/htsinfer/get_library_type.py
@@ -127,13 +127,19 @@ def _evaluate_mate_relationship(
                 self.mapping.library_type.relationship = (
                     StatesTypeRelationship.split_mates
                 )
-        else:
+        elif self.library_source.file_1.short_name is not None \
+                and self.library_source.file_2.short_name is not None:
             self.mapping.library_type.relationship \
                 = StatesTypeRelationship.not_available
             self.mapping.library_source = self.library_source
             self.mapping.paths = self.path_1, self.path_2
             self.mapping.evaluate()
             self._align_mates()
+        else:
+            LOGGER.warning(
+                "Library source is not determined, "
+                "mate relationship cannot be inferred by alignment."
+            )
 
     def _align_mates(self):
         """Decide mate relationship by alignment."""
diff --git a/htsinfer/get_read_orientation.py b/htsinfer/get_read_orientation.py
index d76b092..c7dd1ff 100644
--- a/htsinfer/get_read_orientation.py
+++ b/htsinfer/get_read_orientation.py
@@ -75,8 +75,15 @@ def evaluate(self) -> ResultsOrientation:
         self.mapping.transcripts_file = self.transcripts_file
         self.mapping.tmp_dir = self.tmp_dir
 
-        if not self.mapping.mapped:
+        if not self.mapping.mapped \
+                and self.library_source.file_1.short_name is not None \
+                and self.library_source.file_2.short_name is not None:
             self.mapping.evaluate()
+        else:
+            LOGGER.warning(
+                "Library source is not determined, "
+                "read orientation cannot be inferred by alignment."
+            )
 
         return self.process_alignments(star_dirs=self.mapping.star_dirs)
 

From 28284debc61a48ef8c36c511a4c43525f41d8dcb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1t=C3=A9=20Balajti?= <mate.balajti@unibas.ch>
Date: Fri, 5 Jan 2024 11:12:25 +0100
Subject: [PATCH 05/15] update tests

---
 htsinfer/get_read_orientation.py |  2 +-
 tests/test_get_library_type.py   | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/htsinfer/get_read_orientation.py b/htsinfer/get_read_orientation.py
index c7dd1ff..a9ccd36 100644
--- a/htsinfer/get_read_orientation.py
+++ b/htsinfer/get_read_orientation.py
@@ -77,7 +77,7 @@ def evaluate(self) -> ResultsOrientation:
 
         if not self.mapping.mapped \
                 and self.library_source.file_1.short_name is not None \
-                and self.library_source.file_2.short_name is not None:
+                or self.library_source.file_2.short_name is not None:
             self.mapping.evaluate()
         else:
             LOGGER.warning(
diff --git a/tests/test_get_library_type.py b/tests/test_get_library_type.py
index 8555694..738840a 100644
--- a/tests/test_get_library_type.py
+++ b/tests/test_get_library_type.py
@@ -13,7 +13,9 @@
     GetFastqType,
 )
 from htsinfer.models import (
+    ResultsSource,
     ResultsType,
+    Source,
     SeqIdFormats,
     StatesType,
     StatesTypeRelationship,
@@ -147,6 +149,10 @@ def test_evaluate_mate_relationship_not_available(self, tmpdir):
         CONFIG.args.path_1_processed = FILE_IDS_NOT_MATCH_1
         CONFIG.args.path_2_processed = FILE_MATE_2
         CONFIG.args.t_file_processed = FILE_TRANSCRIPTS
+        CONFIG.results.library_source = ResultsSource(
+            file_1=Source(short_name="hsapiens", taxon_id=9606),
+            file_2=Source(short_name="hsapiens", taxon_id=9606),
+        )
         CONFIG.args.tmp_dir = tmpdir
         MAPPING.paths = (FILE_IDS_NOT_MATCH_1, FILE_MATE_2)
         MAPPING.transcripts_file = FILE_TRANSCRIPTS
@@ -167,6 +173,10 @@ def test_evaluate_split_mates_not_matching_ids(self, tmpdir):
         """
         CONFIG.args.path_1_processed = FILE_IDS_NOT_MATCH_1
         CONFIG.args.path_2_processed = FILE_IDS_NOT_MATCH_2
+        CONFIG.results.library_source = ResultsSource(
+            file_1=Source(short_name="hsapiens", taxon_id=9606),
+            file_2=Source(short_name="hsapiens", taxon_id=9606),
+        )
         CONFIG.args.tmp_dir = tmpdir
         MAPPING.paths = (FILE_IDS_NOT_MATCH_1, FILE_IDS_NOT_MATCH_2)
         MAPPING.tmp_dir = tmpdir

From dae137d6c23f34bd6d037f7f4c3d9b8f34398ae1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1t=C3=A9=20Balajti?= <mate.balajti@unibas.ch>
Date: Fri, 5 Jan 2024 11:45:40 +0100
Subject: [PATCH 06/15] fix orientation tests

---
 tests/test_get_read_orientation.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/test_get_read_orientation.py b/tests/test_get_read_orientation.py
index 1089433..ddd9fb6 100644
--- a/tests/test_get_read_orientation.py
+++ b/tests/test_get_read_orientation.py
@@ -154,7 +154,10 @@ def test_evaluate_paired_unmapped(self, tmpdir):
         CONFIG.args.path_1_processed = FILE_UNMAPPED_PAIRED_1
         CONFIG.args.path_2_processed = FILE_UNMAPPED_PAIRED_2
         CONFIG.args.tmp_dir = tmpdir
-        CONFIG.results.library_source = ResultsSource()
+        CONFIG.results.library_source = ResultsSource(
+            file_1=Source(short_name="hsapiens", taxon_id=9606),
+            file_2=Source(short_name="hsapiens", taxon_id=9606)
+        )
         CONFIG.results.library_type = ResultsType(
             relationship=StatesTypeRelationship.split_mates,
         )
@@ -259,7 +262,7 @@ def test_evaluate_paired_not_mates_unmapped(self, tmpdir):
             )
         CONFIG.results.library_source = ResultsSource(
             file_1=Source(),
-            file_2=Source(),
+            file_2=Source(short_name="hsapiens", taxon_id=9606),
         )
         CONFIG.args.tmp_dir = tmpdir
         MAPPING.mapped = False

From bf7bea12885064946d70d79d9c454ce92c6f09e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1t=C3=A9=20Balajti?= <mate.balajti@unibas.ch>
Date: Fri, 5 Jan 2024 13:41:41 +0100
Subject: [PATCH 07/15] fix orientation tests

---
 tests/test_get_read_orientation.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/test_get_read_orientation.py b/tests/test_get_read_orientation.py
index ddd9fb6..b73b8e2 100644
--- a/tests/test_get_read_orientation.py
+++ b/tests/test_get_read_orientation.py
@@ -73,6 +73,10 @@ def test_init_all(self, tmpdir):
         CONFIG.args.path_2_processed = FILE_MATE_2
         CONFIG.args.t_file_processed = FILE_TRANSCRIPTS
         CONFIG.args.tmp_dir = tmpdir
+        CONFIG.results.library_source = ResultsSource(
+            file_1=Source(),
+            file_2=Source()
+        )
         test_instance = GetOrientation(config=CONFIG,
                                        mapping=MAPPING)
         assert test_instance.paths[0] == FILE_MATE_1

From 5f8f62d113a970579b544a487f2ce08d265fd539 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1t=C3=A9=20Balajti?= <mate.balajti@unibas.ch>
Date: Fri, 5 Jan 2024 15:48:55 +0100
Subject: [PATCH 08/15] refactor scripts

---
 htsinfer/get_library_type.py     | 4 ++--
 htsinfer/get_read_orientation.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/htsinfer/get_library_type.py b/htsinfer/get_library_type.py
index 0daa737..0edfc36 100644
--- a/htsinfer/get_library_type.py
+++ b/htsinfer/get_library_type.py
@@ -127,8 +127,8 @@ def _evaluate_mate_relationship(
                 self.mapping.library_type.relationship = (
                     StatesTypeRelationship.split_mates
                 )
-        elif self.library_source.file_1.short_name is not None \
-                and self.library_source.file_2.short_name is not None:
+        elif (self.library_source.file_1.short_name is not None
+              and self.library_source.file_2.short_name is not None):
             self.mapping.library_type.relationship \
                 = StatesTypeRelationship.not_available
             self.mapping.library_source = self.library_source
diff --git a/htsinfer/get_read_orientation.py b/htsinfer/get_read_orientation.py
index a9ccd36..6e3c7a7 100644
--- a/htsinfer/get_read_orientation.py
+++ b/htsinfer/get_read_orientation.py
@@ -76,8 +76,8 @@ def evaluate(self) -> ResultsOrientation:
         self.mapping.tmp_dir = self.tmp_dir
 
         if not self.mapping.mapped \
-                and self.library_source.file_1.short_name is not None \
-                or self.library_source.file_2.short_name is not None:
+                and (self.library_source.file_1.short_name is not None
+                     or self.library_source.file_2.short_name is not None):
             self.mapping.evaluate()
         else:
             LOGGER.warning(

From 1e992e1cda3ec16c73d1704417e06310a7167fed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1t=C3=A9=20Balajti?= <mate.balajti@unibas.ch>
Date: Mon, 8 Jan 2024 11:05:54 +0100
Subject: [PATCH 09/15] update get lib type and get read orient

---
 htsinfer/get_library_type.py     | 65 +++++++++++++++++++-------------
 htsinfer/get_read_orientation.py |  2 +-
 tests/test_get_library_type.py   |  6 ---
 3 files changed, 40 insertions(+), 33 deletions(-)

diff --git a/htsinfer/get_library_type.py b/htsinfer/get_library_type.py
index 0edfc36..ca68c4e 100644
--- a/htsinfer/get_library_type.py
+++ b/htsinfer/get_library_type.py
@@ -113,7 +113,7 @@ def _evaluate_mate_relationship(
             ids_2: As `ids_1` but for the putative second mate file.
         """
         self.results.relationship = StatesTypeRelationship.not_mates
-        if ids_1 == ids_2:
+        if ids_1 == ids_2 and len(ids_1) > 0 and len(ids_2) > 0:
             if (
                 self.results.file_1 == StatesType.first_mate and
                 self.results.file_2 == StatesType.second_mate
@@ -136,7 +136,7 @@ def _evaluate_mate_relationship(
             self.mapping.evaluate()
             self._align_mates()
         else:
-            LOGGER.warning(
+            LOGGER.debug(
                 "Library source is not determined, "
                 "mate relationship cannot be inferred by alignment."
             )
@@ -181,7 +181,9 @@ def _align_mates(self):
                 seq_id2 = read2.query_name
                 if seq_id2 != previous_seq_id2 \
                         and previous_seq_id2 is not None:
-                    if self._compare_alignments(mate1[read_counter], reads2):
+                    if read_counter < len(mate1) and self._compare_alignments(
+                        mate1[read_counter], reads2
+                    ):
                         concordant += 1
                     reads2.clear()
                     read_counter += 1
@@ -189,9 +191,13 @@ def _align_mates(self):
                     reads2.append(read2)
             previous_seq_id2 = seq_id2
 
-        if self._compare_alignments(mate1[read_counter], reads2):
+        if read_counter < len(mate1) and self._compare_alignments(
+            mate1[read_counter], reads2
+        ):
             concordant += 1
-
+        LOGGER.debug(f"Number of mapped reads file 1: {len(mate1)}")
+        LOGGER.debug(f"Number of mapped reads file 2: {read_counter}")
+        LOGGER.debug(f"Number of concordant reads: {concordant}")
         self._update_relationship(concordant, read_counter)
 
         samfile1.close()
@@ -325,12 +331,14 @@ def evaluate(self) -> None:
 
                 if self.seq_id_format is None:
                     self.result = StatesType.not_available
-                    raise MetadataWarning(
+                    LOGGER.warning(
                         "Could not determine sequence identifier format."
                     )
-                LOGGER.debug(
-                    f"Sequence identifier format: {self.seq_id_format.name}"
-                )
+                else:
+                    LOGGER.debug(
+                        "Sequence identifier format: "
+                        f"{self.seq_id_format.name}"
+                    )
 
                 # Ensure that remaining records are compatible with sequence
                 # identifier format and library type determined from first
@@ -339,28 +347,33 @@ def evaluate(self) -> None:
                     "Checking consistency of remaining reads with initially "
                     "determined identifier format and library type..."
                 )
-                for record in seq_iter:
-                    records += 1
-                    try:
-                        self._get_read_type(
-                            seq_id=record[0],
-                            regex=self.seq_id_format.value,
-                        )
-                    except (
-                        InconsistentFastqIdentifiers,
-                        UnknownFastqIdentifier,
-                    ) as exc:
-                        self.result = StatesType.not_available
-                        raise MetadataWarning(
-                            f"{type(exc).__name__}: {str(exc)}"
-                        ) from exc
+                if self.seq_id_format is not None:
+                    for record in seq_iter:
+                        records += 1
+                        try:
+                            self._get_read_type(
+                                seq_id=record[0],
+                                regex=self.seq_id_format.value,
+                            )
+                        except (
+                            InconsistentFastqIdentifiers,
+                            UnknownFastqIdentifier,
+                        ) as exc:
+                            self.result = StatesType.not_available
+                            raise MetadataWarning(
+                                f"{type(exc).__name__}: {str(exc)}"
+                            ) from exc
+                    LOGGER.debug(f"Total records processed: {records}")
+                else:
+                    LOGGER.debug(
+                        "Could not determine sequence identifier format. "
+                        "Skipping consistency check for the remaining reads."
+                    )
 
         except (OSError, ValueError) as exc:
             self.result = StatesType.not_available
             raise FileProblem(f"{type(exc).__name__}: {str(exc)}") from exc
 
-        LOGGER.debug(f"Total records processed: {records}")
-
     def _get_read_type(
         self,
         seq_id: str,
diff --git a/htsinfer/get_read_orientation.py b/htsinfer/get_read_orientation.py
index 6e3c7a7..93b7c01 100644
--- a/htsinfer/get_read_orientation.py
+++ b/htsinfer/get_read_orientation.py
@@ -80,7 +80,7 @@ def evaluate(self) -> ResultsOrientation:
                      or self.library_source.file_2.short_name is not None):
             self.mapping.evaluate()
         else:
-            LOGGER.warning(
+            LOGGER.debug(
                 "Library source is not determined, "
                 "read orientation cannot be inferred by alignment."
             )
diff --git a/tests/test_get_library_type.py b/tests/test_get_library_type.py
index 738840a..9682bf7 100644
--- a/tests/test_get_library_type.py
+++ b/tests/test_get_library_type.py
@@ -214,12 +214,6 @@ def test_evaluate_single(self):
         test_instance.evaluate()
         assert test_instance.result == StatesType.single
 
-    def test_evaluate_unknown_seq_id(self):
-        """Evaluate file with identifiers of an unknown format."""
-        test_instance = GetFastqType(path=FILE_UNKNOWN_SEQ_ID)
-        with pytest.raises(MetadataWarning):
-            test_instance.evaluate()
-
     def test_evaluate_inconsistent_identifiers_single_mate(self):
         """Raise ``MetadataWarning`` by passing a file with inconsistent
         identifiers, suggesting a single-end library first, then a paired-end

From 3fbaceb511b6b3aedcebf0cd73217f763a428e95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1t=C3=A9=20Balajti?= <mate.balajti@unibas.ch>
Date: Mon, 8 Jan 2024 14:02:31 +0100
Subject: [PATCH 10/15] refactor get lib type

---
 htsinfer/get_library_type.py   | 14 ++++++++------
 tests/test_get_library_type.py |  1 -
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/htsinfer/get_library_type.py b/htsinfer/get_library_type.py
index ca68c4e..19665c9 100644
--- a/htsinfer/get_library_type.py
+++ b/htsinfer/get_library_type.py
@@ -206,7 +206,13 @@ def _align_mates(self):
     def _update_relationship(self, concordant, read_counter):
         """Helper function to update relationship based on alignment."""
         try:
-            if (concordant / read_counter) >= self.cutoff:
+            ratio = concordant / read_counter
+        except ZeroDivisionError:
+            self.results.relationship = (
+                StatesTypeRelationship.not_available
+            )
+        else:
+            if ratio >= self.cutoff:
                 self.results.relationship = (
                     StatesTypeRelationship.split_mates
                 )
@@ -218,10 +224,6 @@ def _update_relationship(self, concordant, read_counter):
                 self.results.relationship = (
                     StatesTypeRelationship.not_mates
                 )
-        except ZeroDivisionError:
-            self.results.relationship = (
-                StatesTypeRelationship.not_available
-            )
 
     class AlignedSegment:
         """Placeholder class for mypy "Missing attribute"
@@ -331,7 +333,7 @@ def evaluate(self) -> None:
 
                 if self.seq_id_format is None:
                     self.result = StatesType.not_available
-                    LOGGER.warning(
+                    LOGGER.debug(
                         "Could not determine sequence identifier format."
                     )
                 else:
diff --git a/tests/test_get_library_type.py b/tests/test_get_library_type.py
index 9682bf7..2ae8c96 100644
--- a/tests/test_get_library_type.py
+++ b/tests/test_get_library_type.py
@@ -33,7 +33,6 @@
     FILE_IDS_NOT_MATCH_2,
     FILE_TRANSCRIPTS,
     FILE_SINGLE,
-    FILE_UNKNOWN_SEQ_ID,
     RaiseError,
     SEQ_ID_DUMMY,
     SEQ_ID_MATE_1,

From 70f425214c77be96c3cc01a38ced88cf2e0193fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1t=C3=A9=20Balajti?= <mate.balajti@unibas.ch>
Date: Mon, 8 Jan 2024 16:16:25 +0100
Subject: [PATCH 11/15] refactor: update get lib type and tests

---
 htsinfer/get_library_type.py     | 35 +++++++++--------
 htsinfer/get_read_orientation.py |  7 ++--
 tests/test_get_library_type.py   | 65 +++++++++++++++++++++++++++++++-
 3 files changed, 85 insertions(+), 22 deletions(-)

diff --git a/htsinfer/get_library_type.py b/htsinfer/get_library_type.py
index 19665c9..19d870f 100644
--- a/htsinfer/get_library_type.py
+++ b/htsinfer/get_library_type.py
@@ -127,8 +127,10 @@ def _evaluate_mate_relationship(
                 self.mapping.library_type.relationship = (
                     StatesTypeRelationship.split_mates
                 )
-        elif (self.library_source.file_1.short_name is not None
-              and self.library_source.file_2.short_name is not None):
+        elif (
+            self.library_source.file_1.short_name is not None and
+            self.library_source.file_2.short_name is not None
+        ):
             self.mapping.library_type.relationship \
                 = StatesTypeRelationship.not_available
             self.mapping.library_source = self.library_source
@@ -136,6 +138,7 @@ def _evaluate_mate_relationship(
             self.mapping.evaluate()
             self._align_mates()
         else:
+            self.results.relationship = StatesTypeRelationship.not_available
             LOGGER.debug(
                 "Library source is not determined, "
                 "mate relationship cannot be inferred by alignment."
@@ -195,8 +198,8 @@ def _align_mates(self):
             mate1[read_counter], reads2
         ):
             concordant += 1
-        LOGGER.debug(f"Number of mapped reads file 1: {len(mate1)}")
-        LOGGER.debug(f"Number of mapped reads file 2: {read_counter}")
+        LOGGER.debug(f"Number of aligned reads file 1: {len(mate1)}")
+        LOGGER.debug(f"Number of aligned reads file 2: {read_counter}")
         LOGGER.debug(f"Number of concordant reads: {concordant}")
         self._update_relationship(concordant, read_counter)
 
@@ -331,25 +334,26 @@ def evaluate(self) -> None:
                     self.result = StatesType.not_available
                     raise FileProblem(f"File is empty: {self.path}") from exc
 
-                if self.seq_id_format is None:
-                    self.result = StatesType.not_available
+                if self.seq_id_format is not None:
                     LOGGER.debug(
-                        "Could not determine sequence identifier format."
+                        "Sequence identifier format: "
+                        f"{self.seq_id_format.name}"
                     )
                 else:
+                    self.result = StatesType.not_available
                     LOGGER.debug(
-                        "Sequence identifier format: "
-                        f"{self.seq_id_format.name}"
+                        "Could not determine sequence identifier format."
                     )
 
                 # Ensure that remaining records are compatible with sequence
                 # identifier format and library type determined from first
                 # record
-                LOGGER.debug(
-                    "Checking consistency of remaining reads with initially "
-                    "determined identifier format and library type..."
-                )
                 if self.seq_id_format is not None:
+                    LOGGER.debug(
+                        "Checking consistency of remaining reads with "
+                        "initially determined identifier format "
+                        "and library type..."
+                    )
                     for record in seq_iter:
                         records += 1
                         try:
@@ -366,11 +370,6 @@ def evaluate(self) -> None:
                                 f"{type(exc).__name__}: {str(exc)}"
                             ) from exc
                     LOGGER.debug(f"Total records processed: {records}")
-                else:
-                    LOGGER.debug(
-                        "Could not determine sequence identifier format. "
-                        "Skipping consistency check for the remaining reads."
-                    )
 
         except (OSError, ValueError) as exc:
             self.result = StatesType.not_available
diff --git a/htsinfer/get_read_orientation.py b/htsinfer/get_read_orientation.py
index 93b7c01..f66794b 100644
--- a/htsinfer/get_read_orientation.py
+++ b/htsinfer/get_read_orientation.py
@@ -75,9 +75,10 @@ def evaluate(self) -> ResultsOrientation:
         self.mapping.transcripts_file = self.transcripts_file
         self.mapping.tmp_dir = self.tmp_dir
 
-        if not self.mapping.mapped \
-                and (self.library_source.file_1.short_name is not None
-                     or self.library_source.file_2.short_name is not None):
+        if not self.mapping.mapped and (
+            self.library_source.file_1.short_name is not None or
+            self.library_source.file_2.short_name is not None
+        ):
             self.mapping.evaluate()
         else:
             LOGGER.debug(
diff --git a/tests/test_get_library_type.py b/tests/test_get_library_type.py
index 2ae8c96..e1a0dce 100644
--- a/tests/test_get_library_type.py
+++ b/tests/test_get_library_type.py
@@ -29,6 +29,7 @@
     FILE_INCONSISTENT_IDS_SINGLE_OLD_NEW,
     FILE_MATE_1,
     FILE_MATE_2,
+    FILE_UNKNOWN_SEQ_ID,
     FILE_IDS_NOT_MATCH_1,
     FILE_IDS_NOT_MATCH_2,
     FILE_TRANSCRIPTS,
@@ -148,11 +149,11 @@ def test_evaluate_mate_relationship_not_available(self, tmpdir):
         CONFIG.args.path_1_processed = FILE_IDS_NOT_MATCH_1
         CONFIG.args.path_2_processed = FILE_MATE_2
         CONFIG.args.t_file_processed = FILE_TRANSCRIPTS
+        CONFIG.args.tmp_dir = tmpdir
         CONFIG.results.library_source = ResultsSource(
             file_1=Source(short_name="hsapiens", taxon_id=9606),
             file_2=Source(short_name="hsapiens", taxon_id=9606),
         )
-        CONFIG.args.tmp_dir = tmpdir
         MAPPING.paths = (FILE_IDS_NOT_MATCH_1, FILE_MATE_2)
         MAPPING.transcripts_file = FILE_TRANSCRIPTS
         MAPPING.tmp_dir = tmpdir
@@ -166,6 +167,62 @@ def test_evaluate_mate_relationship_not_available(self, tmpdir):
             StatesTypeRelationship.not_available
         )
 
+    def test_update_relationship_not_mates(self, tmpdir):
+        """Test update_relationship logic."""
+        CONFIG.args.path_1_processed = FILE_IDS_NOT_MATCH_1
+        CONFIG.args.path_2_processed = FILE_MATE_2
+        CONFIG.args.t_file_processed = FILE_TRANSCRIPTS
+        CONFIG.args.tmp_dir = tmpdir
+        MAPPING.paths = (FILE_IDS_NOT_MATCH_1, FILE_MATE_2)
+        MAPPING.transcripts_file = FILE_TRANSCRIPTS
+        MAPPING.tmp_dir = tmpdir
+
+        test_instance = GetLibType(config=CONFIG, mapping=MAPPING)
+        test_instance.results.file_1 = StatesType.not_available
+        test_instance.results.file_2 = StatesType.not_available
+
+        # Simulate a scenario where ratio is below the cutoff
+        concordant = 0
+        read_counter = 20
+
+        # Call the _update_relationship method
+        test_instance._update_relationship(concordant, read_counter)
+
+        assert (
+            test_instance.results.relationship ==
+            StatesTypeRelationship.not_mates
+        )
+        assert (
+            test_instance.mapping.library_type.relationship ==
+            StatesTypeRelationship.not_available
+        )
+
+    def test_evaluate_mate_relationship_not_determined(self, tmpdir):
+        """Test mate relationship evaluation logic when
+        library source is not determined.
+        """
+        CONFIG.args.path_1_processed = FILE_MATE_1
+        CONFIG.args.path_2_processed = FILE_MATE_2
+        CONFIG.args.t_file_processed = FILE_TRANSCRIPTS
+        CONFIG.args.tmp_dir = tmpdir
+        CONFIG.results.library_source = ResultsSource(
+            file_1=Source(),
+            file_2=Source(),
+        )
+        test_instance = GetLibType(config=CONFIG, mapping=MAPPING)
+        test_instance.results.file_1 = StatesType.not_available
+        test_instance.results.file_2 = StatesType.not_available
+
+        # Call the _evaluate_mate_relationship method
+        test_instance._evaluate_mate_relationship(
+            ids_1=["A", "B", "C"], ids_2=["D", "E", "F"]
+        )
+
+        assert (
+            test_instance.results.relationship ==
+            StatesTypeRelationship.not_available
+        )
+
     def test_evaluate_split_mates_not_matching_ids(self, tmpdir):
         """Test mate relationship evaluation logic with input files that are
         not mates from a paired-end library.
@@ -304,6 +361,12 @@ def test_get_read_type_no_match(self):
                 regex=SeqIdFormats['Casava >=1.8'].value,
             )
 
+    def test_evaluate_unknown_identifier_format(self):
+        """Test scenario where seq_id format cannot be determined."""
+        test_instance = GetFastqType(path=FILE_UNKNOWN_SEQ_ID)
+        test_instance.evaluate()
+        assert test_instance.result == StatesType.not_available
+
     def test_get_read_type_single_pass(self):
         """Read identifier is consistent with previous state."""
         test_instance = GetFastqType(path=FILE_DUMMY)

From b5b80ca72ef69276fdffb257caff4bb850ede1d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1t=C3=A9=20Balajti?= <mate.balajti@unibas.ch>
Date: Mon, 15 Jan 2024 14:00:41 +0100
Subject: [PATCH 12/15] refactor: update lib type and orientation

---
 htsinfer/get_library_type.py     | 12 +++++++-----
 htsinfer/get_read_orientation.py |  3 +--
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/htsinfer/get_library_type.py b/htsinfer/get_library_type.py
index 19d870f..3781179 100644
--- a/htsinfer/get_library_type.py
+++ b/htsinfer/get_library_type.py
@@ -113,7 +113,7 @@ def _evaluate_mate_relationship(
             ids_2: As `ids_1` but for the putative second mate file.
         """
         self.results.relationship = StatesTypeRelationship.not_mates
-        if ids_1 == ids_2 and len(ids_1) > 0 and len(ids_2) > 0:
+        if ids_1 and ids_2 and ids_1 == ids_2:
             if (
                 self.results.file_1 == StatesType.first_mate and
                 self.results.file_2 == StatesType.second_mate
@@ -140,8 +140,7 @@ def _evaluate_mate_relationship(
         else:
             self.results.relationship = StatesTypeRelationship.not_available
             LOGGER.debug(
-                "Library source is not determined, "
-                "mate relationship cannot be inferred by alignment."
+                "Mate relationship cannot be determined."
             )
 
     def _align_mates(self):
@@ -165,6 +164,7 @@ def _align_mates(self):
         concordant = 0
 
         for read1 in samfile1:
+            # ensure that "unmapped" flag is not set and query name is set
             if not read1.flag & (1 << 2) and isinstance(read1.query_name, str):
                 seq_id1 = read1.query_name
                 if (
@@ -180,6 +180,7 @@ def _align_mates(self):
 
         read_counter = 0
         for read2 in samfile2:
+            # ensure that "unmapped" flag is not set and query name is set
             if not read2.flag & (1 << 2) and isinstance(read2.query_name, str):
                 seq_id2 = read2.query_name
                 if seq_id2 != previous_seq_id2 \
@@ -219,8 +220,9 @@ def _update_relationship(self, concordant, read_counter):
                 self.results.relationship = (
                     StatesTypeRelationship.split_mates
                 )
-                self.mapping.library_type.relationship \
-                    = StatesTypeRelationship.split_mates
+                self.mapping.library_type.relationship = (
+                    StatesTypeRelationship.split_mates
+                )
                 self.mapping.mapped = False
                 self.mapping.star_dirs = []
             else:
diff --git a/htsinfer/get_read_orientation.py b/htsinfer/get_read_orientation.py
index f66794b..b5cce78 100644
--- a/htsinfer/get_read_orientation.py
+++ b/htsinfer/get_read_orientation.py
@@ -82,8 +82,7 @@ def evaluate(self) -> ResultsOrientation:
             self.mapping.evaluate()
         else:
             LOGGER.debug(
-                "Library source is not determined, "
-                "read orientation cannot be inferred by alignment."
+                "Read orientation cannot be determined."
             )
 
         return self.process_alignments(star_dirs=self.mapping.star_dirs)

From 31af8ec271d1ade32cd792ea1de51e53c87a16c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1t=C3=A9=20Balajti?= <mate.balajti@unibas.ch>
Date: Wed, 17 Jan 2024 11:46:07 +0100
Subject: [PATCH 13/15] refactor: update concordant read counting, mapping

---
 htsinfer/get_library_type.py | 71 ++++++++++++++++++++----------------
 htsinfer/mapping.py          |  1 +
 tests/test_mapping.py        |  3 +-
 3 files changed, 42 insertions(+), 33 deletions(-)

diff --git a/htsinfer/get_library_type.py b/htsinfer/get_library_type.py
index 3781179..3dd562c 100644
--- a/htsinfer/get_library_type.py
+++ b/htsinfer/get_library_type.py
@@ -158,59 +158,66 @@ def _align_mates(self):
         previous_seq_id2 = None
 
         reads1 = []  # List to store alignments for one read from file1
-        mate1 = []  # List to store alignments for each read
+        mate1 = []  # List to store alignments for each read from file1
         reads2 = []  # List to store alignments for one read from file2
+        mate2 = []  # List to store alignments for each read from file2
 
         concordant = 0
 
         for read1 in samfile1:
-            # ensure that "unmapped" flag is not set and query name is set
-            if not read1.flag & (1 << 2) and isinstance(read1.query_name, str):
-                seq_id1 = read1.query_name
-                if (
-                    seq_id1 != previous_seq_id1 and
-                    previous_seq_id1 is not None
-                ):
-                    mate1.append(reads1.copy())
-                    reads1.clear()
-                if read1.reference_end:
-                    reads1.append(read1)
+            seq_id1 = read1.query_name
+            if (
+                seq_id1 != previous_seq_id1 and
+                previous_seq_id1 is not None
+            ):
+                mate1.append(reads1.copy())
+                reads1.clear()
+            if read1.reference_end:
+                reads1.append(read1)
             previous_seq_id1 = seq_id1
         mate1.append(reads1.copy())
 
         read_counter = 0
         for read2 in samfile2:
-            # ensure that "unmapped" flag is not set and query name is set
-            if not read2.flag & (1 << 2) and isinstance(read2.query_name, str):
-                seq_id2 = read2.query_name
-                if seq_id2 != previous_seq_id2 \
-                        and previous_seq_id2 is not None:
-                    if read_counter < len(mate1) and self._compare_alignments(
-                        mate1[read_counter], reads2
-                    ):
-                        concordant += 1
-                    reads2.clear()
-                    read_counter += 1
-                if read2.reference_end:
-                    reads2.append(read2)
+            seq_id2 = read2.query_name
+            if (
+                seq_id2 != previous_seq_id2 and
+                previous_seq_id2 is not None
+            ):
+                mate2.append(reads2.copy())
+                if self._compare_alignments(
+                    mate1[read_counter], reads2
+                ):
+                    concordant += 1
+                reads2.clear()
+                read_counter += 1
+            if read2.reference_end:
+                reads2.append(read2)
             previous_seq_id2 = seq_id2
-
-        if read_counter < len(mate1) and self._compare_alignments(
+        mate2.append(reads2.copy())
+        if self._compare_alignments(
             mate1[read_counter], reads2
         ):
             concordant += 1
-        LOGGER.debug(f"Number of aligned reads file 1: {len(mate1)}")
-        LOGGER.debug(f"Number of aligned reads file 2: {read_counter}")
+
+        aligned_mate1 = len(list(filter(None, mate1)))
+        aligned_mate2 = len(list(filter(None, mate2)))
+
+        LOGGER.debug(f"Number of aligned reads file 1: {aligned_mate1}")
+        LOGGER.debug(f"Number of aligned reads file 2: {aligned_mate2}")
         LOGGER.debug(f"Number of concordant reads: {concordant}")
-        self._update_relationship(concordant, read_counter)
+
+        self._update_relationship(
+            concordant, min(aligned_mate1, aligned_mate2)
+        )
 
         samfile1.close()
         samfile2.close()
 
-    def _update_relationship(self, concordant, read_counter):
+    def _update_relationship(self, concordant, aligned_reads):
         """Helper function to update relationship based on alignment."""
         try:
-            ratio = concordant / read_counter
+            ratio = concordant / aligned_reads
         except ZeroDivisionError:
             self.results.relationship = (
                 StatesTypeRelationship.not_available
diff --git a/htsinfer/mapping.py b/htsinfer/mapping.py
index cb53f7a..e1cef46 100644
--- a/htsinfer/mapping.py
+++ b/htsinfer/mapping.py
@@ -299,6 +299,7 @@ def build_star_command(
                 "--runThreadN", f"{str(self.threads_star)}",
                 "--genomeDir", f"{str(index_dir)}",
                 "--outFilterMultimapNmax", "50",
+                "--outSAMorder", "PairedKeepInputOrder",
                 "--outSAMunmapped", "Within", "KeepPairs",
             ]
             cmd: List[str] = cmd_base[:]
diff --git a/tests/test_mapping.py b/tests/test_mapping.py
index 33798be..1bdecbb 100644
--- a/tests/test_mapping.py
+++ b/tests/test_mapping.py
@@ -187,7 +187,8 @@ def test_prepare_star_alignment_commands(self, tmpdir):
         file1_alignment_path = tmpdir / 'alignments/file_1'
         cmd = "STAR --alignIntronMax 1 --alignEndsType Local --runThreadN 1" \
             + " --genomeDir " + str(index_dir) + " --outFilterMultimapNmax " \
-            + "50 --outSAMunmapped Within KeepPairs --readFilesIn " \
+            + "50 --outSAMorder PairedKeepInputOrder " \
+            + "--outSAMunmapped Within KeepPairs --readFilesIn " \
             + str(FILE_2000_RECORDS) + " --outFileNamePrefix " \
             + str(file1_alignment_path) + "/"
         results = test_instance.prepare_star_alignment_commands(

From b90893054d0a350ef7a2557765bb6db526cf2885 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1t=C3=A9=20Balajti?= <mate.balajti@unibas.ch>
Date: Wed, 17 Jan 2024 13:59:24 +0100
Subject: [PATCH 14/15] update debug messages

---
 htsinfer/get_library_type.py     | 6 ++++--
 htsinfer/get_read_orientation.py | 5 +----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/htsinfer/get_library_type.py b/htsinfer/get_library_type.py
index 3dd562c..37dad4f 100644
--- a/htsinfer/get_library_type.py
+++ b/htsinfer/get_library_type.py
@@ -128,9 +128,10 @@ def _evaluate_mate_relationship(
                     StatesTypeRelationship.split_mates
                 )
         elif (
-            self.library_source.file_1.short_name is not None and
+            self.library_source.file_1.short_name is not None or
             self.library_source.file_2.short_name is not None
         ):
+            LOGGER.debug("Determining mate relationship by alignment...")
             self.mapping.library_type.relationship \
                 = StatesTypeRelationship.not_available
             self.mapping.library_source = self.library_source
@@ -140,7 +141,8 @@ def _evaluate_mate_relationship(
         else:
             self.results.relationship = StatesTypeRelationship.not_available
             LOGGER.debug(
-                "Mate relationship cannot be determined."
+                "Sequence IDs and library source are not determined, "
+                "mate relationship cannot be inferred."
             )
 
     def _align_mates(self):
diff --git a/htsinfer/get_read_orientation.py b/htsinfer/get_read_orientation.py
index b5cce78..4b36e8b 100644
--- a/htsinfer/get_read_orientation.py
+++ b/htsinfer/get_read_orientation.py
@@ -79,11 +79,8 @@ def evaluate(self) -> ResultsOrientation:
             self.library_source.file_1.short_name is not None or
             self.library_source.file_2.short_name is not None
         ):
+            LOGGER.debug("Determining read relationship by alignment...")
             self.mapping.evaluate()
-        else:
-            LOGGER.debug(
-                "Read orientation cannot be determined."
-            )
 
         return self.process_alignments(star_dirs=self.mapping.star_dirs)
 

From 8fe0cbe78965f54f7c293fccded1801f6367b322 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1t=C3=A9=20Balajti?= <mate.balajti@unibas.ch>
Date: Thu, 18 Jan 2024 11:10:07 +0100
Subject: [PATCH 15/15] update comments in mapping

---
 htsinfer/mapping.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/htsinfer/mapping.py b/htsinfer/mapping.py
index e1cef46..52780f3 100644
--- a/htsinfer/mapping.py
+++ b/htsinfer/mapping.py
@@ -51,11 +51,7 @@ def __init__(
         self.star_dirs: List[Path] = []
 
     def evaluate(self):
-        """Infer read orientation.
-
-        Returns:
-            Orientation results object.
-        """
+        """Align FASTQ files to reference transcripts with STAR."""
 
         # get transcripts for current organims
         transcripts = self.subset_transcripts_by_organism()
@@ -270,6 +266,10 @@ def prepare_star_alignment_commands(
     ) -> Dict[Path, List[str]]:
         """Prepare STAR alignment commands.
 
+        Input FASTQ files are assumed to be sorted according to reference names
+        or coordinates, the order of input reads is kept with the option
+        "PairedKeepInputOrder", no additional sorting of aligned reads is done.
+
         Args:
             index_dir: Path to directory containing STAR index.