From 5ce362c3b914d732d0f045870a14635c39923833 Mon Sep 17 00:00:00 2001
From: Ben Webb <ben@salilab.org>
Date: Tue, 10 Sep 2024 16:54:19 -0700
Subject: [PATCH] Squashed 'modules/pmi/' changes from 3648ed87cb..95043d139f

95043d139f Note that UniProt accession is used if present
8172b86c0d More docs on FASTA and chain IDs

git-subtree-dir: modules/pmi
git-subtree-split: 95043d139f0136a10f2806b3025e99e27b03bf7d
---
 modules/pmi/pyext/src/topology/__init__.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/modules/pmi/pyext/src/topology/__init__.py b/modules/pmi/pyext/src/topology/__init__.py
index ed287116c6..9b591bd151 100644
--- a/modules/pmi/pyext/src/topology/__init__.py
+++ b/modules/pmi/pyext/src/topology/__init__.py
@@ -1368,11 +1368,18 @@ class TopologyReader:
       from the file is assumed to be a protein sequence. If it should instead
       be treated as RNA or DNA, add an ',RNA' or ',DNA' suffix. For example,
       a `fasta_id` of 'myseq,RNA' will read the sequence 'myseq' from the
-      FASTA file and treat it as RNA.
+      FASTA file and treat it as RNA. The FASTA header may contain multiple
+      fields split by pipe (|) characters. If so, the FASTA sequence name is
+      the first field and the second field (if present) is the UniProt
+      accession. For example, ">cop9|Q13098" yields a FASTA sequence name
+      of "cop9" and UniProt accession of "Q13098". If such an accession is
+      present, it is added to the generated structure (and ultimately
+      recorded in any output RMF file).
     - `pdb_fn`: Name of PDB or mmCIF file with coordinates (if available).
        If left empty, will set up as BEADS (you can also specify "BEADS")
        Can also write "IDEAL_HELIX".
-    - `chain`: Chain ID of this domain in the PDB file.
+    - `chain`: Chain ID of this domain in the PDB or mmCIF file. This is
+      the "author-provided" chain ID for mmCIF files, not the asym_id.
     - `residue_range`: Comma delimited pair defining range.
        Can leave empty or use 'all' for entire sequence from PDB file.
        The second item in the pair can be END to select the last residue in the