forked from XuegongLab/neoguider
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fasta_addkey.py
39 lines (33 loc) · 1.65 KB
/
fasta_addkey.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/env python
import argparse, collections, json, os, sys
def get_val_by_key(fhdr, key):
val = None
for i, tok in enumerate(fhdr.split()):
if i > 0 and len(tok.split('=')) == 2:
k, v = tok.split('=')
if k == key:
assert val == None, 'The header {} has duplicated key {}'.format(fhdr, key)
val = v
return val
def main():
parser = argparse.ArgumentParser(description = 'Read FASTA records from stdin, fill in records without any token of the form <key>=<val> in is comments with <--key>=<--default>, and write FASTA records to stdout. ',
formatter_class = argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--key', default = '', type = str,
help = 'Partition the fasta input from stdin into multiple fasta files with this key (e.g., HLA for FASTA header with HLA=... in its comment). ')
parser.add_argument('--val', default = '', type = str,
help = 'Default value of the key to be used if the key is not found in the FASTA header. ')
args = parser.parse_args()
fhdr = None
for line in sys.stdin:
if line.startswith('>'):
if fhdr: print(F'''{fhdr}\n{''.join(fseq)}''')
fhdr = line.strip()
val = get_val_by_key(fhdr, args.key)
if None == val:
assert args.val != '', F'The header {fhdr} does not have the key {key} and empty string --val is provided. '
fhdr += F' {args.key}={args.val}'
fseq = []
else:
fseq.append(line.strip())
if fhdr: print(F'''{fhdr}\n{''.join(fseq)}''')
if __name__ == '__main__': main()