1 files changed, 453 insertions, 0 deletions
diff --git a/python-bioinfo-tools.spec b/python-bioinfo-tools.spec
new file mode 100644
index 0000000..a816d30
--- /dev/null
+++ b/python-bioinfo-tools.spec
@@ -0,0 +1,453 @@
+%global _empty_manifest_terminate_build 0
+Name:		python-bioinfo_tools
+Version:	0.3.1
+Release:	1
+Summary:	Python library that parses GFF, Fasta files into python classes
+License:	BSD
+URL:		https://github.com/sebriois/bioinfo_tools
+Source0:	https://mirrors.aliyun.com/pypi/web/packages/0b/79/69b6fa350d0f8074c64a53642ddcf6e8453f1c1ed9d06f636bd70efb5686/bioinfo_tools-0.3.1.tar.gz
+BuildArch:	noarch
+
+
+%description
+# bioinfo_tools 0.3.1
+
+## Installation
+
+```bash
+pip install bioinfo_tools
+```
+
+## Parsers
+
+*HEADS UP!* These parsers are still under development and usage is not consistent from one parser to another.
+
+### Fasta parser
+
+```python
+from bioinfo_tools.parsers.fasta import FastaParser
+
+fasta_parser = FastaParser()
+
+# by default, sequence IDs are separated by the firstly found '|' or ':'
+for seqid, sequence in fasta_parser.read("/path/to/file.fasta"):
+    print(seqid, sequence)
+
+# you may specify a specific separator for your sequence ID (e.g white space):
+for seqid, sequence in fasta_parser.read("/path/to/file.fasta", id_separator=" "):
+    print(seqid, sequence)
+```
+
+### GFF parser
+
+```python
+from bioinfo_tools.parsers.gff import Gff3
+
+gff_parser = Gff3()
+with open("/path/to/file.gff", "r") as fh:
+    for gene in gff_parser.read(fh):
+        print(gene)
+
+import gzip
+with gzip.open("/path/to/file.gz", "rb") as fh:
+    for gene in gff_parser.read(fh):
+        print(gene)
+```
+
+### OBO parser
+
+
+```python
+from bioinfo_tools.parsers.obo import OboParser
+
+obo_parser = OboParser()
+with open("/path/to/file.obo") as fh:
+    go_terms = obo_parser.read(fh)
+
+for go_term in go_terms.values():
+    print(go_term)
+
+    # you may also get the GO term parents via the parser
+    parents = obo_parser.get_parents(go_term)
+```
+
+## Usage Examples
+
+### Extract all introns sequences by parsing GFF and fasta files
+
+In this example, we focus on a genome assembly. We will first load a GFF file containing gene annotations for this
+assembly, then load a fastA file containing the nucleic sequences of each chromosome in the genome.
+We will then collect all transcript introns and extract their nucleic sequences.
+
+**__DISCLAIMER__**: for this example to work, your GFF file must expose at least the following feature types in column #3:
+ - `gene`
+ - one of `transcript|mRNA|RNA` (or lowercased version)
+
+
+```python
+from bioinfo_tools.genomic_features.chromosome import Chromosome
+from bioinfo_tools.parsers.gff import Gff3
+from bioinfo_tools.parsers.fasta import FastaParser
+
+chromosomes = dict()  # {<chromosome_id>: <bioinfo_tools.genomic_features.Chromosome>}
+
+# start with parsing a GFF file
+gff_parser = Gff3()
+with open("/path/to/gene_models.gff", "r") as fh:
+    for gene in gff_parser.read(fh):
+        chromosome = gene['seqid']
+
+        if chromosome not in chromosomes:
+            chromosomes[chromosome] = Chromosome(chromosome)  # init a new Chromosome object
+
+        chromosomes[chromosome].add_gene(gene)  # add the current gene to our Chromosome object
+
+# load our chromosome sequences in memory
+fasta_parser = FastaParser()
+for chromosome, nucleic_sequence in fasta_parser.read("/path/to/genome_chromosomes.fasta"):
+    if chromosome not in chromosomes:
+        chromosomes[chromosome] = Chromosome(chromosome)
+    # attach parsed chromosome sequence to our Chromosome object
+    chromosomes[chromosome].attach_nucleic_sequence(nucleic_sequence)
+
+# now, collect introns and extact their nucleic sequence
+introns_sequences = dict()  #  {<intron_id>: <intron_sequence>}
+for chromosome in chromosomes.values():
+    for gene in chromosome.genes:
+        for transcript in gene.transcripts:
+            for idx, intron in enumerate(transcript.introns):
+                intron_id = "%s_intron_%s" % (transcript.transcript_id, idx)
+                intron_seq = intron.extract(chromosome.nucleic_sequence)  # that we attached above
+                introns_sequences[intron_id] = intron_seq
+
+# from here, you can do what you want with the intron sequences (eg. write them to a fasta file, etc)
+# ...
+```
+
+__Note:__ when at the transcript level, you can grab its feature types as described in your GFF file by doing so:
+```python
+for feature in transcript._get_features("exon"):
+    print(feature)  # I'm an exon
+```
+For convenience and clarity, following properties are available on transcript objects:
+```python
+print(transcript.introns)  # will call transcript._get_features('intron') behind the scenes
+print(transcript.exons)  # will call transcript._get_features('exon') behind the scenes
+print(transcript.cds)  # will call transcript._get_features('cds') behind the scenes
+print(transcript.polypeptide)  # will call transcript._get_features('polypeptide') behind the scenes
+print(transcript.five_prime_utr)  # will call transcript._get_features('five_prime_utr') behind the scenes
+print(transcript.three_prime_utr)  # will call transcript._get_features('three_prime_utr') behind the scenes
+```
+
+%package -n python3-bioinfo_tools
+Summary:	Python library that parses GFF, Fasta files into python classes
+Provides:	python-bioinfo_tools
+BuildRequires:	python3-devel
+BuildRequires:	python3-setuptools
+BuildRequires:	python3-pip
+%description -n python3-bioinfo_tools
+# bioinfo_tools 0.3.1
+
+## Installation
+
+```bash
+pip install bioinfo_tools
+```
+
+## Parsers
+
+*HEADS UP!* These parsers are still under development and usage is not consistent from one parser to another.
+
+### Fasta parser
+
+```python
+from bioinfo_tools.parsers.fasta import FastaParser
+
+fasta_parser = FastaParser()
+
+# by default, sequence IDs are separated by the firstly found '|' or ':'
+for seqid, sequence in fasta_parser.read("/path/to/file.fasta"):
+    print(seqid, sequence)
+
+# you may specify a specific separator for your sequence ID (e.g white space):
+for seqid, sequence in fasta_parser.read("/path/to/file.fasta", id_separator=" "):
+    print(seqid, sequence)
+```
+
+### GFF parser
+
+```python
+from bioinfo_tools.parsers.gff import Gff3
+
+gff_parser = Gff3()
+with open("/path/to/file.gff", "r") as fh:
+    for gene in gff_parser.read(fh):
+        print(gene)
+
+import gzip
+with gzip.open("/path/to/file.gz", "rb") as fh:
+    for gene in gff_parser.read(fh):
+        print(gene)
+```
+
+### OBO parser
+
+
+```python
+from bioinfo_tools.parsers.obo import OboParser
+
+obo_parser = OboParser()
+with open("/path/to/file.obo") as fh:
+    go_terms = obo_parser.read(fh)
+
+for go_term in go_terms.values():
+    print(go_term)
+
+    # you may also get the GO term parents via the parser
+    parents = obo_parser.get_parents(go_term)
+```
+
+## Usage Examples
+
+### Extract all introns sequences by parsing GFF and fasta files
+
+In this example, we focus on a genome assembly. We will first load a GFF file containing gene annotations for this
+assembly, then load a fastA file containing the nucleic sequences of each chromosome in the genome.
+We will then collect all transcript introns and extract their nucleic sequences.
+
+**__DISCLAIMER__**: for this example to work, your GFF file must expose at least the following feature types in column #3:
+ - `gene`
+ - one of `transcript|mRNA|RNA` (or lowercased version)
+
+
+```python
+from bioinfo_tools.genomic_features.chromosome import Chromosome
+from bioinfo_tools.parsers.gff import Gff3
+from bioinfo_tools.parsers.fasta import FastaParser
+
+chromosomes = dict()  # {<chromosome_id>: <bioinfo_tools.genomic_features.Chromosome>}
+
+# start with parsing a GFF file
+gff_parser = Gff3()
+with open("/path/to/gene_models.gff", "r") as fh:
+    for gene in gff_parser.read(fh):
+        chromosome = gene['seqid']
+
+        if chromosome not in chromosomes:
+            chromosomes[chromosome] = Chromosome(chromosome)  # init a new Chromosome object
+
+        chromosomes[chromosome].add_gene(gene)  # add the current gene to our Chromosome object
+
+# load our chromosome sequences in memory
+fasta_parser = FastaParser()
+for chromosome, nucleic_sequence in fasta_parser.read("/path/to/genome_chromosomes.fasta"):
+    if chromosome not in chromosomes:
+        chromosomes[chromosome] = Chromosome(chromosome)
+    # attach parsed chromosome sequence to our Chromosome object
+    chromosomes[chromosome].attach_nucleic_sequence(nucleic_sequence)
+
+# now, collect introns and extact their nucleic sequence
+introns_sequences = dict()  #  {<intron_id>: <intron_sequence>}
+for chromosome in chromosomes.values():
+    for gene in chromosome.genes:
+        for transcript in gene.transcripts:
+            for idx, intron in enumerate(transcript.introns):
+                intron_id = "%s_intron_%s" % (transcript.transcript_id, idx)
+                intron_seq = intron.extract(chromosome.nucleic_sequence)  # that we attached above
+                introns_sequences[intron_id] = intron_seq
+
+# from here, you can do what you want with the intron sequences (eg. write them to a fasta file, etc)
+# ...
+```
+
+__Note:__ when at the transcript level, you can grab its feature types as described in your GFF file by doing so:
+```python
+for feature in transcript._get_features("exon"):
+    print(feature)  # I'm an exon
+```
+For convenience and clarity, following properties are available on transcript objects:
+```python
+print(transcript.introns)  # will call transcript._get_features('intron') behind the scenes
+print(transcript.exons)  # will call transcript._get_features('exon') behind the scenes
+print(transcript.cds)  # will call transcript._get_features('cds') behind the scenes
+print(transcript.polypeptide)  # will call transcript._get_features('polypeptide') behind the scenes
+print(transcript.five_prime_utr)  # will call transcript._get_features('five_prime_utr') behind the scenes
+print(transcript.three_prime_utr)  # will call transcript._get_features('three_prime_utr') behind the scenes
+```
+
+%package help
+Summary:	Development documents and examples for bioinfo_tools
+Provides:	python3-bioinfo_tools-doc
+%description help
+# bioinfo_tools 0.3.1
+
+## Installation
+
+```bash
+pip install bioinfo_tools
+```
+
+## Parsers
+
+*HEADS UP!* These parsers are still under development and usage is not consistent from one parser to another.
+
+### Fasta parser
+
+```python
+from bioinfo_tools.parsers.fasta import FastaParser
+
+fasta_parser = FastaParser()
+
+# by default, sequence IDs are separated by the firstly found '|' or ':'
+for seqid, sequence in fasta_parser.read("/path/to/file.fasta"):
+    print(seqid, sequence)
+
+# you may specify a specific separator for your sequence ID (e.g white space):
+for seqid, sequence in fasta_parser.read("/path/to/file.fasta", id_separator=" "):
+    print(seqid, sequence)
+```
+
+### GFF parser
+
+```python
+from bioinfo_tools.parsers.gff import Gff3
+
+gff_parser = Gff3()
+with open("/path/to/file.gff", "r") as fh:
+    for gene in gff_parser.read(fh):
+        print(gene)
+
+import gzip
+with gzip.open("/path/to/file.gz", "rb") as fh:
+    for gene in gff_parser.read(fh):
+        print(gene)
+```
+
+### OBO parser
+
+
+```python
+from bioinfo_tools.parsers.obo import OboParser
+
+obo_parser = OboParser()
+with open("/path/to/file.obo") as fh:
+    go_terms = obo_parser.read(fh)
+
+for go_term in go_terms.values():
+    print(go_term)
+
+    # you may also get the GO term parents via the parser
+    parents = obo_parser.get_parents(go_term)
+```
+
+## Usage Examples
+
+### Extract all introns sequences by parsing GFF and fasta files
+
+In this example, we focus on a genome assembly. We will first load a GFF file containing gene annotations for this
+assembly, then load a fastA file containing the nucleic sequences of each chromosome in the genome.
+We will then collect all transcript introns and extract their nucleic sequences.
+
+**__DISCLAIMER__**: for this example to work, your GFF file must expose at least the following feature types in column #3:
+ - `gene`
+ - one of `transcript|mRNA|RNA` (or lowercased version)
+
+
+```python
+from bioinfo_tools.genomic_features.chromosome import Chromosome
+from bioinfo_tools.parsers.gff import Gff3
+from bioinfo_tools.parsers.fasta import FastaParser
+
+chromosomes = dict()  # {<chromosome_id>: <bioinfo_tools.genomic_features.Chromosome>}
+
+# start with parsing a GFF file
+gff_parser = Gff3()
+with open("/path/to/gene_models.gff", "r") as fh:
+    for gene in gff_parser.read(fh):
+        chromosome = gene['seqid']
+
+        if chromosome not in chromosomes:
+            chromosomes[chromosome] = Chromosome(chromosome)  # init a new Chromosome object
+
+        chromosomes[chromosome].add_gene(gene)  # add the current gene to our Chromosome object
+
+# load our chromosome sequences in memory
+fasta_parser = FastaParser()
+for chromosome, nucleic_sequence in fasta_parser.read("/path/to/genome_chromosomes.fasta"):
+    if chromosome not in chromosomes:
+        chromosomes[chromosome] = Chromosome(chromosome)
+    # attach parsed chromosome sequence to our Chromosome object
+    chromosomes[chromosome].attach_nucleic_sequence(nucleic_sequence)
+
+# now, collect introns and extact their nucleic sequence
+introns_sequences = dict()  #  {<intron_id>: <intron_sequence>}
+for chromosome in chromosomes.values():
+    for gene in chromosome.genes:
+        for transcript in gene.transcripts:
+            for idx, intron in enumerate(transcript.introns):
+                intron_id = "%s_intron_%s" % (transcript.transcript_id, idx)
+                intron_seq = intron.extract(chromosome.nucleic_sequence)  # that we attached above
+                introns_sequences[intron_id] = intron_seq
+
+# from here, you can do what you want with the intron sequences (eg. write them to a fasta file, etc)
+# ...
+```
+
+__Note:__ when at the transcript level, you can grab its feature types as described in your GFF file by doing so:
+```python
+for feature in transcript._get_features("exon"):
+    print(feature)  # I'm an exon
+```
+For convenience and clarity, following properties are available on transcript objects:
+```python
+print(transcript.introns)  # will call transcript._get_features('intron') behind the scenes
+print(transcript.exons)  # will call transcript._get_features('exon') behind the scenes
+print(transcript.cds)  # will call transcript._get_features('cds') behind the scenes
+print(transcript.polypeptide)  # will call transcript._get_features('polypeptide') behind the scenes
+print(transcript.five_prime_utr)  # will call transcript._get_features('five_prime_utr') behind the scenes
+print(transcript.three_prime_utr)  # will call transcript._get_features('three_prime_utr') behind the scenes
+```
+
+%prep
+%autosetup -n bioinfo_tools-0.3.1
+
+%build
+%py3_build
+
+%install
+%py3_install
+install -d -m755 %{buildroot}/%{_pkgdocdir}
+if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi
+if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi
+if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi
+if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi
+pushd %{buildroot}
+if [ -d usr/lib ]; then
+	find usr/lib -type f -printf "\"/%h/%f\"\n" >> filelist.lst
+fi
+if [ -d usr/lib64 ]; then
+	find usr/lib64 -type f -printf "\"/%h/%f\"\n" >> filelist.lst
+fi
+if [ -d usr/bin ]; then
+	find usr/bin -type f -printf "\"/%h/%f\"\n" >> filelist.lst
+fi
+if [ -d usr/sbin ]; then
+	find usr/sbin -type f -printf "\"/%h/%f\"\n" >> filelist.lst
+fi
+touch doclist.lst
+if [ -d usr/share/man ]; then
+	find usr/share/man -type f -printf "\"/%h/%f.gz\"\n" >> doclist.lst
+fi
+popd
+mv %{buildroot}/filelist.lst .
+mv %{buildroot}/doclist.lst .
+
+%files -n python3-bioinfo_tools -f filelist.lst
+%dir %{python3_sitelib}/*
+
+%files help -f doclist.lst
+%{_docdir}/*
+
+%changelog
+* Tue Jun 20 2023 Python_Bot <Python_Bot@openeuler.org> - 0.3.1-1
+- Package Spec generated