From 178956eebe248466ead0f262da67bf72e8e2ac75 Mon Sep 17 00:00:00 2001 From: CoprDistGit Date: Wed, 31 May 2023 04:31:54 +0000 Subject: automatic import of python-vicinator --- .gitignore | 1 + python-vicinator.spec | 551 ++++++++++++++++++++++++++++++++++++++++++++++++++ sources | 1 + 3 files changed, 553 insertions(+) create mode 100644 python-vicinator.spec create mode 100644 sources diff --git a/.gitignore b/.gitignore index e69de29..6449dea 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1 @@ +/Vicinator-0.0.32.tar.gz diff --git a/python-vicinator.spec b/python-vicinator.spec new file mode 100644 index 0000000..210478f --- /dev/null +++ b/python-vicinator.spec @@ -0,0 +1,551 @@ +%global _empty_manifest_terminate_build 0 +Name: python-Vicinator +Version: 0.0.32 +Release: 1 +Summary: A small python package to trace orthology neighborhood across feature files +License: MIT License +URL: https://github.com/ba1/vicinator +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/fb/20/1bc6dd3bc088bfdd933b1ace6c2a384c7da34491dc54d7f2907621d2b01f/Vicinator-0.0.32.tar.gz +BuildArch: noarch + +Requires: python3-ete3 +Requires: python3-ansi2html +Requires: python3-colorama +Requires: python3-pandas +Requires: python3-importlib-metadata + +%description +[![Build Status](https://www.travis-ci.org/ba1/Vicinator.svg?branch=master)](https://www.travis-ci.org/ba1/Vicinator) +[![codecov](https://codecov.io/gh/ba1/Vicinator/branch/master/graph/badge.svg)](https://codecov.io/gh/ba1/Vicinator) +[![PyPI version](https://badge.fury.io/py/Vicinator.svg)](https://badge.fury.io/py/Vicinator) +[![Requirements Status](https://requires.io/github/ba1/Vicinator/requirements.svg?branch=master)](https://requires.io/github/ba1/Vicinator/requirements/?branch=master) +[![Documentation Status](https://readthedocs.org/projects/vicinator/badge/?version=latest)](https://vicinator.readthedocs.io/en/latest/?badge=latest) +[![Code style:black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) + +# Vicinator + +### What is Vicinator for? + +Vicinator visualizes the microsynteny of grouped proteins (e.g. orthologs) across a large collection of genomes. +As input, it requires a mapping of the genomes' proteins to the respective protein groups and a directory containing +the genomes' feature files, i.e. files of the format *\*.gff* or *\*_feature_table.txt*. + +![image](https://user-images.githubusercontent.com/8181764/104918766-86b5e980-5995-11eb-8a6b-9f2505c74973.png) + + +### What is Vicinator not for? + +As stated above, Vicinator relies on a pre-computed grouping of proteins across genomes. It can not find these +groups of genes for you. + +### Installation + +Vicinator is written for Python 3.6+ + +It is recommended to install Vicinator inside a virtual environment, e.g. with venv: + +`python3 -m venv myenv` + +This activates the new environment called *myenv*. While activated, you can install the latest version via pip. +The following command installs the latest version and all unmet requirements automatically. + +`pip install --upgrades vicinator` + +Requirements: + - ansi2html>=1.5.2 + - colorama>=0.4.4 + - ete3>=3.1.2 + - pandas>=1.1.3 + - importlib-metadata>=3.1.1 + - setuptools-scm>=5.0.1 + +### Options + +``` +python3 vicinator/vicinator.py --help + +usage: vicinator [-h] --tabular-ortholog-groups --feat-tables-dir + --reference --centerprotein-accession + (--extension-size | --extension-mask [ ...]) + [--tree ] [--outdir ] [--prefix ] + [--outputlabel-map ] [--nprocs ] [--force] [--version] + +Track Microsynteny of target proteins and its orthologs across genomes. + +required arguments: + --tabular-ortholog-groups + path to mapping file with format + ortholog_group_idgenome_idprotein_seq_id + --feat-tables-dir + path to directory of *.feature_tables.txt or *.gff3 files that shall be + screen + +required arguments (neighborhood): + --reference + path to a ncbi style feature table or gff file that acts as a reference + --centerprotein-accession + unique identifier of the central gene of the window + --extension-size + defines the #features that are co-checked to the left and right of the + centerprotein + --extension-mask [ ...] + defines the position of features that are co-checked to the left and right + relative to the centerprotein (position 0). + +optional arguments (output): + --tree + path to newick tree that includes all taxa to be screened + --outdir path to desired output directory + --prefix if option is set, shows intergenic distances of genes surrounding the + center gene + --outputlabel-map + Attempts to replace genome accessions in the outputs with a replacement + string. Requires a two-column map file formatted like so: 'genome file + accession' 'replacement string'. The replacement will automatically + be cut to a maximum of 30 chars. + +optional arguments (run): + --nprocs Number of CPUs for parallel processing of genomes. Default: Number of + CPUs-1 + --force if option is set, existing ortholog databases in the output dir are + ignored and will be overwritten +``` + +### Input: Required Arguments + +
+ +`--tabular-ortholog-groups ` + +>Vicinator requires a tab-separated three-column mapping of orthologs that is formatted like so: +> +> **group_id**    \tab   **genome_id**    \tab   **protein_id** +> ![example mapping file](https://user-images.githubusercontent.com/8181764/104924281-815c9d00-599d-11eb-9cb5-3e309f188bcd.png) + +
+ +` --feat-tables-dir ` + +>Vicinator expects the path to a directory containing *.gff* format or *_feature_table.txt* +> files of all the genomes you want to trace the microsynteny in. +> +> A recommended source for these files is NCBI RefSeq. In order for the mapping to work, the filenames +> should correspond to the **genome_ids** specified in the mapping file: +> +> E.g. line 7: **OG_2    genomeB    protein_X011** +>
+> triggers a search in a feature file named **genomeB.gff** or **genomeB_genomic.gff** or **genomeB_feature_table.txt** +> in the directory specified with `--feat-tables-dir`. Effectively, it tries to locate the protein_X011 in this feature file. + +
+ +`--reference ` +> the path to a reference genome feature file where the center-protein accession must be found + +
+ +`--centerprotein-accession` & `--extension-size ` + +>Identifies the window of vicinity around a center-protein which is traced based on the findings in the reference +> genome. +> ![Vicinator Window in Reference Genome](https://user-images.githubusercontent.com/8181764/104915463-f83f6900-5990-11eb-9930-552b95109d16.png) + +
+ +## Example Basic Usage + +`vicinator --tabular-ortholog-groups orthogenome_map.tsv --feat-tables-dir ./gff_dir --outdir ./results --reference gff_dir/MUSMU@10090@1.gff --centerprotein XP_006539605.1 --extension-size 3` + +## Example Advanced Usage + +When vicinator receives a phylogenetic tree (with genome_ids as leaf labels) it will trace the microsynteny in order of +increasing phylogentic distance to the reference genome specified. + +`vicinator --tabular-ortholog-groups orthogenome_map.tsv --feat-tables-dir ./gff_dir --outdir ./results --reference gff_dir/MUSMU@10090@1.gff --centerprotein XP_006539605.1 --extension-size 3 --tree phylogeny.nwk` + + +## Example Advanced Usage 2 + +When vicinator is started with the `--extension-mask` parameter it excpects a space-separated list of integers representing +the relative positions of proteins to the center-protein vicinator will trace. You don't have to give +them in order since they will be sorted automatically with 0 representing the center protein (always included). + +`vicinator --tabular-ortholog-groups orthogenome_map.tsv --feat-tables-dir ./gff_dir --outdir ./results --reference gff_dir/MUSMU@10090@1.gff --centerprotein XP_006539605.1 --extension-mask -35 -1 0 7 9` + + + + +%package -n python3-Vicinator +Summary: A small python package to trace orthology neighborhood across feature files +Provides: python-Vicinator +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-Vicinator +[![Build Status](https://www.travis-ci.org/ba1/Vicinator.svg?branch=master)](https://www.travis-ci.org/ba1/Vicinator) +[![codecov](https://codecov.io/gh/ba1/Vicinator/branch/master/graph/badge.svg)](https://codecov.io/gh/ba1/Vicinator) +[![PyPI version](https://badge.fury.io/py/Vicinator.svg)](https://badge.fury.io/py/Vicinator) +[![Requirements Status](https://requires.io/github/ba1/Vicinator/requirements.svg?branch=master)](https://requires.io/github/ba1/Vicinator/requirements/?branch=master) +[![Documentation Status](https://readthedocs.org/projects/vicinator/badge/?version=latest)](https://vicinator.readthedocs.io/en/latest/?badge=latest) +[![Code style:black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) + +# Vicinator + +### What is Vicinator for? + +Vicinator visualizes the microsynteny of grouped proteins (e.g. orthologs) across a large collection of genomes. +As input, it requires a mapping of the genomes' proteins to the respective protein groups and a directory containing +the genomes' feature files, i.e. files of the format *\*.gff* or *\*_feature_table.txt*. + +![image](https://user-images.githubusercontent.com/8181764/104918766-86b5e980-5995-11eb-8a6b-9f2505c74973.png) + + +### What is Vicinator not for? + +As stated above, Vicinator relies on a pre-computed grouping of proteins across genomes. It can not find these +groups of genes for you. + +### Installation + +Vicinator is written for Python 3.6+ + +It is recommended to install Vicinator inside a virtual environment, e.g. with venv: + +`python3 -m venv myenv` + +This activates the new environment called *myenv*. While activated, you can install the latest version via pip. +The following command installs the latest version and all unmet requirements automatically. + +`pip install --upgrades vicinator` + +Requirements: + - ansi2html>=1.5.2 + - colorama>=0.4.4 + - ete3>=3.1.2 + - pandas>=1.1.3 + - importlib-metadata>=3.1.1 + - setuptools-scm>=5.0.1 + +### Options + +``` +python3 vicinator/vicinator.py --help + +usage: vicinator [-h] --tabular-ortholog-groups --feat-tables-dir + --reference --centerprotein-accession + (--extension-size | --extension-mask [ ...]) + [--tree ] [--outdir ] [--prefix ] + [--outputlabel-map ] [--nprocs ] [--force] [--version] + +Track Microsynteny of target proteins and its orthologs across genomes. + +required arguments: + --tabular-ortholog-groups + path to mapping file with format + ortholog_group_idgenome_idprotein_seq_id + --feat-tables-dir + path to directory of *.feature_tables.txt or *.gff3 files that shall be + screen + +required arguments (neighborhood): + --reference + path to a ncbi style feature table or gff file that acts as a reference + --centerprotein-accession + unique identifier of the central gene of the window + --extension-size + defines the #features that are co-checked to the left and right of the + centerprotein + --extension-mask [ ...] + defines the position of features that are co-checked to the left and right + relative to the centerprotein (position 0). + +optional arguments (output): + --tree + path to newick tree that includes all taxa to be screened + --outdir path to desired output directory + --prefix if option is set, shows intergenic distances of genes surrounding the + center gene + --outputlabel-map + Attempts to replace genome accessions in the outputs with a replacement + string. Requires a two-column map file formatted like so: 'genome file + accession' 'replacement string'. The replacement will automatically + be cut to a maximum of 30 chars. + +optional arguments (run): + --nprocs Number of CPUs for parallel processing of genomes. Default: Number of + CPUs-1 + --force if option is set, existing ortholog databases in the output dir are + ignored and will be overwritten +``` + +### Input: Required Arguments + +
+ +`--tabular-ortholog-groups ` + +>Vicinator requires a tab-separated three-column mapping of orthologs that is formatted like so: +> +> **group_id**    \tab   **genome_id**    \tab   **protein_id** +> ![example mapping file](https://user-images.githubusercontent.com/8181764/104924281-815c9d00-599d-11eb-9cb5-3e309f188bcd.png) + +
+ +` --feat-tables-dir ` + +>Vicinator expects the path to a directory containing *.gff* format or *_feature_table.txt* +> files of all the genomes you want to trace the microsynteny in. +> +> A recommended source for these files is NCBI RefSeq. In order for the mapping to work, the filenames +> should correspond to the **genome_ids** specified in the mapping file: +> +> E.g. line 7: **OG_2    genomeB    protein_X011** +>
+> triggers a search in a feature file named **genomeB.gff** or **genomeB_genomic.gff** or **genomeB_feature_table.txt** +> in the directory specified with `--feat-tables-dir`. Effectively, it tries to locate the protein_X011 in this feature file. + +
+ +`--reference ` +> the path to a reference genome feature file where the center-protein accession must be found + +
+ +`--centerprotein-accession` & `--extension-size ` + +>Identifies the window of vicinity around a center-protein which is traced based on the findings in the reference +> genome. +> ![Vicinator Window in Reference Genome](https://user-images.githubusercontent.com/8181764/104915463-f83f6900-5990-11eb-9930-552b95109d16.png) + +
+ +## Example Basic Usage + +`vicinator --tabular-ortholog-groups orthogenome_map.tsv --feat-tables-dir ./gff_dir --outdir ./results --reference gff_dir/MUSMU@10090@1.gff --centerprotein XP_006539605.1 --extension-size 3` + +## Example Advanced Usage + +When vicinator receives a phylogenetic tree (with genome_ids as leaf labels) it will trace the microsynteny in order of +increasing phylogentic distance to the reference genome specified. + +`vicinator --tabular-ortholog-groups orthogenome_map.tsv --feat-tables-dir ./gff_dir --outdir ./results --reference gff_dir/MUSMU@10090@1.gff --centerprotein XP_006539605.1 --extension-size 3 --tree phylogeny.nwk` + + +## Example Advanced Usage 2 + +When vicinator is started with the `--extension-mask` parameter it excpects a space-separated list of integers representing +the relative positions of proteins to the center-protein vicinator will trace. You don't have to give +them in order since they will be sorted automatically with 0 representing the center protein (always included). + +`vicinator --tabular-ortholog-groups orthogenome_map.tsv --feat-tables-dir ./gff_dir --outdir ./results --reference gff_dir/MUSMU@10090@1.gff --centerprotein XP_006539605.1 --extension-mask -35 -1 0 7 9` + + + + +%package help +Summary: Development documents and examples for Vicinator +Provides: python3-Vicinator-doc +%description help +[![Build Status](https://www.travis-ci.org/ba1/Vicinator.svg?branch=master)](https://www.travis-ci.org/ba1/Vicinator) +[![codecov](https://codecov.io/gh/ba1/Vicinator/branch/master/graph/badge.svg)](https://codecov.io/gh/ba1/Vicinator) +[![PyPI version](https://badge.fury.io/py/Vicinator.svg)](https://badge.fury.io/py/Vicinator) +[![Requirements Status](https://requires.io/github/ba1/Vicinator/requirements.svg?branch=master)](https://requires.io/github/ba1/Vicinator/requirements/?branch=master) +[![Documentation Status](https://readthedocs.org/projects/vicinator/badge/?version=latest)](https://vicinator.readthedocs.io/en/latest/?badge=latest) +[![Code style:black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) + +# Vicinator + +### What is Vicinator for? + +Vicinator visualizes the microsynteny of grouped proteins (e.g. orthologs) across a large collection of genomes. +As input, it requires a mapping of the genomes' proteins to the respective protein groups and a directory containing +the genomes' feature files, i.e. files of the format *\*.gff* or *\*_feature_table.txt*. + +![image](https://user-images.githubusercontent.com/8181764/104918766-86b5e980-5995-11eb-8a6b-9f2505c74973.png) + + +### What is Vicinator not for? + +As stated above, Vicinator relies on a pre-computed grouping of proteins across genomes. It can not find these +groups of genes for you. + +### Installation + +Vicinator is written for Python 3.6+ + +It is recommended to install Vicinator inside a virtual environment, e.g. with venv: + +`python3 -m venv myenv` + +This activates the new environment called *myenv*. While activated, you can install the latest version via pip. +The following command installs the latest version and all unmet requirements automatically. + +`pip install --upgrades vicinator` + +Requirements: + - ansi2html>=1.5.2 + - colorama>=0.4.4 + - ete3>=3.1.2 + - pandas>=1.1.3 + - importlib-metadata>=3.1.1 + - setuptools-scm>=5.0.1 + +### Options + +``` +python3 vicinator/vicinator.py --help + +usage: vicinator [-h] --tabular-ortholog-groups --feat-tables-dir + --reference --centerprotein-accession + (--extension-size | --extension-mask [ ...]) + [--tree ] [--outdir ] [--prefix ] + [--outputlabel-map ] [--nprocs ] [--force] [--version] + +Track Microsynteny of target proteins and its orthologs across genomes. + +required arguments: + --tabular-ortholog-groups + path to mapping file with format + ortholog_group_idgenome_idprotein_seq_id + --feat-tables-dir + path to directory of *.feature_tables.txt or *.gff3 files that shall be + screen + +required arguments (neighborhood): + --reference + path to a ncbi style feature table or gff file that acts as a reference + --centerprotein-accession + unique identifier of the central gene of the window + --extension-size + defines the #features that are co-checked to the left and right of the + centerprotein + --extension-mask [ ...] + defines the position of features that are co-checked to the left and right + relative to the centerprotein (position 0). + +optional arguments (output): + --tree + path to newick tree that includes all taxa to be screened + --outdir path to desired output directory + --prefix if option is set, shows intergenic distances of genes surrounding the + center gene + --outputlabel-map + Attempts to replace genome accessions in the outputs with a replacement + string. Requires a two-column map file formatted like so: 'genome file + accession' 'replacement string'. The replacement will automatically + be cut to a maximum of 30 chars. + +optional arguments (run): + --nprocs Number of CPUs for parallel processing of genomes. Default: Number of + CPUs-1 + --force if option is set, existing ortholog databases in the output dir are + ignored and will be overwritten +``` + +### Input: Required Arguments + +
+ +`--tabular-ortholog-groups ` + +>Vicinator requires a tab-separated three-column mapping of orthologs that is formatted like so: +> +> **group_id**    \tab   **genome_id**    \tab   **protein_id** +> ![example mapping file](https://user-images.githubusercontent.com/8181764/104924281-815c9d00-599d-11eb-9cb5-3e309f188bcd.png) + +
+ +` --feat-tables-dir ` + +>Vicinator expects the path to a directory containing *.gff* format or *_feature_table.txt* +> files of all the genomes you want to trace the microsynteny in. +> +> A recommended source for these files is NCBI RefSeq. In order for the mapping to work, the filenames +> should correspond to the **genome_ids** specified in the mapping file: +> +> E.g. line 7: **OG_2    genomeB    protein_X011** +>
+> triggers a search in a feature file named **genomeB.gff** or **genomeB_genomic.gff** or **genomeB_feature_table.txt** +> in the directory specified with `--feat-tables-dir`. Effectively, it tries to locate the protein_X011 in this feature file. + +
+ +`--reference ` +> the path to a reference genome feature file where the center-protein accession must be found + +
+ +`--centerprotein-accession` & `--extension-size ` + +>Identifies the window of vicinity around a center-protein which is traced based on the findings in the reference +> genome. +> ![Vicinator Window in Reference Genome](https://user-images.githubusercontent.com/8181764/104915463-f83f6900-5990-11eb-9930-552b95109d16.png) + +
+ +## Example Basic Usage + +`vicinator --tabular-ortholog-groups orthogenome_map.tsv --feat-tables-dir ./gff_dir --outdir ./results --reference gff_dir/MUSMU@10090@1.gff --centerprotein XP_006539605.1 --extension-size 3` + +## Example Advanced Usage + +When vicinator receives a phylogenetic tree (with genome_ids as leaf labels) it will trace the microsynteny in order of +increasing phylogentic distance to the reference genome specified. + +`vicinator --tabular-ortholog-groups orthogenome_map.tsv --feat-tables-dir ./gff_dir --outdir ./results --reference gff_dir/MUSMU@10090@1.gff --centerprotein XP_006539605.1 --extension-size 3 --tree phylogeny.nwk` + + +## Example Advanced Usage 2 + +When vicinator is started with the `--extension-mask` parameter it excpects a space-separated list of integers representing +the relative positions of proteins to the center-protein vicinator will trace. You don't have to give +them in order since they will be sorted automatically with 0 representing the center protein (always included). + +`vicinator --tabular-ortholog-groups orthogenome_map.tsv --feat-tables-dir ./gff_dir --outdir ./results --reference gff_dir/MUSMU@10090@1.gff --centerprotein XP_006539605.1 --extension-mask -35 -1 0 7 9` + + + + +%prep +%autosetup -n Vicinator-0.0.32 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-Vicinator -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Wed May 31 2023 Python_Bot - 0.0.32-1 +- Package Spec generated diff --git a/sources b/sources new file mode 100644 index 0000000..06e9dce --- /dev/null +++ b/sources @@ -0,0 +1 @@ +876be16b84c60e253670607728edd880 Vicinator-0.0.32.tar.gz -- cgit v1.2.3