diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | python-fastadist.spec | 212 | ||||
-rw-r--r-- | sources | 1 |
3 files changed, 214 insertions, 0 deletions
@@ -0,0 +1 @@ +/FastaDist-1.0.1.tar.gz diff --git a/python-fastadist.spec b/python-fastadist.spec new file mode 100644 index 0000000..361a5c2 --- /dev/null +++ b/python-fastadist.spec @@ -0,0 +1,212 @@ +%global _empty_manifest_terminate_build 0 +Name: python-FastaDist +Version: 1.0.1 +Release: 1 +Summary: Package to calculate a distance matrix from a multiple sequence file +License: MIT +URL: https://pypi.org/project/FastaDist/ +Source0: https://mirrors.aliyun.com/pypi/web/packages/1c/b2/8f441111d5608155558652b13c0636a2d9009f7a0d4eb0e540b12e868ab3/FastaDist-1.0.1.tar.gz +BuildArch: noarch + +Requires: python3-biopython +Requires: python3-bitarray +Requires: python3-parmap +Requires: python3-tqdm +Requires: python3-dendropy + +%description +## FastaDist + + +[Github repository](https://gitlab.com/antunderwood/fastadist) + +This small utility package will calculate number of differences between all samples in a fasta alignment file. +It will count any position where there is a G,A,T or C (case insensitive) in both sequences that differ as 1 SNV. + +Output formats are a square distance matrix in tsv, csv or phylip formats +It is fast since it first converts sequences to bit arrays and then uses fast bit operations to calculate the differences. + +On a mid-range laptop a distance matrix was produced in 11 minutes from a 764 sequence alignment of length 1,082,859 using -p 1 and 4.5 minutes with -p 4 + +#### Installation +FastaDist is available as [PyPi](https://pypi.org/project/FastaDist/) package for Python3 + +``` +pip3 install fastadist +``` + +#### Usage +``` +usage: fastadist [-h] -i ALIGNMENT_FILEPATH [-t TREE_FILEPATH] -o + OUTPUT_FILEPATH [-f FORMAT] [-p PARALLEL_PROCESSES] [-v] + + A script to calculate distances by converting sequences to bit arrays. + Specify number of processes as -p N to speed up the calculation + + +optional arguments: + -h, --help show this help message and exit + -i ALIGNMENT_FILEPATH, --alignment_filepath ALIGNMENT_FILEPATH + path to multiple sequence alignment input file + -t TREE_FILEPATH, --tree_filepath TREE_FILEPATH + path to newick tree for distance matrix ordering + -o OUTPUT_FILEPATH, --output_filepath OUTPUT_FILEPATH + path to distance matrix output file + -f FORMAT, --format FORMAT + output format for distance matrix (one of tsv + [default], csv and phylip + -p PARALLEL_PROCESSES, --parallel_processes PARALLEL_PROCESSES + number of parallel processes to run (default 1) + -v, --version print out software version +``` + + + +%package -n python3-FastaDist +Summary: Package to calculate a distance matrix from a multiple sequence file +Provides: python-FastaDist +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-FastaDist +## FastaDist + + +[Github repository](https://gitlab.com/antunderwood/fastadist) + +This small utility package will calculate number of differences between all samples in a fasta alignment file. +It will count any position where there is a G,A,T or C (case insensitive) in both sequences that differ as 1 SNV. + +Output formats are a square distance matrix in tsv, csv or phylip formats +It is fast since it first converts sequences to bit arrays and then uses fast bit operations to calculate the differences. + +On a mid-range laptop a distance matrix was produced in 11 minutes from a 764 sequence alignment of length 1,082,859 using -p 1 and 4.5 minutes with -p 4 + +#### Installation +FastaDist is available as [PyPi](https://pypi.org/project/FastaDist/) package for Python3 + +``` +pip3 install fastadist +``` + +#### Usage +``` +usage: fastadist [-h] -i ALIGNMENT_FILEPATH [-t TREE_FILEPATH] -o + OUTPUT_FILEPATH [-f FORMAT] [-p PARALLEL_PROCESSES] [-v] + + A script to calculate distances by converting sequences to bit arrays. + Specify number of processes as -p N to speed up the calculation + + +optional arguments: + -h, --help show this help message and exit + -i ALIGNMENT_FILEPATH, --alignment_filepath ALIGNMENT_FILEPATH + path to multiple sequence alignment input file + -t TREE_FILEPATH, --tree_filepath TREE_FILEPATH + path to newick tree for distance matrix ordering + -o OUTPUT_FILEPATH, --output_filepath OUTPUT_FILEPATH + path to distance matrix output file + -f FORMAT, --format FORMAT + output format for distance matrix (one of tsv + [default], csv and phylip + -p PARALLEL_PROCESSES, --parallel_processes PARALLEL_PROCESSES + number of parallel processes to run (default 1) + -v, --version print out software version +``` + + + +%package help +Summary: Development documents and examples for FastaDist +Provides: python3-FastaDist-doc +%description help +## FastaDist + + +[Github repository](https://gitlab.com/antunderwood/fastadist) + +This small utility package will calculate number of differences between all samples in a fasta alignment file. +It will count any position where there is a G,A,T or C (case insensitive) in both sequences that differ as 1 SNV. + +Output formats are a square distance matrix in tsv, csv or phylip formats +It is fast since it first converts sequences to bit arrays and then uses fast bit operations to calculate the differences. + +On a mid-range laptop a distance matrix was produced in 11 minutes from a 764 sequence alignment of length 1,082,859 using -p 1 and 4.5 minutes with -p 4 + +#### Installation +FastaDist is available as [PyPi](https://pypi.org/project/FastaDist/) package for Python3 + +``` +pip3 install fastadist +``` + +#### Usage +``` +usage: fastadist [-h] -i ALIGNMENT_FILEPATH [-t TREE_FILEPATH] -o + OUTPUT_FILEPATH [-f FORMAT] [-p PARALLEL_PROCESSES] [-v] + + A script to calculate distances by converting sequences to bit arrays. + Specify number of processes as -p N to speed up the calculation + + +optional arguments: + -h, --help show this help message and exit + -i ALIGNMENT_FILEPATH, --alignment_filepath ALIGNMENT_FILEPATH + path to multiple sequence alignment input file + -t TREE_FILEPATH, --tree_filepath TREE_FILEPATH + path to newick tree for distance matrix ordering + -o OUTPUT_FILEPATH, --output_filepath OUTPUT_FILEPATH + path to distance matrix output file + -f FORMAT, --format FORMAT + output format for distance matrix (one of tsv + [default], csv and phylip + -p PARALLEL_PROCESSES, --parallel_processes PARALLEL_PROCESSES + number of parallel processes to run (default 1) + -v, --version print out software version +``` + + + +%prep +%autosetup -n FastaDist-1.0.1 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "\"/%h/%f\"\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "\"/%h/%f\"\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "\"/%h/%f\"\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "\"/%h/%f\"\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "\"/%h/%f.gz\"\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-FastaDist -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Tue Jun 20 2023 Python_Bot <Python_Bot@openeuler.org> - 1.0.1-1 +- Package Spec generated @@ -0,0 +1 @@ +2409b71df4ace9922028aefa7191a7c1 FastaDist-1.0.1.tar.gz |