From 64339236bad9c83f40939b614ad2e48c3960554b Mon Sep 17 00:00:00 2001 From: CoprDistGit Date: Mon, 29 May 2023 10:37:25 +0000 Subject: automatic import of python-gcgc --- .gitignore | 1 + python-gcgc.spec | 353 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ sources | 1 + 3 files changed, 355 insertions(+) create mode 100644 python-gcgc.spec create mode 100644 sources diff --git a/.gitignore b/.gitignore index e69de29..0466779 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1 @@ +/gcgc-1.0.0.tar.gz diff --git a/python-gcgc.spec b/python-gcgc.spec new file mode 100644 index 0000000..ef0b99e --- /dev/null +++ b/python-gcgc.spec @@ -0,0 +1,353 @@ +%global _empty_manifest_terminate_build 0 +Name: python-gcgc +Version: 1.0.0 +Release: 1 +Summary: GCGC is a preprocessing library for biological sequence model development. +License: MIT +URL: http://gcgc.trenthauck.com/ +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/07/80/a45a6f4dfdd9dfcb4a2f6c505478dcdf50eb45fdefabbf9ff10b444e5147/gcgc-1.0.0.tar.gz +BuildArch: noarch + +Requires: python3-pydantic +Requires: python3-importlib-metadata +Requires: python3-pytest +Requires: python3-black +Requires: python3-mypy +Requires: python3-mypy-extensions +Requires: python3-pycodestyle +Requires: python3-pydocstyle +Requires: python3-pytest-cov +Requires: python3-mkdocs +Requires: python3-mkdocs-material +Requires: python3-phmdoctest +Requires: python3-mkdocstrings +Requires: python3-commitizen +Requires: python3-pygments +Requires: python3-isort +Requires: python3-pylint +Requires: python3-twine +Requires: python3-biopython +Requires: python3-tokenizers +Requires: python3-datasets +Requires: python3-True +Requires: python3-setuptools-scm + +%description +# GCGC + +> GCGC is a tool for feature processing on Biological Sequences. + +[![](https://github.com/tshauck/gcgc/workflows/Run%20Tests%20and%20Lint/badge.svg)](https://github.com/tshauck/gcgc/actions?query=workflow%3A%22Run+Tests+and+Lint%22) +[![](https://img.shields.io/pypi/v/gcgc.svg)](https://pypi.python.org/pypi/gcgc) +[![code style black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) + +## Installation + +GCGC is primarily intended to be used as part of a larger workflow inside +Python. + +To install via pip: + +```sh +$ pip install gcgc +``` + +If you'd like to use code that helps gcgc's tokenizers integrate with common +third party libraries, either install those packages separately, or use gcgc's +extras. + +```sh +$ pip install 'gcgc[pytorch,hf]' +``` + +## Documentation + +The GCGC documentation is at [gcgc.trenthauck.com](http://gcgc.trenthauck.com), +please see it for examples. + +### Quick Start + +The easiest way to get started is to import the kmer tokenizer, configure it, +then start tokenizing. + +```python +from gcgc import KmerTokenizer + +kmer_tokenizer = KmerTokenizer(alphabet="unambiguous_dna") +encoded = kmer_tokenizer.encode("ATCG") +print(encoded) +``` + +sample output: + +``` +[1, 6, 7, 8, 5, 2] +``` + +This output includes the "bos" token, the "eos" token, and the four nucleotide +tokens in between. + +You can go the other way and convert the integers to strings. + +```python +from gcgc import KmerTokenizer + +kmer_tokenizer = KmerTokenizer(alphabet="unambiguous_dna") +decoded = kmer_tokenizer.decode(kmer_tokenizer.encode("ATCG")) +print(decoded) +``` + +sample output: + +``` +['>', 'A', 'T', 'C', 'G', '<'] +``` + +There's also the vocab for the kmer tokenizer. + +```python +from gcgc import KmerTokenizer + +kmer_tokenizer = KmerTokenizer(alphabet="unambiguous_dna") +print(kmer_tokenizer.vocab.stoi) +``` + +sample output: + +``` +{'|': 0, '>': 1, '<': 2, '#': 3, '?': 4, 'G': 5, 'A': 6, 'T': 7, 'C': 8} +``` + + + + +%package -n python3-gcgc +Summary: GCGC is a preprocessing library for biological sequence model development. +Provides: python-gcgc +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-gcgc +# GCGC + +> GCGC is a tool for feature processing on Biological Sequences. + +[![](https://github.com/tshauck/gcgc/workflows/Run%20Tests%20and%20Lint/badge.svg)](https://github.com/tshauck/gcgc/actions?query=workflow%3A%22Run+Tests+and+Lint%22) +[![](https://img.shields.io/pypi/v/gcgc.svg)](https://pypi.python.org/pypi/gcgc) +[![code style black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) + +## Installation + +GCGC is primarily intended to be used as part of a larger workflow inside +Python. + +To install via pip: + +```sh +$ pip install gcgc +``` + +If you'd like to use code that helps gcgc's tokenizers integrate with common +third party libraries, either install those packages separately, or use gcgc's +extras. + +```sh +$ pip install 'gcgc[pytorch,hf]' +``` + +## Documentation + +The GCGC documentation is at [gcgc.trenthauck.com](http://gcgc.trenthauck.com), +please see it for examples. + +### Quick Start + +The easiest way to get started is to import the kmer tokenizer, configure it, +then start tokenizing. + +```python +from gcgc import KmerTokenizer + +kmer_tokenizer = KmerTokenizer(alphabet="unambiguous_dna") +encoded = kmer_tokenizer.encode("ATCG") +print(encoded) +``` + +sample output: + +``` +[1, 6, 7, 8, 5, 2] +``` + +This output includes the "bos" token, the "eos" token, and the four nucleotide +tokens in between. + +You can go the other way and convert the integers to strings. + +```python +from gcgc import KmerTokenizer + +kmer_tokenizer = KmerTokenizer(alphabet="unambiguous_dna") +decoded = kmer_tokenizer.decode(kmer_tokenizer.encode("ATCG")) +print(decoded) +``` + +sample output: + +``` +['>', 'A', 'T', 'C', 'G', '<'] +``` + +There's also the vocab for the kmer tokenizer. + +```python +from gcgc import KmerTokenizer + +kmer_tokenizer = KmerTokenizer(alphabet="unambiguous_dna") +print(kmer_tokenizer.vocab.stoi) +``` + +sample output: + +``` +{'|': 0, '>': 1, '<': 2, '#': 3, '?': 4, 'G': 5, 'A': 6, 'T': 7, 'C': 8} +``` + + + + +%package help +Summary: Development documents and examples for gcgc +Provides: python3-gcgc-doc +%description help +# GCGC + +> GCGC is a tool for feature processing on Biological Sequences. + +[![](https://github.com/tshauck/gcgc/workflows/Run%20Tests%20and%20Lint/badge.svg)](https://github.com/tshauck/gcgc/actions?query=workflow%3A%22Run+Tests+and+Lint%22) +[![](https://img.shields.io/pypi/v/gcgc.svg)](https://pypi.python.org/pypi/gcgc) +[![code style black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) + +## Installation + +GCGC is primarily intended to be used as part of a larger workflow inside +Python. + +To install via pip: + +```sh +$ pip install gcgc +``` + +If you'd like to use code that helps gcgc's tokenizers integrate with common +third party libraries, either install those packages separately, or use gcgc's +extras. + +```sh +$ pip install 'gcgc[pytorch,hf]' +``` + +## Documentation + +The GCGC documentation is at [gcgc.trenthauck.com](http://gcgc.trenthauck.com), +please see it for examples. + +### Quick Start + +The easiest way to get started is to import the kmer tokenizer, configure it, +then start tokenizing. + +```python +from gcgc import KmerTokenizer + +kmer_tokenizer = KmerTokenizer(alphabet="unambiguous_dna") +encoded = kmer_tokenizer.encode("ATCG") +print(encoded) +``` + +sample output: + +``` +[1, 6, 7, 8, 5, 2] +``` + +This output includes the "bos" token, the "eos" token, and the four nucleotide +tokens in between. + +You can go the other way and convert the integers to strings. + +```python +from gcgc import KmerTokenizer + +kmer_tokenizer = KmerTokenizer(alphabet="unambiguous_dna") +decoded = kmer_tokenizer.decode(kmer_tokenizer.encode("ATCG")) +print(decoded) +``` + +sample output: + +``` +['>', 'A', 'T', 'C', 'G', '<'] +``` + +There's also the vocab for the kmer tokenizer. + +```python +from gcgc import KmerTokenizer + +kmer_tokenizer = KmerTokenizer(alphabet="unambiguous_dna") +print(kmer_tokenizer.vocab.stoi) +``` + +sample output: + +``` +{'|': 0, '>': 1, '<': 2, '#': 3, '?': 4, 'G': 5, 'A': 6, 'T': 7, 'C': 8} +``` + + + + +%prep +%autosetup -n gcgc-1.0.0 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-gcgc -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Mon May 29 2023 Python_Bot - 1.0.0-1 +- Package Spec generated diff --git a/sources b/sources new file mode 100644 index 0000000..bd4f941 --- /dev/null +++ b/sources @@ -0,0 +1 @@ +77e30c48c0bec219d4f010b995707ce8 gcgc-1.0.0.tar.gz -- cgit v1.2.3