summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCoprDistGit <infra@openeuler.org>2023-05-29 10:37:25 +0000
committerCoprDistGit <infra@openeuler.org>2023-05-29 10:37:25 +0000
commit64339236bad9c83f40939b614ad2e48c3960554b (patch)
tree5a2c68dc79e429c20e9e229aeee1c103bfc6b887
parentf3e1dc3292ed0f03c284fa16edefb89a9af908ed (diff)
automatic import of python-gcgc
-rw-r--r--.gitignore1
-rw-r--r--python-gcgc.spec353
-rw-r--r--sources1
3 files changed, 355 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index e69de29..0466779 100644
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+/gcgc-1.0.0.tar.gz
diff --git a/python-gcgc.spec b/python-gcgc.spec
new file mode 100644
index 0000000..ef0b99e
--- /dev/null
+++ b/python-gcgc.spec
@@ -0,0 +1,353 @@
+%global _empty_manifest_terminate_build 0
+Name: python-gcgc
+Version: 1.0.0
+Release: 1
+Summary: GCGC is a preprocessing library for biological sequence model development.
+License: MIT
+URL: http://gcgc.trenthauck.com/
+Source0: https://mirrors.nju.edu.cn/pypi/web/packages/07/80/a45a6f4dfdd9dfcb4a2f6c505478dcdf50eb45fdefabbf9ff10b444e5147/gcgc-1.0.0.tar.gz
+BuildArch: noarch
+
+Requires: python3-pydantic
+Requires: python3-importlib-metadata
+Requires: python3-pytest
+Requires: python3-black
+Requires: python3-mypy
+Requires: python3-mypy-extensions
+Requires: python3-pycodestyle
+Requires: python3-pydocstyle
+Requires: python3-pytest-cov
+Requires: python3-mkdocs
+Requires: python3-mkdocs-material
+Requires: python3-phmdoctest
+Requires: python3-mkdocstrings
+Requires: python3-commitizen
+Requires: python3-pygments
+Requires: python3-isort
+Requires: python3-pylint
+Requires: python3-twine
+Requires: python3-biopython
+Requires: python3-tokenizers
+Requires: python3-datasets
+Requires: python3-True
+Requires: python3-setuptools-scm
+
+%description
+# GCGC
+
+> GCGC is a tool for feature processing on Biological Sequences.
+
+[![](https://github.com/tshauck/gcgc/workflows/Run%20Tests%20and%20Lint/badge.svg)](https://github.com/tshauck/gcgc/actions?query=workflow%3A%22Run+Tests+and+Lint%22)
+[![](https://img.shields.io/pypi/v/gcgc.svg)](https://pypi.python.org/pypi/gcgc)
+[![code style black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+
+## Installation
+
+GCGC is primarily intended to be used as part of a larger workflow inside
+Python.
+
+To install via pip:
+
+```sh
+$ pip install gcgc
+```
+
+If you'd like to use code that helps gcgc's tokenizers integrate with common
+third party libraries, either install those packages separately, or use gcgc's
+extras.
+
+```sh
+$ pip install 'gcgc[pytorch,hf]'
+```
+
+## Documentation
+
+The GCGC documentation is at [gcgc.trenthauck.com](http://gcgc.trenthauck.com),
+please see it for examples.
+
+### Quick Start
+
+The easiest way to get started is to import the kmer tokenizer, configure it,
+then start tokenizing.
+
+```python
+from gcgc import KmerTokenizer
+
+kmer_tokenizer = KmerTokenizer(alphabet="unambiguous_dna")
+encoded = kmer_tokenizer.encode("ATCG")
+print(encoded)
+```
+
+sample output:
+
+```
+[1, 6, 7, 8, 5, 2]
+```
+
+This output includes the "bos" token, the "eos" token, and the four nucleotide
+tokens in between.
+
+You can go the other way and convert the integers to strings.
+
+```python
+from gcgc import KmerTokenizer
+
+kmer_tokenizer = KmerTokenizer(alphabet="unambiguous_dna")
+decoded = kmer_tokenizer.decode(kmer_tokenizer.encode("ATCG"))
+print(decoded)
+```
+
+sample output:
+
+```
+['>', 'A', 'T', 'C', 'G', '<']
+```
+
+There's also the vocab for the kmer tokenizer.
+
+```python
+from gcgc import KmerTokenizer
+
+kmer_tokenizer = KmerTokenizer(alphabet="unambiguous_dna")
+print(kmer_tokenizer.vocab.stoi)
+```
+
+sample output:
+
+```
+{'|': 0, '>': 1, '<': 2, '#': 3, '?': 4, 'G': 5, 'A': 6, 'T': 7, 'C': 8}
+```
+
+
+
+
+%package -n python3-gcgc
+Summary: GCGC is a preprocessing library for biological sequence model development.
+Provides: python-gcgc
+BuildRequires: python3-devel
+BuildRequires: python3-setuptools
+BuildRequires: python3-pip
+%description -n python3-gcgc
+# GCGC
+
+> GCGC is a tool for feature processing on Biological Sequences.
+
+[![](https://github.com/tshauck/gcgc/workflows/Run%20Tests%20and%20Lint/badge.svg)](https://github.com/tshauck/gcgc/actions?query=workflow%3A%22Run+Tests+and+Lint%22)
+[![](https://img.shields.io/pypi/v/gcgc.svg)](https://pypi.python.org/pypi/gcgc)
+[![code style black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+
+## Installation
+
+GCGC is primarily intended to be used as part of a larger workflow inside
+Python.
+
+To install via pip:
+
+```sh
+$ pip install gcgc
+```
+
+If you'd like to use code that helps gcgc's tokenizers integrate with common
+third party libraries, either install those packages separately, or use gcgc's
+extras.
+
+```sh
+$ pip install 'gcgc[pytorch,hf]'
+```
+
+## Documentation
+
+The GCGC documentation is at [gcgc.trenthauck.com](http://gcgc.trenthauck.com),
+please see it for examples.
+
+### Quick Start
+
+The easiest way to get started is to import the kmer tokenizer, configure it,
+then start tokenizing.
+
+```python
+from gcgc import KmerTokenizer
+
+kmer_tokenizer = KmerTokenizer(alphabet="unambiguous_dna")
+encoded = kmer_tokenizer.encode("ATCG")
+print(encoded)
+```
+
+sample output:
+
+```
+[1, 6, 7, 8, 5, 2]
+```
+
+This output includes the "bos" token, the "eos" token, and the four nucleotide
+tokens in between.
+
+You can go the other way and convert the integers to strings.
+
+```python
+from gcgc import KmerTokenizer
+
+kmer_tokenizer = KmerTokenizer(alphabet="unambiguous_dna")
+decoded = kmer_tokenizer.decode(kmer_tokenizer.encode("ATCG"))
+print(decoded)
+```
+
+sample output:
+
+```
+['>', 'A', 'T', 'C', 'G', '<']
+```
+
+There's also the vocab for the kmer tokenizer.
+
+```python
+from gcgc import KmerTokenizer
+
+kmer_tokenizer = KmerTokenizer(alphabet="unambiguous_dna")
+print(kmer_tokenizer.vocab.stoi)
+```
+
+sample output:
+
+```
+{'|': 0, '>': 1, '<': 2, '#': 3, '?': 4, 'G': 5, 'A': 6, 'T': 7, 'C': 8}
+```
+
+
+
+
+%package help
+Summary: Development documents and examples for gcgc
+Provides: python3-gcgc-doc
+%description help
+# GCGC
+
+> GCGC is a tool for feature processing on Biological Sequences.
+
+[![](https://github.com/tshauck/gcgc/workflows/Run%20Tests%20and%20Lint/badge.svg)](https://github.com/tshauck/gcgc/actions?query=workflow%3A%22Run+Tests+and+Lint%22)
+[![](https://img.shields.io/pypi/v/gcgc.svg)](https://pypi.python.org/pypi/gcgc)
+[![code style black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+
+## Installation
+
+GCGC is primarily intended to be used as part of a larger workflow inside
+Python.
+
+To install via pip:
+
+```sh
+$ pip install gcgc
+```
+
+If you'd like to use code that helps gcgc's tokenizers integrate with common
+third party libraries, either install those packages separately, or use gcgc's
+extras.
+
+```sh
+$ pip install 'gcgc[pytorch,hf]'
+```
+
+## Documentation
+
+The GCGC documentation is at [gcgc.trenthauck.com](http://gcgc.trenthauck.com),
+please see it for examples.
+
+### Quick Start
+
+The easiest way to get started is to import the kmer tokenizer, configure it,
+then start tokenizing.
+
+```python
+from gcgc import KmerTokenizer
+
+kmer_tokenizer = KmerTokenizer(alphabet="unambiguous_dna")
+encoded = kmer_tokenizer.encode("ATCG")
+print(encoded)
+```
+
+sample output:
+
+```
+[1, 6, 7, 8, 5, 2]
+```
+
+This output includes the "bos" token, the "eos" token, and the four nucleotide
+tokens in between.
+
+You can go the other way and convert the integers to strings.
+
+```python
+from gcgc import KmerTokenizer
+
+kmer_tokenizer = KmerTokenizer(alphabet="unambiguous_dna")
+decoded = kmer_tokenizer.decode(kmer_tokenizer.encode("ATCG"))
+print(decoded)
+```
+
+sample output:
+
+```
+['>', 'A', 'T', 'C', 'G', '<']
+```
+
+There's also the vocab for the kmer tokenizer.
+
+```python
+from gcgc import KmerTokenizer
+
+kmer_tokenizer = KmerTokenizer(alphabet="unambiguous_dna")
+print(kmer_tokenizer.vocab.stoi)
+```
+
+sample output:
+
+```
+{'|': 0, '>': 1, '<': 2, '#': 3, '?': 4, 'G': 5, 'A': 6, 'T': 7, 'C': 8}
+```
+
+
+
+
+%prep
+%autosetup -n gcgc-1.0.0
+
+%build
+%py3_build
+
+%install
+%py3_install
+install -d -m755 %{buildroot}/%{_pkgdocdir}
+if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi
+if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi
+if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi
+if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi
+pushd %{buildroot}
+if [ -d usr/lib ]; then
+ find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/lib64 ]; then
+ find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/bin ]; then
+ find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/sbin ]; then
+ find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+touch doclist.lst
+if [ -d usr/share/man ]; then
+ find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst
+fi
+popd
+mv %{buildroot}/filelist.lst .
+mv %{buildroot}/doclist.lst .
+
+%files -n python3-gcgc -f filelist.lst
+%dir %{python3_sitelib}/*
+
+%files help -f doclist.lst
+%{_docdir}/*
+
+%changelog
+* Mon May 29 2023 Python_Bot <Python_Bot@openeuler.org> - 1.0.0-1
+- Package Spec generated
diff --git a/sources b/sources
new file mode 100644
index 0000000..bd4f941
--- /dev/null
+++ b/sources
@@ -0,0 +1 @@
+77e30c48c0bec219d4f010b995707ce8 gcgc-1.0.0.tar.gz