From 0e748cc99d98a03e5280f025f6095a1fa88aa59e Mon Sep 17 00:00:00 2001 From: CoprDistGit Date: Wed, 10 May 2023 04:58:35 +0000 Subject: automatic import of python-laserembeddings --- .gitignore | 1 + python-laserembeddings.spec | 410 ++++++++++++++++++++++++++++++++++++++++++++ sources | 1 + 3 files changed, 412 insertions(+) create mode 100644 python-laserembeddings.spec create mode 100644 sources diff --git a/.gitignore b/.gitignore index e69de29..feb5ff0 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1 @@ +/laserembeddings-1.1.2.tar.gz diff --git a/python-laserembeddings.spec b/python-laserembeddings.spec new file mode 100644 index 0000000..0fd962f --- /dev/null +++ b/python-laserembeddings.spec @@ -0,0 +1,410 @@ +%global _empty_manifest_terminate_build 0 +Name: python-laserembeddings +Version: 1.1.2 +Release: 1 +Summary: Production-ready LASER multilingual embeddings +License: BSD-3-Clause +URL: https://github.com/yannvgn/laserembeddings +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/d1/d4/334569ff2a318e8d587506d4dd1b54260b2391a5759e0614326bc17969bc/laserembeddings-1.1.2.tar.gz +BuildArch: noarch + +Requires: python3-torch +Requires: python3-subword-nmt +Requires: python3-numpy +Requires: python3-sacremoses +Requires: python3-transliterate +Requires: python3-mecab-python3 +Requires: python3-ipadic +Requires: python3-jieba + +%description +# LASER embeddings + +[![GitHub Workflow Status](https://img.shields.io/github/workflow/status/yannvgn/laserembeddings/python-package?style=flat-square)](https://github.com/yannvgn/laserembeddings/actions) +![PyPI - Python Version](https://img.shields.io/pypi/pyversions/laserembeddings?style=flat-square) +[![PyPI](https://img.shields.io/pypi/v/laserembeddings.svg?style=flat-square)](https://pypi.org/project/laserembeddings/) +[![PyPI - License](https://img.shields.io/pypi/l/laserembeddings.svg?style=flat-square)](https://github.com/yannvgn/laserembeddings/blob/master/LICENSE) + +**Out-of-the-box multilingual sentence embeddings.** + +![LASER embeddings maps similar sentences in any language to similar language-agnostic embeddings](https://raw.githubusercontent.com/yannvgn/laserembeddings/master/laserembeddings.gif) + +laserembeddings is a pip-packaged, production-ready port of Facebook Research's [LASER](https://github.com/facebookresearch/LASER) (Language-Agnostic SEntence Representations) to compute multilingual sentence embeddings. + +**Have a look at the project's repo ([master branch](https://github.com/yannvgn/laserembeddings) or [this release](https://github.com/yannvgn/laserembeddings/tree/v1.1.2)) for the full documentation.** + +## Getting started + +### Prerequisites + +You'll need Python 3.6+ and PyTorch. Please refer to [PyTorch installation instructions](https://pytorch.org/get-started/locally/). + +### Installation + +``` +pip install laserembeddings +``` + +#### Chinese language + +Chinese is not supported by default. If you need to embed Chinese sentences, please install laserembeddings with the "zh" extra. This extra includes [jieba](https://github.com/fxsjy/jieba). + +``` +pip install laserembeddings[zh] +``` + +#### Japanese language + +Japanese is not supported by default. If you need to embed Japanese sentences, please install laserembeddings with the "ja" extra. This extra includes [mecab-python3](https://github.com/SamuraiT/mecab-python3) and the [ipadic](https://github.com/polm/ipadic-py) dictionary, which is used in the original LASER project. + +If you have issues running laserembeddings on Japanese sentences, please refer to [mecab-python3 documentation](https://github.com/SamuraiT/mecab-python3) for troubleshooting. + +``` +pip install laserembeddings[ja] +``` + + +### Downloading the pre-trained models + +``` +python -m laserembeddings download-models +``` + +This will download the models to the default `data` directory next to the source code of the package. Use `python -m laserembeddings download-models path/to/model/directory` to download the models to a specific location. + +### Usage + +```python +from laserembeddings import Laser + +laser = Laser() + +# if all sentences are in the same language: + +embeddings = laser.embed_sentences( + ['let your neural network be polyglot', + 'use multilingual embeddings!'], + lang='en') # lang is only used for tokenization + +# embeddings is a N*1024 (N = number of sentences) NumPy array +``` + +If the sentences are not in the same language, you can pass a list of language codes: +```python +embeddings = laser.embed_sentences( + ['I love pasta.', + "J'adore les pâtes.", + 'Ich liebe Pasta.'], + lang=['en', 'fr', 'de']) +``` + +If you downloaded the models into a specific directory: + +```python +from laserembeddings import Laser + +path_to_bpe_codes = ... +path_to_bpe_vocab = ... +path_to_encoder = ... + +laser = Laser(path_to_bpe_codes, path_to_bpe_vocab, path_to_encoder) + +# you can also supply file objects instead of file paths +``` + +If you want to pull the models from S3: + +```python +from io import BytesIO, StringIO +from laserembeddings import Laser +import boto3 + +s3 = boto3.resource('s3') +MODELS_BUCKET = ... + +f_bpe_codes = StringIO(s3.Object(MODELS_BUCKET, 'path_to_bpe_codes.fcodes').get()['Body'].read().decode('utf-8')) +f_bpe_vocab = StringIO(s3.Object(MODELS_BUCKET, 'path_to_bpe_vocabulary.fvocab').get()['Body'].read().decode('utf-8')) +f_encoder = BytesIO(s3.Object(MODELS_BUCKET, 'path_to_encoder.pt').get()['Body'].read()) + +laser = Laser(f_bpe_codes, f_bpe_vocab, f_encoder) +``` + + +%package -n python3-laserembeddings +Summary: Production-ready LASER multilingual embeddings +Provides: python-laserembeddings +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-laserembeddings +# LASER embeddings + +[![GitHub Workflow Status](https://img.shields.io/github/workflow/status/yannvgn/laserembeddings/python-package?style=flat-square)](https://github.com/yannvgn/laserembeddings/actions) +![PyPI - Python Version](https://img.shields.io/pypi/pyversions/laserembeddings?style=flat-square) +[![PyPI](https://img.shields.io/pypi/v/laserembeddings.svg?style=flat-square)](https://pypi.org/project/laserembeddings/) +[![PyPI - License](https://img.shields.io/pypi/l/laserembeddings.svg?style=flat-square)](https://github.com/yannvgn/laserembeddings/blob/master/LICENSE) + +**Out-of-the-box multilingual sentence embeddings.** + +![LASER embeddings maps similar sentences in any language to similar language-agnostic embeddings](https://raw.githubusercontent.com/yannvgn/laserembeddings/master/laserembeddings.gif) + +laserembeddings is a pip-packaged, production-ready port of Facebook Research's [LASER](https://github.com/facebookresearch/LASER) (Language-Agnostic SEntence Representations) to compute multilingual sentence embeddings. + +**Have a look at the project's repo ([master branch](https://github.com/yannvgn/laserembeddings) or [this release](https://github.com/yannvgn/laserembeddings/tree/v1.1.2)) for the full documentation.** + +## Getting started + +### Prerequisites + +You'll need Python 3.6+ and PyTorch. Please refer to [PyTorch installation instructions](https://pytorch.org/get-started/locally/). + +### Installation + +``` +pip install laserembeddings +``` + +#### Chinese language + +Chinese is not supported by default. If you need to embed Chinese sentences, please install laserembeddings with the "zh" extra. This extra includes [jieba](https://github.com/fxsjy/jieba). + +``` +pip install laserembeddings[zh] +``` + +#### Japanese language + +Japanese is not supported by default. If you need to embed Japanese sentences, please install laserembeddings with the "ja" extra. This extra includes [mecab-python3](https://github.com/SamuraiT/mecab-python3) and the [ipadic](https://github.com/polm/ipadic-py) dictionary, which is used in the original LASER project. + +If you have issues running laserembeddings on Japanese sentences, please refer to [mecab-python3 documentation](https://github.com/SamuraiT/mecab-python3) for troubleshooting. + +``` +pip install laserembeddings[ja] +``` + + +### Downloading the pre-trained models + +``` +python -m laserembeddings download-models +``` + +This will download the models to the default `data` directory next to the source code of the package. Use `python -m laserembeddings download-models path/to/model/directory` to download the models to a specific location. + +### Usage + +```python +from laserembeddings import Laser + +laser = Laser() + +# if all sentences are in the same language: + +embeddings = laser.embed_sentences( + ['let your neural network be polyglot', + 'use multilingual embeddings!'], + lang='en') # lang is only used for tokenization + +# embeddings is a N*1024 (N = number of sentences) NumPy array +``` + +If the sentences are not in the same language, you can pass a list of language codes: +```python +embeddings = laser.embed_sentences( + ['I love pasta.', + "J'adore les pâtes.", + 'Ich liebe Pasta.'], + lang=['en', 'fr', 'de']) +``` + +If you downloaded the models into a specific directory: + +```python +from laserembeddings import Laser + +path_to_bpe_codes = ... +path_to_bpe_vocab = ... +path_to_encoder = ... + +laser = Laser(path_to_bpe_codes, path_to_bpe_vocab, path_to_encoder) + +# you can also supply file objects instead of file paths +``` + +If you want to pull the models from S3: + +```python +from io import BytesIO, StringIO +from laserembeddings import Laser +import boto3 + +s3 = boto3.resource('s3') +MODELS_BUCKET = ... + +f_bpe_codes = StringIO(s3.Object(MODELS_BUCKET, 'path_to_bpe_codes.fcodes').get()['Body'].read().decode('utf-8')) +f_bpe_vocab = StringIO(s3.Object(MODELS_BUCKET, 'path_to_bpe_vocabulary.fvocab').get()['Body'].read().decode('utf-8')) +f_encoder = BytesIO(s3.Object(MODELS_BUCKET, 'path_to_encoder.pt').get()['Body'].read()) + +laser = Laser(f_bpe_codes, f_bpe_vocab, f_encoder) +``` + + +%package help +Summary: Development documents and examples for laserembeddings +Provides: python3-laserembeddings-doc +%description help +# LASER embeddings + +[![GitHub Workflow Status](https://img.shields.io/github/workflow/status/yannvgn/laserembeddings/python-package?style=flat-square)](https://github.com/yannvgn/laserembeddings/actions) +![PyPI - Python Version](https://img.shields.io/pypi/pyversions/laserembeddings?style=flat-square) +[![PyPI](https://img.shields.io/pypi/v/laserembeddings.svg?style=flat-square)](https://pypi.org/project/laserembeddings/) +[![PyPI - License](https://img.shields.io/pypi/l/laserembeddings.svg?style=flat-square)](https://github.com/yannvgn/laserembeddings/blob/master/LICENSE) + +**Out-of-the-box multilingual sentence embeddings.** + +![LASER embeddings maps similar sentences in any language to similar language-agnostic embeddings](https://raw.githubusercontent.com/yannvgn/laserembeddings/master/laserembeddings.gif) + +laserembeddings is a pip-packaged, production-ready port of Facebook Research's [LASER](https://github.com/facebookresearch/LASER) (Language-Agnostic SEntence Representations) to compute multilingual sentence embeddings. + +**Have a look at the project's repo ([master branch](https://github.com/yannvgn/laserembeddings) or [this release](https://github.com/yannvgn/laserembeddings/tree/v1.1.2)) for the full documentation.** + +## Getting started + +### Prerequisites + +You'll need Python 3.6+ and PyTorch. Please refer to [PyTorch installation instructions](https://pytorch.org/get-started/locally/). + +### Installation + +``` +pip install laserembeddings +``` + +#### Chinese language + +Chinese is not supported by default. If you need to embed Chinese sentences, please install laserembeddings with the "zh" extra. This extra includes [jieba](https://github.com/fxsjy/jieba). + +``` +pip install laserembeddings[zh] +``` + +#### Japanese language + +Japanese is not supported by default. If you need to embed Japanese sentences, please install laserembeddings with the "ja" extra. This extra includes [mecab-python3](https://github.com/SamuraiT/mecab-python3) and the [ipadic](https://github.com/polm/ipadic-py) dictionary, which is used in the original LASER project. + +If you have issues running laserembeddings on Japanese sentences, please refer to [mecab-python3 documentation](https://github.com/SamuraiT/mecab-python3) for troubleshooting. + +``` +pip install laserembeddings[ja] +``` + + +### Downloading the pre-trained models + +``` +python -m laserembeddings download-models +``` + +This will download the models to the default `data` directory next to the source code of the package. Use `python -m laserembeddings download-models path/to/model/directory` to download the models to a specific location. + +### Usage + +```python +from laserembeddings import Laser + +laser = Laser() + +# if all sentences are in the same language: + +embeddings = laser.embed_sentences( + ['let your neural network be polyglot', + 'use multilingual embeddings!'], + lang='en') # lang is only used for tokenization + +# embeddings is a N*1024 (N = number of sentences) NumPy array +``` + +If the sentences are not in the same language, you can pass a list of language codes: +```python +embeddings = laser.embed_sentences( + ['I love pasta.', + "J'adore les pâtes.", + 'Ich liebe Pasta.'], + lang=['en', 'fr', 'de']) +``` + +If you downloaded the models into a specific directory: + +```python +from laserembeddings import Laser + +path_to_bpe_codes = ... +path_to_bpe_vocab = ... +path_to_encoder = ... + +laser = Laser(path_to_bpe_codes, path_to_bpe_vocab, path_to_encoder) + +# you can also supply file objects instead of file paths +``` + +If you want to pull the models from S3: + +```python +from io import BytesIO, StringIO +from laserembeddings import Laser +import boto3 + +s3 = boto3.resource('s3') +MODELS_BUCKET = ... + +f_bpe_codes = StringIO(s3.Object(MODELS_BUCKET, 'path_to_bpe_codes.fcodes').get()['Body'].read().decode('utf-8')) +f_bpe_vocab = StringIO(s3.Object(MODELS_BUCKET, 'path_to_bpe_vocabulary.fvocab').get()['Body'].read().decode('utf-8')) +f_encoder = BytesIO(s3.Object(MODELS_BUCKET, 'path_to_encoder.pt').get()['Body'].read()) + +laser = Laser(f_bpe_codes, f_bpe_vocab, f_encoder) +``` + + +%prep +%autosetup -n laserembeddings-1.1.2 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-laserembeddings -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Wed May 10 2023 Python_Bot - 1.1.2-1 +- Package Spec generated diff --git a/sources b/sources new file mode 100644 index 0000000..b1794de --- /dev/null +++ b/sources @@ -0,0 +1 @@ +22ad3225289be7ed7822bfe5ee0163f1 laserembeddings-1.1.2.tar.gz -- cgit v1.2.3