From 5141a5d9856517a0894be51bf611ff81b99c228f Mon Sep 17 00:00:00 2001 From: CoprDistGit Date: Mon, 29 May 2023 10:42:16 +0000 Subject: automatic import of python-textpipe --- .gitignore | 1 + python-textpipe.spec | 642 +++++++++++++++++++++++++++++++++++++++++++++++++++ sources | 1 + 3 files changed, 644 insertions(+) create mode 100644 python-textpipe.spec create mode 100644 sources diff --git a/.gitignore b/.gitignore index e69de29..56c3b0c 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1 @@ +/textpipe-0.12.2.tar.gz diff --git a/python-textpipe.spec b/python-textpipe.spec new file mode 100644 index 0000000..1150c37 --- /dev/null +++ b/python-textpipe.spec @@ -0,0 +1,642 @@ +%global _empty_manifest_terminate_build 0 +Name: python-textpipe +Version: 0.12.2 +Release: 1 +Summary: textpipe: clean and extract metadata from text +License: MIT License +URL: https://github.com/textpipe/textpipe +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/65/b1/5b5544ce361dd1c440f0538b5ee0821a5a3d1c74983bd742d23e720c22ab/textpipe-0.12.2.tar.gz +BuildArch: noarch + + +%description +# textpipe: clean and extract metadata from text + +[![Build Status](https://travis-ci.com/textpipe/textpipe.svg?branch=master)](https://travis-ci.com/textpipe/textpipe) + +![The textpipe logo](https://avatars3.githubusercontent.com/u/40492530?s=400&u=c65c2c8274cbdcd05b1942d1963d7aa2800e6d7f&v=4) + +`textpipe` is a Python package for converting raw text in to clean, readable text and +extracting metadata from that text. Its functionalities include transforming +raw text into readable text by removing HTML tags and extracting +metadata such as the number of words and named entities from the text. + +## Vision: the zen of textpipe + +- Designed for use in production pipelines without adult supervision. +- Rechargeable batteries included: provide sane defaults and clear examples to adapt. +- A uniform interface with thin wrappers around state-of-the-art NLP packages. +- As language-agnostic as possible. +- Bring your own models. + +## Features + +- Clean raw text by removing `HTML` and other unreadable constructs +- Identify the language of text +- Extract the number of words, number of sentences, named entities from a text +- Calculate the complexity of a text +- Obtain text metadata by specifying a pipeline containing all desired elements +- Obtain sentiment (polarity and a subjectivity score) +- Generates word counts +- Computes minhash for cheap similarity estimation of documents + +## Installation + +It is recommended that you install textpipe using a virtual environment. + +- First, create your virtual environment using [virtualenv](https://virtualenv.pypa.io/en/stable/) or [virtualenvwrapper](https://virtualenvwrapper.readthedocs.io/en/latest/). + +- Using Venv if your default interpreter is python3.6 + +```bash +python3 -m venv .venv +``` + +- Using virtualenv. + +```bash +virtualenv venv -p python3.6 +``` + +- Using virtualenvwrapper + +```bash +mkvirtualenv textpipe -p python3.6 +``` + +- Install textpipe using pip. + +```bash +pip install textpipe +``` + +- Install the required packages using *requirements.txt*. + +```bash +pip install -r requirements.txt +``` + +### A note on spaCy download model requirement + +While the requirements.txt file that comes with the package calls for spaCy's en_core_web_sm model, this can be changed depending on the model and language you require for your intended use. See [spaCy.io's page on their different models](https://spacy.io/usage/models) for more information. + +## Usage example + +```python +>>> from textpipe import doc, pipeline +>>> sample_text = 'Sample text! ' +>>> document = doc.Doc(sample_text) +>>> print(document.clean) +'Sample text!' +>>> print(document.language) +'en' +>>> print(document.nwords) +2 + +>>> pipe = pipeline.Pipeline(['CleanText', 'NWords']) +>>> print(pipe(sample_text)) +{'CleanText': 'Sample text!', 'NWords': 3} +``` + +In order to extend the existing Textpipe operations with your own proprietary operations; + +```python +test_pipe = pipeline.Pipeline(['CleanText', 'NWords']) +def custom_op(doc, context=None, settings=None, **kwargs): + return 1 + +custom_argument = {'argument' :1 } +test_pipe.register_operation('CUSTOM_STEP', custom_op) +test_pipe.steps.append(('CUSTOM_STEP', custom_argument )) + +``` + +## Contributing + +See [CONTRIBUTING](CONTRIBUTING.md) for guidelines for contributors. + +## Changes + +0.12.1 + +- Bumps redis, tqdm, pyling + + +0.12.0 + +- Bumps versions of many dependencies including textacy. Results for keyterm extraction changed. + +0.11.9 + +- Exposes arbitrary SpaCy `ents` properties + +0.11.8 + +- Exposes SpaCy's `cats` attribute + +0.11.7 + +- Bumps spaCy and redis versions + +0.11.6 + +- Fixes bug where gensim model is not cached in pipeline + +0.11.5 + +- Raise TextpipeMissingModelException instead of KeyError + +0.11.4 + +- Bumps spaCy and datasketch dependencies + +0.11.1 + +- Replaces codacy with pylint on CI +- Fixes pylint issues + +0.11.0 + +- Adds wrapper around Gensim keyed vectors to construct document embeddings from Redis cache + +0.9.0 + +- Adds functionality to compute document embeddings using a Gensim word2vec model + +0.8.6 + +- Removes non standard utf chars before detecting language + +0.8.5 + +- Bump spaCy to 2.1.3 + +0.8.4 + +- Fix broken install command + +0.8.3 + +- Fix broken install command + +0.8.2 + +- Fix copy-paste error in word vector aggregation ([#118](https://github.com/textpipe/textpipe/issues/118)) + +0.8.1 + +- Fixes bugs in several operations that didn't accept kwargs + +0.8.0 + +- Bumps Spacy to 2.1 + +0.7.2 + +- Pins Spacy and Pattern versions (with pinned lxml) + +0.7.0 + +- change operation's registry from list to dict +- global pipeline data is available across operations via the `context` kwarg +- load custom operations using `register_operation` in pipeline +- custom steps (operations) with arguments + +%package -n python3-textpipe +Summary: textpipe: clean and extract metadata from text +Provides: python-textpipe +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-textpipe +# textpipe: clean and extract metadata from text + +[![Build Status](https://travis-ci.com/textpipe/textpipe.svg?branch=master)](https://travis-ci.com/textpipe/textpipe) + +![The textpipe logo](https://avatars3.githubusercontent.com/u/40492530?s=400&u=c65c2c8274cbdcd05b1942d1963d7aa2800e6d7f&v=4) + +`textpipe` is a Python package for converting raw text in to clean, readable text and +extracting metadata from that text. Its functionalities include transforming +raw text into readable text by removing HTML tags and extracting +metadata such as the number of words and named entities from the text. + +## Vision: the zen of textpipe + +- Designed for use in production pipelines without adult supervision. +- Rechargeable batteries included: provide sane defaults and clear examples to adapt. +- A uniform interface with thin wrappers around state-of-the-art NLP packages. +- As language-agnostic as possible. +- Bring your own models. + +## Features + +- Clean raw text by removing `HTML` and other unreadable constructs +- Identify the language of text +- Extract the number of words, number of sentences, named entities from a text +- Calculate the complexity of a text +- Obtain text metadata by specifying a pipeline containing all desired elements +- Obtain sentiment (polarity and a subjectivity score) +- Generates word counts +- Computes minhash for cheap similarity estimation of documents + +## Installation + +It is recommended that you install textpipe using a virtual environment. + +- First, create your virtual environment using [virtualenv](https://virtualenv.pypa.io/en/stable/) or [virtualenvwrapper](https://virtualenvwrapper.readthedocs.io/en/latest/). + +- Using Venv if your default interpreter is python3.6 + +```bash +python3 -m venv .venv +``` + +- Using virtualenv. + +```bash +virtualenv venv -p python3.6 +``` + +- Using virtualenvwrapper + +```bash +mkvirtualenv textpipe -p python3.6 +``` + +- Install textpipe using pip. + +```bash +pip install textpipe +``` + +- Install the required packages using *requirements.txt*. + +```bash +pip install -r requirements.txt +``` + +### A note on spaCy download model requirement + +While the requirements.txt file that comes with the package calls for spaCy's en_core_web_sm model, this can be changed depending on the model and language you require for your intended use. See [spaCy.io's page on their different models](https://spacy.io/usage/models) for more information. + +## Usage example + +```python +>>> from textpipe import doc, pipeline +>>> sample_text = 'Sample text! ' +>>> document = doc.Doc(sample_text) +>>> print(document.clean) +'Sample text!' +>>> print(document.language) +'en' +>>> print(document.nwords) +2 + +>>> pipe = pipeline.Pipeline(['CleanText', 'NWords']) +>>> print(pipe(sample_text)) +{'CleanText': 'Sample text!', 'NWords': 3} +``` + +In order to extend the existing Textpipe operations with your own proprietary operations; + +```python +test_pipe = pipeline.Pipeline(['CleanText', 'NWords']) +def custom_op(doc, context=None, settings=None, **kwargs): + return 1 + +custom_argument = {'argument' :1 } +test_pipe.register_operation('CUSTOM_STEP', custom_op) +test_pipe.steps.append(('CUSTOM_STEP', custom_argument )) + +``` + +## Contributing + +See [CONTRIBUTING](CONTRIBUTING.md) for guidelines for contributors. + +## Changes + +0.12.1 + +- Bumps redis, tqdm, pyling + + +0.12.0 + +- Bumps versions of many dependencies including textacy. Results for keyterm extraction changed. + +0.11.9 + +- Exposes arbitrary SpaCy `ents` properties + +0.11.8 + +- Exposes SpaCy's `cats` attribute + +0.11.7 + +- Bumps spaCy and redis versions + +0.11.6 + +- Fixes bug where gensim model is not cached in pipeline + +0.11.5 + +- Raise TextpipeMissingModelException instead of KeyError + +0.11.4 + +- Bumps spaCy and datasketch dependencies + +0.11.1 + +- Replaces codacy with pylint on CI +- Fixes pylint issues + +0.11.0 + +- Adds wrapper around Gensim keyed vectors to construct document embeddings from Redis cache + +0.9.0 + +- Adds functionality to compute document embeddings using a Gensim word2vec model + +0.8.6 + +- Removes non standard utf chars before detecting language + +0.8.5 + +- Bump spaCy to 2.1.3 + +0.8.4 + +- Fix broken install command + +0.8.3 + +- Fix broken install command + +0.8.2 + +- Fix copy-paste error in word vector aggregation ([#118](https://github.com/textpipe/textpipe/issues/118)) + +0.8.1 + +- Fixes bugs in several operations that didn't accept kwargs + +0.8.0 + +- Bumps Spacy to 2.1 + +0.7.2 + +- Pins Spacy and Pattern versions (with pinned lxml) + +0.7.0 + +- change operation's registry from list to dict +- global pipeline data is available across operations via the `context` kwarg +- load custom operations using `register_operation` in pipeline +- custom steps (operations) with arguments + +%package help +Summary: Development documents and examples for textpipe +Provides: python3-textpipe-doc +%description help +# textpipe: clean and extract metadata from text + +[![Build Status](https://travis-ci.com/textpipe/textpipe.svg?branch=master)](https://travis-ci.com/textpipe/textpipe) + +![The textpipe logo](https://avatars3.githubusercontent.com/u/40492530?s=400&u=c65c2c8274cbdcd05b1942d1963d7aa2800e6d7f&v=4) + +`textpipe` is a Python package for converting raw text in to clean, readable text and +extracting metadata from that text. Its functionalities include transforming +raw text into readable text by removing HTML tags and extracting +metadata such as the number of words and named entities from the text. + +## Vision: the zen of textpipe + +- Designed for use in production pipelines without adult supervision. +- Rechargeable batteries included: provide sane defaults and clear examples to adapt. +- A uniform interface with thin wrappers around state-of-the-art NLP packages. +- As language-agnostic as possible. +- Bring your own models. + +## Features + +- Clean raw text by removing `HTML` and other unreadable constructs +- Identify the language of text +- Extract the number of words, number of sentences, named entities from a text +- Calculate the complexity of a text +- Obtain text metadata by specifying a pipeline containing all desired elements +- Obtain sentiment (polarity and a subjectivity score) +- Generates word counts +- Computes minhash for cheap similarity estimation of documents + +## Installation + +It is recommended that you install textpipe using a virtual environment. + +- First, create your virtual environment using [virtualenv](https://virtualenv.pypa.io/en/stable/) or [virtualenvwrapper](https://virtualenvwrapper.readthedocs.io/en/latest/). + +- Using Venv if your default interpreter is python3.6 + +```bash +python3 -m venv .venv +``` + +- Using virtualenv. + +```bash +virtualenv venv -p python3.6 +``` + +- Using virtualenvwrapper + +```bash +mkvirtualenv textpipe -p python3.6 +``` + +- Install textpipe using pip. + +```bash +pip install textpipe +``` + +- Install the required packages using *requirements.txt*. + +```bash +pip install -r requirements.txt +``` + +### A note on spaCy download model requirement + +While the requirements.txt file that comes with the package calls for spaCy's en_core_web_sm model, this can be changed depending on the model and language you require for your intended use. See [spaCy.io's page on their different models](https://spacy.io/usage/models) for more information. + +## Usage example + +```python +>>> from textpipe import doc, pipeline +>>> sample_text = 'Sample text! ' +>>> document = doc.Doc(sample_text) +>>> print(document.clean) +'Sample text!' +>>> print(document.language) +'en' +>>> print(document.nwords) +2 + +>>> pipe = pipeline.Pipeline(['CleanText', 'NWords']) +>>> print(pipe(sample_text)) +{'CleanText': 'Sample text!', 'NWords': 3} +``` + +In order to extend the existing Textpipe operations with your own proprietary operations; + +```python +test_pipe = pipeline.Pipeline(['CleanText', 'NWords']) +def custom_op(doc, context=None, settings=None, **kwargs): + return 1 + +custom_argument = {'argument' :1 } +test_pipe.register_operation('CUSTOM_STEP', custom_op) +test_pipe.steps.append(('CUSTOM_STEP', custom_argument )) + +``` + +## Contributing + +See [CONTRIBUTING](CONTRIBUTING.md) for guidelines for contributors. + +## Changes + +0.12.1 + +- Bumps redis, tqdm, pyling + + +0.12.0 + +- Bumps versions of many dependencies including textacy. Results for keyterm extraction changed. + +0.11.9 + +- Exposes arbitrary SpaCy `ents` properties + +0.11.8 + +- Exposes SpaCy's `cats` attribute + +0.11.7 + +- Bumps spaCy and redis versions + +0.11.6 + +- Fixes bug where gensim model is not cached in pipeline + +0.11.5 + +- Raise TextpipeMissingModelException instead of KeyError + +0.11.4 + +- Bumps spaCy and datasketch dependencies + +0.11.1 + +- Replaces codacy with pylint on CI +- Fixes pylint issues + +0.11.0 + +- Adds wrapper around Gensim keyed vectors to construct document embeddings from Redis cache + +0.9.0 + +- Adds functionality to compute document embeddings using a Gensim word2vec model + +0.8.6 + +- Removes non standard utf chars before detecting language + +0.8.5 + +- Bump spaCy to 2.1.3 + +0.8.4 + +- Fix broken install command + +0.8.3 + +- Fix broken install command + +0.8.2 + +- Fix copy-paste error in word vector aggregation ([#118](https://github.com/textpipe/textpipe/issues/118)) + +0.8.1 + +- Fixes bugs in several operations that didn't accept kwargs + +0.8.0 + +- Bumps Spacy to 2.1 + +0.7.2 + +- Pins Spacy and Pattern versions (with pinned lxml) + +0.7.0 + +- change operation's registry from list to dict +- global pipeline data is available across operations via the `context` kwarg +- load custom operations using `register_operation` in pipeline +- custom steps (operations) with arguments + +%prep +%autosetup -n textpipe-0.12.2 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-textpipe -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Mon May 29 2023 Python_Bot - 0.12.2-1 +- Package Spec generated diff --git a/sources b/sources new file mode 100644 index 0000000..3095b40 --- /dev/null +++ b/sources @@ -0,0 +1 @@ +aa1f96e03ee1dc6660cb854a22a46636 textpipe-0.12.2.tar.gz -- cgit v1.2.3