From ed88e3e6dec2db6e5606fd31baa4442707b6f90f Mon Sep 17 00:00:00 2001 From: CoprDistGit Date: Mon, 10 Apr 2023 13:03:51 +0000 Subject: automatic import of python-wmd --- python-wmd.spec | 294 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 294 insertions(+) create mode 100644 python-wmd.spec (limited to 'python-wmd.spec') diff --git a/python-wmd.spec b/python-wmd.spec new file mode 100644 index 0000000..f0ffefd --- /dev/null +++ b/python-wmd.spec @@ -0,0 +1,294 @@ +%global _empty_manifest_terminate_build 0 +Name: python-wmd +Version: 1.3.2 +Release: 1 +Summary: Accelerated functions to calculate Word Mover's Distance +License: Apache Software License +URL: https://github.com/src-d/wmd-relax +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/e5/14/e1d122e56607ae49999041f372fa14166eb1e3b838122118d706f9bf1620/wmd-1.3.2.tar.gz +BuildArch: noarch + + +%description +Calculates Word Mover's Distance as described in +[From Word Embeddings To Document Distances](http://www.cs.cornell.edu/~kilian/papers/wmd_metric.pdf) +by Matt Kusner, Yu Sun, Nicholas Kolkin and Kilian Weinberger. +Word Mover's Distance +The high level logic is written in Python, the low level functions related to +linear programming are offloaded to the bundled native extension. The native +extension can be built as a generic shared library not related to Python at all. +**Python 2.7 and older are not supported.** The heavy-lifting is done by +[google/or-tools](https://github.com/google/or-tools). +### Installation +``` +pip3 install wmd +``` +Tested on Linux and macOS. +### Usage +You should have the embeddings numpy array and the nbow model - that is, +every sample is a weighted set of items, and every item is embedded. +```python +import numpy +from wmd import WMD +embeddings = numpy.array([[0.1, 1], [1, 0.1]], dtype=numpy.float32) +nbow = {"first": ("#1", [0, 1], numpy.array([1.5, 0.5], dtype=numpy.float32)), + "second": ("#2", [0, 1], numpy.array([0.75, 0.15], dtype=numpy.float32))} +calc = WMD(embeddings, nbow, vocabulary_min=2) +print(calc.nearest_neighbors("first")) +``` +``` +[('second', 0.10606599599123001)] +``` +`embeddings` must support `__getitem__` which returns an item by it's +identifier; particularly, `numpy.ndarray` matches that interface. +`nbow` must be iterable - returns sample identifiers - and support +`__getitem__` by those identifiers which returns tuples of length 3. +The first element is the human-readable name of the sample, the +second is an iterable with item identifiers and the third is `numpy.ndarray` +with the corresponding weights. All numpy arrays must be float32. The return +format is the list of tuples with sample identifiers and relevancy +indices (lower the better). +It is possible to use this package with [spaCy](https://github.com/explosion/spaCy): +```python +import spacy +import wmd +nlp = spacy.load('en_core_web_md') +nlp.add_pipe(wmd.WMD.SpacySimilarityHook(nlp), last=True) +doc1 = nlp("Politician speaks to the media in Illinois.") +doc2 = nlp("The president greets the press in Chicago.") +print(doc1.similarity(doc2)) +``` +Besides, see another [example](spacy_example.py) which finds similar Wikipedia +pages. +### Building from source +Either build it as a Python package: +``` +pip3 install git+https://github.com/src-d/wmd-relax +``` +or use CMake: +``` +git clone --recursive https://github.com/src-d/wmd-relax +cmake -D CMAKE_BUILD_TYPE=Release . +make -j +``` +Please note the `--recursive` flag for `git clone`. This project uses source{d}'s +fork of [google/or-tools](https://github.com/google/or-tools) as the git submodule. +### Tests +Tests are in `test.py` and use the stock `unittest` package. +### Documentation +``` +cd doc +make html +``` +The files are in `doc/doxyhtml` and `doc/html` directories. +### Contributions +### License +[Apache 2.0](LICENSE.md) +#### README {#ignore_this_doxygen_anchor} + +%package -n python3-wmd +Summary: Accelerated functions to calculate Word Mover's Distance +Provides: python-wmd +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-wmd +Calculates Word Mover's Distance as described in +[From Word Embeddings To Document Distances](http://www.cs.cornell.edu/~kilian/papers/wmd_metric.pdf) +by Matt Kusner, Yu Sun, Nicholas Kolkin and Kilian Weinberger. +Word Mover's Distance +The high level logic is written in Python, the low level functions related to +linear programming are offloaded to the bundled native extension. The native +extension can be built as a generic shared library not related to Python at all. +**Python 2.7 and older are not supported.** The heavy-lifting is done by +[google/or-tools](https://github.com/google/or-tools). +### Installation +``` +pip3 install wmd +``` +Tested on Linux and macOS. +### Usage +You should have the embeddings numpy array and the nbow model - that is, +every sample is a weighted set of items, and every item is embedded. +```python +import numpy +from wmd import WMD +embeddings = numpy.array([[0.1, 1], [1, 0.1]], dtype=numpy.float32) +nbow = {"first": ("#1", [0, 1], numpy.array([1.5, 0.5], dtype=numpy.float32)), + "second": ("#2", [0, 1], numpy.array([0.75, 0.15], dtype=numpy.float32))} +calc = WMD(embeddings, nbow, vocabulary_min=2) +print(calc.nearest_neighbors("first")) +``` +``` +[('second', 0.10606599599123001)] +``` +`embeddings` must support `__getitem__` which returns an item by it's +identifier; particularly, `numpy.ndarray` matches that interface. +`nbow` must be iterable - returns sample identifiers - and support +`__getitem__` by those identifiers which returns tuples of length 3. +The first element is the human-readable name of the sample, the +second is an iterable with item identifiers and the third is `numpy.ndarray` +with the corresponding weights. All numpy arrays must be float32. The return +format is the list of tuples with sample identifiers and relevancy +indices (lower the better). +It is possible to use this package with [spaCy](https://github.com/explosion/spaCy): +```python +import spacy +import wmd +nlp = spacy.load('en_core_web_md') +nlp.add_pipe(wmd.WMD.SpacySimilarityHook(nlp), last=True) +doc1 = nlp("Politician speaks to the media in Illinois.") +doc2 = nlp("The president greets the press in Chicago.") +print(doc1.similarity(doc2)) +``` +Besides, see another [example](spacy_example.py) which finds similar Wikipedia +pages. +### Building from source +Either build it as a Python package: +``` +pip3 install git+https://github.com/src-d/wmd-relax +``` +or use CMake: +``` +git clone --recursive https://github.com/src-d/wmd-relax +cmake -D CMAKE_BUILD_TYPE=Release . +make -j +``` +Please note the `--recursive` flag for `git clone`. This project uses source{d}'s +fork of [google/or-tools](https://github.com/google/or-tools) as the git submodule. +### Tests +Tests are in `test.py` and use the stock `unittest` package. +### Documentation +``` +cd doc +make html +``` +The files are in `doc/doxyhtml` and `doc/html` directories. +### Contributions +### License +[Apache 2.0](LICENSE.md) +#### README {#ignore_this_doxygen_anchor} + +%package help +Summary: Development documents and examples for wmd +Provides: python3-wmd-doc +%description help +Calculates Word Mover's Distance as described in +[From Word Embeddings To Document Distances](http://www.cs.cornell.edu/~kilian/papers/wmd_metric.pdf) +by Matt Kusner, Yu Sun, Nicholas Kolkin and Kilian Weinberger. +Word Mover's Distance +The high level logic is written in Python, the low level functions related to +linear programming are offloaded to the bundled native extension. The native +extension can be built as a generic shared library not related to Python at all. +**Python 2.7 and older are not supported.** The heavy-lifting is done by +[google/or-tools](https://github.com/google/or-tools). +### Installation +``` +pip3 install wmd +``` +Tested on Linux and macOS. +### Usage +You should have the embeddings numpy array and the nbow model - that is, +every sample is a weighted set of items, and every item is embedded. +```python +import numpy +from wmd import WMD +embeddings = numpy.array([[0.1, 1], [1, 0.1]], dtype=numpy.float32) +nbow = {"first": ("#1", [0, 1], numpy.array([1.5, 0.5], dtype=numpy.float32)), + "second": ("#2", [0, 1], numpy.array([0.75, 0.15], dtype=numpy.float32))} +calc = WMD(embeddings, nbow, vocabulary_min=2) +print(calc.nearest_neighbors("first")) +``` +``` +[('second', 0.10606599599123001)] +``` +`embeddings` must support `__getitem__` which returns an item by it's +identifier; particularly, `numpy.ndarray` matches that interface. +`nbow` must be iterable - returns sample identifiers - and support +`__getitem__` by those identifiers which returns tuples of length 3. +The first element is the human-readable name of the sample, the +second is an iterable with item identifiers and the third is `numpy.ndarray` +with the corresponding weights. All numpy arrays must be float32. The return +format is the list of tuples with sample identifiers and relevancy +indices (lower the better). +It is possible to use this package with [spaCy](https://github.com/explosion/spaCy): +```python +import spacy +import wmd +nlp = spacy.load('en_core_web_md') +nlp.add_pipe(wmd.WMD.SpacySimilarityHook(nlp), last=True) +doc1 = nlp("Politician speaks to the media in Illinois.") +doc2 = nlp("The president greets the press in Chicago.") +print(doc1.similarity(doc2)) +``` +Besides, see another [example](spacy_example.py) which finds similar Wikipedia +pages. +### Building from source +Either build it as a Python package: +``` +pip3 install git+https://github.com/src-d/wmd-relax +``` +or use CMake: +``` +git clone --recursive https://github.com/src-d/wmd-relax +cmake -D CMAKE_BUILD_TYPE=Release . +make -j +``` +Please note the `--recursive` flag for `git clone`. This project uses source{d}'s +fork of [google/or-tools](https://github.com/google/or-tools) as the git submodule. +### Tests +Tests are in `test.py` and use the stock `unittest` package. +### Documentation +``` +cd doc +make html +``` +The files are in `doc/doxyhtml` and `doc/html` directories. +### Contributions +### License +[Apache 2.0](LICENSE.md) +#### README {#ignore_this_doxygen_anchor} + +%prep +%autosetup -n wmd-1.3.2 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-wmd -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Mon Apr 10 2023 Python_Bot - 1.3.2-1 +- Package Spec generated -- cgit v1.2.3