summaryrefslogtreecommitdiff
path: root/python-pyhacrf-datamade.spec
diff options
context:
space:
mode:
Diffstat (limited to 'python-pyhacrf-datamade.spec')
-rw-r--r--python-pyhacrf-datamade.spec127
1 files changed, 127 insertions, 0 deletions
diff --git a/python-pyhacrf-datamade.spec b/python-pyhacrf-datamade.spec
new file mode 100644
index 0000000..6377245
--- /dev/null
+++ b/python-pyhacrf-datamade.spec
@@ -0,0 +1,127 @@
+%global _empty_manifest_terminate_build 0
+Name: python-pyhacrf-datamade
+Version: 0.2.6
+Release: 1
+Summary: Hidden alignment conditional random field, a discriminative string edit distance
+License: BSD License
+URL: https://github.com/datamade/pyhacrf
+Source0: https://mirrors.nju.edu.cn/pypi/web/packages/2f/47/d2dea0847a98445d0faac8699f5727a747fb3a9cadb68eb8fbbcc5aa48be/pyhacrf-datamade-0.2.6.tar.gz
+
+Requires: python3-PyLBFGS
+Requires: python3-numpy
+Requires: python3-numpy
+Requires: python3-numpy
+Requires: python3-numpy
+
+%description
+Hidden alignment conditional random field for classifying string pairs -
+a learnable edit distance.
+Part of the Dedupe.io cloud service and open source toolset for de-duplicating and finding fuzzy matches in your data: https://dedupe.io
+This package aims to implement the HACRF machine learning model with a
+``sklearn``-like interface. It includes ways to fit a model to training
+examples and score new example.
+The model takes string pairs as input and classify them into any number
+of classes. In McCallum's original paper the model was applied to the
+database deduplication problem. Each database entry was paired with
+every other entry and the model then classified whether the pair was a
+'match' or a 'mismatch' based on training examples of matches and
+mismatches.
+I also tried to use it as learnable string edit distance for normalizing
+noisy text. See *A Conditional Random Field for Discriminatively-trained
+Finite-state String Edit Distance* by McCallum, Bellare, and Pereira,
+and the report *Conditional Random Fields for Noisy text normalisation*
+by Dirko Coetsee.
+
+%package -n python3-pyhacrf-datamade
+Summary: Hidden alignment conditional random field, a discriminative string edit distance
+Provides: python-pyhacrf-datamade
+BuildRequires: python3-devel
+BuildRequires: python3-setuptools
+BuildRequires: python3-pip
+BuildRequires: python3-cffi
+BuildRequires: gcc
+BuildRequires: gdb
+%description -n python3-pyhacrf-datamade
+Hidden alignment conditional random field for classifying string pairs -
+a learnable edit distance.
+Part of the Dedupe.io cloud service and open source toolset for de-duplicating and finding fuzzy matches in your data: https://dedupe.io
+This package aims to implement the HACRF machine learning model with a
+``sklearn``-like interface. It includes ways to fit a model to training
+examples and score new example.
+The model takes string pairs as input and classify them into any number
+of classes. In McCallum's original paper the model was applied to the
+database deduplication problem. Each database entry was paired with
+every other entry and the model then classified whether the pair was a
+'match' or a 'mismatch' based on training examples of matches and
+mismatches.
+I also tried to use it as learnable string edit distance for normalizing
+noisy text. See *A Conditional Random Field for Discriminatively-trained
+Finite-state String Edit Distance* by McCallum, Bellare, and Pereira,
+and the report *Conditional Random Fields for Noisy text normalisation*
+by Dirko Coetsee.
+
+%package help
+Summary: Development documents and examples for pyhacrf-datamade
+Provides: python3-pyhacrf-datamade-doc
+%description help
+Hidden alignment conditional random field for classifying string pairs -
+a learnable edit distance.
+Part of the Dedupe.io cloud service and open source toolset for de-duplicating and finding fuzzy matches in your data: https://dedupe.io
+This package aims to implement the HACRF machine learning model with a
+``sklearn``-like interface. It includes ways to fit a model to training
+examples and score new example.
+The model takes string pairs as input and classify them into any number
+of classes. In McCallum's original paper the model was applied to the
+database deduplication problem. Each database entry was paired with
+every other entry and the model then classified whether the pair was a
+'match' or a 'mismatch' based on training examples of matches and
+mismatches.
+I also tried to use it as learnable string edit distance for normalizing
+noisy text. See *A Conditional Random Field for Discriminatively-trained
+Finite-state String Edit Distance* by McCallum, Bellare, and Pereira,
+and the report *Conditional Random Fields for Noisy text normalisation*
+by Dirko Coetsee.
+
+%prep
+%autosetup -n pyhacrf-datamade-0.2.6
+
+%build
+%py3_build
+
+%install
+%py3_install
+install -d -m755 %{buildroot}/%{_pkgdocdir}
+if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi
+if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi
+if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi
+if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi
+pushd %{buildroot}
+if [ -d usr/lib ]; then
+ find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/lib64 ]; then
+ find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/bin ]; then
+ find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/sbin ]; then
+ find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+touch doclist.lst
+if [ -d usr/share/man ]; then
+ find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst
+fi
+popd
+mv %{buildroot}/filelist.lst .
+mv %{buildroot}/doclist.lst .
+
+%files -n python3-pyhacrf-datamade -f filelist.lst
+%dir %{python3_sitearch}/*
+
+%files help -f doclist.lst
+%{_docdir}/*
+
+%changelog
+* Mon Apr 10 2023 Python_Bot <Python_Bot@openeuler.org> - 0.2.6-1
+- Package Spec generated