diff options
Diffstat (limited to 'python-pyhacrf-datamade.spec')
| -rw-r--r-- | python-pyhacrf-datamade.spec | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/python-pyhacrf-datamade.spec b/python-pyhacrf-datamade.spec new file mode 100644 index 0000000..6377245 --- /dev/null +++ b/python-pyhacrf-datamade.spec @@ -0,0 +1,127 @@ +%global _empty_manifest_terminate_build 0 +Name: python-pyhacrf-datamade +Version: 0.2.6 +Release: 1 +Summary: Hidden alignment conditional random field, a discriminative string edit distance +License: BSD License +URL: https://github.com/datamade/pyhacrf +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/2f/47/d2dea0847a98445d0faac8699f5727a747fb3a9cadb68eb8fbbcc5aa48be/pyhacrf-datamade-0.2.6.tar.gz + +Requires: python3-PyLBFGS +Requires: python3-numpy +Requires: python3-numpy +Requires: python3-numpy +Requires: python3-numpy + +%description +Hidden alignment conditional random field for classifying string pairs - +a learnable edit distance. +Part of the Dedupe.io cloud service and open source toolset for de-duplicating and finding fuzzy matches in your data: https://dedupe.io +This package aims to implement the HACRF machine learning model with a +``sklearn``-like interface. It includes ways to fit a model to training +examples and score new example. +The model takes string pairs as input and classify them into any number +of classes. In McCallum's original paper the model was applied to the +database deduplication problem. Each database entry was paired with +every other entry and the model then classified whether the pair was a +'match' or a 'mismatch' based on training examples of matches and +mismatches. +I also tried to use it as learnable string edit distance for normalizing +noisy text. See *A Conditional Random Field for Discriminatively-trained +Finite-state String Edit Distance* by McCallum, Bellare, and Pereira, +and the report *Conditional Random Fields for Noisy text normalisation* +by Dirko Coetsee. + +%package -n python3-pyhacrf-datamade +Summary: Hidden alignment conditional random field, a discriminative string edit distance +Provides: python-pyhacrf-datamade +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +BuildRequires: python3-cffi +BuildRequires: gcc +BuildRequires: gdb +%description -n python3-pyhacrf-datamade +Hidden alignment conditional random field for classifying string pairs - +a learnable edit distance. +Part of the Dedupe.io cloud service and open source toolset for de-duplicating and finding fuzzy matches in your data: https://dedupe.io +This package aims to implement the HACRF machine learning model with a +``sklearn``-like interface. It includes ways to fit a model to training +examples and score new example. +The model takes string pairs as input and classify them into any number +of classes. In McCallum's original paper the model was applied to the +database deduplication problem. Each database entry was paired with +every other entry and the model then classified whether the pair was a +'match' or a 'mismatch' based on training examples of matches and +mismatches. +I also tried to use it as learnable string edit distance for normalizing +noisy text. See *A Conditional Random Field for Discriminatively-trained +Finite-state String Edit Distance* by McCallum, Bellare, and Pereira, +and the report *Conditional Random Fields for Noisy text normalisation* +by Dirko Coetsee. + +%package help +Summary: Development documents and examples for pyhacrf-datamade +Provides: python3-pyhacrf-datamade-doc +%description help +Hidden alignment conditional random field for classifying string pairs - +a learnable edit distance. +Part of the Dedupe.io cloud service and open source toolset for de-duplicating and finding fuzzy matches in your data: https://dedupe.io +This package aims to implement the HACRF machine learning model with a +``sklearn``-like interface. It includes ways to fit a model to training +examples and score new example. +The model takes string pairs as input and classify them into any number +of classes. In McCallum's original paper the model was applied to the +database deduplication problem. Each database entry was paired with +every other entry and the model then classified whether the pair was a +'match' or a 'mismatch' based on training examples of matches and +mismatches. +I also tried to use it as learnable string edit distance for normalizing +noisy text. See *A Conditional Random Field for Discriminatively-trained +Finite-state String Edit Distance* by McCallum, Bellare, and Pereira, +and the report *Conditional Random Fields for Noisy text normalisation* +by Dirko Coetsee. + +%prep +%autosetup -n pyhacrf-datamade-0.2.6 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-pyhacrf-datamade -f filelist.lst +%dir %{python3_sitearch}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Mon Apr 10 2023 Python_Bot <Python_Bot@openeuler.org> - 0.2.6-1 +- Package Spec generated |
