diff options
Diffstat (limited to 'python-wordsegment.spec')
| -rw-r--r-- | python-wordsegment.spec | 99 |
1 files changed, 99 insertions, 0 deletions
diff --git a/python-wordsegment.spec b/python-wordsegment.spec new file mode 100644 index 0000000..d94e9d3 --- /dev/null +++ b/python-wordsegment.spec @@ -0,0 +1,99 @@ +%global _empty_manifest_terminate_build 0 +Name: python-wordsegment +Version: 1.3.1 +Release: 1 +Summary: English word segmentation. +License: Apache 2.0 +URL: http://www.grantjenks.com/docs/wordsegment/ +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/64/68/08112f4c2888f41520e54e2d0b22dcec5adb28cddf4eeca344eb9da04177/wordsegment-1.3.1.tar.gz +BuildArch: noarch + + +%description +`WordSegment`_ is an Apache2 licensed module for English word +segmentation, written in pure-Python, and based on a trillion-word corpus. +Based on code from the chapter "`Natural Language Corpus Data`_" by Peter +Norvig from the book "`Beautiful Data`_" (Segaran and Hammerbacher, 2009). +Data files are derived from the `Google Web Trillion Word Corpus`_, as +described by Thorsten Brants and Alex Franz, and `distributed`_ by the +Linguistic Data Consortium. This module contains only a subset of that +data. The unigram data includes only the most common 333,000 words. Similarly, +bigram data includes only the most common 250,000 phrases. Every word and +phrase is lowercased with punctuation removed. + +%package -n python3-wordsegment +Summary: English word segmentation. +Provides: python-wordsegment +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-wordsegment +`WordSegment`_ is an Apache2 licensed module for English word +segmentation, written in pure-Python, and based on a trillion-word corpus. +Based on code from the chapter "`Natural Language Corpus Data`_" by Peter +Norvig from the book "`Beautiful Data`_" (Segaran and Hammerbacher, 2009). +Data files are derived from the `Google Web Trillion Word Corpus`_, as +described by Thorsten Brants and Alex Franz, and `distributed`_ by the +Linguistic Data Consortium. This module contains only a subset of that +data. The unigram data includes only the most common 333,000 words. Similarly, +bigram data includes only the most common 250,000 phrases. Every word and +phrase is lowercased with punctuation removed. + +%package help +Summary: Development documents and examples for wordsegment +Provides: python3-wordsegment-doc +%description help +`WordSegment`_ is an Apache2 licensed module for English word +segmentation, written in pure-Python, and based on a trillion-word corpus. +Based on code from the chapter "`Natural Language Corpus Data`_" by Peter +Norvig from the book "`Beautiful Data`_" (Segaran and Hammerbacher, 2009). +Data files are derived from the `Google Web Trillion Word Corpus`_, as +described by Thorsten Brants and Alex Franz, and `distributed`_ by the +Linguistic Data Consortium. This module contains only a subset of that +data. The unigram data includes only the most common 333,000 words. Similarly, +bigram data includes only the most common 250,000 phrases. Every word and +phrase is lowercased with punctuation removed. + +%prep +%autosetup -n wordsegment-1.3.1 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-wordsegment -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Tue Apr 11 2023 Python_Bot <Python_Bot@openeuler.org> - 1.3.1-1 +- Package Spec generated |
