python-wordsegment.spec


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99

%global _empty_manifest_terminate_build 0
Name:		python-wordsegment
Version:	1.3.1
Release:	1
Summary:	English word segmentation.
License:	Apache 2.0
URL:		http://www.grantjenks.com/docs/wordsegment/
Source0:	https://mirrors.nju.edu.cn/pypi/web/packages/64/68/08112f4c2888f41520e54e2d0b22dcec5adb28cddf4eeca344eb9da04177/wordsegment-1.3.1.tar.gz
BuildArch:	noarch


%description
`WordSegment`_ is an Apache2 licensed module for English word
segmentation, written in pure-Python, and based on a trillion-word corpus.
Based on code from the chapter "`Natural Language Corpus Data`_" by Peter
Norvig from the book "`Beautiful Data`_" (Segaran and Hammerbacher, 2009).
Data files are derived from the `Google Web Trillion Word Corpus`_, as
described by Thorsten Brants and Alex Franz, and `distributed`_ by the
Linguistic Data Consortium. This module contains only a subset of that
data. The unigram data includes only the most common 333,000 words. Similarly,
bigram data includes only the most common 250,000 phrases. Every word and
phrase is lowercased with punctuation removed.

%package -n python3-wordsegment
Summary:	English word segmentation.
Provides:	python-wordsegment
BuildRequires:	python3-devel
BuildRequires:	python3-setuptools
BuildRequires:	python3-pip
%description -n python3-wordsegment
`WordSegment`_ is an Apache2 licensed module for English word
segmentation, written in pure-Python, and based on a trillion-word corpus.
Based on code from the chapter "`Natural Language Corpus Data`_" by Peter
Norvig from the book "`Beautiful Data`_" (Segaran and Hammerbacher, 2009).
Data files are derived from the `Google Web Trillion Word Corpus`_, as
described by Thorsten Brants and Alex Franz, and `distributed`_ by the
Linguistic Data Consortium. This module contains only a subset of that
data. The unigram data includes only the most common 333,000 words. Similarly,
bigram data includes only the most common 250,000 phrases. Every word and
phrase is lowercased with punctuation removed.

%package help
Summary:	Development documents and examples for wordsegment
Provides:	python3-wordsegment-doc
%description help
`WordSegment`_ is an Apache2 licensed module for English word
segmentation, written in pure-Python, and based on a trillion-word corpus.
Based on code from the chapter "`Natural Language Corpus Data`_" by Peter
Norvig from the book "`Beautiful Data`_" (Segaran and Hammerbacher, 2009).
Data files are derived from the `Google Web Trillion Word Corpus`_, as
described by Thorsten Brants and Alex Franz, and `distributed`_ by the
Linguistic Data Consortium. This module contains only a subset of that
data. The unigram data includes only the most common 333,000 words. Similarly,
bigram data includes only the most common 250,000 phrases. Every word and
phrase is lowercased with punctuation removed.

%prep
%autosetup -n wordsegment-1.3.1

%build
%py3_build

%install
%py3_install
install -d -m755 %{buildroot}/%{_pkgdocdir}
if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi
if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi
if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi
if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi
pushd %{buildroot}
if [ -d usr/lib ]; then
	find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst
fi
if [ -d usr/lib64 ]; then
	find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst
fi
if [ -d usr/bin ]; then
	find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst
fi
if [ -d usr/sbin ]; then
	find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst
fi
touch doclist.lst
if [ -d usr/share/man ]; then
	find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst
fi
popd
mv %{buildroot}/filelist.lst .
mv %{buildroot}/doclist.lst .

%files -n python3-wordsegment -f filelist.lst
%dir %{python3_sitelib}/*

%files help -f doclist.lst
%{_docdir}/*

%changelog
* Sun Apr 23 2023 Python_Bot <Python_Bot@openeuler.org> - 1.3.1-1
- Package Spec generated