diff options
author | CoprDistGit <infra@openeuler.org> | 2024-04-16 07:44:35 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2024-04-16 07:44:35 +0000 |
commit | b3d4bdc278a6dcf3239924b8bfbb48683bd91cd7 (patch) | |
tree | fa2552f811c1ac708f29df55f94d36770a9fad16 | |
parent | 56d85fd91030f4cb29ac0948ff3d527dc08d4006 (diff) |
automatic import of sentencepiece
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | sentencepiece.spec | 184 | ||||
-rw-r--r-- | sources | 2 |
3 files changed, 66 insertions, 121 deletions
@@ -1,2 +1,3 @@ /sentencepiece-0.1.99.tar.gz /v0.2.0.tar.gz +/v0.1.98.tar.gz diff --git a/sentencepiece.spec b/sentencepiece.spec index 894787b..3aaa001 100644 --- a/sentencepiece.spec +++ b/sentencepiece.spec @@ -1,38 +1,22 @@ -%define __cmake_in_source_build 1 -%define _epel %{?epel:%{epel}}%{!?epel:0} - Name: sentencepiece -Version: 0.2.0 -Release: 5%{?dist} -Summary: An unsupervised text tokenizer for Neural Network-based text generation - -License: ASL 2.0 +Version: 0.1.98 +Release: 1 +Summary: An unsupervised text tokenizer and detokenizer +License: Apache-2.0 URL: https://github.com/google/sentencepiece -Source0: https://github.com/google/sentencepiece/archive/refs/tags/v0.2.0.tar.gz +Source0: https://github.com/google/sentencepiece/archive/v%{version}.tar.gz +BuildRequires: gcc-c++ gcc autoconf pkgconfig protobuf-compiler protobuf +BuildRequires: cmake >= 3.14.0 +Requires: protobuf protobuf-compiler -BuildRequires: make -%if 0%{_epel} >= 7 -BuildRequires: cmake3 -%else -BuildRequires: cmake -%endif +%description +SentencePiece is an unsupervised text tokenizer and detokenizer mainly for Neural Network-based text generation +systems where the vocabulary size is predetermined prior to the neural model training. -BuildRequires: gcc-c++ -BuildRequires: gperftools-devel -BuildRequires: pkgconfig -BuildRequires: python3-devel -BuildRequires: python3-setuptools +%global debug_package %{nil} -%description -The SentencePiece is an unsupervised text tokenizer for Neural Network-based -text generation. -It is an unsupervised text tokenizer and detokenizer mainly for -Neural Network-based text generation systems where the vocabulary size is -predetermined prior to the neural model training. -SentencePiece implements subword units and unigram language model with the -extension of direct training from raw sentences. -SentencePiece allows us to make a purely end-to-end system that does not -depend on language-specific pre/post-processing. +%prep +%autosetup -p1 -n %{name}-%{version} %package libs Summary: Runtime libraries for SentencePiece @@ -54,109 +38,69 @@ Requires: %{name}-libs%{?_isa} = %{version}-%{release} %description devel This package contains header files to develop a software using SentencePiece. -%package -n python3-%{name} -Summary: Python module for SentencePiece -Requires: %{name}-libs%{?_isa} = %{version}-%{release} -%{?python_provide:%python_provide python3-%{name}} - -%description -n python3-%{name} -This package contains Python3 module file for SentencePiece. - -%prep -%autosetup -n %{name}-%{version}-Source - %build -%if %{_epel} >= 7 -cmake3 . -DCMAKE_INSTALL_PREFIX=%{_prefix} -DCMAKE_INSTALL_LIBDIR=%{_libdir} -%else -%cmake . -DCMAKE_INSTALL_LIBDIR=%{_libdir} -%endif -%make_build -pushd python -CFLAGS="-I../src" LDFLAGS="-L../src -lsentencepiece" PKG_CONFIG_PATH=".." %{__python3} setup.py build -popd +mkdir -p cmake/build +cd cmake/build +cmake ../../ -DCMAKE_CXX_FLAGS="-D_FORTIFY_SOURCE=2 -O2 -Wno-unused-result -Wno-sign-compare"\ + -DCMAKE_C_FLAGS="-D_FORTIFY_SOURCE=2"\ + -DCMAKE_BUILD_TYPE=Release\ + -DSPM_USE_BUILTIN_PROTOBUF=ON\ + -DSPM_ENABLE_SHARED=OFF\ + -DSPM_BUILD_TEST=ON\ + -DCMAKE_INSTALL_LIBDIR=%{buildroot}%{_libdir}\ + -DCMAKE_INSTALL_BINDIR=%{buildroot}%{_bindir}\ + -DCMAKE_INSTALL_INCDIR=%{buildroot}%{_includedir} + +make -j24 V=1 +make CTEST_OUTPUT_ON_FAILURE=1 test +cd ../../ %install -%make_install -pushd python -PKG_CONFIG_PATH=".." %py3_install -popd +cd cmake/build +make install sed -i'' -e "s,%{buildroot},," %{buildroot}%{_libdir}/pkgconfig/sentencepiece.pc sed -i'' -e "s,${prefix}/lib,%{_libdir}," %{buildroot}%{_libdir}/pkgconfig/sentencepiece.pc -find %{buildroot} -name '*.a' -delete + %files libs %doc README.md %license LICENSE %{_libdir}/libsentencepiece*.so.0* -%files devel -%{_includedir}/sentencepiece*.h -%{_libdir}/*.so -%{_libdir}/pkgconfig/sentencepiece*.pc - %files tools %{_bindir}/spm* -%files -n python3-%{name} -%{python3_sitearch}/%{name}.py -%{python3_sitearch}/_%{name}*.so -%{python3_sitearch}/__pycache__/* -%{python3_sitearch}/%{name}-*.egg-info/ - +%files +%defattr(-,root,root) +%{_bindir}/spm_* +%{_libdir}/*.a +%{_libdir}/pkgconfig/* +%{_includedir}/sentencepiece*.h %changelog -* Sat Jul 22 2023 Fedora Release Engineering <releng@fedoraproject.org> - 0.1.92-11 -- Rebuilt for https://fedoraproject.org/wiki/Fedora_39_Mass_Rebuild - -* Tue Jun 13 2023 Python Maint <python-maint@redhat.com> - 0.1.92-10 -- Rebuilt for Python 3.12 - -* Sat Jan 21 2023 Fedora Release Engineering <releng@fedoraproject.org> - 0.1.92-9 -- Rebuilt for https://fedoraproject.org/wiki/Fedora_38_Mass_Rebuild - -* Sat Jul 23 2022 Fedora Release Engineering <releng@fedoraproject.org> - 0.1.92-8 -- Rebuilt for https://fedoraproject.org/wiki/Fedora_37_Mass_Rebuild - -* Mon Jun 13 2022 Python Maint <python-maint@redhat.com> - 0.1.92-7 -- Rebuilt for Python 3.11 - -* Sat Jan 22 2022 Fedora Release Engineering <releng@fedoraproject.org> - 0.1.92-6 -- Rebuilt for https://fedoraproject.org/wiki/Fedora_36_Mass_Rebuild - -* Fri Jul 23 2021 Fedora Release Engineering <releng@fedoraproject.org> - 0.1.92-5 -- Rebuilt for https://fedoraproject.org/wiki/Fedora_35_Mass_Rebuild - -* Fri Jun 04 2021 Python Maint <python-maint@redhat.com> - 0.1.92-4 -- Rebuilt for Python 3.10 - -* Wed Jan 27 2021 Fedora Release Engineering <releng@fedoraproject.org> - 0.1.92-3 -- Rebuilt for https://fedoraproject.org/wiki/Fedora_34_Mass_Rebuild - -* Wed Oct 07 2020 Kentaro Hayashi <kenhys@gmail.com> - 0.1.92-2 -- Add missing BuildRequires: python3-setuptools - -* Thu Oct 01 2020 Kentaro Hayashi <kenhys@gmail.com> - 0.1.92-1 -- New upstream release - -* Tue Sep 22 2020 Jeff Law <law@redhat.com> - 0.1.84-6 -- Use cmake_in_source_build to fix FTBFS due to recent cmake macro changes - -* Sat Aug 01 2020 Fedora Release Engineering <releng@fedoraproject.org> - 0.1.84-5 -- Second attempt - Rebuilt for - https://fedoraproject.org/wiki/Fedora_33_Mass_Rebuild - -* Wed Jul 29 2020 Fedora Release Engineering <releng@fedoraproject.org> - 0.1.84-4 -- Rebuilt for https://fedoraproject.org/wiki/Fedora_33_Mass_Rebuild - -* Tue May 26 2020 Miro HronĨok <mhroncok@redhat.com> - 0.1.84-3 -- Rebuilt for Python 3.9 - -* Thu Jan 30 2020 Fedora Release Engineering <releng@fedoraproject.org> - 0.1.84-2 -- Rebuilt for https://fedoraproject.org/wiki/Fedora_32_Mass_Rebuild - -* Thu Nov 14 2019 Kentaro Hayashi <hayashi@clear-code.com> - 0.1.84-1 -- New upstream release - -* Mon Oct 07 2019 Kentaro Hayashi <hayashi@clear-code.com> - 0.1.83-1 -- initial packaging +* Fri May 12 2023 liuyongqi <liuyongqi5@huawei.com> - 0.1.98-1 +- Sentencepiece version updata: Upgraded from 0.1.92 to 0.1.98 + +* Fri Nov 27 2021 xiefangqi <xiefangqi2@huawei.com> - 0.1.92.6 +- Fix split_digits support to SentencepieceTrainer spec parser +- Add sentencepiece.pc install +- Add spm_train --help option +- Fix FTBFS problem on armel/mips/powerpc/m68k/sh4 +- Fix endian problem on android plarform +- Fix pb protobuf header file can't find problem +- Restore the sentence boundary + +* Tue Nov 16 2021 xiefangqi <xiefangqi2@huawei.com> - 0.1.92.5 +- add README.md/README.en.md + +* Tue Nov 2 2021 xiefangqi <xiefangqi2@huawei.com> - 0.1.92-4 +- fix of an unattainable condition + +* Tue Nov 2 2021 xiefangqi <xiefangqi2@huawei.com> - 0.1.92-3 +- Prevent Zero Width Joiner replaced with whitespace + +* Wed Sep 29 2021 xiefangqi <xiefangqi2@huawei.com> - 0.1.92-2 +- add test cases + +* Wed Sep 23 2021 xiefangqi <xiefangqi2@huawei.com> - 0.1.92-1 +- package init @@ -1 +1 @@ -6ee244227f5ee8123de05980920e2936 v0.2.0.tar.gz +2bba6f65825ef0b17a8bed1630f8e709 v0.1.98.tar.gz |