diff options
Diffstat (limited to 'python-kuro2sudachi.spec')
-rw-r--r-- | python-kuro2sudachi.spec | 419 |
1 files changed, 419 insertions, 0 deletions
diff --git a/python-kuro2sudachi.spec b/python-kuro2sudachi.spec new file mode 100644 index 0000000..108216e --- /dev/null +++ b/python-kuro2sudachi.spec @@ -0,0 +1,419 @@ +%global _empty_manifest_terminate_build 0 +Name: python-kuro2sudachi +Version: 0.3.6 +Release: 1 +Summary: please add a summary manually as the author left a blank one +License: Apache-2.0 +URL: http://github.com/po3rin/kuro2sudachi +Source0: https://mirrors.aliyun.com/pypi/web/packages/d8/7c/c2bd0bc055f4b22dff32040519c4f7f1727b2c053db0c23f160dad2df0e5/kuro2sudachi-0.3.6.tar.gz +BuildArch: noarch + +Requires: python3-jaconv +Requires: python3-importlib-metadata +Requires: python3-sudachipy +Requires: python3-sudachidict_full +Requires: python3-SudachiDict-core + +%description +# kuro2sudachi + +[](https://pypi.python.org/pypi/kuro2sudachi/) + +[](https://www.python.org/downloads/release/python-390/) + +kuro2sudachi lets you to convert kuromoji user dict to sudachi user dict. + +## Usage + +```sh +$ pip install kuro2sudachi +$ kuro2sudachi kuromoji_dict.txt -o sudachi_user_dict.txt +``` + +## Custom pos convert dict + +you can overwrite convert config with setting json file. + +```json +{ + "固有名詞": { + "sudachi_pos": "名詞,固有名詞,一般,*,*,*", + "left_id": 4786, + "right_id": 4786, + "cost": 5000 + }, + "名詞": { + "sudachi_pos": "名詞,普通名詞,一般,*,*,*", + "left_id": 5146, + "right_id": 5146, + "cost": 5000 + } +} + +``` + +```$ +$ kuro2sudachi kuromoji_dict.txt -o sudachi_user_dict.txt -c convert_config.json +``` + +if you want to ignore unsupported pos error & invalid format, use `--ignore` flag. + +## Dictionary type + +You can specify the dictionary with the tokenize option -s (default: core). + +```sh +$ pip install sudachidict_full +$ kuro2sudachi kuromoji_dict.txt -o sudachi_user_dict.txt -s full +``` + +## Auto Splitting + +kuro2sudachi supports suto splitting. + +```json +{ + "名詞": { + "sudachi_pos": "名詞,普通名詞,一般,*,*,*", + "left_id": 5146, + "right_id": 5146, + "cost": 5000, + "split_mode": "C", + "unit_div_mode": [ + "A", "B" + ] + } +} +``` + +output includes unit devision info. + +```sh +$ cat kuromoji_dict.txt +融合たんぱく質,融合たんぱく質,融合たんぱく質,名詞 +発作性心房細動,発作性心房細動,発作性心房細動,名詞 + +$ kuro2sudachi kuromoji_dict.txt -o sudachi_user_dict.txt -c convert_config.json --ignore + +$ cat sudachi_user_dict.txt +融合たんぱく質,4786,4786,5000,融合たんぱく質,名詞,普通名詞,一般,*,*,*,,融合たんぱく質,*,C,"融合,名詞,普通名詞,サ変可能,*,*,*,ユウゴウ/たんぱく,名詞,普通名詞,一般,*,*,*,タンパク/質,接尾辞,名詞的,一般,*,*,*,シツ","融合,名詞,普通名詞,サ変可能,*,*,*,ユウゴウ/たんぱく質,名詞,普通名詞,一般,*,*,*,タンパクシツ",* +発作性心房細動,4786,4786,5000,発作性心房細動,名詞,普通名詞,一般,*,*,*,,発作性心房細動,*,C,"発作,名詞,普通名詞,一般,*,*,*,ホッサ/性,接尾辞,名詞的,一般,*,*,*,セイ/心房,名詞,普通名詞,一般,*,*,*,シンボウ/細動,名詞,普通名詞,一般,*,*,*,サイドウ","発作,名詞,普通名詞,一般,*,*,*,ホッサ/性,接尾辞,名詞的,一般,*,*,*,セイ/心房,名詞,普通名詞,一般,*,*,*,シンボウ/細動,名詞,普通名詞,一般,*,*,*,サイドウ",* +``` + +## Splitting Words defined by kuromoji + +Currently, the CLI does not support word splitting defined by kuromoji. Therefore, the split representation of kuromoji is ignored. + +``` +中咽頭ガン,中咽頭 ガン,チュウイントウ ガン,カスタム名詞 +↓ +中咽頭ガン,4786,4786,7000,中咽頭ガン,名詞,固有名詞,一般,*,*,*,チュウイントウガン,中咽頭ガン,*,*,*,*,* +``` + +# For Developer + +test kuro2sudachi + +```sh +$ poetry install +$ poetry run pytest +``` + +exec kuro2sudachi command + +```sh +$ poetry run kuro2sudachi tests/kuromoji_dict_test.txt -o sudachi_user_dict.txt +``` + +## TODO + +- [ ] split mode +- [ ] default rewrite + + +%package -n python3-kuro2sudachi +Summary: please add a summary manually as the author left a blank one +Provides: python-kuro2sudachi +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-kuro2sudachi +# kuro2sudachi + +[](https://pypi.python.org/pypi/kuro2sudachi/) + +[](https://www.python.org/downloads/release/python-390/) + +kuro2sudachi lets you to convert kuromoji user dict to sudachi user dict. + +## Usage + +```sh +$ pip install kuro2sudachi +$ kuro2sudachi kuromoji_dict.txt -o sudachi_user_dict.txt +``` + +## Custom pos convert dict + +you can overwrite convert config with setting json file. + +```json +{ + "固有名詞": { + "sudachi_pos": "名詞,固有名詞,一般,*,*,*", + "left_id": 4786, + "right_id": 4786, + "cost": 5000 + }, + "名詞": { + "sudachi_pos": "名詞,普通名詞,一般,*,*,*", + "left_id": 5146, + "right_id": 5146, + "cost": 5000 + } +} + +``` + +```$ +$ kuro2sudachi kuromoji_dict.txt -o sudachi_user_dict.txt -c convert_config.json +``` + +if you want to ignore unsupported pos error & invalid format, use `--ignore` flag. + +## Dictionary type + +You can specify the dictionary with the tokenize option -s (default: core). + +```sh +$ pip install sudachidict_full +$ kuro2sudachi kuromoji_dict.txt -o sudachi_user_dict.txt -s full +``` + +## Auto Splitting + +kuro2sudachi supports suto splitting. + +```json +{ + "名詞": { + "sudachi_pos": "名詞,普通名詞,一般,*,*,*", + "left_id": 5146, + "right_id": 5146, + "cost": 5000, + "split_mode": "C", + "unit_div_mode": [ + "A", "B" + ] + } +} +``` + +output includes unit devision info. + +```sh +$ cat kuromoji_dict.txt +融合たんぱく質,融合たんぱく質,融合たんぱく質,名詞 +発作性心房細動,発作性心房細動,発作性心房細動,名詞 + +$ kuro2sudachi kuromoji_dict.txt -o sudachi_user_dict.txt -c convert_config.json --ignore + +$ cat sudachi_user_dict.txt +融合たんぱく質,4786,4786,5000,融合たんぱく質,名詞,普通名詞,一般,*,*,*,,融合たんぱく質,*,C,"融合,名詞,普通名詞,サ変可能,*,*,*,ユウゴウ/たんぱく,名詞,普通名詞,一般,*,*,*,タンパク/質,接尾辞,名詞的,一般,*,*,*,シツ","融合,名詞,普通名詞,サ変可能,*,*,*,ユウゴウ/たんぱく質,名詞,普通名詞,一般,*,*,*,タンパクシツ",* +発作性心房細動,4786,4786,5000,発作性心房細動,名詞,普通名詞,一般,*,*,*,,発作性心房細動,*,C,"発作,名詞,普通名詞,一般,*,*,*,ホッサ/性,接尾辞,名詞的,一般,*,*,*,セイ/心房,名詞,普通名詞,一般,*,*,*,シンボウ/細動,名詞,普通名詞,一般,*,*,*,サイドウ","発作,名詞,普通名詞,一般,*,*,*,ホッサ/性,接尾辞,名詞的,一般,*,*,*,セイ/心房,名詞,普通名詞,一般,*,*,*,シンボウ/細動,名詞,普通名詞,一般,*,*,*,サイドウ",* +``` + +## Splitting Words defined by kuromoji + +Currently, the CLI does not support word splitting defined by kuromoji. Therefore, the split representation of kuromoji is ignored. + +``` +中咽頭ガン,中咽頭 ガン,チュウイントウ ガン,カスタム名詞 +↓ +中咽頭ガン,4786,4786,7000,中咽頭ガン,名詞,固有名詞,一般,*,*,*,チュウイントウガン,中咽頭ガン,*,*,*,*,* +``` + +# For Developer + +test kuro2sudachi + +```sh +$ poetry install +$ poetry run pytest +``` + +exec kuro2sudachi command + +```sh +$ poetry run kuro2sudachi tests/kuromoji_dict_test.txt -o sudachi_user_dict.txt +``` + +## TODO + +- [ ] split mode +- [ ] default rewrite + + +%package help +Summary: Development documents and examples for kuro2sudachi +Provides: python3-kuro2sudachi-doc +%description help +# kuro2sudachi + +[](https://pypi.python.org/pypi/kuro2sudachi/) + +[](https://www.python.org/downloads/release/python-390/) + +kuro2sudachi lets you to convert kuromoji user dict to sudachi user dict. + +## Usage + +```sh +$ pip install kuro2sudachi +$ kuro2sudachi kuromoji_dict.txt -o sudachi_user_dict.txt +``` + +## Custom pos convert dict + +you can overwrite convert config with setting json file. + +```json +{ + "固有名詞": { + "sudachi_pos": "名詞,固有名詞,一般,*,*,*", + "left_id": 4786, + "right_id": 4786, + "cost": 5000 + }, + "名詞": { + "sudachi_pos": "名詞,普通名詞,一般,*,*,*", + "left_id": 5146, + "right_id": 5146, + "cost": 5000 + } +} + +``` + +```$ +$ kuro2sudachi kuromoji_dict.txt -o sudachi_user_dict.txt -c convert_config.json +``` + +if you want to ignore unsupported pos error & invalid format, use `--ignore` flag. + +## Dictionary type + +You can specify the dictionary with the tokenize option -s (default: core). + +```sh +$ pip install sudachidict_full +$ kuro2sudachi kuromoji_dict.txt -o sudachi_user_dict.txt -s full +``` + +## Auto Splitting + +kuro2sudachi supports suto splitting. + +```json +{ + "名詞": { + "sudachi_pos": "名詞,普通名詞,一般,*,*,*", + "left_id": 5146, + "right_id": 5146, + "cost": 5000, + "split_mode": "C", + "unit_div_mode": [ + "A", "B" + ] + } +} +``` + +output includes unit devision info. + +```sh +$ cat kuromoji_dict.txt +融合たんぱく質,融合たんぱく質,融合たんぱく質,名詞 +発作性心房細動,発作性心房細動,発作性心房細動,名詞 + +$ kuro2sudachi kuromoji_dict.txt -o sudachi_user_dict.txt -c convert_config.json --ignore + +$ cat sudachi_user_dict.txt +融合たんぱく質,4786,4786,5000,融合たんぱく質,名詞,普通名詞,一般,*,*,*,,融合たんぱく質,*,C,"融合,名詞,普通名詞,サ変可能,*,*,*,ユウゴウ/たんぱく,名詞,普通名詞,一般,*,*,*,タンパク/質,接尾辞,名詞的,一般,*,*,*,シツ","融合,名詞,普通名詞,サ変可能,*,*,*,ユウゴウ/たんぱく質,名詞,普通名詞,一般,*,*,*,タンパクシツ",* +発作性心房細動,4786,4786,5000,発作性心房細動,名詞,普通名詞,一般,*,*,*,,発作性心房細動,*,C,"発作,名詞,普通名詞,一般,*,*,*,ホッサ/性,接尾辞,名詞的,一般,*,*,*,セイ/心房,名詞,普通名詞,一般,*,*,*,シンボウ/細動,名詞,普通名詞,一般,*,*,*,サイドウ","発作,名詞,普通名詞,一般,*,*,*,ホッサ/性,接尾辞,名詞的,一般,*,*,*,セイ/心房,名詞,普通名詞,一般,*,*,*,シンボウ/細動,名詞,普通名詞,一般,*,*,*,サイドウ",* +``` + +## Splitting Words defined by kuromoji + +Currently, the CLI does not support word splitting defined by kuromoji. Therefore, the split representation of kuromoji is ignored. + +``` +中咽頭ガン,中咽頭 ガン,チュウイントウ ガン,カスタム名詞 +↓ +中咽頭ガン,4786,4786,7000,中咽頭ガン,名詞,固有名詞,一般,*,*,*,チュウイントウガン,中咽頭ガン,*,*,*,*,* +``` + +# For Developer + +test kuro2sudachi + +```sh +$ poetry install +$ poetry run pytest +``` + +exec kuro2sudachi command + +```sh +$ poetry run kuro2sudachi tests/kuromoji_dict_test.txt -o sudachi_user_dict.txt +``` + +## TODO + +- [ ] split mode +- [ ] default rewrite + + +%prep +%autosetup -n kuro2sudachi-0.3.6 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "\"/%h/%f\"\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "\"/%h/%f\"\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "\"/%h/%f\"\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "\"/%h/%f\"\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "\"/%h/%f.gz\"\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-kuro2sudachi -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Tue Jun 20 2023 Python_Bot <Python_Bot@openeuler.org> - 0.3.6-1 +- Package Spec generated |