summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCoprDistGit <infra@openeuler.org>2023-05-17 04:53:26 +0000
committerCoprDistGit <infra@openeuler.org>2023-05-17 04:53:26 +0000
commit81926489872c6e12458e8ccd403c22526c220665 (patch)
tree8968e87633dcf4096d451a0d673c4d40de63c350
parent492a6b7af2026a67b748718bd485eb3167adfa0a (diff)
automatic import of python-takemessagecleaner
-rw-r--r--.gitignore1
-rw-r--r--python-takemessagecleaner.spec432
-rw-r--r--sources1
3 files changed, 434 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index e69de29..cc62786 100644
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+/TakeMessageCleaner-1.1.4.tar.gz
diff --git a/python-takemessagecleaner.spec b/python-takemessagecleaner.spec
new file mode 100644
index 0000000..9572f5c
--- /dev/null
+++ b/python-takemessagecleaner.spec
@@ -0,0 +1,432 @@
+%global _empty_manifest_terminate_build 0
+Name: python-TakeMessageCleaner
+Version: 1.1.4
+Release: 1
+Summary: TakeMessageCleaner is a tool for pre processing messages
+License: MIT License
+URL: https://github.com/karinatk/TakeMessageCleaner
+Source0: https://mirrors.nju.edu.cn/pypi/web/packages/26/cb/b977c71a6a34322e23bee45b0be39ee0d0b3885f8383ff7b50d796119ff5/TakeMessageCleaner-1.1.4.tar.gz
+BuildArch: noarch
+
+Requires: python3-requests
+Requires: python3-emoji
+Requires: python3-Unidecode
+Requires: python3-setuptools
+Requires: python3-pandas
+Requires: python3-numpy
+
+%description
+# TakeMessageCleaner
+
+TakeMessageCleaner is a tool for pre processing messages.
+It can be used to convert messages to lower case, correct spelling, remove elements like punctuation, emoji, whatapp's emoji, accentuation, number, cpf, url, e-mail, money, code, time, date and small talks.
+Also, it can pre process data from a dataframe, series, list or csv file.
+
+#### MessageCleaner.from_dataframe: creates a constructor from a dataframe
+
+<ul>
+<li>config_file_path: str</li>
+config_file_path is the path of the json file with the configuration
+
+<li>dataframe: pd.core.frame.DataFrame</li>
+dataframe is the pandas dataframe that needs to be processed.
+
+<li>content_column : str</li>
+content_column is the column name of the dataframe that has the information to be processed.
+</ul>
+
+#### MessageCleaner.from_series: creates a constructor from a series
+
+<ul>
+<li>config_file_path: str</li>
+config_file_path is the path of the json file with the pre processing
+
+<li>series: pd.core.frame.Series</li>
+series is the pandas series that needs to be processed.
+
+#### MessageCleaner.from_list: creates a constructor from a list
+
+<ul>
+<li>config_file_path: str</li>
+config_file_path is the path of the json file with the configuration
+
+<li>lst: list</li>
+lst is the list of string that need to be processed.
+</ul>
+
+#### MessageCleaner.from_file: creates a constructor from a csv file
+file_path : str, content_column : str = 'Content', encoding: str = 'utf-8', sep: str = ';'
+
+<ul>
+<li>config_file_path: str</li>
+config_file_path is the path of the json file with the configuration
+
+<li>file_path : strt</li>
+file_path is the path of the csv file that needs to be processed.
+
+<li>content_column: str</li>
+content_column is the column name of the dataframe that has the information to be processed. If the file separator is not set, the value 'Content' will be used.
+
+<li>sep: str</li>
+sep is the csv file separator. If the file separator is not set, the value ';' will be used.
+
+<li>encoding: str</li>
+encoding is the encoding of the csv file. If the file encoding is not set, the value 'utf-8' will be used.
+</ul>
+
+#### MessageCleaner.pre_process: pre-process messages using a json file with the configuration.
+The pre processing step is able to convert sentences to lower case, correct spelling and remove elements like punctuation, emoji, whatapp emoji, accentuation, number, cpf, url, e-mail, money, code, time, date and small talks.
+Optionally, you can activate use_placeholder to insert a placeholder where the element was removed. For example: "I want 2 apples" would be converted in "I want NUMBER apples".
+
+## config.json
+```
+{
+ "use_placeholder": true,
+ "verbose": true,
+ "processing": {
+ "lower": true,
+ "punctuation": true,
+ "emoji": true,
+ "wa_emoji": true,
+ "accentuation": true,
+ "number": true,
+ "cpf": true,
+ "url": true,
+ "email": true,
+ "money": true,
+ "code": true,
+ "time": true,
+ "date": true,
+ "spelling": true
+ },
+ "output": {
+ "file_name": "output_file.csv",
+ "file_encoding" : "utf-8",
+ "file_sep": ";",
+ "remove_duplicates": true,
+ "remove_empty": true,
+ "sort_by_length": true
+ }
+}
+```
+
+## Installation
+
+Use the package manager [pip](https://pip.pypa.io/en/stable/) to install TakeMessageCleaner
+
+```bash
+pip install TakeMessageCleaner
+```
+
+## Usage
+
+```python
+import MessageCleaner as mc
+
+cleaner = mc.MessageCleaner.from_file(config_file_path = 'C:/Documents/config.json', file_path = 'C:/Users/mydata.csv', sep = ';', encoding = 'latin-1')
+result = cleaner.clean()
+print(result)
+```
+
+## Author
+Karina Tiemi Kato
+
+## License
+[MIT](https://choosealicense.com/licenses/mit/)
+
+
+
+%package -n python3-TakeMessageCleaner
+Summary: TakeMessageCleaner is a tool for pre processing messages
+Provides: python-TakeMessageCleaner
+BuildRequires: python3-devel
+BuildRequires: python3-setuptools
+BuildRequires: python3-pip
+%description -n python3-TakeMessageCleaner
+# TakeMessageCleaner
+
+TakeMessageCleaner is a tool for pre processing messages.
+It can be used to convert messages to lower case, correct spelling, remove elements like punctuation, emoji, whatapp's emoji, accentuation, number, cpf, url, e-mail, money, code, time, date and small talks.
+Also, it can pre process data from a dataframe, series, list or csv file.
+
+#### MessageCleaner.from_dataframe: creates a constructor from a dataframe
+
+<ul>
+<li>config_file_path: str</li>
+config_file_path is the path of the json file with the configuration
+
+<li>dataframe: pd.core.frame.DataFrame</li>
+dataframe is the pandas dataframe that needs to be processed.
+
+<li>content_column : str</li>
+content_column is the column name of the dataframe that has the information to be processed.
+</ul>
+
+#### MessageCleaner.from_series: creates a constructor from a series
+
+<ul>
+<li>config_file_path: str</li>
+config_file_path is the path of the json file with the pre processing
+
+<li>series: pd.core.frame.Series</li>
+series is the pandas series that needs to be processed.
+
+#### MessageCleaner.from_list: creates a constructor from a list
+
+<ul>
+<li>config_file_path: str</li>
+config_file_path is the path of the json file with the configuration
+
+<li>lst: list</li>
+lst is the list of string that need to be processed.
+</ul>
+
+#### MessageCleaner.from_file: creates a constructor from a csv file
+file_path : str, content_column : str = 'Content', encoding: str = 'utf-8', sep: str = ';'
+
+<ul>
+<li>config_file_path: str</li>
+config_file_path is the path of the json file with the configuration
+
+<li>file_path : strt</li>
+file_path is the path of the csv file that needs to be processed.
+
+<li>content_column: str</li>
+content_column is the column name of the dataframe that has the information to be processed. If the file separator is not set, the value 'Content' will be used.
+
+<li>sep: str</li>
+sep is the csv file separator. If the file separator is not set, the value ';' will be used.
+
+<li>encoding: str</li>
+encoding is the encoding of the csv file. If the file encoding is not set, the value 'utf-8' will be used.
+</ul>
+
+#### MessageCleaner.pre_process: pre-process messages using a json file with the configuration.
+The pre processing step is able to convert sentences to lower case, correct spelling and remove elements like punctuation, emoji, whatapp emoji, accentuation, number, cpf, url, e-mail, money, code, time, date and small talks.
+Optionally, you can activate use_placeholder to insert a placeholder where the element was removed. For example: "I want 2 apples" would be converted in "I want NUMBER apples".
+
+## config.json
+```
+{
+ "use_placeholder": true,
+ "verbose": true,
+ "processing": {
+ "lower": true,
+ "punctuation": true,
+ "emoji": true,
+ "wa_emoji": true,
+ "accentuation": true,
+ "number": true,
+ "cpf": true,
+ "url": true,
+ "email": true,
+ "money": true,
+ "code": true,
+ "time": true,
+ "date": true,
+ "spelling": true
+ },
+ "output": {
+ "file_name": "output_file.csv",
+ "file_encoding" : "utf-8",
+ "file_sep": ";",
+ "remove_duplicates": true,
+ "remove_empty": true,
+ "sort_by_length": true
+ }
+}
+```
+
+## Installation
+
+Use the package manager [pip](https://pip.pypa.io/en/stable/) to install TakeMessageCleaner
+
+```bash
+pip install TakeMessageCleaner
+```
+
+## Usage
+
+```python
+import MessageCleaner as mc
+
+cleaner = mc.MessageCleaner.from_file(config_file_path = 'C:/Documents/config.json', file_path = 'C:/Users/mydata.csv', sep = ';', encoding = 'latin-1')
+result = cleaner.clean()
+print(result)
+```
+
+## Author
+Karina Tiemi Kato
+
+## License
+[MIT](https://choosealicense.com/licenses/mit/)
+
+
+
+%package help
+Summary: Development documents and examples for TakeMessageCleaner
+Provides: python3-TakeMessageCleaner-doc
+%description help
+# TakeMessageCleaner
+
+TakeMessageCleaner is a tool for pre processing messages.
+It can be used to convert messages to lower case, correct spelling, remove elements like punctuation, emoji, whatapp's emoji, accentuation, number, cpf, url, e-mail, money, code, time, date and small talks.
+Also, it can pre process data from a dataframe, series, list or csv file.
+
+#### MessageCleaner.from_dataframe: creates a constructor from a dataframe
+
+<ul>
+<li>config_file_path: str</li>
+config_file_path is the path of the json file with the configuration
+
+<li>dataframe: pd.core.frame.DataFrame</li>
+dataframe is the pandas dataframe that needs to be processed.
+
+<li>content_column : str</li>
+content_column is the column name of the dataframe that has the information to be processed.
+</ul>
+
+#### MessageCleaner.from_series: creates a constructor from a series
+
+<ul>
+<li>config_file_path: str</li>
+config_file_path is the path of the json file with the pre processing
+
+<li>series: pd.core.frame.Series</li>
+series is the pandas series that needs to be processed.
+
+#### MessageCleaner.from_list: creates a constructor from a list
+
+<ul>
+<li>config_file_path: str</li>
+config_file_path is the path of the json file with the configuration
+
+<li>lst: list</li>
+lst is the list of string that need to be processed.
+</ul>
+
+#### MessageCleaner.from_file: creates a constructor from a csv file
+file_path : str, content_column : str = 'Content', encoding: str = 'utf-8', sep: str = ';'
+
+<ul>
+<li>config_file_path: str</li>
+config_file_path is the path of the json file with the configuration
+
+<li>file_path : strt</li>
+file_path is the path of the csv file that needs to be processed.
+
+<li>content_column: str</li>
+content_column is the column name of the dataframe that has the information to be processed. If the file separator is not set, the value 'Content' will be used.
+
+<li>sep: str</li>
+sep is the csv file separator. If the file separator is not set, the value ';' will be used.
+
+<li>encoding: str</li>
+encoding is the encoding of the csv file. If the file encoding is not set, the value 'utf-8' will be used.
+</ul>
+
+#### MessageCleaner.pre_process: pre-process messages using a json file with the configuration.
+The pre processing step is able to convert sentences to lower case, correct spelling and remove elements like punctuation, emoji, whatapp emoji, accentuation, number, cpf, url, e-mail, money, code, time, date and small talks.
+Optionally, you can activate use_placeholder to insert a placeholder where the element was removed. For example: "I want 2 apples" would be converted in "I want NUMBER apples".
+
+## config.json
+```
+{
+ "use_placeholder": true,
+ "verbose": true,
+ "processing": {
+ "lower": true,
+ "punctuation": true,
+ "emoji": true,
+ "wa_emoji": true,
+ "accentuation": true,
+ "number": true,
+ "cpf": true,
+ "url": true,
+ "email": true,
+ "money": true,
+ "code": true,
+ "time": true,
+ "date": true,
+ "spelling": true
+ },
+ "output": {
+ "file_name": "output_file.csv",
+ "file_encoding" : "utf-8",
+ "file_sep": ";",
+ "remove_duplicates": true,
+ "remove_empty": true,
+ "sort_by_length": true
+ }
+}
+```
+
+## Installation
+
+Use the package manager [pip](https://pip.pypa.io/en/stable/) to install TakeMessageCleaner
+
+```bash
+pip install TakeMessageCleaner
+```
+
+## Usage
+
+```python
+import MessageCleaner as mc
+
+cleaner = mc.MessageCleaner.from_file(config_file_path = 'C:/Documents/config.json', file_path = 'C:/Users/mydata.csv', sep = ';', encoding = 'latin-1')
+result = cleaner.clean()
+print(result)
+```
+
+## Author
+Karina Tiemi Kato
+
+## License
+[MIT](https://choosealicense.com/licenses/mit/)
+
+
+
+%prep
+%autosetup -n TakeMessageCleaner-1.1.4
+
+%build
+%py3_build
+
+%install
+%py3_install
+install -d -m755 %{buildroot}/%{_pkgdocdir}
+if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi
+if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi
+if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi
+if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi
+pushd %{buildroot}
+if [ -d usr/lib ]; then
+ find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/lib64 ]; then
+ find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/bin ]; then
+ find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/sbin ]; then
+ find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+touch doclist.lst
+if [ -d usr/share/man ]; then
+ find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst
+fi
+popd
+mv %{buildroot}/filelist.lst .
+mv %{buildroot}/doclist.lst .
+
+%files -n python3-TakeMessageCleaner -f filelist.lst
+%dir %{python3_sitelib}/*
+
+%files help -f doclist.lst
+%{_docdir}/*
+
+%changelog
+* Wed May 17 2023 Python_Bot <Python_Bot@openeuler.org> - 1.1.4-1
+- Package Spec generated
diff --git a/sources b/sources
new file mode 100644
index 0000000..8cf1b27
--- /dev/null
+++ b/sources
@@ -0,0 +1 @@
+1fec160a51a60601a00ab51e1b6abf49 TakeMessageCleaner-1.1.4.tar.gz