diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | python-bertstem.spec | 310 | ||||
-rw-r--r-- | sources | 1 |
3 files changed, 312 insertions, 0 deletions
@@ -0,0 +1 @@ +/bertstem-0.0.33.tar.gz diff --git a/python-bertstem.spec b/python-bertstem.spec new file mode 100644 index 0000000..4f1ceca --- /dev/null +++ b/python-bertstem.spec @@ -0,0 +1,310 @@ +%global _empty_manifest_terminate_build 0 +Name: python-bertstem +Version: 0.0.33 +Release: 1 +Summary: BERT model fine-tuned on chilean STEM lessons +License: MIT License +URL: https://github.com/pabloveazul/BERT-STEM +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/e1/44/85fa556d35d908304190dcf2fc5249f07f25d3d48570ce7fdf6529c29b36/bertstem-0.0.33.tar.gz +BuildArch: noarch + +Requires: python3-torch +Requires: python3-pandas +Requires: python3-numpy +Requires: python3-transformers + +%description +# BERT-STEM + +BERT model fine-tuned on Science Technology Engineering and Mathematics (STEM) lessons. + +## Install: + +To install from pip: + +``` +pip install bertstem +``` + +## Quickstart + +To encode sentences : + +```python +from BERT_STEM.BertSTEM import * +bert = BertSTEM() + +# Example dataframe with text in spanish +data = {'col_1': [3, 2, 1], +'col_2': ['hola como estan', 'alumnos queridos', 'vamos a hablar de matematicas']} + +df = pd.DataFrame.from_dict(data) + +# Encode sentences using BertSTEM: +bert._encode_df(df, column='col_2', encoding='sum') + +``` +To classify sentences with COPUS models: + +```python +from BERT_STEM.BertSTEM import * + +# Download BERT for classification (guiding/presenting/administration) +bert_classification = BertSTEMForTextClassification(2, model_name = 'pablouribe/bertstem-copus-guiding') + +# Example dataframe with text in spanish +data = {'col_1': [3, 2, 1], +'col_2': ['hola como estan', 'alumnos queridos', 'vamos a hablar de matematicas']} + +df = pd.DataFrame.from_dict(data) + +# Classify sentences using BertSTEM for COPUS (Guiding): +bert_classification.predict(df,'col_2') + +``` + + +To use it from HuggingFace: + +```python +from BERT_STEM.Encode import * +import pandas as pd +import transformers + +# Download spanish BERTSTEM: +model = transformers.BertModel.from_pretrained("pablouribe/bertstem") + +# Download spanish tokenizer: +tokenizer = transformers.BertTokenizerFast.from_pretrained("dccuchile/bert-base-spanish-wwm-uncased", + do_lower_case=True, + add_special_tokens = False) + +# Example dataframe with text in spanish +data = {'col_1': [3, 2, 1], + 'col_2': ['hola como estan', 'alumnos queridos', 'vamos a hablar de matematicas']} + +df = pd.DataFrame.from_dict(data) + +# Encode sentences using BertSTEM: +sentence_encoder(df, model, tokenizer, column = 'col_2', encoding = 'sum') +``` + + + + + + +%package -n python3-bertstem +Summary: BERT model fine-tuned on chilean STEM lessons +Provides: python-bertstem +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-bertstem +# BERT-STEM + +BERT model fine-tuned on Science Technology Engineering and Mathematics (STEM) lessons. + +## Install: + +To install from pip: + +``` +pip install bertstem +``` + +## Quickstart + +To encode sentences : + +```python +from BERT_STEM.BertSTEM import * +bert = BertSTEM() + +# Example dataframe with text in spanish +data = {'col_1': [3, 2, 1], +'col_2': ['hola como estan', 'alumnos queridos', 'vamos a hablar de matematicas']} + +df = pd.DataFrame.from_dict(data) + +# Encode sentences using BertSTEM: +bert._encode_df(df, column='col_2', encoding='sum') + +``` +To classify sentences with COPUS models: + +```python +from BERT_STEM.BertSTEM import * + +# Download BERT for classification (guiding/presenting/administration) +bert_classification = BertSTEMForTextClassification(2, model_name = 'pablouribe/bertstem-copus-guiding') + +# Example dataframe with text in spanish +data = {'col_1': [3, 2, 1], +'col_2': ['hola como estan', 'alumnos queridos', 'vamos a hablar de matematicas']} + +df = pd.DataFrame.from_dict(data) + +# Classify sentences using BertSTEM for COPUS (Guiding): +bert_classification.predict(df,'col_2') + +``` + + +To use it from HuggingFace: + +```python +from BERT_STEM.Encode import * +import pandas as pd +import transformers + +# Download spanish BERTSTEM: +model = transformers.BertModel.from_pretrained("pablouribe/bertstem") + +# Download spanish tokenizer: +tokenizer = transformers.BertTokenizerFast.from_pretrained("dccuchile/bert-base-spanish-wwm-uncased", + do_lower_case=True, + add_special_tokens = False) + +# Example dataframe with text in spanish +data = {'col_1': [3, 2, 1], + 'col_2': ['hola como estan', 'alumnos queridos', 'vamos a hablar de matematicas']} + +df = pd.DataFrame.from_dict(data) + +# Encode sentences using BertSTEM: +sentence_encoder(df, model, tokenizer, column = 'col_2', encoding = 'sum') +``` + + + + + + +%package help +Summary: Development documents and examples for bertstem +Provides: python3-bertstem-doc +%description help +# BERT-STEM + +BERT model fine-tuned on Science Technology Engineering and Mathematics (STEM) lessons. + +## Install: + +To install from pip: + +``` +pip install bertstem +``` + +## Quickstart + +To encode sentences : + +```python +from BERT_STEM.BertSTEM import * +bert = BertSTEM() + +# Example dataframe with text in spanish +data = {'col_1': [3, 2, 1], +'col_2': ['hola como estan', 'alumnos queridos', 'vamos a hablar de matematicas']} + +df = pd.DataFrame.from_dict(data) + +# Encode sentences using BertSTEM: +bert._encode_df(df, column='col_2', encoding='sum') + +``` +To classify sentences with COPUS models: + +```python +from BERT_STEM.BertSTEM import * + +# Download BERT for classification (guiding/presenting/administration) +bert_classification = BertSTEMForTextClassification(2, model_name = 'pablouribe/bertstem-copus-guiding') + +# Example dataframe with text in spanish +data = {'col_1': [3, 2, 1], +'col_2': ['hola como estan', 'alumnos queridos', 'vamos a hablar de matematicas']} + +df = pd.DataFrame.from_dict(data) + +# Classify sentences using BertSTEM for COPUS (Guiding): +bert_classification.predict(df,'col_2') + +``` + + +To use it from HuggingFace: + +```python +from BERT_STEM.Encode import * +import pandas as pd +import transformers + +# Download spanish BERTSTEM: +model = transformers.BertModel.from_pretrained("pablouribe/bertstem") + +# Download spanish tokenizer: +tokenizer = transformers.BertTokenizerFast.from_pretrained("dccuchile/bert-base-spanish-wwm-uncased", + do_lower_case=True, + add_special_tokens = False) + +# Example dataframe with text in spanish +data = {'col_1': [3, 2, 1], + 'col_2': ['hola como estan', 'alumnos queridos', 'vamos a hablar de matematicas']} + +df = pd.DataFrame.from_dict(data) + +# Encode sentences using BertSTEM: +sentence_encoder(df, model, tokenizer, column = 'col_2', encoding = 'sum') +``` + + + + + + +%prep +%autosetup -n bertstem-0.0.33 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-bertstem -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Fri May 05 2023 Python_Bot <Python_Bot@openeuler.org> - 0.0.33-1 +- Package Spec generated @@ -0,0 +1 @@ +12247f399dc48cfdd362df25dc78f6c1 bertstem-0.0.33.tar.gz |