From 5535dd0f2105f2533eb6a1e4366258a5de7b633b Mon Sep 17 00:00:00 2001 From: CoprDistGit Date: Mon, 29 May 2023 12:57:59 +0000 Subject: automatic import of python-eland --- .gitignore | 1 + python-eland.spec | 334 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ sources | 1 + 3 files changed, 336 insertions(+) create mode 100644 python-eland.spec create mode 100644 sources diff --git a/.gitignore b/.gitignore index e69de29..fc48944 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1 @@ +/eland-8.7.0.tar.gz diff --git a/python-eland.spec b/python-eland.spec new file mode 100644 index 0000000..8b96dc5 --- /dev/null +++ b/python-eland.spec @@ -0,0 +1,334 @@ +%global _empty_manifest_terminate_build 0 +Name: python-eland +Version: 8.7.0 +Release: 1 +Summary: Python Client and Toolkit for DataFrames, Big Data, Machine Learning and ETL in Elasticsearch +License: Apache-2.0 +URL: https://github.com/elastic/eland +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/23/45/416493ab2ee3a7865ffa43bc17a9ac1833fd55752437b9a099d1653ae0b3/eland-8.7.0.tar.gz +BuildArch: noarch + +Requires: python3-elasticsearch +Requires: python3-pandas +Requires: python3-matplotlib +Requires: python3-numpy +Requires: python3-torch +Requires: python3-lightgbm +Requires: python3-xgboost +Requires: python3-sentence-transformers +Requires: python3-transformers[torch] +Requires: python3-scikit-learn +Requires: python3-lightgbm +Requires: python3-torch +Requires: python3-sentence-transformers +Requires: python3-transformers[torch] +Requires: python3-scikit-learn +Requires: python3-xgboost + +%description + 0 AvgTicketPrice 13059 non-null float64 + 1 Cancelled 13059 non-null bool + 2 Carrier 13059 non-null object + 24 OriginWeather 13059 non-null object + 25 dayOfWeek 13059 non-null int64 + 26 timestamp 13059 non-null datetime64[ns] +dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17) +memory usage: 80.0 bytes +Elasticsearch storage usage: 5.043 MB +# Filtering of rows using comparisons +>>> df[(df.Carrier=="Kibana Airlines") & (df.AvgTicketPrice > 900.0) & (df.Cancelled == True)].head() + AvgTicketPrice Cancelled ... dayOfWeek timestamp +8 960.869736 True ... 0 2018-01-01 12:09:35 +26 975.812632 True ... 0 2018-01-01 15:38:32 +311 946.358410 True ... 0 2018-01-01 11:51:12 +651 975.383864 True ... 2 2018-01-03 21:13:17 +950 907.836523 True ... 2 2018-01-03 05:14:51 +[5 rows x 27 columns] +# Running aggregations across an index +>>> df[['DistanceKilometers', 'AvgTicketPrice']].aggregate(['sum', 'min', 'std']) + DistanceKilometers AvgTicketPrice +sum 9.261629e+07 8.204365e+06 +min 0.000000e+00 1.000205e+02 +std 4.578263e+03 2.663867e+02 +``` +## Machine Learning in Eland +### Regression and classification +Eland allows transforming trained regression and classification models from scikit-learn, XGBoost, and LightGBM +libraries to be serialized and used as an inference model in Elasticsearch. +➤ [Eland Machine Learning API documentation](https://eland.readthedocs.io/en/latest/reference/ml.html) +➤ [Read more about Machine Learning in Elasticsearch](https://www.elastic.co/guide/en/machine-learning/current/ml-getting-started.html) +```python +>>> from xgboost import XGBClassifier +>>> from eland.ml import MLModel +# Train and exercise an XGBoost ML model locally +>>> xgb_model = XGBClassifier(booster="gbtree") +>>> xgb_model.fit(training_data[0], training_data[1]) +>>> xgb_model.predict(training_data[0]) +[0 1 1 0 1 0 0 0 1 0] +# Import the model into Elasticsearch +>>> es_model = MLModel.import_model( + es_client="localhost:9200", + model_id="xgb-classifier", + model=xgb_model, + feature_names=["f0", "f1", "f2", "f3", "f4"], +) +# Exercise the ML model in Elasticsearch with the training data +>>> es_model.predict(training_data[0]) +[0 1 1 0 1 0 0 0 1 0] +``` +### NLP with PyTorch +For NLP tasks, Eland allows importing PyTorch trained BERT models into Elasticsearch. Models can be either plain PyTorch +models, or supported [transformers](https://huggingface.co/transformers) models from the +[Hugging Face model hub](https://huggingface.co/models). +```bash +$ eland_import_hub_model \ + --url http://localhost:9200/ \ + --hub-model-id elastic/distilbert-base-cased-finetuned-conll03-english \ + --task-type ner \ + --start +``` +```python +>>> import elasticsearch +>>> from pathlib import Path +>>> from eland.ml.pytorch import PyTorchModel +>>> from eland.ml.pytorch.transformers import TransformerModel +# Load a Hugging Face transformers model directly from the model hub +>>> tm = TransformerModel("elastic/distilbert-base-cased-finetuned-conll03-english", "ner") +Downloading: 100%|██████████| 257/257 [00:00<00:00, 108kB/s] +Downloading: 100%|██████████| 954/954 [00:00<00:00, 372kB/s] +Downloading: 100%|██████████| 208k/208k [00:00<00:00, 668kB/s] +Downloading: 100%|██████████| 112/112 [00:00<00:00, 43.9kB/s] +Downloading: 100%|██████████| 249M/249M [00:23<00:00, 11.2MB/s] +# Export the model in a TorchScrpt representation which Elasticsearch uses +>>> tmp_path = "models" +>>> Path(tmp_path).mkdir(parents=True, exist_ok=True) +>>> model_path, config, vocab_path = tm.save(tmp_path) +# Import model into Elasticsearch +>>> es = elasticsearch.Elasticsearch("http://elastic:mlqa_admin@localhost:9200", timeout=300) # 5 minute timeout +>>> ptm = PyTorchModel(es, tm.elasticsearch_model_id()) +>>> ptm.import_model(model_path=model_path, config_path=None, vocab_path=vocab_path, config=config) +100%|██████████| 63/63 [00:12<00:00, 5.02it/s] +``` + +%package -n python3-eland +Summary: Python Client and Toolkit for DataFrames, Big Data, Machine Learning and ETL in Elasticsearch +Provides: python-eland +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-eland + 0 AvgTicketPrice 13059 non-null float64 + 1 Cancelled 13059 non-null bool + 2 Carrier 13059 non-null object + 24 OriginWeather 13059 non-null object + 25 dayOfWeek 13059 non-null int64 + 26 timestamp 13059 non-null datetime64[ns] +dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17) +memory usage: 80.0 bytes +Elasticsearch storage usage: 5.043 MB +# Filtering of rows using comparisons +>>> df[(df.Carrier=="Kibana Airlines") & (df.AvgTicketPrice > 900.0) & (df.Cancelled == True)].head() + AvgTicketPrice Cancelled ... dayOfWeek timestamp +8 960.869736 True ... 0 2018-01-01 12:09:35 +26 975.812632 True ... 0 2018-01-01 15:38:32 +311 946.358410 True ... 0 2018-01-01 11:51:12 +651 975.383864 True ... 2 2018-01-03 21:13:17 +950 907.836523 True ... 2 2018-01-03 05:14:51 +[5 rows x 27 columns] +# Running aggregations across an index +>>> df[['DistanceKilometers', 'AvgTicketPrice']].aggregate(['sum', 'min', 'std']) + DistanceKilometers AvgTicketPrice +sum 9.261629e+07 8.204365e+06 +min 0.000000e+00 1.000205e+02 +std 4.578263e+03 2.663867e+02 +``` +## Machine Learning in Eland +### Regression and classification +Eland allows transforming trained regression and classification models from scikit-learn, XGBoost, and LightGBM +libraries to be serialized and used as an inference model in Elasticsearch. +➤ [Eland Machine Learning API documentation](https://eland.readthedocs.io/en/latest/reference/ml.html) +➤ [Read more about Machine Learning in Elasticsearch](https://www.elastic.co/guide/en/machine-learning/current/ml-getting-started.html) +```python +>>> from xgboost import XGBClassifier +>>> from eland.ml import MLModel +# Train and exercise an XGBoost ML model locally +>>> xgb_model = XGBClassifier(booster="gbtree") +>>> xgb_model.fit(training_data[0], training_data[1]) +>>> xgb_model.predict(training_data[0]) +[0 1 1 0 1 0 0 0 1 0] +# Import the model into Elasticsearch +>>> es_model = MLModel.import_model( + es_client="localhost:9200", + model_id="xgb-classifier", + model=xgb_model, + feature_names=["f0", "f1", "f2", "f3", "f4"], +) +# Exercise the ML model in Elasticsearch with the training data +>>> es_model.predict(training_data[0]) +[0 1 1 0 1 0 0 0 1 0] +``` +### NLP with PyTorch +For NLP tasks, Eland allows importing PyTorch trained BERT models into Elasticsearch. Models can be either plain PyTorch +models, or supported [transformers](https://huggingface.co/transformers) models from the +[Hugging Face model hub](https://huggingface.co/models). +```bash +$ eland_import_hub_model \ + --url http://localhost:9200/ \ + --hub-model-id elastic/distilbert-base-cased-finetuned-conll03-english \ + --task-type ner \ + --start +``` +```python +>>> import elasticsearch +>>> from pathlib import Path +>>> from eland.ml.pytorch import PyTorchModel +>>> from eland.ml.pytorch.transformers import TransformerModel +# Load a Hugging Face transformers model directly from the model hub +>>> tm = TransformerModel("elastic/distilbert-base-cased-finetuned-conll03-english", "ner") +Downloading: 100%|██████████| 257/257 [00:00<00:00, 108kB/s] +Downloading: 100%|██████████| 954/954 [00:00<00:00, 372kB/s] +Downloading: 100%|██████████| 208k/208k [00:00<00:00, 668kB/s] +Downloading: 100%|██████████| 112/112 [00:00<00:00, 43.9kB/s] +Downloading: 100%|██████████| 249M/249M [00:23<00:00, 11.2MB/s] +# Export the model in a TorchScrpt representation which Elasticsearch uses +>>> tmp_path = "models" +>>> Path(tmp_path).mkdir(parents=True, exist_ok=True) +>>> model_path, config, vocab_path = tm.save(tmp_path) +# Import model into Elasticsearch +>>> es = elasticsearch.Elasticsearch("http://elastic:mlqa_admin@localhost:9200", timeout=300) # 5 minute timeout +>>> ptm = PyTorchModel(es, tm.elasticsearch_model_id()) +>>> ptm.import_model(model_path=model_path, config_path=None, vocab_path=vocab_path, config=config) +100%|██████████| 63/63 [00:12<00:00, 5.02it/s] +``` + +%package help +Summary: Development documents and examples for eland +Provides: python3-eland-doc +%description help + 0 AvgTicketPrice 13059 non-null float64 + 1 Cancelled 13059 non-null bool + 2 Carrier 13059 non-null object + 24 OriginWeather 13059 non-null object + 25 dayOfWeek 13059 non-null int64 + 26 timestamp 13059 non-null datetime64[ns] +dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17) +memory usage: 80.0 bytes +Elasticsearch storage usage: 5.043 MB +# Filtering of rows using comparisons +>>> df[(df.Carrier=="Kibana Airlines") & (df.AvgTicketPrice > 900.0) & (df.Cancelled == True)].head() + AvgTicketPrice Cancelled ... dayOfWeek timestamp +8 960.869736 True ... 0 2018-01-01 12:09:35 +26 975.812632 True ... 0 2018-01-01 15:38:32 +311 946.358410 True ... 0 2018-01-01 11:51:12 +651 975.383864 True ... 2 2018-01-03 21:13:17 +950 907.836523 True ... 2 2018-01-03 05:14:51 +[5 rows x 27 columns] +# Running aggregations across an index +>>> df[['DistanceKilometers', 'AvgTicketPrice']].aggregate(['sum', 'min', 'std']) + DistanceKilometers AvgTicketPrice +sum 9.261629e+07 8.204365e+06 +min 0.000000e+00 1.000205e+02 +std 4.578263e+03 2.663867e+02 +``` +## Machine Learning in Eland +### Regression and classification +Eland allows transforming trained regression and classification models from scikit-learn, XGBoost, and LightGBM +libraries to be serialized and used as an inference model in Elasticsearch. +➤ [Eland Machine Learning API documentation](https://eland.readthedocs.io/en/latest/reference/ml.html) +➤ [Read more about Machine Learning in Elasticsearch](https://www.elastic.co/guide/en/machine-learning/current/ml-getting-started.html) +```python +>>> from xgboost import XGBClassifier +>>> from eland.ml import MLModel +# Train and exercise an XGBoost ML model locally +>>> xgb_model = XGBClassifier(booster="gbtree") +>>> xgb_model.fit(training_data[0], training_data[1]) +>>> xgb_model.predict(training_data[0]) +[0 1 1 0 1 0 0 0 1 0] +# Import the model into Elasticsearch +>>> es_model = MLModel.import_model( + es_client="localhost:9200", + model_id="xgb-classifier", + model=xgb_model, + feature_names=["f0", "f1", "f2", "f3", "f4"], +) +# Exercise the ML model in Elasticsearch with the training data +>>> es_model.predict(training_data[0]) +[0 1 1 0 1 0 0 0 1 0] +``` +### NLP with PyTorch +For NLP tasks, Eland allows importing PyTorch trained BERT models into Elasticsearch. Models can be either plain PyTorch +models, or supported [transformers](https://huggingface.co/transformers) models from the +[Hugging Face model hub](https://huggingface.co/models). +```bash +$ eland_import_hub_model \ + --url http://localhost:9200/ \ + --hub-model-id elastic/distilbert-base-cased-finetuned-conll03-english \ + --task-type ner \ + --start +``` +```python +>>> import elasticsearch +>>> from pathlib import Path +>>> from eland.ml.pytorch import PyTorchModel +>>> from eland.ml.pytorch.transformers import TransformerModel +# Load a Hugging Face transformers model directly from the model hub +>>> tm = TransformerModel("elastic/distilbert-base-cased-finetuned-conll03-english", "ner") +Downloading: 100%|██████████| 257/257 [00:00<00:00, 108kB/s] +Downloading: 100%|██████████| 954/954 [00:00<00:00, 372kB/s] +Downloading: 100%|██████████| 208k/208k [00:00<00:00, 668kB/s] +Downloading: 100%|██████████| 112/112 [00:00<00:00, 43.9kB/s] +Downloading: 100%|██████████| 249M/249M [00:23<00:00, 11.2MB/s] +# Export the model in a TorchScrpt representation which Elasticsearch uses +>>> tmp_path = "models" +>>> Path(tmp_path).mkdir(parents=True, exist_ok=True) +>>> model_path, config, vocab_path = tm.save(tmp_path) +# Import model into Elasticsearch +>>> es = elasticsearch.Elasticsearch("http://elastic:mlqa_admin@localhost:9200", timeout=300) # 5 minute timeout +>>> ptm = PyTorchModel(es, tm.elasticsearch_model_id()) +>>> ptm.import_model(model_path=model_path, config_path=None, vocab_path=vocab_path, config=config) +100%|██████████| 63/63 [00:12<00:00, 5.02it/s] +``` + +%prep +%autosetup -n eland-8.7.0 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-eland -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Mon May 29 2023 Python_Bot - 8.7.0-1 +- Package Spec generated diff --git a/sources b/sources new file mode 100644 index 0000000..8cc5926 --- /dev/null +++ b/sources @@ -0,0 +1 @@ +ab5de7d7fe8c4beed20fb8ada30d4afb eland-8.7.0.tar.gz -- cgit v1.2.3