automatic import of python-autoxgbopeneuler20.03

author: CoprDistGit <infra@openeuler.org> 2023-05-05 08:37:40 +0000
committer: CoprDistGit <infra@openeuler.org> 2023-05-05 08:37:40 +0000
commit: 77bea3cf3285b756a75138079310f23781d0ca2d (patch)
tree: 09a9053a701b0efcc97c82530655d581eb18b92f
parent: afee9c1f83fa629f0b7346220c138382582131a2 (diff)
3 files changed, 700 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index e69de29..185a5c9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+/autoxgb-0.2.2.tar.gz
diff --git a/python-autoxgb.spec b/python-autoxgb.spec
new file mode 100644
index 0000000..8100677
--- /dev/null
+++ b/python-autoxgb.spec
@@ -0,0 +1,698 @@
+%global _empty_manifest_terminate_build 0
+Name:		python-autoxgb
+Version:	0.2.2
+Release:	1
+Summary:	autoxgb: tuning xgboost with optuna
+License:	Apache 2.0
+URL:		https://github.com/abhishekkrthakur/autoxgb
+Source0:	https://mirrors.nju.edu.cn/pypi/web/packages/5f/7f/d52884a7044f16ea4dfd010f8334a25939684e69988156ece7f067747b3a/autoxgb-0.2.2.tar.gz
+BuildArch:	noarch
+
+Requires:	python3-fastapi
+Requires:	python3-loguru
+Requires:	python3-numpy
+Requires:	python3-optuna
+Requires:	python3-pyarrow
+Requires:	python3-pydantic
+Requires:	python3-joblib
+Requires:	python3-pandas
+Requires:	python3-scikit-learn
+Requires:	python3-uvicorn
+Requires:	python3-xgboost
+
+%description
+# AutoXGB
+
+
+XGBoost + Optuna:  no brainer
+
+- auto train xgboost directly from CSV files
+- auto tune xgboost using optuna
+- auto serve best xgboot model using fastapi
+
+NOTE: PRs are currently not accepted. If there are issues/problems, please create an issue.
+
+# Installation
+
+Install using pip
+
+    pip install autoxgb
+
+
+# Usage
+Training a model using AutoXGB is a piece of cake. All you need is some tabular data.
+
+## Parameters
+
+```python
+
+###############################################################################
+### required parameters
+###############################################################################
+
+# path to training data
+train_filename = "data_samples/binary_classification.csv"
+
+# path to output folder to store artifacts
+output = "output"
+
+###############################################################################
+### optional parameters
+###############################################################################
+
+# path to test data. if specified, the model will be evaluated on the test data
+# and test_predictions.csv will be saved to the output folder
+# if not specified, only OOF predictions will be saved
+# test_filename = "test.csv"
+test_filename = None
+
+# task: classification or regression
+# if not specified, the task will be inferred automatically
+# task = "classification"
+# task = "regression"
+task = None
+
+# an id column
+# if not specified, the id column will be generated automatically with the name `id`
+# idx = "id"
+idx = None
+
+# target columns are list of strings
+# if not specified, the target column be assumed to be named `target`
+# and the problem will be treated as one of: binary classification, multiclass classification,
+# or single column regression
+# targets = ["target"]
+# targets = ["target1", "target2"]
+targets = ["income"]
+
+# features columns are list of strings
+# if not specified, all columns except `id`, `targets` & `kfold` columns will be used
+# features = ["col1", "col2"]
+features = None
+
+# categorical_features are list of strings
+# if not specified, categorical columns will be inferred automatically
+# categorical_features = ["col1", "col2"]
+categorical_features = None
+
+# use_gpu is boolean
+# if not specified, GPU is not used
+# use_gpu = True
+# use_gpu = False
+use_gpu = True
+
+# number of folds to use for cross-validation
+# default is 5
+num_folds = 5
+
+# random seed for reproducibility
+# default is 42
+seed = 42
+
+# number of optuna trials to run
+# default is 1000
+# num_trials = 1000
+num_trials = 100
+
+# time_limit for optuna trials in seconds
+# if not specified, timeout is not set and all trials are run
+# time_limit = None
+time_limit = 360
+
+# if fast is set to True, the hyperparameter tuning will use only one fold
+# however, the model will be trained on all folds in the end
+# to generate OOF predictions and test predictions
+# default is False
+# fast = False
+fast = False
+```
+
+# Python API
+
+To train a new model, you can run:
+
+```python
+from autoxgb import AutoXGB
+
+
+# required parameters:
+train_filename = "data_samples/binary_classification.csv"
+output = "output"
+
+# optional parameters
+test_filename = None
+task = None
+idx = None
+targets = ["income"]
+features = None
+categorical_features = None
+use_gpu = True
+num_folds = 5
+seed = 42
+num_trials = 100
+time_limit = 360
+fast = False
+
+# Now its time to train the model!
+axgb = AutoXGB(
+    train_filename=train_filename,
+    output=output,
+    test_filename=test_filename,
+    task=task,
+    idx=idx,
+    targets=targets,
+    features=features,
+    categorical_features=categorical_features,
+    use_gpu=use_gpu,
+    num_folds=num_folds,
+    seed=seed,
+    num_trials=num_trials,
+    time_limit=time_limit,
+    fast=fast,
+)
+axgb.train()
+```
+
+# CLI
+
+Train the model using the `autoxgb train` command. The parameters are same as above.
+
+```
+autoxgb train \
+ --train_filename datasets/30train.csv \
+ --output outputs/30days \
+ --test_filename datasets/30test.csv \
+ --use_gpu
+```
+
+You can also serve the trained model using the `autoxgb serve` command.
+
+```bash
+autoxgb serve --model_path outputs/mll --host 0.0.0.0 --debug
+```
+
+To know more about a command, run:
+
+    `autoxgb <command> --help` 
+
+```
+autoxgb train --help
+
+
+usage: autoxgb <command> [<args>] train [-h] --train_filename TRAIN_FILENAME [--test_filename TEST_FILENAME] --output
+                                        OUTPUT [--task {classification,regression}] [--idx IDX] [--targets TARGETS]
+                                        [--num_folds NUM_FOLDS] [--features FEATURES] [--use_gpu] [--fast]
+                                        [--seed SEED] [--time_limit TIME_LIMIT]
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --train_filename TRAIN_FILENAME
+                        Path to training file
+  --test_filename TEST_FILENAME
+                        Path to test file
+  --output OUTPUT       Path to output directory
+  --task {classification,regression}
+                        User defined task type
+  --idx IDX             ID column
+  --targets TARGETS     Target column(s). If there are multiple targets, separate by ';'
+  --num_folds NUM_FOLDS
+                        Number of folds to use
+  --features FEATURES   Features to use, separated by ';'
+  --use_gpu             Whether to use GPU for training
+  --fast                Whether to use fast mode for tuning params. Only one fold will be used if fast mode is set
+  --seed SEED           Random seed
+  --time_limit TIME_LIMIT
+                        Time limit for optimization
+```
+
+
+
+
+%package -n python3-autoxgb
+Summary:	autoxgb: tuning xgboost with optuna
+Provides:	python-autoxgb
+BuildRequires:	python3-devel
+BuildRequires:	python3-setuptools
+BuildRequires:	python3-pip
+%description -n python3-autoxgb
+# AutoXGB
+
+
+XGBoost + Optuna:  no brainer
+
+- auto train xgboost directly from CSV files
+- auto tune xgboost using optuna
+- auto serve best xgboot model using fastapi
+
+NOTE: PRs are currently not accepted. If there are issues/problems, please create an issue.
+
+# Installation
+
+Install using pip
+
+    pip install autoxgb
+
+
+# Usage
+Training a model using AutoXGB is a piece of cake. All you need is some tabular data.
+
+## Parameters
+
+```python
+
+###############################################################################
+### required parameters
+###############################################################################
+
+# path to training data
+train_filename = "data_samples/binary_classification.csv"
+
+# path to output folder to store artifacts
+output = "output"
+
+###############################################################################
+### optional parameters
+###############################################################################
+
+# path to test data. if specified, the model will be evaluated on the test data
+# and test_predictions.csv will be saved to the output folder
+# if not specified, only OOF predictions will be saved
+# test_filename = "test.csv"
+test_filename = None
+
+# task: classification or regression
+# if not specified, the task will be inferred automatically
+# task = "classification"
+# task = "regression"
+task = None
+
+# an id column
+# if not specified, the id column will be generated automatically with the name `id`
+# idx = "id"
+idx = None
+
+# target columns are list of strings
+# if not specified, the target column be assumed to be named `target`
+# and the problem will be treated as one of: binary classification, multiclass classification,
+# or single column regression
+# targets = ["target"]
+# targets = ["target1", "target2"]
+targets = ["income"]
+
+# features columns are list of strings
+# if not specified, all columns except `id`, `targets` & `kfold` columns will be used
+# features = ["col1", "col2"]
+features = None
+
+# categorical_features are list of strings
+# if not specified, categorical columns will be inferred automatically
+# categorical_features = ["col1", "col2"]
+categorical_features = None
+
+# use_gpu is boolean
+# if not specified, GPU is not used
+# use_gpu = True
+# use_gpu = False
+use_gpu = True
+
+# number of folds to use for cross-validation
+# default is 5
+num_folds = 5
+
+# random seed for reproducibility
+# default is 42
+seed = 42
+
+# number of optuna trials to run
+# default is 1000
+# num_trials = 1000
+num_trials = 100
+
+# time_limit for optuna trials in seconds
+# if not specified, timeout is not set and all trials are run
+# time_limit = None
+time_limit = 360
+
+# if fast is set to True, the hyperparameter tuning will use only one fold
+# however, the model will be trained on all folds in the end
+# to generate OOF predictions and test predictions
+# default is False
+# fast = False
+fast = False
+```
+
+# Python API
+
+To train a new model, you can run:
+
+```python
+from autoxgb import AutoXGB
+
+
+# required parameters:
+train_filename = "data_samples/binary_classification.csv"
+output = "output"
+
+# optional parameters
+test_filename = None
+task = None
+idx = None
+targets = ["income"]
+features = None
+categorical_features = None
+use_gpu = True
+num_folds = 5
+seed = 42
+num_trials = 100
+time_limit = 360
+fast = False
+
+# Now its time to train the model!
+axgb = AutoXGB(
+    train_filename=train_filename,
+    output=output,
+    test_filename=test_filename,
+    task=task,
+    idx=idx,
+    targets=targets,
+    features=features,
+    categorical_features=categorical_features,
+    use_gpu=use_gpu,
+    num_folds=num_folds,
+    seed=seed,
+    num_trials=num_trials,
+    time_limit=time_limit,
+    fast=fast,
+)
+axgb.train()
+```
+
+# CLI
+
+Train the model using the `autoxgb train` command. The parameters are same as above.
+
+```
+autoxgb train \
+ --train_filename datasets/30train.csv \
+ --output outputs/30days \
+ --test_filename datasets/30test.csv \
+ --use_gpu
+```
+
+You can also serve the trained model using the `autoxgb serve` command.
+
+```bash
+autoxgb serve --model_path outputs/mll --host 0.0.0.0 --debug
+```
+
+To know more about a command, run:
+
+    `autoxgb <command> --help` 
+
+```
+autoxgb train --help
+
+
+usage: autoxgb <command> [<args>] train [-h] --train_filename TRAIN_FILENAME [--test_filename TEST_FILENAME] --output
+                                        OUTPUT [--task {classification,regression}] [--idx IDX] [--targets TARGETS]
+                                        [--num_folds NUM_FOLDS] [--features FEATURES] [--use_gpu] [--fast]
+                                        [--seed SEED] [--time_limit TIME_LIMIT]
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --train_filename TRAIN_FILENAME
+                        Path to training file
+  --test_filename TEST_FILENAME
+                        Path to test file
+  --output OUTPUT       Path to output directory
+  --task {classification,regression}
+                        User defined task type
+  --idx IDX             ID column
+  --targets TARGETS     Target column(s). If there are multiple targets, separate by ';'
+  --num_folds NUM_FOLDS
+                        Number of folds to use
+  --features FEATURES   Features to use, separated by ';'
+  --use_gpu             Whether to use GPU for training
+  --fast                Whether to use fast mode for tuning params. Only one fold will be used if fast mode is set
+  --seed SEED           Random seed
+  --time_limit TIME_LIMIT
+                        Time limit for optimization
+```
+
+
+
+
+%package help
+Summary:	Development documents and examples for autoxgb
+Provides:	python3-autoxgb-doc
+%description help
+# AutoXGB
+
+
+XGBoost + Optuna:  no brainer
+
+- auto train xgboost directly from CSV files
+- auto tune xgboost using optuna
+- auto serve best xgboot model using fastapi
+
+NOTE: PRs are currently not accepted. If there are issues/problems, please create an issue.
+
+# Installation
+
+Install using pip
+
+    pip install autoxgb
+
+
+# Usage
+Training a model using AutoXGB is a piece of cake. All you need is some tabular data.
+
+## Parameters
+
+```python
+
+###############################################################################
+### required parameters
+###############################################################################
+
+# path to training data
+train_filename = "data_samples/binary_classification.csv"
+
+# path to output folder to store artifacts
+output = "output"
+
+###############################################################################
+### optional parameters
+###############################################################################
+
+# path to test data. if specified, the model will be evaluated on the test data
+# and test_predictions.csv will be saved to the output folder
+# if not specified, only OOF predictions will be saved
+# test_filename = "test.csv"
+test_filename = None
+
+# task: classification or regression
+# if not specified, the task will be inferred automatically
+# task = "classification"
+# task = "regression"
+task = None
+
+# an id column
+# if not specified, the id column will be generated automatically with the name `id`
+# idx = "id"
+idx = None
+
+# target columns are list of strings
+# if not specified, the target column be assumed to be named `target`
+# and the problem will be treated as one of: binary classification, multiclass classification,
+# or single column regression
+# targets = ["target"]
+# targets = ["target1", "target2"]
+targets = ["income"]
+
+# features columns are list of strings
+# if not specified, all columns except `id`, `targets` & `kfold` columns will be used
+# features = ["col1", "col2"]
+features = None
+
+# categorical_features are list of strings
+# if not specified, categorical columns will be inferred automatically
+# categorical_features = ["col1", "col2"]
+categorical_features = None
+
+# use_gpu is boolean
+# if not specified, GPU is not used
+# use_gpu = True
+# use_gpu = False
+use_gpu = True
+
+# number of folds to use for cross-validation
+# default is 5
+num_folds = 5
+
+# random seed for reproducibility
+# default is 42
+seed = 42
+
+# number of optuna trials to run
+# default is 1000
+# num_trials = 1000
+num_trials = 100
+
+# time_limit for optuna trials in seconds
+# if not specified, timeout is not set and all trials are run
+# time_limit = None
+time_limit = 360
+
+# if fast is set to True, the hyperparameter tuning will use only one fold
+# however, the model will be trained on all folds in the end
+# to generate OOF predictions and test predictions
+# default is False
+# fast = False
+fast = False
+```
+
+# Python API
+
+To train a new model, you can run:
+
+```python
+from autoxgb import AutoXGB
+
+
+# required parameters:
+train_filename = "data_samples/binary_classification.csv"
+output = "output"
+
+# optional parameters
+test_filename = None
+task = None
+idx = None
+targets = ["income"]
+features = None
+categorical_features = None
+use_gpu = True
+num_folds = 5
+seed = 42
+num_trials = 100
+time_limit = 360
+fast = False
+
+# Now its time to train the model!
+axgb = AutoXGB(
+    train_filename=train_filename,
+    output=output,
+    test_filename=test_filename,
+    task=task,
+    idx=idx,
+    targets=targets,
+    features=features,
+    categorical_features=categorical_features,
+    use_gpu=use_gpu,
+    num_folds=num_folds,
+    seed=seed,
+    num_trials=num_trials,
+    time_limit=time_limit,
+    fast=fast,
+)
+axgb.train()
+```
+
+# CLI
+
+Train the model using the `autoxgb train` command. The parameters are same as above.
+
+```
+autoxgb train \
+ --train_filename datasets/30train.csv \
+ --output outputs/30days \
+ --test_filename datasets/30test.csv \
+ --use_gpu
+```
+
+You can also serve the trained model using the `autoxgb serve` command.
+
+```bash
+autoxgb serve --model_path outputs/mll --host 0.0.0.0 --debug
+```
+
+To know more about a command, run:
+
+    `autoxgb <command> --help` 
+
+```
+autoxgb train --help
+
+
+usage: autoxgb <command> [<args>] train [-h] --train_filename TRAIN_FILENAME [--test_filename TEST_FILENAME] --output
+                                        OUTPUT [--task {classification,regression}] [--idx IDX] [--targets TARGETS]
+                                        [--num_folds NUM_FOLDS] [--features FEATURES] [--use_gpu] [--fast]
+                                        [--seed SEED] [--time_limit TIME_LIMIT]
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --train_filename TRAIN_FILENAME
+                        Path to training file
+  --test_filename TEST_FILENAME
+                        Path to test file
+  --output OUTPUT       Path to output directory
+  --task {classification,regression}
+                        User defined task type
+  --idx IDX             ID column
+  --targets TARGETS     Target column(s). If there are multiple targets, separate by ';'
+  --num_folds NUM_FOLDS
+                        Number of folds to use
+  --features FEATURES   Features to use, separated by ';'
+  --use_gpu             Whether to use GPU for training
+  --fast                Whether to use fast mode for tuning params. Only one fold will be used if fast mode is set
+  --seed SEED           Random seed
+  --time_limit TIME_LIMIT
+                        Time limit for optimization
+```
+
+
+
+
+%prep
+%autosetup -n autoxgb-0.2.2
+
+%build
+%py3_build
+
+%install
+%py3_install
+install -d -m755 %{buildroot}/%{_pkgdocdir}
+if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi
+if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi
+if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi
+if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi
+pushd %{buildroot}
+if [ -d usr/lib ]; then
+	find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/lib64 ]; then
+	find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/bin ]; then
+	find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/sbin ]; then
+	find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+touch doclist.lst
+if [ -d usr/share/man ]; then
+	find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst
+fi
+popd
+mv %{buildroot}/filelist.lst .
+mv %{buildroot}/doclist.lst .
+
+%files -n python3-autoxgb -f filelist.lst
+%dir %{python3_sitelib}/*
+
+%files help -f doclist.lst
+%{_docdir}/*
+
+%changelog
+* Fri May 05 2023 Python_Bot <Python_Bot@openeuler.org> - 0.2.2-1
+- Package Spec generated
diff --git a/sources b/sources
new file mode 100644
index 0000000..fbb7323
--- /dev/null
+++ b/sources
@@ -0,0 +1 @@
+01132f3f5cce1377b8698ad6ffcd248d  autoxgb-0.2.2.tar.gz
author	CoprDistGit <infra@openeuler.org>	2023-05-05 08:37:40 +0000
committer	CoprDistGit <infra@openeuler.org>	2023-05-05 08:37:40 +0000
commit	77bea3cf3285b756a75138079310f23781d0ca2d (patch)
tree	09a9053a701b0efcc97c82530655d581eb18b92f
parent	afee9c1f83fa629f0b7346220c138382582131a2 (diff)