summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCoprDistGit <infra@openeuler.org>2023-05-05 08:37:40 +0000
committerCoprDistGit <infra@openeuler.org>2023-05-05 08:37:40 +0000
commit77bea3cf3285b756a75138079310f23781d0ca2d (patch)
tree09a9053a701b0efcc97c82530655d581eb18b92f
parentafee9c1f83fa629f0b7346220c138382582131a2 (diff)
automatic import of python-autoxgbopeneuler20.03
-rw-r--r--.gitignore1
-rw-r--r--python-autoxgb.spec698
-rw-r--r--sources1
3 files changed, 700 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index e69de29..185a5c9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+/autoxgb-0.2.2.tar.gz
diff --git a/python-autoxgb.spec b/python-autoxgb.spec
new file mode 100644
index 0000000..8100677
--- /dev/null
+++ b/python-autoxgb.spec
@@ -0,0 +1,698 @@
+%global _empty_manifest_terminate_build 0
+Name: python-autoxgb
+Version: 0.2.2
+Release: 1
+Summary: autoxgb: tuning xgboost with optuna
+License: Apache 2.0
+URL: https://github.com/abhishekkrthakur/autoxgb
+Source0: https://mirrors.nju.edu.cn/pypi/web/packages/5f/7f/d52884a7044f16ea4dfd010f8334a25939684e69988156ece7f067747b3a/autoxgb-0.2.2.tar.gz
+BuildArch: noarch
+
+Requires: python3-fastapi
+Requires: python3-loguru
+Requires: python3-numpy
+Requires: python3-optuna
+Requires: python3-pyarrow
+Requires: python3-pydantic
+Requires: python3-joblib
+Requires: python3-pandas
+Requires: python3-scikit-learn
+Requires: python3-uvicorn
+Requires: python3-xgboost
+
+%description
+# AutoXGB
+
+
+XGBoost + Optuna: no brainer
+
+- auto train xgboost directly from CSV files
+- auto tune xgboost using optuna
+- auto serve best xgboot model using fastapi
+
+NOTE: PRs are currently not accepted. If there are issues/problems, please create an issue.
+
+# Installation
+
+Install using pip
+
+ pip install autoxgb
+
+
+# Usage
+Training a model using AutoXGB is a piece of cake. All you need is some tabular data.
+
+## Parameters
+
+```python
+
+###############################################################################
+### required parameters
+###############################################################################
+
+# path to training data
+train_filename = "data_samples/binary_classification.csv"
+
+# path to output folder to store artifacts
+output = "output"
+
+###############################################################################
+### optional parameters
+###############################################################################
+
+# path to test data. if specified, the model will be evaluated on the test data
+# and test_predictions.csv will be saved to the output folder
+# if not specified, only OOF predictions will be saved
+# test_filename = "test.csv"
+test_filename = None
+
+# task: classification or regression
+# if not specified, the task will be inferred automatically
+# task = "classification"
+# task = "regression"
+task = None
+
+# an id column
+# if not specified, the id column will be generated automatically with the name `id`
+# idx = "id"
+idx = None
+
+# target columns are list of strings
+# if not specified, the target column be assumed to be named `target`
+# and the problem will be treated as one of: binary classification, multiclass classification,
+# or single column regression
+# targets = ["target"]
+# targets = ["target1", "target2"]
+targets = ["income"]
+
+# features columns are list of strings
+# if not specified, all columns except `id`, `targets` & `kfold` columns will be used
+# features = ["col1", "col2"]
+features = None
+
+# categorical_features are list of strings
+# if not specified, categorical columns will be inferred automatically
+# categorical_features = ["col1", "col2"]
+categorical_features = None
+
+# use_gpu is boolean
+# if not specified, GPU is not used
+# use_gpu = True
+# use_gpu = False
+use_gpu = True
+
+# number of folds to use for cross-validation
+# default is 5
+num_folds = 5
+
+# random seed for reproducibility
+# default is 42
+seed = 42
+
+# number of optuna trials to run
+# default is 1000
+# num_trials = 1000
+num_trials = 100
+
+# time_limit for optuna trials in seconds
+# if not specified, timeout is not set and all trials are run
+# time_limit = None
+time_limit = 360
+
+# if fast is set to True, the hyperparameter tuning will use only one fold
+# however, the model will be trained on all folds in the end
+# to generate OOF predictions and test predictions
+# default is False
+# fast = False
+fast = False
+```
+
+# Python API
+
+To train a new model, you can run:
+
+```python
+from autoxgb import AutoXGB
+
+
+# required parameters:
+train_filename = "data_samples/binary_classification.csv"
+output = "output"
+
+# optional parameters
+test_filename = None
+task = None
+idx = None
+targets = ["income"]
+features = None
+categorical_features = None
+use_gpu = True
+num_folds = 5
+seed = 42
+num_trials = 100
+time_limit = 360
+fast = False
+
+# Now its time to train the model!
+axgb = AutoXGB(
+ train_filename=train_filename,
+ output=output,
+ test_filename=test_filename,
+ task=task,
+ idx=idx,
+ targets=targets,
+ features=features,
+ categorical_features=categorical_features,
+ use_gpu=use_gpu,
+ num_folds=num_folds,
+ seed=seed,
+ num_trials=num_trials,
+ time_limit=time_limit,
+ fast=fast,
+)
+axgb.train()
+```
+
+# CLI
+
+Train the model using the `autoxgb train` command. The parameters are same as above.
+
+```
+autoxgb train \
+ --train_filename datasets/30train.csv \
+ --output outputs/30days \
+ --test_filename datasets/30test.csv \
+ --use_gpu
+```
+
+You can also serve the trained model using the `autoxgb serve` command.
+
+```bash
+autoxgb serve --model_path outputs/mll --host 0.0.0.0 --debug
+```
+
+To know more about a command, run:
+
+ `autoxgb <command> --help`
+
+```
+autoxgb train --help
+
+
+usage: autoxgb <command> [<args>] train [-h] --train_filename TRAIN_FILENAME [--test_filename TEST_FILENAME] --output
+ OUTPUT [--task {classification,regression}] [--idx IDX] [--targets TARGETS]
+ [--num_folds NUM_FOLDS] [--features FEATURES] [--use_gpu] [--fast]
+ [--seed SEED] [--time_limit TIME_LIMIT]
+
+optional arguments:
+ -h, --help show this help message and exit
+ --train_filename TRAIN_FILENAME
+ Path to training file
+ --test_filename TEST_FILENAME
+ Path to test file
+ --output OUTPUT Path to output directory
+ --task {classification,regression}
+ User defined task type
+ --idx IDX ID column
+ --targets TARGETS Target column(s). If there are multiple targets, separate by ';'
+ --num_folds NUM_FOLDS
+ Number of folds to use
+ --features FEATURES Features to use, separated by ';'
+ --use_gpu Whether to use GPU for training
+ --fast Whether to use fast mode for tuning params. Only one fold will be used if fast mode is set
+ --seed SEED Random seed
+ --time_limit TIME_LIMIT
+ Time limit for optimization
+```
+
+
+
+
+%package -n python3-autoxgb
+Summary: autoxgb: tuning xgboost with optuna
+Provides: python-autoxgb
+BuildRequires: python3-devel
+BuildRequires: python3-setuptools
+BuildRequires: python3-pip
+%description -n python3-autoxgb
+# AutoXGB
+
+
+XGBoost + Optuna: no brainer
+
+- auto train xgboost directly from CSV files
+- auto tune xgboost using optuna
+- auto serve best xgboot model using fastapi
+
+NOTE: PRs are currently not accepted. If there are issues/problems, please create an issue.
+
+# Installation
+
+Install using pip
+
+ pip install autoxgb
+
+
+# Usage
+Training a model using AutoXGB is a piece of cake. All you need is some tabular data.
+
+## Parameters
+
+```python
+
+###############################################################################
+### required parameters
+###############################################################################
+
+# path to training data
+train_filename = "data_samples/binary_classification.csv"
+
+# path to output folder to store artifacts
+output = "output"
+
+###############################################################################
+### optional parameters
+###############################################################################
+
+# path to test data. if specified, the model will be evaluated on the test data
+# and test_predictions.csv will be saved to the output folder
+# if not specified, only OOF predictions will be saved
+# test_filename = "test.csv"
+test_filename = None
+
+# task: classification or regression
+# if not specified, the task will be inferred automatically
+# task = "classification"
+# task = "regression"
+task = None
+
+# an id column
+# if not specified, the id column will be generated automatically with the name `id`
+# idx = "id"
+idx = None
+
+# target columns are list of strings
+# if not specified, the target column be assumed to be named `target`
+# and the problem will be treated as one of: binary classification, multiclass classification,
+# or single column regression
+# targets = ["target"]
+# targets = ["target1", "target2"]
+targets = ["income"]
+
+# features columns are list of strings
+# if not specified, all columns except `id`, `targets` & `kfold` columns will be used
+# features = ["col1", "col2"]
+features = None
+
+# categorical_features are list of strings
+# if not specified, categorical columns will be inferred automatically
+# categorical_features = ["col1", "col2"]
+categorical_features = None
+
+# use_gpu is boolean
+# if not specified, GPU is not used
+# use_gpu = True
+# use_gpu = False
+use_gpu = True
+
+# number of folds to use for cross-validation
+# default is 5
+num_folds = 5
+
+# random seed for reproducibility
+# default is 42
+seed = 42
+
+# number of optuna trials to run
+# default is 1000
+# num_trials = 1000
+num_trials = 100
+
+# time_limit for optuna trials in seconds
+# if not specified, timeout is not set and all trials are run
+# time_limit = None
+time_limit = 360
+
+# if fast is set to True, the hyperparameter tuning will use only one fold
+# however, the model will be trained on all folds in the end
+# to generate OOF predictions and test predictions
+# default is False
+# fast = False
+fast = False
+```
+
+# Python API
+
+To train a new model, you can run:
+
+```python
+from autoxgb import AutoXGB
+
+
+# required parameters:
+train_filename = "data_samples/binary_classification.csv"
+output = "output"
+
+# optional parameters
+test_filename = None
+task = None
+idx = None
+targets = ["income"]
+features = None
+categorical_features = None
+use_gpu = True
+num_folds = 5
+seed = 42
+num_trials = 100
+time_limit = 360
+fast = False
+
+# Now its time to train the model!
+axgb = AutoXGB(
+ train_filename=train_filename,
+ output=output,
+ test_filename=test_filename,
+ task=task,
+ idx=idx,
+ targets=targets,
+ features=features,
+ categorical_features=categorical_features,
+ use_gpu=use_gpu,
+ num_folds=num_folds,
+ seed=seed,
+ num_trials=num_trials,
+ time_limit=time_limit,
+ fast=fast,
+)
+axgb.train()
+```
+
+# CLI
+
+Train the model using the `autoxgb train` command. The parameters are same as above.
+
+```
+autoxgb train \
+ --train_filename datasets/30train.csv \
+ --output outputs/30days \
+ --test_filename datasets/30test.csv \
+ --use_gpu
+```
+
+You can also serve the trained model using the `autoxgb serve` command.
+
+```bash
+autoxgb serve --model_path outputs/mll --host 0.0.0.0 --debug
+```
+
+To know more about a command, run:
+
+ `autoxgb <command> --help`
+
+```
+autoxgb train --help
+
+
+usage: autoxgb <command> [<args>] train [-h] --train_filename TRAIN_FILENAME [--test_filename TEST_FILENAME] --output
+ OUTPUT [--task {classification,regression}] [--idx IDX] [--targets TARGETS]
+ [--num_folds NUM_FOLDS] [--features FEATURES] [--use_gpu] [--fast]
+ [--seed SEED] [--time_limit TIME_LIMIT]
+
+optional arguments:
+ -h, --help show this help message and exit
+ --train_filename TRAIN_FILENAME
+ Path to training file
+ --test_filename TEST_FILENAME
+ Path to test file
+ --output OUTPUT Path to output directory
+ --task {classification,regression}
+ User defined task type
+ --idx IDX ID column
+ --targets TARGETS Target column(s). If there are multiple targets, separate by ';'
+ --num_folds NUM_FOLDS
+ Number of folds to use
+ --features FEATURES Features to use, separated by ';'
+ --use_gpu Whether to use GPU for training
+ --fast Whether to use fast mode for tuning params. Only one fold will be used if fast mode is set
+ --seed SEED Random seed
+ --time_limit TIME_LIMIT
+ Time limit for optimization
+```
+
+
+
+
+%package help
+Summary: Development documents and examples for autoxgb
+Provides: python3-autoxgb-doc
+%description help
+# AutoXGB
+
+
+XGBoost + Optuna: no brainer
+
+- auto train xgboost directly from CSV files
+- auto tune xgboost using optuna
+- auto serve best xgboot model using fastapi
+
+NOTE: PRs are currently not accepted. If there are issues/problems, please create an issue.
+
+# Installation
+
+Install using pip
+
+ pip install autoxgb
+
+
+# Usage
+Training a model using AutoXGB is a piece of cake. All you need is some tabular data.
+
+## Parameters
+
+```python
+
+###############################################################################
+### required parameters
+###############################################################################
+
+# path to training data
+train_filename = "data_samples/binary_classification.csv"
+
+# path to output folder to store artifacts
+output = "output"
+
+###############################################################################
+### optional parameters
+###############################################################################
+
+# path to test data. if specified, the model will be evaluated on the test data
+# and test_predictions.csv will be saved to the output folder
+# if not specified, only OOF predictions will be saved
+# test_filename = "test.csv"
+test_filename = None
+
+# task: classification or regression
+# if not specified, the task will be inferred automatically
+# task = "classification"
+# task = "regression"
+task = None
+
+# an id column
+# if not specified, the id column will be generated automatically with the name `id`
+# idx = "id"
+idx = None
+
+# target columns are list of strings
+# if not specified, the target column be assumed to be named `target`
+# and the problem will be treated as one of: binary classification, multiclass classification,
+# or single column regression
+# targets = ["target"]
+# targets = ["target1", "target2"]
+targets = ["income"]
+
+# features columns are list of strings
+# if not specified, all columns except `id`, `targets` & `kfold` columns will be used
+# features = ["col1", "col2"]
+features = None
+
+# categorical_features are list of strings
+# if not specified, categorical columns will be inferred automatically
+# categorical_features = ["col1", "col2"]
+categorical_features = None
+
+# use_gpu is boolean
+# if not specified, GPU is not used
+# use_gpu = True
+# use_gpu = False
+use_gpu = True
+
+# number of folds to use for cross-validation
+# default is 5
+num_folds = 5
+
+# random seed for reproducibility
+# default is 42
+seed = 42
+
+# number of optuna trials to run
+# default is 1000
+# num_trials = 1000
+num_trials = 100
+
+# time_limit for optuna trials in seconds
+# if not specified, timeout is not set and all trials are run
+# time_limit = None
+time_limit = 360
+
+# if fast is set to True, the hyperparameter tuning will use only one fold
+# however, the model will be trained on all folds in the end
+# to generate OOF predictions and test predictions
+# default is False
+# fast = False
+fast = False
+```
+
+# Python API
+
+To train a new model, you can run:
+
+```python
+from autoxgb import AutoXGB
+
+
+# required parameters:
+train_filename = "data_samples/binary_classification.csv"
+output = "output"
+
+# optional parameters
+test_filename = None
+task = None
+idx = None
+targets = ["income"]
+features = None
+categorical_features = None
+use_gpu = True
+num_folds = 5
+seed = 42
+num_trials = 100
+time_limit = 360
+fast = False
+
+# Now its time to train the model!
+axgb = AutoXGB(
+ train_filename=train_filename,
+ output=output,
+ test_filename=test_filename,
+ task=task,
+ idx=idx,
+ targets=targets,
+ features=features,
+ categorical_features=categorical_features,
+ use_gpu=use_gpu,
+ num_folds=num_folds,
+ seed=seed,
+ num_trials=num_trials,
+ time_limit=time_limit,
+ fast=fast,
+)
+axgb.train()
+```
+
+# CLI
+
+Train the model using the `autoxgb train` command. The parameters are same as above.
+
+```
+autoxgb train \
+ --train_filename datasets/30train.csv \
+ --output outputs/30days \
+ --test_filename datasets/30test.csv \
+ --use_gpu
+```
+
+You can also serve the trained model using the `autoxgb serve` command.
+
+```bash
+autoxgb serve --model_path outputs/mll --host 0.0.0.0 --debug
+```
+
+To know more about a command, run:
+
+ `autoxgb <command> --help`
+
+```
+autoxgb train --help
+
+
+usage: autoxgb <command> [<args>] train [-h] --train_filename TRAIN_FILENAME [--test_filename TEST_FILENAME] --output
+ OUTPUT [--task {classification,regression}] [--idx IDX] [--targets TARGETS]
+ [--num_folds NUM_FOLDS] [--features FEATURES] [--use_gpu] [--fast]
+ [--seed SEED] [--time_limit TIME_LIMIT]
+
+optional arguments:
+ -h, --help show this help message and exit
+ --train_filename TRAIN_FILENAME
+ Path to training file
+ --test_filename TEST_FILENAME
+ Path to test file
+ --output OUTPUT Path to output directory
+ --task {classification,regression}
+ User defined task type
+ --idx IDX ID column
+ --targets TARGETS Target column(s). If there are multiple targets, separate by ';'
+ --num_folds NUM_FOLDS
+ Number of folds to use
+ --features FEATURES Features to use, separated by ';'
+ --use_gpu Whether to use GPU for training
+ --fast Whether to use fast mode for tuning params. Only one fold will be used if fast mode is set
+ --seed SEED Random seed
+ --time_limit TIME_LIMIT
+ Time limit for optimization
+```
+
+
+
+
+%prep
+%autosetup -n autoxgb-0.2.2
+
+%build
+%py3_build
+
+%install
+%py3_install
+install -d -m755 %{buildroot}/%{_pkgdocdir}
+if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi
+if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi
+if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi
+if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi
+pushd %{buildroot}
+if [ -d usr/lib ]; then
+ find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/lib64 ]; then
+ find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/bin ]; then
+ find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/sbin ]; then
+ find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+touch doclist.lst
+if [ -d usr/share/man ]; then
+ find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst
+fi
+popd
+mv %{buildroot}/filelist.lst .
+mv %{buildroot}/doclist.lst .
+
+%files -n python3-autoxgb -f filelist.lst
+%dir %{python3_sitelib}/*
+
+%files help -f doclist.lst
+%{_docdir}/*
+
+%changelog
+* Fri May 05 2023 Python_Bot <Python_Bot@openeuler.org> - 0.2.2-1
+- Package Spec generated
diff --git a/sources b/sources
new file mode 100644
index 0000000..fbb7323
--- /dev/null
+++ b/sources
@@ -0,0 +1 @@
+01132f3f5cce1377b8698ad6ffcd248d autoxgb-0.2.2.tar.gz