diff options
author | CoprDistGit <infra@openeuler.org> | 2023-05-05 08:37:40 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2023-05-05 08:37:40 +0000 |
commit | 77bea3cf3285b756a75138079310f23781d0ca2d (patch) | |
tree | 09a9053a701b0efcc97c82530655d581eb18b92f | |
parent | afee9c1f83fa629f0b7346220c138382582131a2 (diff) |
automatic import of python-autoxgbopeneuler20.03
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | python-autoxgb.spec | 698 | ||||
-rw-r--r-- | sources | 1 |
3 files changed, 700 insertions, 0 deletions
@@ -0,0 +1 @@ +/autoxgb-0.2.2.tar.gz diff --git a/python-autoxgb.spec b/python-autoxgb.spec new file mode 100644 index 0000000..8100677 --- /dev/null +++ b/python-autoxgb.spec @@ -0,0 +1,698 @@ +%global _empty_manifest_terminate_build 0 +Name: python-autoxgb +Version: 0.2.2 +Release: 1 +Summary: autoxgb: tuning xgboost with optuna +License: Apache 2.0 +URL: https://github.com/abhishekkrthakur/autoxgb +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/5f/7f/d52884a7044f16ea4dfd010f8334a25939684e69988156ece7f067747b3a/autoxgb-0.2.2.tar.gz +BuildArch: noarch + +Requires: python3-fastapi +Requires: python3-loguru +Requires: python3-numpy +Requires: python3-optuna +Requires: python3-pyarrow +Requires: python3-pydantic +Requires: python3-joblib +Requires: python3-pandas +Requires: python3-scikit-learn +Requires: python3-uvicorn +Requires: python3-xgboost + +%description +# AutoXGB + + +XGBoost + Optuna: no brainer + +- auto train xgboost directly from CSV files +- auto tune xgboost using optuna +- auto serve best xgboot model using fastapi + +NOTE: PRs are currently not accepted. If there are issues/problems, please create an issue. + +# Installation + +Install using pip + + pip install autoxgb + + +# Usage +Training a model using AutoXGB is a piece of cake. All you need is some tabular data. + +## Parameters + +```python + +############################################################################### +### required parameters +############################################################################### + +# path to training data +train_filename = "data_samples/binary_classification.csv" + +# path to output folder to store artifacts +output = "output" + +############################################################################### +### optional parameters +############################################################################### + +# path to test data. if specified, the model will be evaluated on the test data +# and test_predictions.csv will be saved to the output folder +# if not specified, only OOF predictions will be saved +# test_filename = "test.csv" +test_filename = None + +# task: classification or regression +# if not specified, the task will be inferred automatically +# task = "classification" +# task = "regression" +task = None + +# an id column +# if not specified, the id column will be generated automatically with the name `id` +# idx = "id" +idx = None + +# target columns are list of strings +# if not specified, the target column be assumed to be named `target` +# and the problem will be treated as one of: binary classification, multiclass classification, +# or single column regression +# targets = ["target"] +# targets = ["target1", "target2"] +targets = ["income"] + +# features columns are list of strings +# if not specified, all columns except `id`, `targets` & `kfold` columns will be used +# features = ["col1", "col2"] +features = None + +# categorical_features are list of strings +# if not specified, categorical columns will be inferred automatically +# categorical_features = ["col1", "col2"] +categorical_features = None + +# use_gpu is boolean +# if not specified, GPU is not used +# use_gpu = True +# use_gpu = False +use_gpu = True + +# number of folds to use for cross-validation +# default is 5 +num_folds = 5 + +# random seed for reproducibility +# default is 42 +seed = 42 + +# number of optuna trials to run +# default is 1000 +# num_trials = 1000 +num_trials = 100 + +# time_limit for optuna trials in seconds +# if not specified, timeout is not set and all trials are run +# time_limit = None +time_limit = 360 + +# if fast is set to True, the hyperparameter tuning will use only one fold +# however, the model will be trained on all folds in the end +# to generate OOF predictions and test predictions +# default is False +# fast = False +fast = False +``` + +# Python API + +To train a new model, you can run: + +```python +from autoxgb import AutoXGB + + +# required parameters: +train_filename = "data_samples/binary_classification.csv" +output = "output" + +# optional parameters +test_filename = None +task = None +idx = None +targets = ["income"] +features = None +categorical_features = None +use_gpu = True +num_folds = 5 +seed = 42 +num_trials = 100 +time_limit = 360 +fast = False + +# Now its time to train the model! +axgb = AutoXGB( + train_filename=train_filename, + output=output, + test_filename=test_filename, + task=task, + idx=idx, + targets=targets, + features=features, + categorical_features=categorical_features, + use_gpu=use_gpu, + num_folds=num_folds, + seed=seed, + num_trials=num_trials, + time_limit=time_limit, + fast=fast, +) +axgb.train() +``` + +# CLI + +Train the model using the `autoxgb train` command. The parameters are same as above. + +``` +autoxgb train \ + --train_filename datasets/30train.csv \ + --output outputs/30days \ + --test_filename datasets/30test.csv \ + --use_gpu +``` + +You can also serve the trained model using the `autoxgb serve` command. + +```bash +autoxgb serve --model_path outputs/mll --host 0.0.0.0 --debug +``` + +To know more about a command, run: + + `autoxgb <command> --help` + +``` +autoxgb train --help + + +usage: autoxgb <command> [<args>] train [-h] --train_filename TRAIN_FILENAME [--test_filename TEST_FILENAME] --output + OUTPUT [--task {classification,regression}] [--idx IDX] [--targets TARGETS] + [--num_folds NUM_FOLDS] [--features FEATURES] [--use_gpu] [--fast] + [--seed SEED] [--time_limit TIME_LIMIT] + +optional arguments: + -h, --help show this help message and exit + --train_filename TRAIN_FILENAME + Path to training file + --test_filename TEST_FILENAME + Path to test file + --output OUTPUT Path to output directory + --task {classification,regression} + User defined task type + --idx IDX ID column + --targets TARGETS Target column(s). If there are multiple targets, separate by ';' + --num_folds NUM_FOLDS + Number of folds to use + --features FEATURES Features to use, separated by ';' + --use_gpu Whether to use GPU for training + --fast Whether to use fast mode for tuning params. Only one fold will be used if fast mode is set + --seed SEED Random seed + --time_limit TIME_LIMIT + Time limit for optimization +``` + + + + +%package -n python3-autoxgb +Summary: autoxgb: tuning xgboost with optuna +Provides: python-autoxgb +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-autoxgb +# AutoXGB + + +XGBoost + Optuna: no brainer + +- auto train xgboost directly from CSV files +- auto tune xgboost using optuna +- auto serve best xgboot model using fastapi + +NOTE: PRs are currently not accepted. If there are issues/problems, please create an issue. + +# Installation + +Install using pip + + pip install autoxgb + + +# Usage +Training a model using AutoXGB is a piece of cake. All you need is some tabular data. + +## Parameters + +```python + +############################################################################### +### required parameters +############################################################################### + +# path to training data +train_filename = "data_samples/binary_classification.csv" + +# path to output folder to store artifacts +output = "output" + +############################################################################### +### optional parameters +############################################################################### + +# path to test data. if specified, the model will be evaluated on the test data +# and test_predictions.csv will be saved to the output folder +# if not specified, only OOF predictions will be saved +# test_filename = "test.csv" +test_filename = None + +# task: classification or regression +# if not specified, the task will be inferred automatically +# task = "classification" +# task = "regression" +task = None + +# an id column +# if not specified, the id column will be generated automatically with the name `id` +# idx = "id" +idx = None + +# target columns are list of strings +# if not specified, the target column be assumed to be named `target` +# and the problem will be treated as one of: binary classification, multiclass classification, +# or single column regression +# targets = ["target"] +# targets = ["target1", "target2"] +targets = ["income"] + +# features columns are list of strings +# if not specified, all columns except `id`, `targets` & `kfold` columns will be used +# features = ["col1", "col2"] +features = None + +# categorical_features are list of strings +# if not specified, categorical columns will be inferred automatically +# categorical_features = ["col1", "col2"] +categorical_features = None + +# use_gpu is boolean +# if not specified, GPU is not used +# use_gpu = True +# use_gpu = False +use_gpu = True + +# number of folds to use for cross-validation +# default is 5 +num_folds = 5 + +# random seed for reproducibility +# default is 42 +seed = 42 + +# number of optuna trials to run +# default is 1000 +# num_trials = 1000 +num_trials = 100 + +# time_limit for optuna trials in seconds +# if not specified, timeout is not set and all trials are run +# time_limit = None +time_limit = 360 + +# if fast is set to True, the hyperparameter tuning will use only one fold +# however, the model will be trained on all folds in the end +# to generate OOF predictions and test predictions +# default is False +# fast = False +fast = False +``` + +# Python API + +To train a new model, you can run: + +```python +from autoxgb import AutoXGB + + +# required parameters: +train_filename = "data_samples/binary_classification.csv" +output = "output" + +# optional parameters +test_filename = None +task = None +idx = None +targets = ["income"] +features = None +categorical_features = None +use_gpu = True +num_folds = 5 +seed = 42 +num_trials = 100 +time_limit = 360 +fast = False + +# Now its time to train the model! +axgb = AutoXGB( + train_filename=train_filename, + output=output, + test_filename=test_filename, + task=task, + idx=idx, + targets=targets, + features=features, + categorical_features=categorical_features, + use_gpu=use_gpu, + num_folds=num_folds, + seed=seed, + num_trials=num_trials, + time_limit=time_limit, + fast=fast, +) +axgb.train() +``` + +# CLI + +Train the model using the `autoxgb train` command. The parameters are same as above. + +``` +autoxgb train \ + --train_filename datasets/30train.csv \ + --output outputs/30days \ + --test_filename datasets/30test.csv \ + --use_gpu +``` + +You can also serve the trained model using the `autoxgb serve` command. + +```bash +autoxgb serve --model_path outputs/mll --host 0.0.0.0 --debug +``` + +To know more about a command, run: + + `autoxgb <command> --help` + +``` +autoxgb train --help + + +usage: autoxgb <command> [<args>] train [-h] --train_filename TRAIN_FILENAME [--test_filename TEST_FILENAME] --output + OUTPUT [--task {classification,regression}] [--idx IDX] [--targets TARGETS] + [--num_folds NUM_FOLDS] [--features FEATURES] [--use_gpu] [--fast] + [--seed SEED] [--time_limit TIME_LIMIT] + +optional arguments: + -h, --help show this help message and exit + --train_filename TRAIN_FILENAME + Path to training file + --test_filename TEST_FILENAME + Path to test file + --output OUTPUT Path to output directory + --task {classification,regression} + User defined task type + --idx IDX ID column + --targets TARGETS Target column(s). If there are multiple targets, separate by ';' + --num_folds NUM_FOLDS + Number of folds to use + --features FEATURES Features to use, separated by ';' + --use_gpu Whether to use GPU for training + --fast Whether to use fast mode for tuning params. Only one fold will be used if fast mode is set + --seed SEED Random seed + --time_limit TIME_LIMIT + Time limit for optimization +``` + + + + +%package help +Summary: Development documents and examples for autoxgb +Provides: python3-autoxgb-doc +%description help +# AutoXGB + + +XGBoost + Optuna: no brainer + +- auto train xgboost directly from CSV files +- auto tune xgboost using optuna +- auto serve best xgboot model using fastapi + +NOTE: PRs are currently not accepted. If there are issues/problems, please create an issue. + +# Installation + +Install using pip + + pip install autoxgb + + +# Usage +Training a model using AutoXGB is a piece of cake. All you need is some tabular data. + +## Parameters + +```python + +############################################################################### +### required parameters +############################################################################### + +# path to training data +train_filename = "data_samples/binary_classification.csv" + +# path to output folder to store artifacts +output = "output" + +############################################################################### +### optional parameters +############################################################################### + +# path to test data. if specified, the model will be evaluated on the test data +# and test_predictions.csv will be saved to the output folder +# if not specified, only OOF predictions will be saved +# test_filename = "test.csv" +test_filename = None + +# task: classification or regression +# if not specified, the task will be inferred automatically +# task = "classification" +# task = "regression" +task = None + +# an id column +# if not specified, the id column will be generated automatically with the name `id` +# idx = "id" +idx = None + +# target columns are list of strings +# if not specified, the target column be assumed to be named `target` +# and the problem will be treated as one of: binary classification, multiclass classification, +# or single column regression +# targets = ["target"] +# targets = ["target1", "target2"] +targets = ["income"] + +# features columns are list of strings +# if not specified, all columns except `id`, `targets` & `kfold` columns will be used +# features = ["col1", "col2"] +features = None + +# categorical_features are list of strings +# if not specified, categorical columns will be inferred automatically +# categorical_features = ["col1", "col2"] +categorical_features = None + +# use_gpu is boolean +# if not specified, GPU is not used +# use_gpu = True +# use_gpu = False +use_gpu = True + +# number of folds to use for cross-validation +# default is 5 +num_folds = 5 + +# random seed for reproducibility +# default is 42 +seed = 42 + +# number of optuna trials to run +# default is 1000 +# num_trials = 1000 +num_trials = 100 + +# time_limit for optuna trials in seconds +# if not specified, timeout is not set and all trials are run +# time_limit = None +time_limit = 360 + +# if fast is set to True, the hyperparameter tuning will use only one fold +# however, the model will be trained on all folds in the end +# to generate OOF predictions and test predictions +# default is False +# fast = False +fast = False +``` + +# Python API + +To train a new model, you can run: + +```python +from autoxgb import AutoXGB + + +# required parameters: +train_filename = "data_samples/binary_classification.csv" +output = "output" + +# optional parameters +test_filename = None +task = None +idx = None +targets = ["income"] +features = None +categorical_features = None +use_gpu = True +num_folds = 5 +seed = 42 +num_trials = 100 +time_limit = 360 +fast = False + +# Now its time to train the model! +axgb = AutoXGB( + train_filename=train_filename, + output=output, + test_filename=test_filename, + task=task, + idx=idx, + targets=targets, + features=features, + categorical_features=categorical_features, + use_gpu=use_gpu, + num_folds=num_folds, + seed=seed, + num_trials=num_trials, + time_limit=time_limit, + fast=fast, +) +axgb.train() +``` + +# CLI + +Train the model using the `autoxgb train` command. The parameters are same as above. + +``` +autoxgb train \ + --train_filename datasets/30train.csv \ + --output outputs/30days \ + --test_filename datasets/30test.csv \ + --use_gpu +``` + +You can also serve the trained model using the `autoxgb serve` command. + +```bash +autoxgb serve --model_path outputs/mll --host 0.0.0.0 --debug +``` + +To know more about a command, run: + + `autoxgb <command> --help` + +``` +autoxgb train --help + + +usage: autoxgb <command> [<args>] train [-h] --train_filename TRAIN_FILENAME [--test_filename TEST_FILENAME] --output + OUTPUT [--task {classification,regression}] [--idx IDX] [--targets TARGETS] + [--num_folds NUM_FOLDS] [--features FEATURES] [--use_gpu] [--fast] + [--seed SEED] [--time_limit TIME_LIMIT] + +optional arguments: + -h, --help show this help message and exit + --train_filename TRAIN_FILENAME + Path to training file + --test_filename TEST_FILENAME + Path to test file + --output OUTPUT Path to output directory + --task {classification,regression} + User defined task type + --idx IDX ID column + --targets TARGETS Target column(s). If there are multiple targets, separate by ';' + --num_folds NUM_FOLDS + Number of folds to use + --features FEATURES Features to use, separated by ';' + --use_gpu Whether to use GPU for training + --fast Whether to use fast mode for tuning params. Only one fold will be used if fast mode is set + --seed SEED Random seed + --time_limit TIME_LIMIT + Time limit for optimization +``` + + + + +%prep +%autosetup -n autoxgb-0.2.2 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-autoxgb -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Fri May 05 2023 Python_Bot <Python_Bot@openeuler.org> - 0.2.2-1 +- Package Spec generated @@ -0,0 +1 @@ +01132f3f5cce1377b8698ad6ffcd248d autoxgb-0.2.2.tar.gz |