diff options
author | CoprDistGit <infra@openeuler.org> | 2023-05-10 09:20:50 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2023-05-10 09:20:50 +0000 |
commit | 4b4dcc75c88e93a16544aa5cb0441e91ec5ef2e7 (patch) | |
tree | 546fec6a9c4f82f3dbc77a8e0515618972efa1b5 | |
parent | b6eb36d606d9bda1901db527c0ff62b1dedf1887 (diff) |
automatic import of python-skt
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | python-skt.spec | 1062 | ||||
-rw-r--r-- | sources | 1 |
3 files changed, 1064 insertions, 0 deletions
@@ -0,0 +1 @@ +/skt-0.2.88.tar.gz diff --git a/python-skt.spec b/python-skt.spec new file mode 100644 index 0000000..e0f1e48 --- /dev/null +++ b/python-skt.spec @@ -0,0 +1,1062 @@ +%global _empty_manifest_terminate_build 0 +Name: python-skt +Version: 0.2.88 +Release: 1 +Summary: SKT package +License: MIT License +URL: https://github.com/sktaiflow/skt +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/2d/2f/556e72ec755bbb04509ea3e788e5021dab64258b063b1d626809e0f82642/skt-0.2.88.tar.gz +BuildArch: noarch + +Requires: python3-streamz +Requires: python3-confluent-kafka +Requires: python3-thrift-sasl +Requires: python3-hvac +Requires: python3-pyhive[hive] +Requires: python3-pyarrow +Requires: python3-pandas +Requires: python3-slackclient +Requires: python3-httplib2 +Requires: python3-click +Requires: python3-PyGithub +Requires: python3-pycryptodome +Requires: python3-tabulate +Requires: python3-pandas-gbq +Requires: python3-google-cloud-bigquery-storage +Requires: python3-grpcio +Requires: python3-sqlalchemy +Requires: python3-packaging +Requires: python3-tqdm +Requires: python3-ipywidgets +Requires: python3-hmsclient-hive-3 +Requires: python3-google-cloud-monitoring +Requires: python3-redis +Requires: python3-pyathena +Requires: python3-opensearch-py +Requires: python3-requests-aws4auth +Requires: python3-bayesian-optimization +Requires: python3-catboost +Requires: python3-plotnine +Requires: python3-shap +Requires: python3-gensim +Requires: python3-seaborn +Requires: python3-scikit-learn +Requires: python3-scipy +Requires: python3-lifelines +Requires: python3-xgboost +Requires: python3-lightgbm +Requires: python3-implicit +Requires: python3-matplotlib +Requires: python3-mushroom-rl +Requires: python3-pytorch-widedeep +Requires: python3-RL-for-reco +Requires: python3-LightGBMwithBayesOpt +Requires: python3-tensorboardX +Requires: python3-torchsummary +Requires: python3-pycaret +Requires: python3-openpyxl +Requires: python3-netcal +Requires: python3-haversine +Requires: python3-pyfarmhash +Requires: python3-mabalgs + +%description +# SKT Package + + +[](https://github.com/sktaiflow/skt/actions) + +This is highly site dependent package. +Resources are abstracted into package structure. + + +## Usage + + +Hive metastore +```python +from skt.ye import get_hms + +c = get_hms() +c.get_partition_names("db", "table") +c.close() +``` + + +Hash and unhash +```python +from skt.lake import hash_s +from skt.lake import unhash_s + +unhashed_list = ['0000000000'] +hashed_list = hash_s(unhashed_list) +unhash_s(hashed_list) +``` + + +Execute hive query without fetch result +```python +from skt.ye import hive_execute +hive_execute(ddl_or_ctas_query) +``` + + +Fetch resultset from hive query +```python +from skt.ye import hive_get_result +result_set = hive_get_result(select_query) +``` + + +Get pandas dataframe from hive qeruy resultset +```python +from skt.ye import hive_to_pandas +pandas_df = hive_to_pandas(hive_query) +``` + + +Get pandas dataframe from parquet file in hdfs +```python +from skt.ye import parquet_to_pandas +pandas_df = parquet_to_pandas(hdfs_path) +``` + + +Save pandas dataframe as parquet in hdfs +```python +from skt.ye import get_spark +from skt.ye import pandas_to_parquet +spark = get_spark() +pandas_to_parquet(pandas_df, hdfs_path, spark) # we need spark for this operation +spark.stop() +``` + + +Work with spark +```python +from skt.ye import get_spark +spark = get_spark() +# do with spark session +spark.stop() +``` + + +Work with spark-bigquery-connector +```python +# SELECT +from skt.gcp import bq_table_to_pandas +pandas_df = bq_table_to_pandas("dataset", "table_name", ["col_1", "col_2"], "2020-01-01", "svc_mgmt_num is not null") +# INSERT +from skt.gcp import pandas_to_bq_table +pandas_to_bq_table(pandas_df, "dataset", "table_name", "2020-03-01") +``` + + +Send slack message +```python +from skt.ye import slack_send +text = 'Hello' +username = 'airflow' +channel = '#leavemealone' +slack_send(text=text, username=username, channel=channel) +# Send dataframe as text +df = pd.DataFrame(data={'col1': [1, 2], 'col2': [3, 4]}) +slack_send(text=df, username=username, channel=channel, dataframe=True) +``` + + +Get bigquery client +```python +from skt.gcp import get_bigquery_client +bq = get_bigquery_client() +bq.query(query) +``` + +IPython BigQuery Magic +```python +from skt.gcp import import_bigquery_ipython_magic + +import_bigquery_ipython_magic() + +query_params = { + "p_1": "v_1", + "dataset": "mnoai", +} +``` +```python +%% bq --params $query_params + +SELECT c_1 +FROM {dataset}.user_logs +WHERE c_1 = @p_1 +``` + +Access MLS +```python +from skt.mls import set_model_name +from skt.mls import get_recent_model_path +from skt.ye import get_pkl_from_hdfs + +set_model_name(COMM_DB, params) +path = get_recent_model_path(COMM_DB, model_key) +model = get_pkl_from_hdfs(f'{path}) +``` + +MLS Model Registry (Upload model_binary(model.tar.gz) / model_meta(model.json) to AWS S3 from YE) +```python +from skt.mls import save_model + +# model object generated by LightGBM or XGBoost +model + +# model name +model_name = 'sample_model' +# model version +model_version = 'v1' +# AWS ENV in 'stg / prd / dev' (default is 'stg') +aws_env = 'stg' +# List of features used in ML Model in string type (only for XGBoost model_type) +feature_list = ['feature_1', 'feature_2', 'feature_3'] +# Force to overwrite model files on S3 if exists (default is False) +force = False + +save_model(model, model_name, model_version, aws_env, force) +``` + + +MLS *meta_table* & *meta_table_item* related methods +```python +from skt.mls import get_meta_table +from skt.mls import create_meta_table_item +from skt.mls import update_meta_table_item +from skt.mls import get_meta_table_item +from skt.mls import meta_table_to_pandas +from skt.mls import pandas_to_meta_table + +# Get a meta_table info +get_meta_table(meta_table_name, aws_env, edd) +# Create a meta_item +create_meta_table_item(meta_table_name, item_name, item_dict, aws_env, edd) +# Update a meta_item +update_meta_table_item(meta_table_name, item_name, item_dict, aws_env, edd) +# Get a meta_item +get_meta_table_item(meta_table_name, item_name, aws_env, edd) +# Get a meta_table as pandas dataframe +meta_table_to_pandas(meta_table_name, aws_env, edd) +# Update pandas dataframe to meta_table +pandas_to_meta_table(method, meta_table_name, dataframe, key, values, aws_env, edd) + + +# For the detal, use ?{method} to get detailed info (ex. ?get_meta_table) +# For the user of EDD, must set edd=True +``` + +MLS *model_meta* related methods +(*Need to set *user* for the ml_model) +```python +from skt.mls import get_ml_model +from skt.mls import create_meta_table_item +from skt.mls import update_meta_table_item + +# Get a ml_model +get_ml_model(user, model_name, model_version, aws_env, edd) +# Get a model_meta of ml_model +get_ml_model_meta(user, model_name, model_version, aws_env, edd) +# Update or Create meta_item(s) +update_ml_model_meta(user, model_name, model_version, model_meta_dict, aws_env, edd) + +# For the detal, use ?{method} to get detailed info (ex. ?get_ml_model) +# For the user of EDD, must set edd=True +``` + + + +Use NES CLI +```bas +nes input_notebook_url -p k1 v1 -p k2 v2 -p k3 v3 +``` + + +Use github util +```python +from skt.ye import get_github_util +g = get_github_util +# query graphql +res = g.query_gql(graph_ql) +# get file in github repository +byte_object = g.download_from_git(github_url_path) +``` + + +## Installation + +```sh +$ pip install skt --upgrade +``` + +If you would like to install submodules for AIR + +```sh +$ pip install skt[air] --upgrade +``` + +## Develop + +Create issue first and follow the GitHub flow +https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/github-flow + + +# AIPS EDA tools + +## OVERVIEW + +- **Modeling EDA** 시 활용할 수 있는 기능의 공통 module +- **Modules** + - 1) EDA (Nuemric / Categorical variable) +<br> +<br> + +## 1) EDA +#### 1. Numeric variable EDA +- **def** *numeric_eda_plot* + +``` + Numeric feature에 대한 EDA Plot function + + Args. : + - df : Pandas DataFrame 형태의 EDA대상 데이터 + - feature_list : EDA 대상 feature list (df의 columns) + - label_col : Label(or Hue) column + - cols : Multi-plot 시 grid column 개수 (row 개수는 feature_list에 따라 자동으로 결정 됨) + - n_samples : Label 별 sampling 할 개수 (default = -1(전수 데이터로 EDA할 경우)) + - plot_type : density or box (default = 'density') + - stat_yn : 기초 통계량 출력여부 (mean / min / max / 1q / 3q) (default : False) + - figsize : (default : (7,4)) + + Returns : + matplotlib.pyplot object + + Example : + fig = numeric_eda_plot(df, ['age'], 'answer', cols = 1, n_samples = 10000, plot_type='density', stat_yn=True, figsize = (7,4)) + fig + + if want to Save the EDA images, + fig.savefig('filename') +``` + + +#### 2. Categorical variable EDA +- **def** *categorical_eda_plot* + +``` + Categorical feature에 대한 EDA Plot function + + Args. : + - df : Pandas DataFrame 형태의 EDA대상 데이터 + - feature_list : EDA 대상 feature list (df의 columns) + - label_col : Label(or Hue) column + - cols : Multi-plot 시 grid column 개수 (row 개수는 feature_list에 따라 자동으로 결정 됨) + - n_samples : Label 별 sampling 할 개수 (default = -1(전수 데이터로 EDA할 경우)) + - figsize : (default : (7,4)) + + Returns : + matplotlib.pyplot object + + + Example : + Example : + fig = categorical_eda_plot(df, ['sex_cd'], 'answer', cols = 1, n_samples = 10000, figsize = (7,4)) + fig + + if want to Save the EDA images, + fig.savefig('filename') + +``` + + + + +%package -n python3-skt +Summary: SKT package +Provides: python-skt +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-skt +# SKT Package + + +[](https://github.com/sktaiflow/skt/actions) + +This is highly site dependent package. +Resources are abstracted into package structure. + + +## Usage + + +Hive metastore +```python +from skt.ye import get_hms + +c = get_hms() +c.get_partition_names("db", "table") +c.close() +``` + + +Hash and unhash +```python +from skt.lake import hash_s +from skt.lake import unhash_s + +unhashed_list = ['0000000000'] +hashed_list = hash_s(unhashed_list) +unhash_s(hashed_list) +``` + + +Execute hive query without fetch result +```python +from skt.ye import hive_execute +hive_execute(ddl_or_ctas_query) +``` + + +Fetch resultset from hive query +```python +from skt.ye import hive_get_result +result_set = hive_get_result(select_query) +``` + + +Get pandas dataframe from hive qeruy resultset +```python +from skt.ye import hive_to_pandas +pandas_df = hive_to_pandas(hive_query) +``` + + +Get pandas dataframe from parquet file in hdfs +```python +from skt.ye import parquet_to_pandas +pandas_df = parquet_to_pandas(hdfs_path) +``` + + +Save pandas dataframe as parquet in hdfs +```python +from skt.ye import get_spark +from skt.ye import pandas_to_parquet +spark = get_spark() +pandas_to_parquet(pandas_df, hdfs_path, spark) # we need spark for this operation +spark.stop() +``` + + +Work with spark +```python +from skt.ye import get_spark +spark = get_spark() +# do with spark session +spark.stop() +``` + + +Work with spark-bigquery-connector +```python +# SELECT +from skt.gcp import bq_table_to_pandas +pandas_df = bq_table_to_pandas("dataset", "table_name", ["col_1", "col_2"], "2020-01-01", "svc_mgmt_num is not null") +# INSERT +from skt.gcp import pandas_to_bq_table +pandas_to_bq_table(pandas_df, "dataset", "table_name", "2020-03-01") +``` + + +Send slack message +```python +from skt.ye import slack_send +text = 'Hello' +username = 'airflow' +channel = '#leavemealone' +slack_send(text=text, username=username, channel=channel) +# Send dataframe as text +df = pd.DataFrame(data={'col1': [1, 2], 'col2': [3, 4]}) +slack_send(text=df, username=username, channel=channel, dataframe=True) +``` + + +Get bigquery client +```python +from skt.gcp import get_bigquery_client +bq = get_bigquery_client() +bq.query(query) +``` + +IPython BigQuery Magic +```python +from skt.gcp import import_bigquery_ipython_magic + +import_bigquery_ipython_magic() + +query_params = { + "p_1": "v_1", + "dataset": "mnoai", +} +``` +```python +%% bq --params $query_params + +SELECT c_1 +FROM {dataset}.user_logs +WHERE c_1 = @p_1 +``` + +Access MLS +```python +from skt.mls import set_model_name +from skt.mls import get_recent_model_path +from skt.ye import get_pkl_from_hdfs + +set_model_name(COMM_DB, params) +path = get_recent_model_path(COMM_DB, model_key) +model = get_pkl_from_hdfs(f'{path}) +``` + +MLS Model Registry (Upload model_binary(model.tar.gz) / model_meta(model.json) to AWS S3 from YE) +```python +from skt.mls import save_model + +# model object generated by LightGBM or XGBoost +model + +# model name +model_name = 'sample_model' +# model version +model_version = 'v1' +# AWS ENV in 'stg / prd / dev' (default is 'stg') +aws_env = 'stg' +# List of features used in ML Model in string type (only for XGBoost model_type) +feature_list = ['feature_1', 'feature_2', 'feature_3'] +# Force to overwrite model files on S3 if exists (default is False) +force = False + +save_model(model, model_name, model_version, aws_env, force) +``` + + +MLS *meta_table* & *meta_table_item* related methods +```python +from skt.mls import get_meta_table +from skt.mls import create_meta_table_item +from skt.mls import update_meta_table_item +from skt.mls import get_meta_table_item +from skt.mls import meta_table_to_pandas +from skt.mls import pandas_to_meta_table + +# Get a meta_table info +get_meta_table(meta_table_name, aws_env, edd) +# Create a meta_item +create_meta_table_item(meta_table_name, item_name, item_dict, aws_env, edd) +# Update a meta_item +update_meta_table_item(meta_table_name, item_name, item_dict, aws_env, edd) +# Get a meta_item +get_meta_table_item(meta_table_name, item_name, aws_env, edd) +# Get a meta_table as pandas dataframe +meta_table_to_pandas(meta_table_name, aws_env, edd) +# Update pandas dataframe to meta_table +pandas_to_meta_table(method, meta_table_name, dataframe, key, values, aws_env, edd) + + +# For the detal, use ?{method} to get detailed info (ex. ?get_meta_table) +# For the user of EDD, must set edd=True +``` + +MLS *model_meta* related methods +(*Need to set *user* for the ml_model) +```python +from skt.mls import get_ml_model +from skt.mls import create_meta_table_item +from skt.mls import update_meta_table_item + +# Get a ml_model +get_ml_model(user, model_name, model_version, aws_env, edd) +# Get a model_meta of ml_model +get_ml_model_meta(user, model_name, model_version, aws_env, edd) +# Update or Create meta_item(s) +update_ml_model_meta(user, model_name, model_version, model_meta_dict, aws_env, edd) + +# For the detal, use ?{method} to get detailed info (ex. ?get_ml_model) +# For the user of EDD, must set edd=True +``` + + + +Use NES CLI +```bas +nes input_notebook_url -p k1 v1 -p k2 v2 -p k3 v3 +``` + + +Use github util +```python +from skt.ye import get_github_util +g = get_github_util +# query graphql +res = g.query_gql(graph_ql) +# get file in github repository +byte_object = g.download_from_git(github_url_path) +``` + + +## Installation + +```sh +$ pip install skt --upgrade +``` + +If you would like to install submodules for AIR + +```sh +$ pip install skt[air] --upgrade +``` + +## Develop + +Create issue first and follow the GitHub flow +https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/github-flow + + +# AIPS EDA tools + +## OVERVIEW + +- **Modeling EDA** 시 활용할 수 있는 기능의 공통 module +- **Modules** + - 1) EDA (Nuemric / Categorical variable) +<br> +<br> + +## 1) EDA +#### 1. Numeric variable EDA +- **def** *numeric_eda_plot* + +``` + Numeric feature에 대한 EDA Plot function + + Args. : + - df : Pandas DataFrame 형태의 EDA대상 데이터 + - feature_list : EDA 대상 feature list (df의 columns) + - label_col : Label(or Hue) column + - cols : Multi-plot 시 grid column 개수 (row 개수는 feature_list에 따라 자동으로 결정 됨) + - n_samples : Label 별 sampling 할 개수 (default = -1(전수 데이터로 EDA할 경우)) + - plot_type : density or box (default = 'density') + - stat_yn : 기초 통계량 출력여부 (mean / min / max / 1q / 3q) (default : False) + - figsize : (default : (7,4)) + + Returns : + matplotlib.pyplot object + + Example : + fig = numeric_eda_plot(df, ['age'], 'answer', cols = 1, n_samples = 10000, plot_type='density', stat_yn=True, figsize = (7,4)) + fig + + if want to Save the EDA images, + fig.savefig('filename') +``` + + +#### 2. Categorical variable EDA +- **def** *categorical_eda_plot* + +``` + Categorical feature에 대한 EDA Plot function + + Args. : + - df : Pandas DataFrame 형태의 EDA대상 데이터 + - feature_list : EDA 대상 feature list (df의 columns) + - label_col : Label(or Hue) column + - cols : Multi-plot 시 grid column 개수 (row 개수는 feature_list에 따라 자동으로 결정 됨) + - n_samples : Label 별 sampling 할 개수 (default = -1(전수 데이터로 EDA할 경우)) + - figsize : (default : (7,4)) + + Returns : + matplotlib.pyplot object + + + Example : + Example : + fig = categorical_eda_plot(df, ['sex_cd'], 'answer', cols = 1, n_samples = 10000, figsize = (7,4)) + fig + + if want to Save the EDA images, + fig.savefig('filename') + +``` + + + + +%package help +Summary: Development documents and examples for skt +Provides: python3-skt-doc +%description help +# SKT Package + + +[](https://github.com/sktaiflow/skt/actions) + +This is highly site dependent package. +Resources are abstracted into package structure. + + +## Usage + + +Hive metastore +```python +from skt.ye import get_hms + +c = get_hms() +c.get_partition_names("db", "table") +c.close() +``` + + +Hash and unhash +```python +from skt.lake import hash_s +from skt.lake import unhash_s + +unhashed_list = ['0000000000'] +hashed_list = hash_s(unhashed_list) +unhash_s(hashed_list) +``` + + +Execute hive query without fetch result +```python +from skt.ye import hive_execute +hive_execute(ddl_or_ctas_query) +``` + + +Fetch resultset from hive query +```python +from skt.ye import hive_get_result +result_set = hive_get_result(select_query) +``` + + +Get pandas dataframe from hive qeruy resultset +```python +from skt.ye import hive_to_pandas +pandas_df = hive_to_pandas(hive_query) +``` + + +Get pandas dataframe from parquet file in hdfs +```python +from skt.ye import parquet_to_pandas +pandas_df = parquet_to_pandas(hdfs_path) +``` + + +Save pandas dataframe as parquet in hdfs +```python +from skt.ye import get_spark +from skt.ye import pandas_to_parquet +spark = get_spark() +pandas_to_parquet(pandas_df, hdfs_path, spark) # we need spark for this operation +spark.stop() +``` + + +Work with spark +```python +from skt.ye import get_spark +spark = get_spark() +# do with spark session +spark.stop() +``` + + +Work with spark-bigquery-connector +```python +# SELECT +from skt.gcp import bq_table_to_pandas +pandas_df = bq_table_to_pandas("dataset", "table_name", ["col_1", "col_2"], "2020-01-01", "svc_mgmt_num is not null") +# INSERT +from skt.gcp import pandas_to_bq_table +pandas_to_bq_table(pandas_df, "dataset", "table_name", "2020-03-01") +``` + + +Send slack message +```python +from skt.ye import slack_send +text = 'Hello' +username = 'airflow' +channel = '#leavemealone' +slack_send(text=text, username=username, channel=channel) +# Send dataframe as text +df = pd.DataFrame(data={'col1': [1, 2], 'col2': [3, 4]}) +slack_send(text=df, username=username, channel=channel, dataframe=True) +``` + + +Get bigquery client +```python +from skt.gcp import get_bigquery_client +bq = get_bigquery_client() +bq.query(query) +``` + +IPython BigQuery Magic +```python +from skt.gcp import import_bigquery_ipython_magic + +import_bigquery_ipython_magic() + +query_params = { + "p_1": "v_1", + "dataset": "mnoai", +} +``` +```python +%% bq --params $query_params + +SELECT c_1 +FROM {dataset}.user_logs +WHERE c_1 = @p_1 +``` + +Access MLS +```python +from skt.mls import set_model_name +from skt.mls import get_recent_model_path +from skt.ye import get_pkl_from_hdfs + +set_model_name(COMM_DB, params) +path = get_recent_model_path(COMM_DB, model_key) +model = get_pkl_from_hdfs(f'{path}) +``` + +MLS Model Registry (Upload model_binary(model.tar.gz) / model_meta(model.json) to AWS S3 from YE) +```python +from skt.mls import save_model + +# model object generated by LightGBM or XGBoost +model + +# model name +model_name = 'sample_model' +# model version +model_version = 'v1' +# AWS ENV in 'stg / prd / dev' (default is 'stg') +aws_env = 'stg' +# List of features used in ML Model in string type (only for XGBoost model_type) +feature_list = ['feature_1', 'feature_2', 'feature_3'] +# Force to overwrite model files on S3 if exists (default is False) +force = False + +save_model(model, model_name, model_version, aws_env, force) +``` + + +MLS *meta_table* & *meta_table_item* related methods +```python +from skt.mls import get_meta_table +from skt.mls import create_meta_table_item +from skt.mls import update_meta_table_item +from skt.mls import get_meta_table_item +from skt.mls import meta_table_to_pandas +from skt.mls import pandas_to_meta_table + +# Get a meta_table info +get_meta_table(meta_table_name, aws_env, edd) +# Create a meta_item +create_meta_table_item(meta_table_name, item_name, item_dict, aws_env, edd) +# Update a meta_item +update_meta_table_item(meta_table_name, item_name, item_dict, aws_env, edd) +# Get a meta_item +get_meta_table_item(meta_table_name, item_name, aws_env, edd) +# Get a meta_table as pandas dataframe +meta_table_to_pandas(meta_table_name, aws_env, edd) +# Update pandas dataframe to meta_table +pandas_to_meta_table(method, meta_table_name, dataframe, key, values, aws_env, edd) + + +# For the detal, use ?{method} to get detailed info (ex. ?get_meta_table) +# For the user of EDD, must set edd=True +``` + +MLS *model_meta* related methods +(*Need to set *user* for the ml_model) +```python +from skt.mls import get_ml_model +from skt.mls import create_meta_table_item +from skt.mls import update_meta_table_item + +# Get a ml_model +get_ml_model(user, model_name, model_version, aws_env, edd) +# Get a model_meta of ml_model +get_ml_model_meta(user, model_name, model_version, aws_env, edd) +# Update or Create meta_item(s) +update_ml_model_meta(user, model_name, model_version, model_meta_dict, aws_env, edd) + +# For the detal, use ?{method} to get detailed info (ex. ?get_ml_model) +# For the user of EDD, must set edd=True +``` + + + +Use NES CLI +```bas +nes input_notebook_url -p k1 v1 -p k2 v2 -p k3 v3 +``` + + +Use github util +```python +from skt.ye import get_github_util +g = get_github_util +# query graphql +res = g.query_gql(graph_ql) +# get file in github repository +byte_object = g.download_from_git(github_url_path) +``` + + +## Installation + +```sh +$ pip install skt --upgrade +``` + +If you would like to install submodules for AIR + +```sh +$ pip install skt[air] --upgrade +``` + +## Develop + +Create issue first and follow the GitHub flow +https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/github-flow + + +# AIPS EDA tools + +## OVERVIEW + +- **Modeling EDA** 시 활용할 수 있는 기능의 공통 module +- **Modules** + - 1) EDA (Nuemric / Categorical variable) +<br> +<br> + +## 1) EDA +#### 1. Numeric variable EDA +- **def** *numeric_eda_plot* + +``` + Numeric feature에 대한 EDA Plot function + + Args. : + - df : Pandas DataFrame 형태의 EDA대상 데이터 + - feature_list : EDA 대상 feature list (df의 columns) + - label_col : Label(or Hue) column + - cols : Multi-plot 시 grid column 개수 (row 개수는 feature_list에 따라 자동으로 결정 됨) + - n_samples : Label 별 sampling 할 개수 (default = -1(전수 데이터로 EDA할 경우)) + - plot_type : density or box (default = 'density') + - stat_yn : 기초 통계량 출력여부 (mean / min / max / 1q / 3q) (default : False) + - figsize : (default : (7,4)) + + Returns : + matplotlib.pyplot object + + Example : + fig = numeric_eda_plot(df, ['age'], 'answer', cols = 1, n_samples = 10000, plot_type='density', stat_yn=True, figsize = (7,4)) + fig + + if want to Save the EDA images, + fig.savefig('filename') +``` + + +#### 2. Categorical variable EDA +- **def** *categorical_eda_plot* + +``` + Categorical feature에 대한 EDA Plot function + + Args. : + - df : Pandas DataFrame 형태의 EDA대상 데이터 + - feature_list : EDA 대상 feature list (df의 columns) + - label_col : Label(or Hue) column + - cols : Multi-plot 시 grid column 개수 (row 개수는 feature_list에 따라 자동으로 결정 됨) + - n_samples : Label 별 sampling 할 개수 (default = -1(전수 데이터로 EDA할 경우)) + - figsize : (default : (7,4)) + + Returns : + matplotlib.pyplot object + + + Example : + Example : + fig = categorical_eda_plot(df, ['sex_cd'], 'answer', cols = 1, n_samples = 10000, figsize = (7,4)) + fig + + if want to Save the EDA images, + fig.savefig('filename') + +``` + + + + +%prep +%autosetup -n skt-0.2.88 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-skt -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Wed May 10 2023 Python_Bot <Python_Bot@openeuler.org> - 0.2.88-1 +- Package Spec generated @@ -0,0 +1 @@ +dd2e65bdd461afe28ef8db6c3fb9210d skt-0.2.88.tar.gz |