From 1e60bd629a438352f4b5953ca148dc2a649d60d0 Mon Sep 17 00:00:00 2001 From: CoprDistGit Date: Wed, 17 May 2023 02:59:38 +0000 Subject: automatic import of python-datawig --- .gitignore | 1 + python-datawig.spec | 399 ++++++++++++++++++++++++++++++++++++++++++++++++++++ sources | 1 + 3 files changed, 401 insertions(+) create mode 100644 python-datawig.spec create mode 100644 sources diff --git a/.gitignore b/.gitignore index e69de29..4043d37 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1 @@ +/datawig-0.2.0.tar.gz diff --git a/python-datawig.spec b/python-datawig.spec new file mode 100644 index 0000000..29f567b --- /dev/null +++ b/python-datawig.spec @@ -0,0 +1,399 @@ +%global _empty_manifest_terminate_build 0 +Name: python-datawig +Version: 0.2.0 +Release: 1 +Summary: Imputation for tables with missing values +License: Apache License 2.0 +URL: https://github.com/awslabs/datawig +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/ad/a9/855c39f27e07c5727fede305f2dc4ce04137f974b5b07c7da8f7b13a72dc/datawig-0.2.0.tar.gz +BuildArch: noarch + + +%description +[![PyPI version](https://badge.fury.io/py/datawig.svg)](https://badge.fury.io/py/datawig.svg) +[![GitHub license](https://img.shields.io/github/license/awslabs/datawig.svg)](https://github.com/awslabs/datawig/blob/master/LICENSE) +[![GitHub issues](https://img.shields.io/github/issues/awslabs/datawig.svg)](https://github.com/awslabs/datawig/issues) +[![Build Status](https://travis-ci.org/awslabs/datawig.svg?branch=master)](https://travis-ci.org/awslabs/datawig) +DataWig learns Machine Learning models to impute missing values in tables. +See our user-guide and extended documentation [here](https://datawig.readthedocs.io/en/latest). +## Installation +### CPU +```bash +pip3 install datawig +``` +### GPU +If you want to run DataWig on a GPU you need to make sure your version of Apache MXNet Incubating contains the GPU bindings. +Depending on your version of CUDA, you can do this by running the following: +```bash +wget https://raw.githubusercontent.com/awslabs/datawig/master/requirements/requirements.gpu-cu${CUDA_VERSION}.txt +pip install datawig --no-deps -r requirements.gpu-cu${CUDA_VERSION}.txt +rm requirements.gpu-cu${CUDA_VERSION}.txt +``` +where `${CUDA_VERSION}` can be `75` (7.5), `80` (8.0), `90` (9.0), or `91` (9.1). +## Running DataWig +The DataWig API expects your data as a [pandas DataFrame](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html). Here is an example of how the dataframe might look: +|Product Type | Description | Size | Color | +|-------------|-----------------------|------|-------| +| Shoe | Ideal for Running | 12UK | Black | +| SDCards | Best SDCard ever ... | 8GB | Blue | +| Dress | This **yellow** dress | M | **?** | +### Quickstart Example +For most use cases, the `SimpleImputer` class is the best starting point. For convenience there is the function [SimpleImputer.complete](https://datawig.readthedocs.io/en/latest/source/API.html#datawig.simple_imputer.SimpleImputer.complete) that takes a DataFrame and fits an imputation model for each column with missing values, with all other columns as inputs: +```python +import datawig, numpy +# generate some data with simple nonlinear dependency +df = datawig.utils.generate_df_numeric() +# mask 10% of the values +df_with_missing = df.mask(numpy.random.rand(*df.shape) > .9) +# impute missing values +df_with_missing_imputed = datawig.SimpleImputer.complete(df_with_missing) +``` +You can also impute values in specific columns only (called `output_column` below) using values in other columns (called `input_columns` below). DataWig currently supports imputation of categorical columns and numeric columns. +### Imputation of categorical columns +```python +import datawig +df = datawig.utils.generate_df_string( num_samples=200, + data_column_name='sentences', + label_column_name='label') +df_train, df_test = datawig.utils.random_split(df) +#Initialize a SimpleImputer model +imputer = datawig.SimpleImputer( + input_columns=['sentences'], # column(s) containing information about the column we want to impute + output_column='label', # the column we'd like to impute values for + output_path = 'imputer_model' # stores model data and metrics + ) +#Fit an imputer model on the train data +imputer.fit(train_df=df_train) +#Impute missing values and return original dataframe with predictions +imputed = imputer.predict(df_test) +``` +### Imputation of numerical columns +```python +import datawig +df = datawig.utils.generate_df_numeric( num_samples=200, + data_column_name='x', + label_column_name='y') +df_train, df_test = datawig.utils.random_split(df) +#Initialize a SimpleImputer model +imputer = datawig.SimpleImputer( + input_columns=['x'], # column(s) containing information about the column we want to impute + output_column='y', # the column we'd like to impute values for + output_path = 'imputer_model' # stores model data and metrics + ) +#Fit an imputer model on the train data +imputer.fit(train_df=df_train, num_epochs=50) +#Impute missing values and return original dataframe with predictions +imputed = imputer.predict(df_test) +``` +In order to have more control over the types of models and preprocessings, the `Imputer` class allows directly specifying all relevant model features and parameters. +For details on usage, refer to the provided [examples](./examples). +### Acknowledgments +Thanks to [David Greenberg](https://github.com/dgreenberg) for the package name. +### Building documentation +```bash +git clone git@github.com:awslabs/datawig.git +cd datawig/docs +make html +open _build/html/index.html +``` +### Executing Tests +Clone the repository from git and set up virtualenv in the root dir of the package: +``` +python3 -m venv venv +``` +Install the package from local sources: +``` +./venv/bin/pip install -e . +``` +Run tests: +``` +./venv/bin/pip install -r requirements/requirements.dev.txt +./venv/bin/python -m pytest +``` +### Updating PyPi distribution +Before updating, increment the version in setup.py. +``` +git clone git@github.com:awslabs/datawig.git +cd datawig +# build local distribution for current version +python setup.py sdist +# upload to PyPi +twine upload --skip-existing dist/* +``` + +%package -n python3-datawig +Summary: Imputation for tables with missing values +Provides: python-datawig +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-datawig +[![PyPI version](https://badge.fury.io/py/datawig.svg)](https://badge.fury.io/py/datawig.svg) +[![GitHub license](https://img.shields.io/github/license/awslabs/datawig.svg)](https://github.com/awslabs/datawig/blob/master/LICENSE) +[![GitHub issues](https://img.shields.io/github/issues/awslabs/datawig.svg)](https://github.com/awslabs/datawig/issues) +[![Build Status](https://travis-ci.org/awslabs/datawig.svg?branch=master)](https://travis-ci.org/awslabs/datawig) +DataWig learns Machine Learning models to impute missing values in tables. +See our user-guide and extended documentation [here](https://datawig.readthedocs.io/en/latest). +## Installation +### CPU +```bash +pip3 install datawig +``` +### GPU +If you want to run DataWig on a GPU you need to make sure your version of Apache MXNet Incubating contains the GPU bindings. +Depending on your version of CUDA, you can do this by running the following: +```bash +wget https://raw.githubusercontent.com/awslabs/datawig/master/requirements/requirements.gpu-cu${CUDA_VERSION}.txt +pip install datawig --no-deps -r requirements.gpu-cu${CUDA_VERSION}.txt +rm requirements.gpu-cu${CUDA_VERSION}.txt +``` +where `${CUDA_VERSION}` can be `75` (7.5), `80` (8.0), `90` (9.0), or `91` (9.1). +## Running DataWig +The DataWig API expects your data as a [pandas DataFrame](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html). Here is an example of how the dataframe might look: +|Product Type | Description | Size | Color | +|-------------|-----------------------|------|-------| +| Shoe | Ideal for Running | 12UK | Black | +| SDCards | Best SDCard ever ... | 8GB | Blue | +| Dress | This **yellow** dress | M | **?** | +### Quickstart Example +For most use cases, the `SimpleImputer` class is the best starting point. For convenience there is the function [SimpleImputer.complete](https://datawig.readthedocs.io/en/latest/source/API.html#datawig.simple_imputer.SimpleImputer.complete) that takes a DataFrame and fits an imputation model for each column with missing values, with all other columns as inputs: +```python +import datawig, numpy +# generate some data with simple nonlinear dependency +df = datawig.utils.generate_df_numeric() +# mask 10% of the values +df_with_missing = df.mask(numpy.random.rand(*df.shape) > .9) +# impute missing values +df_with_missing_imputed = datawig.SimpleImputer.complete(df_with_missing) +``` +You can also impute values in specific columns only (called `output_column` below) using values in other columns (called `input_columns` below). DataWig currently supports imputation of categorical columns and numeric columns. +### Imputation of categorical columns +```python +import datawig +df = datawig.utils.generate_df_string( num_samples=200, + data_column_name='sentences', + label_column_name='label') +df_train, df_test = datawig.utils.random_split(df) +#Initialize a SimpleImputer model +imputer = datawig.SimpleImputer( + input_columns=['sentences'], # column(s) containing information about the column we want to impute + output_column='label', # the column we'd like to impute values for + output_path = 'imputer_model' # stores model data and metrics + ) +#Fit an imputer model on the train data +imputer.fit(train_df=df_train) +#Impute missing values and return original dataframe with predictions +imputed = imputer.predict(df_test) +``` +### Imputation of numerical columns +```python +import datawig +df = datawig.utils.generate_df_numeric( num_samples=200, + data_column_name='x', + label_column_name='y') +df_train, df_test = datawig.utils.random_split(df) +#Initialize a SimpleImputer model +imputer = datawig.SimpleImputer( + input_columns=['x'], # column(s) containing information about the column we want to impute + output_column='y', # the column we'd like to impute values for + output_path = 'imputer_model' # stores model data and metrics + ) +#Fit an imputer model on the train data +imputer.fit(train_df=df_train, num_epochs=50) +#Impute missing values and return original dataframe with predictions +imputed = imputer.predict(df_test) +``` +In order to have more control over the types of models and preprocessings, the `Imputer` class allows directly specifying all relevant model features and parameters. +For details on usage, refer to the provided [examples](./examples). +### Acknowledgments +Thanks to [David Greenberg](https://github.com/dgreenberg) for the package name. +### Building documentation +```bash +git clone git@github.com:awslabs/datawig.git +cd datawig/docs +make html +open _build/html/index.html +``` +### Executing Tests +Clone the repository from git and set up virtualenv in the root dir of the package: +``` +python3 -m venv venv +``` +Install the package from local sources: +``` +./venv/bin/pip install -e . +``` +Run tests: +``` +./venv/bin/pip install -r requirements/requirements.dev.txt +./venv/bin/python -m pytest +``` +### Updating PyPi distribution +Before updating, increment the version in setup.py. +``` +git clone git@github.com:awslabs/datawig.git +cd datawig +# build local distribution for current version +python setup.py sdist +# upload to PyPi +twine upload --skip-existing dist/* +``` + +%package help +Summary: Development documents and examples for datawig +Provides: python3-datawig-doc +%description help +[![PyPI version](https://badge.fury.io/py/datawig.svg)](https://badge.fury.io/py/datawig.svg) +[![GitHub license](https://img.shields.io/github/license/awslabs/datawig.svg)](https://github.com/awslabs/datawig/blob/master/LICENSE) +[![GitHub issues](https://img.shields.io/github/issues/awslabs/datawig.svg)](https://github.com/awslabs/datawig/issues) +[![Build Status](https://travis-ci.org/awslabs/datawig.svg?branch=master)](https://travis-ci.org/awslabs/datawig) +DataWig learns Machine Learning models to impute missing values in tables. +See our user-guide and extended documentation [here](https://datawig.readthedocs.io/en/latest). +## Installation +### CPU +```bash +pip3 install datawig +``` +### GPU +If you want to run DataWig on a GPU you need to make sure your version of Apache MXNet Incubating contains the GPU bindings. +Depending on your version of CUDA, you can do this by running the following: +```bash +wget https://raw.githubusercontent.com/awslabs/datawig/master/requirements/requirements.gpu-cu${CUDA_VERSION}.txt +pip install datawig --no-deps -r requirements.gpu-cu${CUDA_VERSION}.txt +rm requirements.gpu-cu${CUDA_VERSION}.txt +``` +where `${CUDA_VERSION}` can be `75` (7.5), `80` (8.0), `90` (9.0), or `91` (9.1). +## Running DataWig +The DataWig API expects your data as a [pandas DataFrame](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html). Here is an example of how the dataframe might look: +|Product Type | Description | Size | Color | +|-------------|-----------------------|------|-------| +| Shoe | Ideal for Running | 12UK | Black | +| SDCards | Best SDCard ever ... | 8GB | Blue | +| Dress | This **yellow** dress | M | **?** | +### Quickstart Example +For most use cases, the `SimpleImputer` class is the best starting point. For convenience there is the function [SimpleImputer.complete](https://datawig.readthedocs.io/en/latest/source/API.html#datawig.simple_imputer.SimpleImputer.complete) that takes a DataFrame and fits an imputation model for each column with missing values, with all other columns as inputs: +```python +import datawig, numpy +# generate some data with simple nonlinear dependency +df = datawig.utils.generate_df_numeric() +# mask 10% of the values +df_with_missing = df.mask(numpy.random.rand(*df.shape) > .9) +# impute missing values +df_with_missing_imputed = datawig.SimpleImputer.complete(df_with_missing) +``` +You can also impute values in specific columns only (called `output_column` below) using values in other columns (called `input_columns` below). DataWig currently supports imputation of categorical columns and numeric columns. +### Imputation of categorical columns +```python +import datawig +df = datawig.utils.generate_df_string( num_samples=200, + data_column_name='sentences', + label_column_name='label') +df_train, df_test = datawig.utils.random_split(df) +#Initialize a SimpleImputer model +imputer = datawig.SimpleImputer( + input_columns=['sentences'], # column(s) containing information about the column we want to impute + output_column='label', # the column we'd like to impute values for + output_path = 'imputer_model' # stores model data and metrics + ) +#Fit an imputer model on the train data +imputer.fit(train_df=df_train) +#Impute missing values and return original dataframe with predictions +imputed = imputer.predict(df_test) +``` +### Imputation of numerical columns +```python +import datawig +df = datawig.utils.generate_df_numeric( num_samples=200, + data_column_name='x', + label_column_name='y') +df_train, df_test = datawig.utils.random_split(df) +#Initialize a SimpleImputer model +imputer = datawig.SimpleImputer( + input_columns=['x'], # column(s) containing information about the column we want to impute + output_column='y', # the column we'd like to impute values for + output_path = 'imputer_model' # stores model data and metrics + ) +#Fit an imputer model on the train data +imputer.fit(train_df=df_train, num_epochs=50) +#Impute missing values and return original dataframe with predictions +imputed = imputer.predict(df_test) +``` +In order to have more control over the types of models and preprocessings, the `Imputer` class allows directly specifying all relevant model features and parameters. +For details on usage, refer to the provided [examples](./examples). +### Acknowledgments +Thanks to [David Greenberg](https://github.com/dgreenberg) for the package name. +### Building documentation +```bash +git clone git@github.com:awslabs/datawig.git +cd datawig/docs +make html +open _build/html/index.html +``` +### Executing Tests +Clone the repository from git and set up virtualenv in the root dir of the package: +``` +python3 -m venv venv +``` +Install the package from local sources: +``` +./venv/bin/pip install -e . +``` +Run tests: +``` +./venv/bin/pip install -r requirements/requirements.dev.txt +./venv/bin/python -m pytest +``` +### Updating PyPi distribution +Before updating, increment the version in setup.py. +``` +git clone git@github.com:awslabs/datawig.git +cd datawig +# build local distribution for current version +python setup.py sdist +# upload to PyPi +twine upload --skip-existing dist/* +``` + +%prep +%autosetup -n datawig-0.2.0 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-datawig -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Wed May 17 2023 Python_Bot - 0.2.0-1 +- Package Spec generated diff --git a/sources b/sources new file mode 100644 index 0000000..2100b59 --- /dev/null +++ b/sources @@ -0,0 +1 @@ +b5090f77997202852e976499d25d8a63 datawig-0.2.0.tar.gz -- cgit v1.2.3