%global _empty_manifest_terminate_build 0
Name:		python-numpickle
Version:	0.1.3.post6
Release:	1
Summary:	Faster loading of pandas data frames by saving them as numpy arrays and pickling their meta info (row+column names, column dtype info).
License:	MIT
URL:		https://github.com/gwangjinkim/numpickle
Source0:	https://mirrors.aliyun.com/pypi/web/packages/26/89/64ffb1b50a9df29efce48b41894d99f7627ebfe715d329d6ed2e2a147f93/numpickle-0.1.3.post6.tar.gz
BuildArch:	noarch

Requires:	python3-numpy
Requires:	python3-pandas

%description

# numpickle

Faster loading of pandas data frames by saving them as numpy arrays and pickling their meta info (row+column names, column dtype info).

The bigger the data frame, the bigger the gain of speed when reading compared to loading a text file.
(E.g., a several GB RAM-consuming matrix took minutes to read-in using normal `pd.read_csv()`, but took only seconds to load when using `numpickle.load_numpickle()`).
`all_numeric=True` accelerates loading by ~ 7 times.

Also mentioned in my [medium article](https://gwang-jin-kim.medium.com/faster-loading-and-saving-of-pandas-data-frames-using-numpickle-numpy-and-pickle-d15870519529).

## Install

```pip install numpickle```

## Usage

```
import pandas as pd
import numpickle as npl


# create example data frame with non-numeric and numeric columns
df = pd.DataFrame([[1, 2,'a'], [3, 4, 'b']])
df.columns = ["A", "B", "C"]
df.index = ["row1", "row2"]

df
#       A  B  C
# row1  1  2  a
# row2  3  4  b

df.dtypes
# A     int64
# B     int64
# C    object
# dtype: object


# save data frame as numpy array and pickle row and column names
# into helper pickle file "/home/user/test.npy.pckl"
npl.save_numpickle(df, "/home/user/test.npy")

# load the saved data
df_ = npl.load_numpickle("/home/user/test.npy")

df_
#       A  B  C
# row1  1  2  a
# row2  3  4  b


df_.dtypes
# A     int64
# B     int64
# C    object
# dtype: object

all(df == df_)
# True


####################################
# data frames with numeric-only values
###################################

# If you have a data frame with only numeric values, put all_numeric=True .
# Then dtypes is set to None and the loading will be slightly faster.
df = pd.DataFrame([[1, 2], [3, 4]])
df.columns = ["A", "B"]
df.index = ["row1", "row2"]

df
#       A  B
# row1  1  2
# row2  3  4

df.dtypes
# A     int64
# B     int64
# dtype: object

# save numeric-only data frame
npl.save_numpickle(df, "/home/user/test.npy", all_numeric=True)
# load numeric-only data frame (it recognizes automatically that it is numeric only
# because dtypes=None or not existent in pickle file
df_ = npl.load_numpickle("/home/user/test.npy")


###################################
# save a csv or tab file as numpickle file(s) and delete original files
###################################
npl.save_file_as_numpickle(fpath, sep="\t", ending=".tab", all_numeric=True, deletep=True)
# the data are read by pd.read_csv(), additional arguments for the reading process can be given
# into the argument list, they will be forwarded to pd.read_csv() by *args, **kwargs
# for the output file name, the `ending` is replaced by ".npy" and ".npy.pckl".
# So choose the separator and ending accordingly when file is a csv file (sep=",", ending=".csv").
```


%package -n python3-numpickle
Summary:	Faster loading of pandas data frames by saving them as numpy arrays and pickling their meta info (row+column names, column dtype info).
Provides:	python-numpickle
BuildRequires:	python3-devel
BuildRequires:	python3-setuptools
BuildRequires:	python3-pip
%description -n python3-numpickle

# numpickle

Faster loading of pandas data frames by saving them as numpy arrays and pickling their meta info (row+column names, column dtype info).

The bigger the data frame, the bigger the gain of speed when reading compared to loading a text file.
(E.g., a several GB RAM-consuming matrix took minutes to read-in using normal `pd.read_csv()`, but took only seconds to load when using `numpickle.load_numpickle()`).
`all_numeric=True` accelerates loading by ~ 7 times.

Also mentioned in my [medium article](https://gwang-jin-kim.medium.com/faster-loading-and-saving-of-pandas-data-frames-using-numpickle-numpy-and-pickle-d15870519529).

## Install

```pip install numpickle```

## Usage

```
import pandas as pd
import numpickle as npl


# create example data frame with non-numeric and numeric columns
df = pd.DataFrame([[1, 2,'a'], [3, 4, 'b']])
df.columns = ["A", "B", "C"]
df.index = ["row1", "row2"]

df
#       A  B  C
# row1  1  2  a
# row2  3  4  b

df.dtypes
# A     int64
# B     int64
# C    object
# dtype: object


# save data frame as numpy array and pickle row and column names
# into helper pickle file "/home/user/test.npy.pckl"
npl.save_numpickle(df, "/home/user/test.npy")

# load the saved data
df_ = npl.load_numpickle("/home/user/test.npy")

df_
#       A  B  C
# row1  1  2  a
# row2  3  4  b


df_.dtypes
# A     int64
# B     int64
# C    object
# dtype: object

all(df == df_)
# True


####################################
# data frames with numeric-only values
###################################

# If you have a data frame with only numeric values, put all_numeric=True .
# Then dtypes is set to None and the loading will be slightly faster.
df = pd.DataFrame([[1, 2], [3, 4]])
df.columns = ["A", "B"]
df.index = ["row1", "row2"]

df
#       A  B
# row1  1  2
# row2  3  4

df.dtypes
# A     int64
# B     int64
# dtype: object

# save numeric-only data frame
npl.save_numpickle(df, "/home/user/test.npy", all_numeric=True)
# load numeric-only data frame (it recognizes automatically that it is numeric only
# because dtypes=None or not existent in pickle file
df_ = npl.load_numpickle("/home/user/test.npy")


###################################
# save a csv or tab file as numpickle file(s) and delete original files
###################################
npl.save_file_as_numpickle(fpath, sep="\t", ending=".tab", all_numeric=True, deletep=True)
# the data are read by pd.read_csv(), additional arguments for the reading process can be given
# into the argument list, they will be forwarded to pd.read_csv() by *args, **kwargs
# for the output file name, the `ending` is replaced by ".npy" and ".npy.pckl".
# So choose the separator and ending accordingly when file is a csv file (sep=",", ending=".csv").
```


%package help
Summary:	Development documents and examples for numpickle
Provides:	python3-numpickle-doc
%description help

# numpickle

Faster loading of pandas data frames by saving them as numpy arrays and pickling their meta info (row+column names, column dtype info).

The bigger the data frame, the bigger the gain of speed when reading compared to loading a text file.
(E.g., a several GB RAM-consuming matrix took minutes to read-in using normal `pd.read_csv()`, but took only seconds to load when using `numpickle.load_numpickle()`).
`all_numeric=True` accelerates loading by ~ 7 times.

Also mentioned in my [medium article](https://gwang-jin-kim.medium.com/faster-loading-and-saving-of-pandas-data-frames-using-numpickle-numpy-and-pickle-d15870519529).

## Install

```pip install numpickle```

## Usage

```
import pandas as pd
import numpickle as npl


# create example data frame with non-numeric and numeric columns
df = pd.DataFrame([[1, 2,'a'], [3, 4, 'b']])
df.columns = ["A", "B", "C"]
df.index = ["row1", "row2"]

df
#       A  B  C
# row1  1  2  a
# row2  3  4  b

df.dtypes
# A     int64
# B     int64
# C    object
# dtype: object


# save data frame as numpy array and pickle row and column names
# into helper pickle file "/home/user/test.npy.pckl"
npl.save_numpickle(df, "/home/user/test.npy")

# load the saved data
df_ = npl.load_numpickle("/home/user/test.npy")

df_
#       A  B  C
# row1  1  2  a
# row2  3  4  b


df_.dtypes
# A     int64
# B     int64
# C    object
# dtype: object

all(df == df_)
# True


####################################
# data frames with numeric-only values
###################################

# If you have a data frame with only numeric values, put all_numeric=True .
# Then dtypes is set to None and the loading will be slightly faster.
df = pd.DataFrame([[1, 2], [3, 4]])
df.columns = ["A", "B"]
df.index = ["row1", "row2"]

df
#       A  B
# row1  1  2
# row2  3  4

df.dtypes
# A     int64
# B     int64
# dtype: object

# save numeric-only data frame
npl.save_numpickle(df, "/home/user/test.npy", all_numeric=True)
# load numeric-only data frame (it recognizes automatically that it is numeric only
# because dtypes=None or not existent in pickle file
df_ = npl.load_numpickle("/home/user/test.npy")


###################################
# save a csv or tab file as numpickle file(s) and delete original files
###################################
npl.save_file_as_numpickle(fpath, sep="\t", ending=".tab", all_numeric=True, deletep=True)
# the data are read by pd.read_csv(), additional arguments for the reading process can be given
# into the argument list, they will be forwarded to pd.read_csv() by *args, **kwargs
# for the output file name, the `ending` is replaced by ".npy" and ".npy.pckl".
# So choose the separator and ending accordingly when file is a csv file (sep=",", ending=".csv").
```


%prep
%autosetup -n numpickle-0.1.3.post6

%build
%py3_build

%install
%py3_install
install -d -m755 %{buildroot}/%{_pkgdocdir}
if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi
if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi
if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi
if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi
pushd %{buildroot}
if [ -d usr/lib ]; then
	find usr/lib -type f -printf "\"/%h/%f\"\n" >> filelist.lst
fi
if [ -d usr/lib64 ]; then
	find usr/lib64 -type f -printf "\"/%h/%f\"\n" >> filelist.lst
fi
if [ -d usr/bin ]; then
	find usr/bin -type f -printf "\"/%h/%f\"\n" >> filelist.lst
fi
if [ -d usr/sbin ]; then
	find usr/sbin -type f -printf "\"/%h/%f\"\n" >> filelist.lst
fi
touch doclist.lst
if [ -d usr/share/man ]; then
	find usr/share/man -type f -printf "\"/%h/%f.gz\"\n" >> doclist.lst
fi
popd
mv %{buildroot}/filelist.lst .
mv %{buildroot}/doclist.lst .

%files -n python3-numpickle -f filelist.lst
%dir %{python3_sitelib}/*

%files help -f doclist.lst
%{_docdir}/*

%changelog
* Tue Jun 20 2023 Python_Bot <Python_Bot@openeuler.org> - 0.1.3.post6-1
- Package Spec generated