summaryrefslogtreecommitdiff
path: root/python-numpickle.spec
diff options
context:
space:
mode:
Diffstat (limited to 'python-numpickle.spec')
-rw-r--r--python-numpickle.spec392
1 files changed, 392 insertions, 0 deletions
diff --git a/python-numpickle.spec b/python-numpickle.spec
new file mode 100644
index 0000000..8c7bccb
--- /dev/null
+++ b/python-numpickle.spec
@@ -0,0 +1,392 @@
+%global _empty_manifest_terminate_build 0
+Name: python-numpickle
+Version: 0.1.3.post6
+Release: 1
+Summary: Faster loading of pandas data frames by saving them as numpy arrays and pickling their meta info (row+column names, column dtype info).
+License: MIT
+URL: https://github.com/gwangjinkim/numpickle
+Source0: https://mirrors.aliyun.com/pypi/web/packages/26/89/64ffb1b50a9df29efce48b41894d99f7627ebfe715d329d6ed2e2a147f93/numpickle-0.1.3.post6.tar.gz
+BuildArch: noarch
+
+Requires: python3-numpy
+Requires: python3-pandas
+
+%description
+
+# numpickle
+
+Faster loading of pandas data frames by saving them as numpy arrays and pickling their meta info (row+column names, column dtype info).
+
+The bigger the data frame, the bigger the gain of speed when reading compared to loading a text file.
+(E.g., a several GB RAM-consuming matrix took minutes to read-in using normal `pd.read_csv()`, but took only seconds to load when using `numpickle.load_numpickle()`).
+`all_numeric=True` accelerates loading by ~ 7 times.
+
+Also mentioned in my [medium article](https://gwang-jin-kim.medium.com/faster-loading-and-saving-of-pandas-data-frames-using-numpickle-numpy-and-pickle-d15870519529).
+
+## Install
+
+```pip install numpickle```
+
+## Usage
+
+```
+import pandas as pd
+import numpickle as npl
+
+
+# create example data frame with non-numeric and numeric columns
+df = pd.DataFrame([[1, 2,'a'], [3, 4, 'b']])
+df.columns = ["A", "B", "C"]
+df.index = ["row1", "row2"]
+
+df
+# A B C
+# row1 1 2 a
+# row2 3 4 b
+
+df.dtypes
+# A int64
+# B int64
+# C object
+# dtype: object
+
+
+
+
+# save data frame as numpy array and pickle row and column names
+# into helper pickle file "/home/user/test.npy.pckl"
+npl.save_numpickle(df, "/home/user/test.npy")
+
+# load the saved data
+df_ = npl.load_numpickle("/home/user/test.npy")
+
+df_
+# A B C
+# row1 1 2 a
+# row2 3 4 b
+
+
+df_.dtypes
+# A int64
+# B int64
+# C object
+# dtype: object
+
+all(df == df_)
+# True
+
+
+
+
+
+
+####################################
+# data frames with numeric-only values
+###################################
+
+# If you have a data frame with only numeric values, put all_numeric=True .
+# Then dtypes is set to None and the loading will be slightly faster.
+df = pd.DataFrame([[1, 2], [3, 4]])
+df.columns = ["A", "B"]
+df.index = ["row1", "row2"]
+
+df
+# A B
+# row1 1 2
+# row2 3 4
+
+df.dtypes
+# A int64
+# B int64
+# dtype: object
+
+# save numeric-only data frame
+npl.save_numpickle(df, "/home/user/test.npy", all_numeric=True)
+# load numeric-only data frame (it recognizes automatically that it is numeric only
+# because dtypes=None or not existent in pickle file
+df_ = npl.load_numpickle("/home/user/test.npy")
+
+
+###################################
+# save a csv or tab file as numpickle file(s) and delete original files
+###################################
+npl.save_file_as_numpickle(fpath, sep="\t", ending=".tab", all_numeric=True, deletep=True)
+# the data are read by pd.read_csv(), additional arguments for the reading process can be given
+# into the argument list, they will be forwarded to pd.read_csv() by *args, **kwargs
+# for the output file name, the `ending` is replaced by ".npy" and ".npy.pckl".
+# So choose the separator and ending accordingly when file is a csv file (sep=",", ending=".csv").
+```
+
+
+
+
+%package -n python3-numpickle
+Summary: Faster loading of pandas data frames by saving them as numpy arrays and pickling their meta info (row+column names, column dtype info).
+Provides: python-numpickle
+BuildRequires: python3-devel
+BuildRequires: python3-setuptools
+BuildRequires: python3-pip
+%description -n python3-numpickle
+
+# numpickle
+
+Faster loading of pandas data frames by saving them as numpy arrays and pickling their meta info (row+column names, column dtype info).
+
+The bigger the data frame, the bigger the gain of speed when reading compared to loading a text file.
+(E.g., a several GB RAM-consuming matrix took minutes to read-in using normal `pd.read_csv()`, but took only seconds to load when using `numpickle.load_numpickle()`).
+`all_numeric=True` accelerates loading by ~ 7 times.
+
+Also mentioned in my [medium article](https://gwang-jin-kim.medium.com/faster-loading-and-saving-of-pandas-data-frames-using-numpickle-numpy-and-pickle-d15870519529).
+
+## Install
+
+```pip install numpickle```
+
+## Usage
+
+```
+import pandas as pd
+import numpickle as npl
+
+
+# create example data frame with non-numeric and numeric columns
+df = pd.DataFrame([[1, 2,'a'], [3, 4, 'b']])
+df.columns = ["A", "B", "C"]
+df.index = ["row1", "row2"]
+
+df
+# A B C
+# row1 1 2 a
+# row2 3 4 b
+
+df.dtypes
+# A int64
+# B int64
+# C object
+# dtype: object
+
+
+
+
+# save data frame as numpy array and pickle row and column names
+# into helper pickle file "/home/user/test.npy.pckl"
+npl.save_numpickle(df, "/home/user/test.npy")
+
+# load the saved data
+df_ = npl.load_numpickle("/home/user/test.npy")
+
+df_
+# A B C
+# row1 1 2 a
+# row2 3 4 b
+
+
+df_.dtypes
+# A int64
+# B int64
+# C object
+# dtype: object
+
+all(df == df_)
+# True
+
+
+
+
+
+
+####################################
+# data frames with numeric-only values
+###################################
+
+# If you have a data frame with only numeric values, put all_numeric=True .
+# Then dtypes is set to None and the loading will be slightly faster.
+df = pd.DataFrame([[1, 2], [3, 4]])
+df.columns = ["A", "B"]
+df.index = ["row1", "row2"]
+
+df
+# A B
+# row1 1 2
+# row2 3 4
+
+df.dtypes
+# A int64
+# B int64
+# dtype: object
+
+# save numeric-only data frame
+npl.save_numpickle(df, "/home/user/test.npy", all_numeric=True)
+# load numeric-only data frame (it recognizes automatically that it is numeric only
+# because dtypes=None or not existent in pickle file
+df_ = npl.load_numpickle("/home/user/test.npy")
+
+
+###################################
+# save a csv or tab file as numpickle file(s) and delete original files
+###################################
+npl.save_file_as_numpickle(fpath, sep="\t", ending=".tab", all_numeric=True, deletep=True)
+# the data are read by pd.read_csv(), additional arguments for the reading process can be given
+# into the argument list, they will be forwarded to pd.read_csv() by *args, **kwargs
+# for the output file name, the `ending` is replaced by ".npy" and ".npy.pckl".
+# So choose the separator and ending accordingly when file is a csv file (sep=",", ending=".csv").
+```
+
+
+
+
+%package help
+Summary: Development documents and examples for numpickle
+Provides: python3-numpickle-doc
+%description help
+
+# numpickle
+
+Faster loading of pandas data frames by saving them as numpy arrays and pickling their meta info (row+column names, column dtype info).
+
+The bigger the data frame, the bigger the gain of speed when reading compared to loading a text file.
+(E.g., a several GB RAM-consuming matrix took minutes to read-in using normal `pd.read_csv()`, but took only seconds to load when using `numpickle.load_numpickle()`).
+`all_numeric=True` accelerates loading by ~ 7 times.
+
+Also mentioned in my [medium article](https://gwang-jin-kim.medium.com/faster-loading-and-saving-of-pandas-data-frames-using-numpickle-numpy-and-pickle-d15870519529).
+
+## Install
+
+```pip install numpickle```
+
+## Usage
+
+```
+import pandas as pd
+import numpickle as npl
+
+
+# create example data frame with non-numeric and numeric columns
+df = pd.DataFrame([[1, 2,'a'], [3, 4, 'b']])
+df.columns = ["A", "B", "C"]
+df.index = ["row1", "row2"]
+
+df
+# A B C
+# row1 1 2 a
+# row2 3 4 b
+
+df.dtypes
+# A int64
+# B int64
+# C object
+# dtype: object
+
+
+
+
+# save data frame as numpy array and pickle row and column names
+# into helper pickle file "/home/user/test.npy.pckl"
+npl.save_numpickle(df, "/home/user/test.npy")
+
+# load the saved data
+df_ = npl.load_numpickle("/home/user/test.npy")
+
+df_
+# A B C
+# row1 1 2 a
+# row2 3 4 b
+
+
+df_.dtypes
+# A int64
+# B int64
+# C object
+# dtype: object
+
+all(df == df_)
+# True
+
+
+
+
+
+
+####################################
+# data frames with numeric-only values
+###################################
+
+# If you have a data frame with only numeric values, put all_numeric=True .
+# Then dtypes is set to None and the loading will be slightly faster.
+df = pd.DataFrame([[1, 2], [3, 4]])
+df.columns = ["A", "B"]
+df.index = ["row1", "row2"]
+
+df
+# A B
+# row1 1 2
+# row2 3 4
+
+df.dtypes
+# A int64
+# B int64
+# dtype: object
+
+# save numeric-only data frame
+npl.save_numpickle(df, "/home/user/test.npy", all_numeric=True)
+# load numeric-only data frame (it recognizes automatically that it is numeric only
+# because dtypes=None or not existent in pickle file
+df_ = npl.load_numpickle("/home/user/test.npy")
+
+
+###################################
+# save a csv or tab file as numpickle file(s) and delete original files
+###################################
+npl.save_file_as_numpickle(fpath, sep="\t", ending=".tab", all_numeric=True, deletep=True)
+# the data are read by pd.read_csv(), additional arguments for the reading process can be given
+# into the argument list, they will be forwarded to pd.read_csv() by *args, **kwargs
+# for the output file name, the `ending` is replaced by ".npy" and ".npy.pckl".
+# So choose the separator and ending accordingly when file is a csv file (sep=",", ending=".csv").
+```
+
+
+
+
+%prep
+%autosetup -n numpickle-0.1.3.post6
+
+%build
+%py3_build
+
+%install
+%py3_install
+install -d -m755 %{buildroot}/%{_pkgdocdir}
+if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi
+if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi
+if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi
+if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi
+pushd %{buildroot}
+if [ -d usr/lib ]; then
+ find usr/lib -type f -printf "\"/%h/%f\"\n" >> filelist.lst
+fi
+if [ -d usr/lib64 ]; then
+ find usr/lib64 -type f -printf "\"/%h/%f\"\n" >> filelist.lst
+fi
+if [ -d usr/bin ]; then
+ find usr/bin -type f -printf "\"/%h/%f\"\n" >> filelist.lst
+fi
+if [ -d usr/sbin ]; then
+ find usr/sbin -type f -printf "\"/%h/%f\"\n" >> filelist.lst
+fi
+touch doclist.lst
+if [ -d usr/share/man ]; then
+ find usr/share/man -type f -printf "\"/%h/%f.gz\"\n" >> doclist.lst
+fi
+popd
+mv %{buildroot}/filelist.lst .
+mv %{buildroot}/doclist.lst .
+
+%files -n python3-numpickle -f filelist.lst
+%dir %{python3_sitelib}/*
+
+%files help -f doclist.lst
+%{_docdir}/*
+
+%changelog
+* Tue Jun 20 2023 Python_Bot <Python_Bot@openeuler.org> - 0.1.3.post6-1
+- Package Spec generated