diff options
author | CoprDistGit <infra@openeuler.org> | 2023-06-20 09:42:59 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2023-06-20 09:42:59 +0000 |
commit | 5776106e0e92cc7e6c7ce1cb513eedd606132665 (patch) | |
tree | a2015382ed8577a31914ed4cf06ef1d72467dbcf | |
parent | 3b961ec6e52516b801bd3391315baaebdaa923e7 (diff) |
automatic import of python-numpickleopeneuler20.03
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | python-numpickle.spec | 392 | ||||
-rw-r--r-- | sources | 1 |
3 files changed, 394 insertions, 0 deletions
@@ -0,0 +1 @@ +/numpickle-0.1.3.post6.tar.gz diff --git a/python-numpickle.spec b/python-numpickle.spec new file mode 100644 index 0000000..8c7bccb --- /dev/null +++ b/python-numpickle.spec @@ -0,0 +1,392 @@ +%global _empty_manifest_terminate_build 0 +Name: python-numpickle +Version: 0.1.3.post6 +Release: 1 +Summary: Faster loading of pandas data frames by saving them as numpy arrays and pickling their meta info (row+column names, column dtype info). +License: MIT +URL: https://github.com/gwangjinkim/numpickle +Source0: https://mirrors.aliyun.com/pypi/web/packages/26/89/64ffb1b50a9df29efce48b41894d99f7627ebfe715d329d6ed2e2a147f93/numpickle-0.1.3.post6.tar.gz +BuildArch: noarch + +Requires: python3-numpy +Requires: python3-pandas + +%description + +# numpickle + +Faster loading of pandas data frames by saving them as numpy arrays and pickling their meta info (row+column names, column dtype info). + +The bigger the data frame, the bigger the gain of speed when reading compared to loading a text file. +(E.g., a several GB RAM-consuming matrix took minutes to read-in using normal `pd.read_csv()`, but took only seconds to load when using `numpickle.load_numpickle()`). +`all_numeric=True` accelerates loading by ~ 7 times. + +Also mentioned in my [medium article](https://gwang-jin-kim.medium.com/faster-loading-and-saving-of-pandas-data-frames-using-numpickle-numpy-and-pickle-d15870519529). + +## Install + +```pip install numpickle``` + +## Usage + +``` +import pandas as pd +import numpickle as npl + + +# create example data frame with non-numeric and numeric columns +df = pd.DataFrame([[1, 2,'a'], [3, 4, 'b']]) +df.columns = ["A", "B", "C"] +df.index = ["row1", "row2"] + +df +# A B C +# row1 1 2 a +# row2 3 4 b + +df.dtypes +# A int64 +# B int64 +# C object +# dtype: object + + + + +# save data frame as numpy array and pickle row and column names +# into helper pickle file "/home/user/test.npy.pckl" +npl.save_numpickle(df, "/home/user/test.npy") + +# load the saved data +df_ = npl.load_numpickle("/home/user/test.npy") + +df_ +# A B C +# row1 1 2 a +# row2 3 4 b + + +df_.dtypes +# A int64 +# B int64 +# C object +# dtype: object + +all(df == df_) +# True + + + + + + +#################################### +# data frames with numeric-only values +################################### + +# If you have a data frame with only numeric values, put all_numeric=True . +# Then dtypes is set to None and the loading will be slightly faster. +df = pd.DataFrame([[1, 2], [3, 4]]) +df.columns = ["A", "B"] +df.index = ["row1", "row2"] + +df +# A B +# row1 1 2 +# row2 3 4 + +df.dtypes +# A int64 +# B int64 +# dtype: object + +# save numeric-only data frame +npl.save_numpickle(df, "/home/user/test.npy", all_numeric=True) +# load numeric-only data frame (it recognizes automatically that it is numeric only +# because dtypes=None or not existent in pickle file +df_ = npl.load_numpickle("/home/user/test.npy") + + +################################### +# save a csv or tab file as numpickle file(s) and delete original files +################################### +npl.save_file_as_numpickle(fpath, sep="\t", ending=".tab", all_numeric=True, deletep=True) +# the data are read by pd.read_csv(), additional arguments for the reading process can be given +# into the argument list, they will be forwarded to pd.read_csv() by *args, **kwargs +# for the output file name, the `ending` is replaced by ".npy" and ".npy.pckl". +# So choose the separator and ending accordingly when file is a csv file (sep=",", ending=".csv"). +``` + + + + +%package -n python3-numpickle +Summary: Faster loading of pandas data frames by saving them as numpy arrays and pickling their meta info (row+column names, column dtype info). +Provides: python-numpickle +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-numpickle + +# numpickle + +Faster loading of pandas data frames by saving them as numpy arrays and pickling their meta info (row+column names, column dtype info). + +The bigger the data frame, the bigger the gain of speed when reading compared to loading a text file. +(E.g., a several GB RAM-consuming matrix took minutes to read-in using normal `pd.read_csv()`, but took only seconds to load when using `numpickle.load_numpickle()`). +`all_numeric=True` accelerates loading by ~ 7 times. + +Also mentioned in my [medium article](https://gwang-jin-kim.medium.com/faster-loading-and-saving-of-pandas-data-frames-using-numpickle-numpy-and-pickle-d15870519529). + +## Install + +```pip install numpickle``` + +## Usage + +``` +import pandas as pd +import numpickle as npl + + +# create example data frame with non-numeric and numeric columns +df = pd.DataFrame([[1, 2,'a'], [3, 4, 'b']]) +df.columns = ["A", "B", "C"] +df.index = ["row1", "row2"] + +df +# A B C +# row1 1 2 a +# row2 3 4 b + +df.dtypes +# A int64 +# B int64 +# C object +# dtype: object + + + + +# save data frame as numpy array and pickle row and column names +# into helper pickle file "/home/user/test.npy.pckl" +npl.save_numpickle(df, "/home/user/test.npy") + +# load the saved data +df_ = npl.load_numpickle("/home/user/test.npy") + +df_ +# A B C +# row1 1 2 a +# row2 3 4 b + + +df_.dtypes +# A int64 +# B int64 +# C object +# dtype: object + +all(df == df_) +# True + + + + + + +#################################### +# data frames with numeric-only values +################################### + +# If you have a data frame with only numeric values, put all_numeric=True . +# Then dtypes is set to None and the loading will be slightly faster. +df = pd.DataFrame([[1, 2], [3, 4]]) +df.columns = ["A", "B"] +df.index = ["row1", "row2"] + +df +# A B +# row1 1 2 +# row2 3 4 + +df.dtypes +# A int64 +# B int64 +# dtype: object + +# save numeric-only data frame +npl.save_numpickle(df, "/home/user/test.npy", all_numeric=True) +# load numeric-only data frame (it recognizes automatically that it is numeric only +# because dtypes=None or not existent in pickle file +df_ = npl.load_numpickle("/home/user/test.npy") + + +################################### +# save a csv or tab file as numpickle file(s) and delete original files +################################### +npl.save_file_as_numpickle(fpath, sep="\t", ending=".tab", all_numeric=True, deletep=True) +# the data are read by pd.read_csv(), additional arguments for the reading process can be given +# into the argument list, they will be forwarded to pd.read_csv() by *args, **kwargs +# for the output file name, the `ending` is replaced by ".npy" and ".npy.pckl". +# So choose the separator and ending accordingly when file is a csv file (sep=",", ending=".csv"). +``` + + + + +%package help +Summary: Development documents and examples for numpickle +Provides: python3-numpickle-doc +%description help + +# numpickle + +Faster loading of pandas data frames by saving them as numpy arrays and pickling their meta info (row+column names, column dtype info). + +The bigger the data frame, the bigger the gain of speed when reading compared to loading a text file. +(E.g., a several GB RAM-consuming matrix took minutes to read-in using normal `pd.read_csv()`, but took only seconds to load when using `numpickle.load_numpickle()`). +`all_numeric=True` accelerates loading by ~ 7 times. + +Also mentioned in my [medium article](https://gwang-jin-kim.medium.com/faster-loading-and-saving-of-pandas-data-frames-using-numpickle-numpy-and-pickle-d15870519529). + +## Install + +```pip install numpickle``` + +## Usage + +``` +import pandas as pd +import numpickle as npl + + +# create example data frame with non-numeric and numeric columns +df = pd.DataFrame([[1, 2,'a'], [3, 4, 'b']]) +df.columns = ["A", "B", "C"] +df.index = ["row1", "row2"] + +df +# A B C +# row1 1 2 a +# row2 3 4 b + +df.dtypes +# A int64 +# B int64 +# C object +# dtype: object + + + + +# save data frame as numpy array and pickle row and column names +# into helper pickle file "/home/user/test.npy.pckl" +npl.save_numpickle(df, "/home/user/test.npy") + +# load the saved data +df_ = npl.load_numpickle("/home/user/test.npy") + +df_ +# A B C +# row1 1 2 a +# row2 3 4 b + + +df_.dtypes +# A int64 +# B int64 +# C object +# dtype: object + +all(df == df_) +# True + + + + + + +#################################### +# data frames with numeric-only values +################################### + +# If you have a data frame with only numeric values, put all_numeric=True . +# Then dtypes is set to None and the loading will be slightly faster. +df = pd.DataFrame([[1, 2], [3, 4]]) +df.columns = ["A", "B"] +df.index = ["row1", "row2"] + +df +# A B +# row1 1 2 +# row2 3 4 + +df.dtypes +# A int64 +# B int64 +# dtype: object + +# save numeric-only data frame +npl.save_numpickle(df, "/home/user/test.npy", all_numeric=True) +# load numeric-only data frame (it recognizes automatically that it is numeric only +# because dtypes=None or not existent in pickle file +df_ = npl.load_numpickle("/home/user/test.npy") + + +################################### +# save a csv or tab file as numpickle file(s) and delete original files +################################### +npl.save_file_as_numpickle(fpath, sep="\t", ending=".tab", all_numeric=True, deletep=True) +# the data are read by pd.read_csv(), additional arguments for the reading process can be given +# into the argument list, they will be forwarded to pd.read_csv() by *args, **kwargs +# for the output file name, the `ending` is replaced by ".npy" and ".npy.pckl". +# So choose the separator and ending accordingly when file is a csv file (sep=",", ending=".csv"). +``` + + + + +%prep +%autosetup -n numpickle-0.1.3.post6 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "\"/%h/%f\"\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "\"/%h/%f\"\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "\"/%h/%f\"\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "\"/%h/%f\"\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "\"/%h/%f.gz\"\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-numpickle -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Tue Jun 20 2023 Python_Bot <Python_Bot@openeuler.org> - 0.1.3.post6-1 +- Package Spec generated @@ -0,0 +1 @@ +3c3eb4c53402963c22e87d32fb4aa8fd numpickle-0.1.3.post6.tar.gz |