From dc2039986a7a4a20da68bfeb6b2eeb60a0a1a88b Mon Sep 17 00:00:00 2001 From: CoprDistGit Date: Fri, 5 May 2023 06:08:10 +0000 Subject: automatic import of python-glum --- .gitignore | 1 + python-glum.spec | 348 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ sources | 1 + 3 files changed, 350 insertions(+) create mode 100644 python-glum.spec create mode 100644 sources diff --git a/.gitignore b/.gitignore index e69de29..27b6788 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1 @@ +/glum-2.5.0.tar.gz diff --git a/python-glum.spec b/python-glum.spec new file mode 100644 index 0000000..ff8e68f --- /dev/null +++ b/python-glum.spec @@ -0,0 +1,348 @@ +%global _empty_manifest_terminate_build 0 +Name: python-glum +Version: 2.5.0 +Release: 1 +Summary: High performance Python GLMs with all the features! +License: BSD +URL: https://github.com/Quantco/glum +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/61/82/e05201187cc571d496d08c628fb159abbdfa52ab2b7241855e27cf6c387b/glum-2.5.0.tar.gz + +Requires: python3-joblib +Requires: python3-numexpr +Requires: python3-numpy +Requires: python3-pandas +Requires: python3-scikit-learn +Requires: python3-scipy +Requires: python3-tabmat + +%description +# glum + +[![CI](https://github.com/Quantco/glm_benchmarks/workflows/CI/badge.svg)](https://github.com/Quantco/glum/actions) +[![Docs](https://readthedocs.org/projects/pip/badge/?version=latest&style=flat)](https://glum.readthedocs.io/) +[![Conda-forge](https://img.shields.io/conda/vn/conda-forge/glum?logoColor=white&logo=conda-forge)](https://anaconda.org/conda-forge/glum) +[![PypiVersion](https://img.shields.io/pypi/v/glum.svg?logo=pypi&logoColor=white)](https://pypi.org/project/glum) +[![PythonVersion](https://img.shields.io/pypi/pyversions/glum?logoColor=white&logo=python)](https://pypi.org/project/glum) + + +[Documentation](https://glum.readthedocs.io/en/latest/) + +Generalized linear models (GLM) are a core statistical tool that include many common methods like least-squares regression, Poisson regression and logistic regression as special cases. At QuantCo, we have used GLMs in e-commerce pricing, insurance claims prediction and more. We have developed `glum`, a fast Python-first GLM library. The development was based on [a fork of scikit-learn](https://github.com/scikit-learn/scikit-learn/pull/9405), so it has a scikit-learn-like API. We are thankful for the starting point provided by Christian Lorentzen in that PR! + +The goal of `glum` is to be at least as feature-complete as existing GLM libraries like `glmnet` or `h2o`. It supports + +* Built-in cross validation for optimal regularization, efficiently exploiting a “regularization path” +* L1 regularization, which produces sparse and easily interpretable solutions +* L2 regularization, including variable matrix-valued (Tikhonov) penalties, which are useful in modeling correlated effects +* Elastic net regularization +* Normal, Poisson, logistic, gamma, and Tweedie distributions, plus varied and customizable link functions +* Box constraints, linear inequality constraints, sample weights, offsets + +This repo also includes tools for benchmarking GLM implementations in the `glum_benchmarks` module. For details on the benchmarking, [see here](src/glum_benchmarks/README.md). Although the performance of `glum` relative to `glmnet` and `h2o` depends on the specific problem, we find that when N >> K (there are more observations than predictors), it is consistently much faster for a wide range of problems. + +![](docs/_static/headline_benchmark.png) + +For more information on `glum`, including tutorials and API reference, please see [the documentation](https://glum.readthedocs.io/en/latest/). + +Why did we choose the name `glum`? We wanted a name that had the letters GLM and wasn't easily confused with any existing implementation. And we thought glum sounded like a funny name (and not glum at all!). If you need a more professional sounding name, feel free to pronounce it as G-L-um. Or maybe it stands for "Generalized linear... ummm... modeling?" + +# A classic example predicting housing prices + +```python +>>> from sklearn.datasets import fetch_openml +>>> from glum import GeneralizedLinearRegressor +>>> +>>> # This dataset contains house sale prices for King County, which includes +>>> # Seattle. It includes homes sold between May 2014 and May 2015. +>>> house_data = fetch_openml(name="house_sales", version=3, as_frame=True) +>>> +>>> # Use only select features +>>> X = house_data.data[ +... [ +... "bedrooms", +... "bathrooms", +... "sqft_living", +... "floors", +... "waterfront", +... "view", +... "condition", +... "grade", +... "yr_built", +... "yr_renovated", +... ] +... ].copy() +>>> +>>> +>>> # Model whether a house had an above or below median price via a Binomial +>>> # distribution. We'll be doing L1-regularized logistic regression. +>>> price = house_data.target +>>> y = (price < price.median()).values.astype(int) +>>> model = GeneralizedLinearRegressor( +... family='binomial', +... l1_ratio=1.0, +... alpha=0.001 +... ) +>>> +>>> _ = model.fit(X=X, y=y) +>>> +>>> # .report_diagnostics shows details about the steps taken by the iterative solver +>>> diags = model.get_formatted_diagnostics(full_report=True) +>>> diags[['objective_fct']] + objective_fct +n_iter +0 0.693091 +1 0.489500 +2 0.449585 +3 0.443681 +4 0.443498 +5 0.443497 + +``` + +# Installation + +Please install the package through conda-forge: +```bash +conda install glum -c conda-forge +``` + + +%package -n python3-glum +Summary: High performance Python GLMs with all the features! +Provides: python-glum +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +BuildRequires: python3-cffi +BuildRequires: gcc +BuildRequires: gdb +%description -n python3-glum +# glum + +[![CI](https://github.com/Quantco/glm_benchmarks/workflows/CI/badge.svg)](https://github.com/Quantco/glum/actions) +[![Docs](https://readthedocs.org/projects/pip/badge/?version=latest&style=flat)](https://glum.readthedocs.io/) +[![Conda-forge](https://img.shields.io/conda/vn/conda-forge/glum?logoColor=white&logo=conda-forge)](https://anaconda.org/conda-forge/glum) +[![PypiVersion](https://img.shields.io/pypi/v/glum.svg?logo=pypi&logoColor=white)](https://pypi.org/project/glum) +[![PythonVersion](https://img.shields.io/pypi/pyversions/glum?logoColor=white&logo=python)](https://pypi.org/project/glum) + + +[Documentation](https://glum.readthedocs.io/en/latest/) + +Generalized linear models (GLM) are a core statistical tool that include many common methods like least-squares regression, Poisson regression and logistic regression as special cases. At QuantCo, we have used GLMs in e-commerce pricing, insurance claims prediction and more. We have developed `glum`, a fast Python-first GLM library. The development was based on [a fork of scikit-learn](https://github.com/scikit-learn/scikit-learn/pull/9405), so it has a scikit-learn-like API. We are thankful for the starting point provided by Christian Lorentzen in that PR! + +The goal of `glum` is to be at least as feature-complete as existing GLM libraries like `glmnet` or `h2o`. It supports + +* Built-in cross validation for optimal regularization, efficiently exploiting a “regularization path” +* L1 regularization, which produces sparse and easily interpretable solutions +* L2 regularization, including variable matrix-valued (Tikhonov) penalties, which are useful in modeling correlated effects +* Elastic net regularization +* Normal, Poisson, logistic, gamma, and Tweedie distributions, plus varied and customizable link functions +* Box constraints, linear inequality constraints, sample weights, offsets + +This repo also includes tools for benchmarking GLM implementations in the `glum_benchmarks` module. For details on the benchmarking, [see here](src/glum_benchmarks/README.md). Although the performance of `glum` relative to `glmnet` and `h2o` depends on the specific problem, we find that when N >> K (there are more observations than predictors), it is consistently much faster for a wide range of problems. + +![](docs/_static/headline_benchmark.png) + +For more information on `glum`, including tutorials and API reference, please see [the documentation](https://glum.readthedocs.io/en/latest/). + +Why did we choose the name `glum`? We wanted a name that had the letters GLM and wasn't easily confused with any existing implementation. And we thought glum sounded like a funny name (and not glum at all!). If you need a more professional sounding name, feel free to pronounce it as G-L-um. Or maybe it stands for "Generalized linear... ummm... modeling?" + +# A classic example predicting housing prices + +```python +>>> from sklearn.datasets import fetch_openml +>>> from glum import GeneralizedLinearRegressor +>>> +>>> # This dataset contains house sale prices for King County, which includes +>>> # Seattle. It includes homes sold between May 2014 and May 2015. +>>> house_data = fetch_openml(name="house_sales", version=3, as_frame=True) +>>> +>>> # Use only select features +>>> X = house_data.data[ +... [ +... "bedrooms", +... "bathrooms", +... "sqft_living", +... "floors", +... "waterfront", +... "view", +... "condition", +... "grade", +... "yr_built", +... "yr_renovated", +... ] +... ].copy() +>>> +>>> +>>> # Model whether a house had an above or below median price via a Binomial +>>> # distribution. We'll be doing L1-regularized logistic regression. +>>> price = house_data.target +>>> y = (price < price.median()).values.astype(int) +>>> model = GeneralizedLinearRegressor( +... family='binomial', +... l1_ratio=1.0, +... alpha=0.001 +... ) +>>> +>>> _ = model.fit(X=X, y=y) +>>> +>>> # .report_diagnostics shows details about the steps taken by the iterative solver +>>> diags = model.get_formatted_diagnostics(full_report=True) +>>> diags[['objective_fct']] + objective_fct +n_iter +0 0.693091 +1 0.489500 +2 0.449585 +3 0.443681 +4 0.443498 +5 0.443497 + +``` + +# Installation + +Please install the package through conda-forge: +```bash +conda install glum -c conda-forge +``` + + +%package help +Summary: Development documents and examples for glum +Provides: python3-glum-doc +%description help +# glum + +[![CI](https://github.com/Quantco/glm_benchmarks/workflows/CI/badge.svg)](https://github.com/Quantco/glum/actions) +[![Docs](https://readthedocs.org/projects/pip/badge/?version=latest&style=flat)](https://glum.readthedocs.io/) +[![Conda-forge](https://img.shields.io/conda/vn/conda-forge/glum?logoColor=white&logo=conda-forge)](https://anaconda.org/conda-forge/glum) +[![PypiVersion](https://img.shields.io/pypi/v/glum.svg?logo=pypi&logoColor=white)](https://pypi.org/project/glum) +[![PythonVersion](https://img.shields.io/pypi/pyversions/glum?logoColor=white&logo=python)](https://pypi.org/project/glum) + + +[Documentation](https://glum.readthedocs.io/en/latest/) + +Generalized linear models (GLM) are a core statistical tool that include many common methods like least-squares regression, Poisson regression and logistic regression as special cases. At QuantCo, we have used GLMs in e-commerce pricing, insurance claims prediction and more. We have developed `glum`, a fast Python-first GLM library. The development was based on [a fork of scikit-learn](https://github.com/scikit-learn/scikit-learn/pull/9405), so it has a scikit-learn-like API. We are thankful for the starting point provided by Christian Lorentzen in that PR! + +The goal of `glum` is to be at least as feature-complete as existing GLM libraries like `glmnet` or `h2o`. It supports + +* Built-in cross validation for optimal regularization, efficiently exploiting a “regularization path” +* L1 regularization, which produces sparse and easily interpretable solutions +* L2 regularization, including variable matrix-valued (Tikhonov) penalties, which are useful in modeling correlated effects +* Elastic net regularization +* Normal, Poisson, logistic, gamma, and Tweedie distributions, plus varied and customizable link functions +* Box constraints, linear inequality constraints, sample weights, offsets + +This repo also includes tools for benchmarking GLM implementations in the `glum_benchmarks` module. For details on the benchmarking, [see here](src/glum_benchmarks/README.md). Although the performance of `glum` relative to `glmnet` and `h2o` depends on the specific problem, we find that when N >> K (there are more observations than predictors), it is consistently much faster for a wide range of problems. + +![](docs/_static/headline_benchmark.png) + +For more information on `glum`, including tutorials and API reference, please see [the documentation](https://glum.readthedocs.io/en/latest/). + +Why did we choose the name `glum`? We wanted a name that had the letters GLM and wasn't easily confused with any existing implementation. And we thought glum sounded like a funny name (and not glum at all!). If you need a more professional sounding name, feel free to pronounce it as G-L-um. Or maybe it stands for "Generalized linear... ummm... modeling?" + +# A classic example predicting housing prices + +```python +>>> from sklearn.datasets import fetch_openml +>>> from glum import GeneralizedLinearRegressor +>>> +>>> # This dataset contains house sale prices for King County, which includes +>>> # Seattle. It includes homes sold between May 2014 and May 2015. +>>> house_data = fetch_openml(name="house_sales", version=3, as_frame=True) +>>> +>>> # Use only select features +>>> X = house_data.data[ +... [ +... "bedrooms", +... "bathrooms", +... "sqft_living", +... "floors", +... "waterfront", +... "view", +... "condition", +... "grade", +... "yr_built", +... "yr_renovated", +... ] +... ].copy() +>>> +>>> +>>> # Model whether a house had an above or below median price via a Binomial +>>> # distribution. We'll be doing L1-regularized logistic regression. +>>> price = house_data.target +>>> y = (price < price.median()).values.astype(int) +>>> model = GeneralizedLinearRegressor( +... family='binomial', +... l1_ratio=1.0, +... alpha=0.001 +... ) +>>> +>>> _ = model.fit(X=X, y=y) +>>> +>>> # .report_diagnostics shows details about the steps taken by the iterative solver +>>> diags = model.get_formatted_diagnostics(full_report=True) +>>> diags[['objective_fct']] + objective_fct +n_iter +0 0.693091 +1 0.489500 +2 0.449585 +3 0.443681 +4 0.443498 +5 0.443497 + +``` + +# Installation + +Please install the package through conda-forge: +```bash +conda install glum -c conda-forge +``` + + +%prep +%autosetup -n glum-2.5.0 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-glum -f filelist.lst +%dir %{python3_sitearch}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Fri May 05 2023 Python_Bot - 2.5.0-1 +- Package Spec generated diff --git a/sources b/sources new file mode 100644 index 0000000..87e3187 --- /dev/null +++ b/sources @@ -0,0 +1 @@ +7298a8fab752719cd55bdc40cd4d2ae4 glum-2.5.0.tar.gz -- cgit v1.2.3