diff options
author | CoprDistGit <infra@openeuler.org> | 2023-05-05 10:20:13 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2023-05-05 10:20:13 +0000 |
commit | ac8c8b56ccb06faebedb9e5ff22ce03872a2f712 (patch) | |
tree | d49e77a9e368fe26eb121a1b7d66bc2c3f947f7d | |
parent | c81ec37bc3acd858c2a865037516b0b868b5b747 (diff) |
automatic import of python-databricks-utilsopeneuler20.03
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | python-databricks-utils.spec | 318 | ||||
-rw-r--r-- | sources | 1 |
3 files changed, 320 insertions, 0 deletions
@@ -0,0 +1 @@ +/databricks-utils-0.0.7.tar.gz diff --git a/python-databricks-utils.spec b/python-databricks-utils.spec new file mode 100644 index 0000000..8037ff4 --- /dev/null +++ b/python-databricks-utils.spec @@ -0,0 +1,318 @@ +%global _empty_manifest_terminate_build 0 +Name: python-databricks-utils +Version: 0.0.7 +Release: 1 +Summary: Ease-of-use utility tools for databricks notebooks. +License: Apache License 2.0 +URL: https://github.com/e2fyi/databricks-utils +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/89/05/4e40e0546bd2415b3fb38eab0d7fd48bead8877cf6121b5e64dc5401c69b/databricks-utils-0.0.7.tar.gz +BuildArch: noarch + + +%description +# databricks-utils +[](https://shields.io/) +[](https://shields.io/) +[](https://travis-ci.org/e2fyi/databricks-utils) + +`databricks-utils` is a python package that provide several utility classes/func +that improve ease-of-use in databricks notebook. + +### Installation +```bash +pip install databricks-utils +``` + +### Features +- `S3Bucket` class to easily interact with a [S3 bucket](https://aws.amazon.com/s3/) via [`dbfs`](https://docs.databricks.com/user-guide/dbfs-databricks-file-system.html) and databricks spark. + +- `vega_embed` to render charts from [Vega](https://vega.github.io/vega/) and [Vega-Lite](https://vega.github.io/vega-lite/) specifications. + +### Documentation +API documentation can be found at [https://e2fyi.github.io/databricks-utils/](https://e2fyi.github.io/databricks-utils/). + + +### Quick start +**S3Bucket** +```python +import json +from databricks_utils.aws import S3Bucket + +# need to attach notebook's dbutils +# before S3Bucket can be used +S3Bucket.attach_dbutils(dbutils) + +# create an instance of the s3 bucket +bucket = (S3Bucket("somebucketname", "SOMEACCESSKEY", "SOMESECRETKEY") + .allow_spark(sc) # local spark context + .mount("somebucketname")) # mount location name (resolves as `/mnt/somebucketname`) + +# show list of files/folders in the bucket "resource" folder +bucket.ls("resource/") + +# read in a json file from the bucket +data = json.load(open(bucket.local("resource/somefile.json", "r"))) + +# read from parquet via spark +dataframe = spark.read.parquet(bucket.s3("resource/somedf.parquet")) + +# umount +bucket.umount() +``` + +**Vega** +[Vega](https://vega.github.io/vega/) and [Vega-Lite](https://vega.github.io/vega-lite/) +are high-level grammars of interactive graphics. They provide concise JSON +syntax for rapidly generating visualizations to support analysis. + +```python +from databricks_utils.vega import vega_embed + +# vega-lite spec for a bar chart +spec = { + "data": { + "values": [ + {"a": "A","b": 28}, {"a": "B","b": 55}, {"a": "C","b": 43}, + {"a": "D","b": 91}, {"a": "E","b": 81}, {"a": "F","b": 53}, + {"a": "G","b": 19}, {"a": "H","b": 87}, {"a": "I","b": 52} + ] + }, + "mark": "bar", + "encoding": { + "x": {"field": "a", "type": "ordinal"}, + "y": {"field": "b", "type": "quantitative"} + } +} + +# plot out the vega chart in databricks notebook +displayHTML(vega_embed(spec=spec)) +``` + +### Developer +```bash +# add a version to git tag and publish to pypi +. add_tag.sh <VERSION> +``` + +%package -n python3-databricks-utils +Summary: Ease-of-use utility tools for databricks notebooks. +Provides: python-databricks-utils +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-databricks-utils +# databricks-utils +[](https://shields.io/) +[](https://shields.io/) +[](https://travis-ci.org/e2fyi/databricks-utils) + +`databricks-utils` is a python package that provide several utility classes/func +that improve ease-of-use in databricks notebook. + +### Installation +```bash +pip install databricks-utils +``` + +### Features +- `S3Bucket` class to easily interact with a [S3 bucket](https://aws.amazon.com/s3/) via [`dbfs`](https://docs.databricks.com/user-guide/dbfs-databricks-file-system.html) and databricks spark. + +- `vega_embed` to render charts from [Vega](https://vega.github.io/vega/) and [Vega-Lite](https://vega.github.io/vega-lite/) specifications. + +### Documentation +API documentation can be found at [https://e2fyi.github.io/databricks-utils/](https://e2fyi.github.io/databricks-utils/). + + +### Quick start +**S3Bucket** +```python +import json +from databricks_utils.aws import S3Bucket + +# need to attach notebook's dbutils +# before S3Bucket can be used +S3Bucket.attach_dbutils(dbutils) + +# create an instance of the s3 bucket +bucket = (S3Bucket("somebucketname", "SOMEACCESSKEY", "SOMESECRETKEY") + .allow_spark(sc) # local spark context + .mount("somebucketname")) # mount location name (resolves as `/mnt/somebucketname`) + +# show list of files/folders in the bucket "resource" folder +bucket.ls("resource/") + +# read in a json file from the bucket +data = json.load(open(bucket.local("resource/somefile.json", "r"))) + +# read from parquet via spark +dataframe = spark.read.parquet(bucket.s3("resource/somedf.parquet")) + +# umount +bucket.umount() +``` + +**Vega** +[Vega](https://vega.github.io/vega/) and [Vega-Lite](https://vega.github.io/vega-lite/) +are high-level grammars of interactive graphics. They provide concise JSON +syntax for rapidly generating visualizations to support analysis. + +```python +from databricks_utils.vega import vega_embed + +# vega-lite spec for a bar chart +spec = { + "data": { + "values": [ + {"a": "A","b": 28}, {"a": "B","b": 55}, {"a": "C","b": 43}, + {"a": "D","b": 91}, {"a": "E","b": 81}, {"a": "F","b": 53}, + {"a": "G","b": 19}, {"a": "H","b": 87}, {"a": "I","b": 52} + ] + }, + "mark": "bar", + "encoding": { + "x": {"field": "a", "type": "ordinal"}, + "y": {"field": "b", "type": "quantitative"} + } +} + +# plot out the vega chart in databricks notebook +displayHTML(vega_embed(spec=spec)) +``` + +### Developer +```bash +# add a version to git tag and publish to pypi +. add_tag.sh <VERSION> +``` + +%package help +Summary: Development documents and examples for databricks-utils +Provides: python3-databricks-utils-doc +%description help +# databricks-utils +[](https://shields.io/) +[](https://shields.io/) +[](https://travis-ci.org/e2fyi/databricks-utils) + +`databricks-utils` is a python package that provide several utility classes/func +that improve ease-of-use in databricks notebook. + +### Installation +```bash +pip install databricks-utils +``` + +### Features +- `S3Bucket` class to easily interact with a [S3 bucket](https://aws.amazon.com/s3/) via [`dbfs`](https://docs.databricks.com/user-guide/dbfs-databricks-file-system.html) and databricks spark. + +- `vega_embed` to render charts from [Vega](https://vega.github.io/vega/) and [Vega-Lite](https://vega.github.io/vega-lite/) specifications. + +### Documentation +API documentation can be found at [https://e2fyi.github.io/databricks-utils/](https://e2fyi.github.io/databricks-utils/). + + +### Quick start +**S3Bucket** +```python +import json +from databricks_utils.aws import S3Bucket + +# need to attach notebook's dbutils +# before S3Bucket can be used +S3Bucket.attach_dbutils(dbutils) + +# create an instance of the s3 bucket +bucket = (S3Bucket("somebucketname", "SOMEACCESSKEY", "SOMESECRETKEY") + .allow_spark(sc) # local spark context + .mount("somebucketname")) # mount location name (resolves as `/mnt/somebucketname`) + +# show list of files/folders in the bucket "resource" folder +bucket.ls("resource/") + +# read in a json file from the bucket +data = json.load(open(bucket.local("resource/somefile.json", "r"))) + +# read from parquet via spark +dataframe = spark.read.parquet(bucket.s3("resource/somedf.parquet")) + +# umount +bucket.umount() +``` + +**Vega** +[Vega](https://vega.github.io/vega/) and [Vega-Lite](https://vega.github.io/vega-lite/) +are high-level grammars of interactive graphics. They provide concise JSON +syntax for rapidly generating visualizations to support analysis. + +```python +from databricks_utils.vega import vega_embed + +# vega-lite spec for a bar chart +spec = { + "data": { + "values": [ + {"a": "A","b": 28}, {"a": "B","b": 55}, {"a": "C","b": 43}, + {"a": "D","b": 91}, {"a": "E","b": 81}, {"a": "F","b": 53}, + {"a": "G","b": 19}, {"a": "H","b": 87}, {"a": "I","b": 52} + ] + }, + "mark": "bar", + "encoding": { + "x": {"field": "a", "type": "ordinal"}, + "y": {"field": "b", "type": "quantitative"} + } +} + +# plot out the vega chart in databricks notebook +displayHTML(vega_embed(spec=spec)) +``` + +### Developer +```bash +# add a version to git tag and publish to pypi +. add_tag.sh <VERSION> +``` + +%prep +%autosetup -n databricks-utils-0.0.7 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-databricks-utils -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Fri May 05 2023 Python_Bot <Python_Bot@openeuler.org> - 0.0.7-1 +- Package Spec generated @@ -0,0 +1 @@ +fe61aea95875a9ae324e75ecf832c792 databricks-utils-0.0.7.tar.gz |