diff options
Diffstat (limited to 'python-csvw.spec')
-rw-r--r-- | python-csvw.spec | 836 |
1 files changed, 836 insertions, 0 deletions
diff --git a/python-csvw.spec b/python-csvw.spec new file mode 100644 index 0000000..2833765 --- /dev/null +++ b/python-csvw.spec @@ -0,0 +1,836 @@ +%global _empty_manifest_terminate_build 0 +Name: python-csvw +Version: 3.1.3 +Release: 1 +Summary: Python library to work with CSVW described tabular data +License: Apache 2.0 +URL: https://github.com/cldf/csvw +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/c1/b2/bbd8d72578de1778f39d7ed43f899b7f9ff261fe41738a02acb97660bf4d/csvw-3.1.3.tar.gz +BuildArch: noarch + +Requires: python3-attrs +Requires: python3-babel +Requires: python3-colorama +Requires: python3-isodate +Requires: python3-jsonschema +Requires: python3-language-tags +Requires: python3-dateutil +Requires: python3-rdflib +Requires: python3-requests +Requires: python3-rfc3986 +Requires: python3-uritemplate +Requires: python3-build +Requires: python3-flake8 +Requires: python3-twine +Requires: python3-wheel +Requires: python3-sphinx +Requires: python3-sphinx-autodoc-typehints +Requires: python3-sphinx-rtd-theme +Requires: python3-frictionless +Requires: python3-pytest-cov +Requires: python3-pytest-mock +Requires: python3-pytest +Requires: python3-requests-mock + +%description +# csvw + +[](https://github.com/cldf/csvw/actions?query=workflow%3Atests) +[](https://pypi.org/project/csvw) +[](https://csvw.readthedocs.io/en/latest/?badge=latest) + + +This package provides +- a Python API to read and write relational, tabular data according to the [CSV on the Web](https://csvw.org/) specification and +- commandline tools for reading and validating CSVW data. + + +## Links + +- GitHub: https://github.com/cldf/csvw +- PyPI: https://pypi.org/project/csvw +- Issue Tracker: https://github.com/cldf/csvw/issues + + +## Installation + +This package runs under Python >=3.7, use pip to install: + +```bash +$ pip install csvw +``` + + +## CLI + +### `csvw2json` + +Converting CSVW data [to JSON](https://www.w3.org/TR/csv2json/) + +```shell +$ csvw2json tests/fixtures/zipped-metadata.json +{ + "tables": [ + { + "url": "tests/fixtures/zipped.csv", + "row": [ + { + "url": "tests/fixtures/zipped.csv#row=2", + "rownum": 1, + "describes": [ + { + "ID": "abc", + "Value": "the value" + } + ] + }, + { + "url": "tests/fixtures/zipped.csv#row=3", + "rownum": 2, + "describes": [ + { + "ID": "cde", + "Value": "another one" + } + ] + } + ] + } + ] +} +``` + +### `csvwvalidate` + +Validating CSVW data + +```shell +$ csvwvalidate tests/fixtures/zipped-metadata.json +OK +``` + +### `csvwdescribe` + +Describing tabular-data files with CSVW metadata + +```shell +$ csvwdescribe --delimiter "|" tests/fixtures/frictionless-data.csv +{ + "@context": "http://www.w3.org/ns/csvw", + "dc:conformsTo": "data-package", + "tables": [ + { + "dialect": { + "delimiter": "|" + }, + "tableSchema": { + "columns": [ + { + "datatype": "string", + "name": "FK" + }, + { + "datatype": "integer", + "name": "Year" + }, + { + "datatype": "string", + "name": "Location name" + }, + { + "datatype": "string", + "name": "Value" + }, + { + "datatype": "string", + "name": "binary" + }, + { + "datatype": "string", + "name": "anyURI" + }, + { + "datatype": "string", + "name": "email" + }, + { + "datatype": "string", + "name": "boolean" + }, + { + "datatype": { + "dc:format": "application/json", + "base": "json" + }, + "name": "array" + }, + { + "datatype": { + "dc:format": "application/json", + "base": "json" + }, + "name": "geojson" + } + ] + }, + "url": "tests/fixtures/frictionless-data.csv" + } + ] +} +``` + + +## Python API + +Find the Python API documentation at [csvw.readthedocs.io](https://csvw.readthedocs.io/en/latest/). + +A quick example for using `csvw` from Python code: + +```python +import json +from csvw import CSVW +data = CSVW('https://raw.githubusercontent.com/cldf/csvw/master/tests/fixtures/test.tsv') +print(json.dumps(data.to_json(minimal=True), indent=4)) +[ + { + "province": "Hello", + "territory": "world", + "precinct": "1" + } +] +``` + + +## Known limitations + +- We read **all** data which is specified as UTF-8 encoded using the + [`utf-8-sig` codecs](https://docs.python.org/3/library/codecs.html#module-encodings.utf_8_sig). + Thus, if such data starts with `U+FEFF` this will be interpreted as [BOM](https://en.wikipedia.org/wiki/Byte_order_mark) + and skipped. +- Low level CSV parsing is delegated to the `csv` module in Python's standard library. Thus, if a `commentPrefix` + is specified in a `Dialect` instance, this will lead to skipping rows where the first value starts + with `commentPrefix`, **even if the value was quoted**. +- Also, cell content containing `escapechar` may not be round-tripped as expected (when specifying + `escapechar` or a `csvw.Dialect` with `quoteChar` but `doubleQuote==False`), + when minimal quoting is specified. This is due to inconsistent `csv` behaviour + across Python versions (see https://bugs.python.org/issue44861). + + +## CSVW conformance + +While we use the CSVW specification as guideline, this package does not (and +probably never will) implement the full extent of this spec. + +- When CSV files with a header are read, columns are not matched in order with + column descriptions in the `tableSchema`, but instead are matched based on the + CSV column header and the column descriptions' `name` and `titles` atributes. + This allows for more flexibility, because columns in the CSV file may be + re-ordered without invalidating the metadata. A stricter matching can be forced + by specifying `"header": false` and `"skipRows": 1` in the table's dialect + description. + +However, `csvw.CSVW` works correctly for +- 269 out of 270 [JSON tests](https://w3c.github.io/csvw/tests/#manifest-json), +- 280 out of 282 [validation tests](https://w3c.github.io/csvw/tests/#manifest-validation), +- 10 out of 18 [non-normative tests](https://w3c.github.io/csvw/tests/#manifest-nonnorm) + +from the [CSVW Test suites](https://w3c.github.io/csvw/tests/). + + +## Compatibility with [Frictionless Data Specs](https://specs.frictionlessdata.io/) + +A CSVW-described dataset is basically equivalent to a Frictionless DataPackage where all +[Data Resources](https://specs.frictionlessdata.io/data-resource/) are [Tabular Data](https://specs.frictionlessdata.io/tabular-data-resource/). +Thus, the `csvw` package provides some conversion functionality. To +"read CSVW data from a Data Package", there's the `csvw.TableGroup.from_frictionless_datapackage` method: +```python +from csvw import TableGroup +tg = TableGroup.from_frictionless_datapackage('PATH/TO/datapackage.json') +``` +To convert the metadata, the `TableGroup` can then be serialzed: +```python +tg.to_file('csvw-metadata.json') +``` + +Note that the CSVW metadata file must be written to the Data Package's directory +to make sure relative paths to data resources work. + +This functionality - together with the schema inference capabilities +of [`frictionless describe`](https://framework.frictionlessdata.io/docs/guides/describing-data/) - provides +a convenient way to bootstrap CSVW metadata for a set of "raw" CSV +files, implemented in the [`csvwdescribe` command described above](#csvwdescribe). + + +## See also + +- https://www.w3.org/2013/csvw/wiki/Main_Page +- https://csvw.org +- https://github.com/CLARIAH/COW +- https://github.com/CLARIAH/ruminator +- https://github.com/bloomberg/pycsvw +- https://specs.frictionlessdata.io/table-schema/ +- https://github.com/theodi/csvlint.rb +- https://github.com/ruby-rdf/rdf-tabular +- https://github.com/rdf-ext/rdf-parser-csvw +- https://github.com/Robsteranium/csvwr + + +## License + +This package is distributed under the [Apache 2.0 license](https://opensource.org/licenses/Apache-2.0). + + + + +%package -n python3-csvw +Summary: Python library to work with CSVW described tabular data +Provides: python-csvw +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-csvw +# csvw + +[](https://github.com/cldf/csvw/actions?query=workflow%3Atests) +[](https://pypi.org/project/csvw) +[](https://csvw.readthedocs.io/en/latest/?badge=latest) + + +This package provides +- a Python API to read and write relational, tabular data according to the [CSV on the Web](https://csvw.org/) specification and +- commandline tools for reading and validating CSVW data. + + +## Links + +- GitHub: https://github.com/cldf/csvw +- PyPI: https://pypi.org/project/csvw +- Issue Tracker: https://github.com/cldf/csvw/issues + + +## Installation + +This package runs under Python >=3.7, use pip to install: + +```bash +$ pip install csvw +``` + + +## CLI + +### `csvw2json` + +Converting CSVW data [to JSON](https://www.w3.org/TR/csv2json/) + +```shell +$ csvw2json tests/fixtures/zipped-metadata.json +{ + "tables": [ + { + "url": "tests/fixtures/zipped.csv", + "row": [ + { + "url": "tests/fixtures/zipped.csv#row=2", + "rownum": 1, + "describes": [ + { + "ID": "abc", + "Value": "the value" + } + ] + }, + { + "url": "tests/fixtures/zipped.csv#row=3", + "rownum": 2, + "describes": [ + { + "ID": "cde", + "Value": "another one" + } + ] + } + ] + } + ] +} +``` + +### `csvwvalidate` + +Validating CSVW data + +```shell +$ csvwvalidate tests/fixtures/zipped-metadata.json +OK +``` + +### `csvwdescribe` + +Describing tabular-data files with CSVW metadata + +```shell +$ csvwdescribe --delimiter "|" tests/fixtures/frictionless-data.csv +{ + "@context": "http://www.w3.org/ns/csvw", + "dc:conformsTo": "data-package", + "tables": [ + { + "dialect": { + "delimiter": "|" + }, + "tableSchema": { + "columns": [ + { + "datatype": "string", + "name": "FK" + }, + { + "datatype": "integer", + "name": "Year" + }, + { + "datatype": "string", + "name": "Location name" + }, + { + "datatype": "string", + "name": "Value" + }, + { + "datatype": "string", + "name": "binary" + }, + { + "datatype": "string", + "name": "anyURI" + }, + { + "datatype": "string", + "name": "email" + }, + { + "datatype": "string", + "name": "boolean" + }, + { + "datatype": { + "dc:format": "application/json", + "base": "json" + }, + "name": "array" + }, + { + "datatype": { + "dc:format": "application/json", + "base": "json" + }, + "name": "geojson" + } + ] + }, + "url": "tests/fixtures/frictionless-data.csv" + } + ] +} +``` + + +## Python API + +Find the Python API documentation at [csvw.readthedocs.io](https://csvw.readthedocs.io/en/latest/). + +A quick example for using `csvw` from Python code: + +```python +import json +from csvw import CSVW +data = CSVW('https://raw.githubusercontent.com/cldf/csvw/master/tests/fixtures/test.tsv') +print(json.dumps(data.to_json(minimal=True), indent=4)) +[ + { + "province": "Hello", + "territory": "world", + "precinct": "1" + } +] +``` + + +## Known limitations + +- We read **all** data which is specified as UTF-8 encoded using the + [`utf-8-sig` codecs](https://docs.python.org/3/library/codecs.html#module-encodings.utf_8_sig). + Thus, if such data starts with `U+FEFF` this will be interpreted as [BOM](https://en.wikipedia.org/wiki/Byte_order_mark) + and skipped. +- Low level CSV parsing is delegated to the `csv` module in Python's standard library. Thus, if a `commentPrefix` + is specified in a `Dialect` instance, this will lead to skipping rows where the first value starts + with `commentPrefix`, **even if the value was quoted**. +- Also, cell content containing `escapechar` may not be round-tripped as expected (when specifying + `escapechar` or a `csvw.Dialect` with `quoteChar` but `doubleQuote==False`), + when minimal quoting is specified. This is due to inconsistent `csv` behaviour + across Python versions (see https://bugs.python.org/issue44861). + + +## CSVW conformance + +While we use the CSVW specification as guideline, this package does not (and +probably never will) implement the full extent of this spec. + +- When CSV files with a header are read, columns are not matched in order with + column descriptions in the `tableSchema`, but instead are matched based on the + CSV column header and the column descriptions' `name` and `titles` atributes. + This allows for more flexibility, because columns in the CSV file may be + re-ordered without invalidating the metadata. A stricter matching can be forced + by specifying `"header": false` and `"skipRows": 1` in the table's dialect + description. + +However, `csvw.CSVW` works correctly for +- 269 out of 270 [JSON tests](https://w3c.github.io/csvw/tests/#manifest-json), +- 280 out of 282 [validation tests](https://w3c.github.io/csvw/tests/#manifest-validation), +- 10 out of 18 [non-normative tests](https://w3c.github.io/csvw/tests/#manifest-nonnorm) + +from the [CSVW Test suites](https://w3c.github.io/csvw/tests/). + + +## Compatibility with [Frictionless Data Specs](https://specs.frictionlessdata.io/) + +A CSVW-described dataset is basically equivalent to a Frictionless DataPackage where all +[Data Resources](https://specs.frictionlessdata.io/data-resource/) are [Tabular Data](https://specs.frictionlessdata.io/tabular-data-resource/). +Thus, the `csvw` package provides some conversion functionality. To +"read CSVW data from a Data Package", there's the `csvw.TableGroup.from_frictionless_datapackage` method: +```python +from csvw import TableGroup +tg = TableGroup.from_frictionless_datapackage('PATH/TO/datapackage.json') +``` +To convert the metadata, the `TableGroup` can then be serialzed: +```python +tg.to_file('csvw-metadata.json') +``` + +Note that the CSVW metadata file must be written to the Data Package's directory +to make sure relative paths to data resources work. + +This functionality - together with the schema inference capabilities +of [`frictionless describe`](https://framework.frictionlessdata.io/docs/guides/describing-data/) - provides +a convenient way to bootstrap CSVW metadata for a set of "raw" CSV +files, implemented in the [`csvwdescribe` command described above](#csvwdescribe). + + +## See also + +- https://www.w3.org/2013/csvw/wiki/Main_Page +- https://csvw.org +- https://github.com/CLARIAH/COW +- https://github.com/CLARIAH/ruminator +- https://github.com/bloomberg/pycsvw +- https://specs.frictionlessdata.io/table-schema/ +- https://github.com/theodi/csvlint.rb +- https://github.com/ruby-rdf/rdf-tabular +- https://github.com/rdf-ext/rdf-parser-csvw +- https://github.com/Robsteranium/csvwr + + +## License + +This package is distributed under the [Apache 2.0 license](https://opensource.org/licenses/Apache-2.0). + + + + +%package help +Summary: Development documents and examples for csvw +Provides: python3-csvw-doc +%description help +# csvw + +[](https://github.com/cldf/csvw/actions?query=workflow%3Atests) +[](https://pypi.org/project/csvw) +[](https://csvw.readthedocs.io/en/latest/?badge=latest) + + +This package provides +- a Python API to read and write relational, tabular data according to the [CSV on the Web](https://csvw.org/) specification and +- commandline tools for reading and validating CSVW data. + + +## Links + +- GitHub: https://github.com/cldf/csvw +- PyPI: https://pypi.org/project/csvw +- Issue Tracker: https://github.com/cldf/csvw/issues + + +## Installation + +This package runs under Python >=3.7, use pip to install: + +```bash +$ pip install csvw +``` + + +## CLI + +### `csvw2json` + +Converting CSVW data [to JSON](https://www.w3.org/TR/csv2json/) + +```shell +$ csvw2json tests/fixtures/zipped-metadata.json +{ + "tables": [ + { + "url": "tests/fixtures/zipped.csv", + "row": [ + { + "url": "tests/fixtures/zipped.csv#row=2", + "rownum": 1, + "describes": [ + { + "ID": "abc", + "Value": "the value" + } + ] + }, + { + "url": "tests/fixtures/zipped.csv#row=3", + "rownum": 2, + "describes": [ + { + "ID": "cde", + "Value": "another one" + } + ] + } + ] + } + ] +} +``` + +### `csvwvalidate` + +Validating CSVW data + +```shell +$ csvwvalidate tests/fixtures/zipped-metadata.json +OK +``` + +### `csvwdescribe` + +Describing tabular-data files with CSVW metadata + +```shell +$ csvwdescribe --delimiter "|" tests/fixtures/frictionless-data.csv +{ + "@context": "http://www.w3.org/ns/csvw", + "dc:conformsTo": "data-package", + "tables": [ + { + "dialect": { + "delimiter": "|" + }, + "tableSchema": { + "columns": [ + { + "datatype": "string", + "name": "FK" + }, + { + "datatype": "integer", + "name": "Year" + }, + { + "datatype": "string", + "name": "Location name" + }, + { + "datatype": "string", + "name": "Value" + }, + { + "datatype": "string", + "name": "binary" + }, + { + "datatype": "string", + "name": "anyURI" + }, + { + "datatype": "string", + "name": "email" + }, + { + "datatype": "string", + "name": "boolean" + }, + { + "datatype": { + "dc:format": "application/json", + "base": "json" + }, + "name": "array" + }, + { + "datatype": { + "dc:format": "application/json", + "base": "json" + }, + "name": "geojson" + } + ] + }, + "url": "tests/fixtures/frictionless-data.csv" + } + ] +} +``` + + +## Python API + +Find the Python API documentation at [csvw.readthedocs.io](https://csvw.readthedocs.io/en/latest/). + +A quick example for using `csvw` from Python code: + +```python +import json +from csvw import CSVW +data = CSVW('https://raw.githubusercontent.com/cldf/csvw/master/tests/fixtures/test.tsv') +print(json.dumps(data.to_json(minimal=True), indent=4)) +[ + { + "province": "Hello", + "territory": "world", + "precinct": "1" + } +] +``` + + +## Known limitations + +- We read **all** data which is specified as UTF-8 encoded using the + [`utf-8-sig` codecs](https://docs.python.org/3/library/codecs.html#module-encodings.utf_8_sig). + Thus, if such data starts with `U+FEFF` this will be interpreted as [BOM](https://en.wikipedia.org/wiki/Byte_order_mark) + and skipped. +- Low level CSV parsing is delegated to the `csv` module in Python's standard library. Thus, if a `commentPrefix` + is specified in a `Dialect` instance, this will lead to skipping rows where the first value starts + with `commentPrefix`, **even if the value was quoted**. +- Also, cell content containing `escapechar` may not be round-tripped as expected (when specifying + `escapechar` or a `csvw.Dialect` with `quoteChar` but `doubleQuote==False`), + when minimal quoting is specified. This is due to inconsistent `csv` behaviour + across Python versions (see https://bugs.python.org/issue44861). + + +## CSVW conformance + +While we use the CSVW specification as guideline, this package does not (and +probably never will) implement the full extent of this spec. + +- When CSV files with a header are read, columns are not matched in order with + column descriptions in the `tableSchema`, but instead are matched based on the + CSV column header and the column descriptions' `name` and `titles` atributes. + This allows for more flexibility, because columns in the CSV file may be + re-ordered without invalidating the metadata. A stricter matching can be forced + by specifying `"header": false` and `"skipRows": 1` in the table's dialect + description. + +However, `csvw.CSVW` works correctly for +- 269 out of 270 [JSON tests](https://w3c.github.io/csvw/tests/#manifest-json), +- 280 out of 282 [validation tests](https://w3c.github.io/csvw/tests/#manifest-validation), +- 10 out of 18 [non-normative tests](https://w3c.github.io/csvw/tests/#manifest-nonnorm) + +from the [CSVW Test suites](https://w3c.github.io/csvw/tests/). + + +## Compatibility with [Frictionless Data Specs](https://specs.frictionlessdata.io/) + +A CSVW-described dataset is basically equivalent to a Frictionless DataPackage where all +[Data Resources](https://specs.frictionlessdata.io/data-resource/) are [Tabular Data](https://specs.frictionlessdata.io/tabular-data-resource/). +Thus, the `csvw` package provides some conversion functionality. To +"read CSVW data from a Data Package", there's the `csvw.TableGroup.from_frictionless_datapackage` method: +```python +from csvw import TableGroup +tg = TableGroup.from_frictionless_datapackage('PATH/TO/datapackage.json') +``` +To convert the metadata, the `TableGroup` can then be serialzed: +```python +tg.to_file('csvw-metadata.json') +``` + +Note that the CSVW metadata file must be written to the Data Package's directory +to make sure relative paths to data resources work. + +This functionality - together with the schema inference capabilities +of [`frictionless describe`](https://framework.frictionlessdata.io/docs/guides/describing-data/) - provides +a convenient way to bootstrap CSVW metadata for a set of "raw" CSV +files, implemented in the [`csvwdescribe` command described above](#csvwdescribe). + + +## See also + +- https://www.w3.org/2013/csvw/wiki/Main_Page +- https://csvw.org +- https://github.com/CLARIAH/COW +- https://github.com/CLARIAH/ruminator +- https://github.com/bloomberg/pycsvw +- https://specs.frictionlessdata.io/table-schema/ +- https://github.com/theodi/csvlint.rb +- https://github.com/ruby-rdf/rdf-tabular +- https://github.com/rdf-ext/rdf-parser-csvw +- https://github.com/Robsteranium/csvwr + + +## License + +This package is distributed under the [Apache 2.0 license](https://opensource.org/licenses/Apache-2.0). + + + + +%prep +%autosetup -n csvw-3.1.3 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-csvw -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Wed Apr 12 2023 Python_Bot <Python_Bot@openeuler.org> - 3.1.3-1 +- Package Spec generated |