From eb3f3a5e5bccef8c0e1b10e13e7c97f4b243b2fa Mon Sep 17 00:00:00 2001 From: CoprDistGit Date: Wed, 12 Apr 2023 06:22:35 +0000 Subject: automatic import of python-waybackpy --- .gitignore | 1 + python-waybackpy.spec | 527 ++++++++++++++++++++++++++++++++++++++++++++++++++ sources | 1 + 3 files changed, 529 insertions(+) create mode 100644 python-waybackpy.spec create mode 100644 sources diff --git a/.gitignore b/.gitignore index e69de29..76d31af 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1 @@ +/waybackpy-3.0.6.tar.gz diff --git a/python-waybackpy.spec b/python-waybackpy.spec new file mode 100644 index 0000000..d6cf0b8 --- /dev/null +++ b/python-waybackpy.spec @@ -0,0 +1,527 @@ +%global _empty_manifest_terminate_build 0 +Name: python-waybackpy +Version: 3.0.6 +Release: 1 +Summary: Python package that interfaces with the Internet Archive's Wayback Machine APIs. Archive pages and retrieve archived pages easily. +License: MIT +URL: https://akamhy.github.io/waybackpy/ +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/34/ab/90085feb81e7fad7d00c736f98e74ec315159ebef2180a77c85a06b2f0aa/waybackpy-3.0.6.tar.gz +BuildArch: noarch + +Requires: python3-click +Requires: python3-requests +Requires: python3-urllib3 +Requires: python3-black +Requires: python3-codecov +Requires: python3-flake8 +Requires: python3-mypy +Requires: python3-pytest +Requires: python3-pytest-cov +Requires: python3-setuptools +Requires: python3-types-requests + +%description +# Introduction +Waybackpy is a Python package and a CLI tool that interfaces with the Wayback Machine APIs. +Wayback Machine has 3 client side APIs. +- SavePageNow or Save API +- CDX Server API +- Availability API +These three APIs can be accessed via the waybackpy either by importing it from a python file/module or from the command-line interface. +## Installation +**Using [pip](https://en.wikipedia.org/wiki/Pip_(package_manager)), from [PyPI](https://pypi.org/) (recommended)**: +```bash +pip install waybackpy +``` +**Using [conda](https://en.wikipedia.org/wiki/Conda_(package_manager)), from [conda-forge](https://anaconda.org/conda-forge/waybackpy) (recommended)**: +See also [waybackpy feedstock](https://github.com/conda-forge/waybackpy-feedstock), maintainers are [@rafaelrdealmeida](https://github.com/rafaelrdealmeida/), + [@labriunesp](https://github.com/labriunesp/) + and [@akamhy](https://github.com/akamhy/). +```bash +conda install -c conda-forge waybackpy +``` +**Install directly from [this git repository](https://github.com/akamhy/waybackpy) (NOT recommended)**: +```bash +pip install git+https://github.com/akamhy/waybackpy.git +``` +## Docker Image +Docker Hub: [hub.docker.com/r/secsi/waybackpy](https://hub.docker.com/r/secsi/waybackpy) +Docker image is automatically updated on every release by [Regulary and Automatically Updated Docker Images](https://github.com/cybersecsi/RAUDI) (RAUDI). +RAUDI is a tool by [SecSI](https://secsi.io), an Italian cybersecurity startup. +## Usage +### As a Python package +#### Save API aka SavePageNow +```python +>>> from waybackpy import WaybackMachineSaveAPI +>>> url = "https://github.com" +>>> user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0" +>>> +>>> save_api = WaybackMachineSaveAPI(url, user_agent) +>>> save_api.save() +https://web.archive.org/web/20220118125249/https://github.com/ +>>> save_api.cached_save +False +>>> save_api.timestamp() +datetime.datetime(2022, 1, 18, 12, 52, 49) +``` +#### CDX API aka CDXServerAPI +```python +>>> from waybackpy import WaybackMachineCDXServerAPI +>>> url = "https://google.com" +>>> user_agent = "my new app's user agent" +>>> cdx_api = WaybackMachineCDXServerAPI(url, user_agent) +``` +##### oldest +```python +>>> cdx_api.oldest() +com,google)/ 19981111184551 http://google.com:80/ text/html 200 HOQ2TGPYAEQJPNUA6M4SMZ3NGQRBXDZ3 381 +>>> oldest = cdx_api.oldest() +>>> oldest +com,google)/ 19981111184551 http://google.com:80/ text/html 200 HOQ2TGPYAEQJPNUA6M4SMZ3NGQRBXDZ3 381 +>>> oldest.archive_url +'https://web.archive.org/web/19981111184551/http://google.com:80/' +>>> oldest.original +'http://google.com:80/' +>>> oldest.urlkey +'com,google)/' +>>> oldest.timestamp +'19981111184551' +>>> oldest.datetime_timestamp +datetime.datetime(1998, 11, 11, 18, 45, 51) +>>> oldest.statuscode +'200' +>>> oldest.mimetype +'text/html' +``` +##### newest +```python +>>> newest = cdx_api.newest() +>>> newest +com,google)/ 20220217234427 http://@google.com/ text/html 301 Y6PVK4XWOI3BXQEXM5WLLWU5JKUVNSFZ 563 +>>> newest.archive_url +'https://web.archive.org/web/20220217234427/http://@google.com/' +>>> newest.timestamp +'20220217234427' +``` +##### near +```python +>>> near = cdx_api.near(year=2010, month=10, day=10, hour=10, minute=10) +>>> near.archive_url +'https://web.archive.org/web/20101010101435/http://google.com/' +>>> near +com,google)/ 20101010101435 http://google.com/ text/html 301 Y6PVK4XWOI3BXQEXM5WLLWU5JKUVNSFZ 391 +>>> near.timestamp +'20101010101435' +>>> near.timestamp +'20101010101435' +>>> near = cdx_api.near(wayback_machine_timestamp=2008080808) +>>> near.archive_url +'https://web.archive.org/web/20080808051143/http://google.com/' +>>> near = cdx_api.near(unix_timestamp=1286705410) +>>> near +com,google)/ 20101010101435 http://google.com/ text/html 301 Y6PVK4XWOI3BXQEXM5WLLWU5JKUVNSFZ 391 +>>> near.archive_url +'https://web.archive.org/web/20101010101435/http://google.com/' +>>> +``` +##### snapshots +```python +>>> from waybackpy import WaybackMachineCDXServerAPI +>>> url = "https://pypi.org" +>>> user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0" +>>> cdx = WaybackMachineCDXServerAPI(url, user_agent, start_timestamp=2016, end_timestamp=2017) +>>> for item in cdx.snapshots(): +https://web.archive.org/web/20160110011047/http://pypi.org/ +https://web.archive.org/web/20160305104847/http://pypi.org/ +. +. # URLS REDACTED FOR READABILITY +. +https://web.archive.org/web/20171127171549/https://pypi.org/ +https://web.archive.org/web/20171206002737/http://pypi.org:80/ +``` +#### Availability API +It is recommended to not use the availability API due to performance issues. All the methods of availability API interface class, `WaybackMachineAvailabilityAPI`, are also implemented in the CDX server API interface class, `WaybackMachineCDXServerAPI`. Also note +that the `newest()` method of `WaybackMachineAvailabilityAPI` can be more recent than `WaybackMachineCDXServerAPI`'s same method. +```python +>>> from waybackpy import WaybackMachineAvailabilityAPI +>>> +>>> url = "https://google.com" +>>> user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0" +>>> +>>> availability_api = WaybackMachineAvailabilityAPI(url, user_agent) +``` +##### oldest +```python +>>> availability_api.oldest() +https://web.archive.org/web/19981111184551/http://google.com:80/ +``` +##### newest +```python +>>> availability_api.newest() +https://web.archive.org/web/20220118150444/https://www.google.com/ +``` +##### near +```python +>>> availability_api.near(year=2010, month=10, day=10, hour=10) +https://web.archive.org/web/20101010101708/http://www.google.com/ +``` +> Documentation is at . +### As a CLI tool +Demo video on [asciinema.org](https://asciinema.org/a/469890), you can copy the text from video: +[![asciicast](https://asciinema.org/a/469890.svg)](https://asciinema.org/a/469890) +> CLI documentation is at . + +%package -n python3-waybackpy +Summary: Python package that interfaces with the Internet Archive's Wayback Machine APIs. Archive pages and retrieve archived pages easily. +Provides: python-waybackpy +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-waybackpy +# Introduction +Waybackpy is a Python package and a CLI tool that interfaces with the Wayback Machine APIs. +Wayback Machine has 3 client side APIs. +- SavePageNow or Save API +- CDX Server API +- Availability API +These three APIs can be accessed via the waybackpy either by importing it from a python file/module or from the command-line interface. +## Installation +**Using [pip](https://en.wikipedia.org/wiki/Pip_(package_manager)), from [PyPI](https://pypi.org/) (recommended)**: +```bash +pip install waybackpy +``` +**Using [conda](https://en.wikipedia.org/wiki/Conda_(package_manager)), from [conda-forge](https://anaconda.org/conda-forge/waybackpy) (recommended)**: +See also [waybackpy feedstock](https://github.com/conda-forge/waybackpy-feedstock), maintainers are [@rafaelrdealmeida](https://github.com/rafaelrdealmeida/), + [@labriunesp](https://github.com/labriunesp/) + and [@akamhy](https://github.com/akamhy/). +```bash +conda install -c conda-forge waybackpy +``` +**Install directly from [this git repository](https://github.com/akamhy/waybackpy) (NOT recommended)**: +```bash +pip install git+https://github.com/akamhy/waybackpy.git +``` +## Docker Image +Docker Hub: [hub.docker.com/r/secsi/waybackpy](https://hub.docker.com/r/secsi/waybackpy) +Docker image is automatically updated on every release by [Regulary and Automatically Updated Docker Images](https://github.com/cybersecsi/RAUDI) (RAUDI). +RAUDI is a tool by [SecSI](https://secsi.io), an Italian cybersecurity startup. +## Usage +### As a Python package +#### Save API aka SavePageNow +```python +>>> from waybackpy import WaybackMachineSaveAPI +>>> url = "https://github.com" +>>> user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0" +>>> +>>> save_api = WaybackMachineSaveAPI(url, user_agent) +>>> save_api.save() +https://web.archive.org/web/20220118125249/https://github.com/ +>>> save_api.cached_save +False +>>> save_api.timestamp() +datetime.datetime(2022, 1, 18, 12, 52, 49) +``` +#### CDX API aka CDXServerAPI +```python +>>> from waybackpy import WaybackMachineCDXServerAPI +>>> url = "https://google.com" +>>> user_agent = "my new app's user agent" +>>> cdx_api = WaybackMachineCDXServerAPI(url, user_agent) +``` +##### oldest +```python +>>> cdx_api.oldest() +com,google)/ 19981111184551 http://google.com:80/ text/html 200 HOQ2TGPYAEQJPNUA6M4SMZ3NGQRBXDZ3 381 +>>> oldest = cdx_api.oldest() +>>> oldest +com,google)/ 19981111184551 http://google.com:80/ text/html 200 HOQ2TGPYAEQJPNUA6M4SMZ3NGQRBXDZ3 381 +>>> oldest.archive_url +'https://web.archive.org/web/19981111184551/http://google.com:80/' +>>> oldest.original +'http://google.com:80/' +>>> oldest.urlkey +'com,google)/' +>>> oldest.timestamp +'19981111184551' +>>> oldest.datetime_timestamp +datetime.datetime(1998, 11, 11, 18, 45, 51) +>>> oldest.statuscode +'200' +>>> oldest.mimetype +'text/html' +``` +##### newest +```python +>>> newest = cdx_api.newest() +>>> newest +com,google)/ 20220217234427 http://@google.com/ text/html 301 Y6PVK4XWOI3BXQEXM5WLLWU5JKUVNSFZ 563 +>>> newest.archive_url +'https://web.archive.org/web/20220217234427/http://@google.com/' +>>> newest.timestamp +'20220217234427' +``` +##### near +```python +>>> near = cdx_api.near(year=2010, month=10, day=10, hour=10, minute=10) +>>> near.archive_url +'https://web.archive.org/web/20101010101435/http://google.com/' +>>> near +com,google)/ 20101010101435 http://google.com/ text/html 301 Y6PVK4XWOI3BXQEXM5WLLWU5JKUVNSFZ 391 +>>> near.timestamp +'20101010101435' +>>> near.timestamp +'20101010101435' +>>> near = cdx_api.near(wayback_machine_timestamp=2008080808) +>>> near.archive_url +'https://web.archive.org/web/20080808051143/http://google.com/' +>>> near = cdx_api.near(unix_timestamp=1286705410) +>>> near +com,google)/ 20101010101435 http://google.com/ text/html 301 Y6PVK4XWOI3BXQEXM5WLLWU5JKUVNSFZ 391 +>>> near.archive_url +'https://web.archive.org/web/20101010101435/http://google.com/' +>>> +``` +##### snapshots +```python +>>> from waybackpy import WaybackMachineCDXServerAPI +>>> url = "https://pypi.org" +>>> user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0" +>>> cdx = WaybackMachineCDXServerAPI(url, user_agent, start_timestamp=2016, end_timestamp=2017) +>>> for item in cdx.snapshots(): +https://web.archive.org/web/20160110011047/http://pypi.org/ +https://web.archive.org/web/20160305104847/http://pypi.org/ +. +. # URLS REDACTED FOR READABILITY +. +https://web.archive.org/web/20171127171549/https://pypi.org/ +https://web.archive.org/web/20171206002737/http://pypi.org:80/ +``` +#### Availability API +It is recommended to not use the availability API due to performance issues. All the methods of availability API interface class, `WaybackMachineAvailabilityAPI`, are also implemented in the CDX server API interface class, `WaybackMachineCDXServerAPI`. Also note +that the `newest()` method of `WaybackMachineAvailabilityAPI` can be more recent than `WaybackMachineCDXServerAPI`'s same method. +```python +>>> from waybackpy import WaybackMachineAvailabilityAPI +>>> +>>> url = "https://google.com" +>>> user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0" +>>> +>>> availability_api = WaybackMachineAvailabilityAPI(url, user_agent) +``` +##### oldest +```python +>>> availability_api.oldest() +https://web.archive.org/web/19981111184551/http://google.com:80/ +``` +##### newest +```python +>>> availability_api.newest() +https://web.archive.org/web/20220118150444/https://www.google.com/ +``` +##### near +```python +>>> availability_api.near(year=2010, month=10, day=10, hour=10) +https://web.archive.org/web/20101010101708/http://www.google.com/ +``` +> Documentation is at . +### As a CLI tool +Demo video on [asciinema.org](https://asciinema.org/a/469890), you can copy the text from video: +[![asciicast](https://asciinema.org/a/469890.svg)](https://asciinema.org/a/469890) +> CLI documentation is at . + +%package help +Summary: Development documents and examples for waybackpy +Provides: python3-waybackpy-doc +%description help +# Introduction +Waybackpy is a Python package and a CLI tool that interfaces with the Wayback Machine APIs. +Wayback Machine has 3 client side APIs. +- SavePageNow or Save API +- CDX Server API +- Availability API +These three APIs can be accessed via the waybackpy either by importing it from a python file/module or from the command-line interface. +## Installation +**Using [pip](https://en.wikipedia.org/wiki/Pip_(package_manager)), from [PyPI](https://pypi.org/) (recommended)**: +```bash +pip install waybackpy +``` +**Using [conda](https://en.wikipedia.org/wiki/Conda_(package_manager)), from [conda-forge](https://anaconda.org/conda-forge/waybackpy) (recommended)**: +See also [waybackpy feedstock](https://github.com/conda-forge/waybackpy-feedstock), maintainers are [@rafaelrdealmeida](https://github.com/rafaelrdealmeida/), + [@labriunesp](https://github.com/labriunesp/) + and [@akamhy](https://github.com/akamhy/). +```bash +conda install -c conda-forge waybackpy +``` +**Install directly from [this git repository](https://github.com/akamhy/waybackpy) (NOT recommended)**: +```bash +pip install git+https://github.com/akamhy/waybackpy.git +``` +## Docker Image +Docker Hub: [hub.docker.com/r/secsi/waybackpy](https://hub.docker.com/r/secsi/waybackpy) +Docker image is automatically updated on every release by [Regulary and Automatically Updated Docker Images](https://github.com/cybersecsi/RAUDI) (RAUDI). +RAUDI is a tool by [SecSI](https://secsi.io), an Italian cybersecurity startup. +## Usage +### As a Python package +#### Save API aka SavePageNow +```python +>>> from waybackpy import WaybackMachineSaveAPI +>>> url = "https://github.com" +>>> user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0" +>>> +>>> save_api = WaybackMachineSaveAPI(url, user_agent) +>>> save_api.save() +https://web.archive.org/web/20220118125249/https://github.com/ +>>> save_api.cached_save +False +>>> save_api.timestamp() +datetime.datetime(2022, 1, 18, 12, 52, 49) +``` +#### CDX API aka CDXServerAPI +```python +>>> from waybackpy import WaybackMachineCDXServerAPI +>>> url = "https://google.com" +>>> user_agent = "my new app's user agent" +>>> cdx_api = WaybackMachineCDXServerAPI(url, user_agent) +``` +##### oldest +```python +>>> cdx_api.oldest() +com,google)/ 19981111184551 http://google.com:80/ text/html 200 HOQ2TGPYAEQJPNUA6M4SMZ3NGQRBXDZ3 381 +>>> oldest = cdx_api.oldest() +>>> oldest +com,google)/ 19981111184551 http://google.com:80/ text/html 200 HOQ2TGPYAEQJPNUA6M4SMZ3NGQRBXDZ3 381 +>>> oldest.archive_url +'https://web.archive.org/web/19981111184551/http://google.com:80/' +>>> oldest.original +'http://google.com:80/' +>>> oldest.urlkey +'com,google)/' +>>> oldest.timestamp +'19981111184551' +>>> oldest.datetime_timestamp +datetime.datetime(1998, 11, 11, 18, 45, 51) +>>> oldest.statuscode +'200' +>>> oldest.mimetype +'text/html' +``` +##### newest +```python +>>> newest = cdx_api.newest() +>>> newest +com,google)/ 20220217234427 http://@google.com/ text/html 301 Y6PVK4XWOI3BXQEXM5WLLWU5JKUVNSFZ 563 +>>> newest.archive_url +'https://web.archive.org/web/20220217234427/http://@google.com/' +>>> newest.timestamp +'20220217234427' +``` +##### near +```python +>>> near = cdx_api.near(year=2010, month=10, day=10, hour=10, minute=10) +>>> near.archive_url +'https://web.archive.org/web/20101010101435/http://google.com/' +>>> near +com,google)/ 20101010101435 http://google.com/ text/html 301 Y6PVK4XWOI3BXQEXM5WLLWU5JKUVNSFZ 391 +>>> near.timestamp +'20101010101435' +>>> near.timestamp +'20101010101435' +>>> near = cdx_api.near(wayback_machine_timestamp=2008080808) +>>> near.archive_url +'https://web.archive.org/web/20080808051143/http://google.com/' +>>> near = cdx_api.near(unix_timestamp=1286705410) +>>> near +com,google)/ 20101010101435 http://google.com/ text/html 301 Y6PVK4XWOI3BXQEXM5WLLWU5JKUVNSFZ 391 +>>> near.archive_url +'https://web.archive.org/web/20101010101435/http://google.com/' +>>> +``` +##### snapshots +```python +>>> from waybackpy import WaybackMachineCDXServerAPI +>>> url = "https://pypi.org" +>>> user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0" +>>> cdx = WaybackMachineCDXServerAPI(url, user_agent, start_timestamp=2016, end_timestamp=2017) +>>> for item in cdx.snapshots(): +https://web.archive.org/web/20160110011047/http://pypi.org/ +https://web.archive.org/web/20160305104847/http://pypi.org/ +. +. # URLS REDACTED FOR READABILITY +. +https://web.archive.org/web/20171127171549/https://pypi.org/ +https://web.archive.org/web/20171206002737/http://pypi.org:80/ +``` +#### Availability API +It is recommended to not use the availability API due to performance issues. All the methods of availability API interface class, `WaybackMachineAvailabilityAPI`, are also implemented in the CDX server API interface class, `WaybackMachineCDXServerAPI`. Also note +that the `newest()` method of `WaybackMachineAvailabilityAPI` can be more recent than `WaybackMachineCDXServerAPI`'s same method. +```python +>>> from waybackpy import WaybackMachineAvailabilityAPI +>>> +>>> url = "https://google.com" +>>> user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0" +>>> +>>> availability_api = WaybackMachineAvailabilityAPI(url, user_agent) +``` +##### oldest +```python +>>> availability_api.oldest() +https://web.archive.org/web/19981111184551/http://google.com:80/ +``` +##### newest +```python +>>> availability_api.newest() +https://web.archive.org/web/20220118150444/https://www.google.com/ +``` +##### near +```python +>>> availability_api.near(year=2010, month=10, day=10, hour=10) +https://web.archive.org/web/20101010101708/http://www.google.com/ +``` +> Documentation is at . +### As a CLI tool +Demo video on [asciinema.org](https://asciinema.org/a/469890), you can copy the text from video: +[![asciicast](https://asciinema.org/a/469890.svg)](https://asciinema.org/a/469890) +> CLI documentation is at . + +%prep +%autosetup -n waybackpy-3.0.6 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-waybackpy -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Wed Apr 12 2023 Python_Bot - 3.0.6-1 +- Package Spec generated diff --git a/sources b/sources new file mode 100644 index 0000000..3874d9f --- /dev/null +++ b/sources @@ -0,0 +1 @@ +a724cf6e2c5b20fde24173301d63aaab waybackpy-3.0.6.tar.gz -- cgit v1.2.3