diff options
author | CoprDistGit <infra@openeuler.org> | 2023-04-10 13:02:05 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2023-04-10 13:02:05 +0000 |
commit | 1b0168e692b9dfd138d7ca0622554704dcf24051 (patch) | |
tree | f2b2ea121a42f02de011f686333ce637e34ea6a5 | |
parent | 8952ef877978ae3e07594599348927cd51799963 (diff) |
automatic import of python-search-engine-parser
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | python-search-engine-parser.spec | 920 | ||||
-rw-r--r-- | sources | 1 |
3 files changed, 922 insertions, 0 deletions
@@ -0,0 +1 @@ +/search-engine-parser-0.6.8.tar.gz diff --git a/python-search-engine-parser.spec b/python-search-engine-parser.spec new file mode 100644 index 0000000..ac59b85 --- /dev/null +++ b/python-search-engine-parser.spec @@ -0,0 +1,920 @@ +%global _empty_manifest_terminate_build 0 +Name: python-search-engine-parser +Version: 0.6.8 +Release: 1 +Summary: scrapes search engine pages for query titles, descriptions and links +License: MIT +URL: https://github.com/bisoncorps/search-engine-parser +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/cf/83/510ce907753919812bec1a2e2a279443f50aadb1a64ab2adc16fca0b8dea/search-engine-parser-0.6.8.tar.gz +BuildArch: noarch + +Requires: python3-lxml +Requires: python3-aiohttp +Requires: python3-beautifulsoup4 +Requires: python3-fake-useragent +Requires: python3-blessed + +%description +# Search Engine Parser + +<span><i>"If it is a search engine, then it can be parsed"</i> - some random guy</span> + + + +[](https://www.python.org/downloads/) +[](https://pypi.org/project/search-engine-parser/) +[](https://pypi.org/project/search-engine-parser/) +[](https://github.com/bisohns/search-engine-parser/actions/workflows/deploy.yml) +[](https://github.com/bisohns/search-engine-parser/actions/workflows/test.yml) +[](https://search-engine-parser.readthedocs.io/en/latest/?badge=latest) +[](https://opensource.org/licenses/MIT) +[](#contributors) +<hr/> + +search-engine-parser is a package that lets you query popular search engines and scrape for result titles, links, descriptions and more. It aims to scrape the widest range of search engines. +View all supported engines [here.](https://github.com/bisoncorps/search-engine-parser/blob/master/docs/supported_engines.md) + +- [Search Engine Parser](#search-engine-parser) + - [Popular Supported Engines](#popular-supported-engines) + - [Installation](#installation) + - [Development](#development) + - [Code Documentation](#code-documentation) + - [Running the tests](#running-the-tests) + - [Usage](#usage) + - [Code](#code) + - [Command line](#command-line) + - [FAQ](docs/faq.md) + - [Code of Conduct](#code-of-conduct) + - [Contribution](#contribution) + - [License (MIT)](#license-mit) + +## Popular Supported Engines +Popular search engines supported include: + +- Google +- DuckDuckGo +- GitHub +- StackOverflow +- Baidu +- YouTube + +View all supported engines [here.](docs/supported_engines.md) + +## Installation +Install from PyPi: + +```bash + # install only package dependencies + pip install search-engine-parser + # Installs `pysearch` cli tool + pip install "search-engine-parser[cli]" +``` + +or from master: +```bash + pip install git+https://github.com/bisoncorps/search-engine-parser +``` + +## Development +Clone the repository: + +```bash + git clone git@github.com:bisoncorps/search-engine-parser.git +``` + +Then create a virtual environment and install the required packages: + +```bash + mkvirtualenv search_engine_parser + pip install -r requirements/dev.txt +``` + + +## Code Documentation +Code docs can be found on [Read the Docs](https://search-engine-parser.readthedocs.io/en/latest). + +## Running the tests +```bash + pytest +``` + +## Usage + +### Code +Query results can be scraped from popular search engines, as shown in the example snippet below. + +```python + import pprint + + from search_engine_parser.core.engines.bing import Search as BingSearch + from search_engine_parser.core.engines.google import Search as GoogleSearch + from search_engine_parser.core.engines.yahoo import Search as YahooSearch + + search_args = ('preaching to the choir', 1) + gsearch = GoogleSearch() + ysearch = YahooSearch() + bsearch = BingSearch() + gresults = gsearch.search(*search_args) + yresults = ysearch.search(*search_args) + bresults = bsearch.search(*search_args) + a = { + "Google": gresults, + "Yahoo": yresults, + "Bing": bresults + } + + # pretty print the result from each engine + for k, v in a.items(): + print(f"-------------{k}------------") + for result in v: + pprint.pprint(result) + + # print first title from google search + print(gresults["titles"][0]) + # print 10th link from yahoo search + print(yresults["links"][9]) + # print 6th description from bing search + print(bresults["descriptions"][5]) + + # print first result containing links, descriptions and title + print(gresults[0]) +``` + +For localization, you can pass the `url` keyword and a localized url. This queries and parses the localized url using the same engine's parser: +```python + # Use google.de instead of google.com + results = gsearch.search(*search_args, url="google.de") +``` + +If you need results in a specific language you can pass the 'hl' keyword and the 2-letter country abbreviation (here's a [handy list](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes)): +```python + # Use 'it' to receive italian results + results = gsearch.search(*search_args, hl="it") +``` + +#### Cache +The results are automatically cached for engine searches. You can either bypass the cache by adding `cache=False` to the `search` or `async_search` method or clear the engine's cache +```python + from search_engine_parser.core.engines.github import Search as GitHub + github = GitHub() + # bypass the cache + github.search("search-engine-parser", cache=False) + + #OR + # clear cache before search + github.clear_cache() + github.search("search-engine-parser") +``` + +#### Proxy +Adding a proxy entails sending details to the search function +```python + from search_engine_parser.core.engines.github import Search as GitHub + github = GitHub() + github.search("search-engine-parser", + # http proxies supported only + proxy='http://123.12.1.0', + proxy_auth=('username', 'password')) +``` + + +#### Async +search-engine-parser supports `async`: +```python + results = await gsearch.async_search(*search_args) +``` + +#### Results +The `SearchResults` after searching: +```python + >>> results = gsearch.search("preaching to the choir", 1) + >>> results + <search_engine_parser.core.base.SearchResult object at 0x7f907426a280> + # the object supports retrieving individual results by iteration of just by type (links, descriptions, titles) + >>> results[0] # returns the first <SearchItem> + >>> results[0]["description"] # gets the description of the first item + >>> results[0]["link"] # gets the link of the first item + >>> results["descriptions"] # returns a list of all descriptions from all results +``` +It can be iterated like a normal list to return individual `SearchItem`s. + +### Command line + +search-engine-parser comes with a CLI tool known as `pysearch`. You can use it as such: + +```bash +pysearch --engine bing --type descriptions "Preaching to the choir" +``` + +Result: + +```bash +'Preaching to the choir' originated in the USA in the 1970s. It is a variant of the earlier 'preaching to the converted', which dates from England in the late 1800s and has the same meaning. Origin - the full story 'Preaching to the choir' (also sometimes spelled quire) is of US origin. +``` + + + +```bash +usage: pysearch [-h] [-V] [-e ENGINE] [--show-summary] [-u URL] [-p PAGE] + [-t TYPE] [-cc] [-r RANK] [--proxy PROXY] + [--proxy-user PROXY_USER] [--proxy-password PROXY_PASSWORD] + query + +SearchEngineParser + +positional arguments: + query Query string to search engine for + +optional arguments: + -h, --help show this help message and exit + -V, --version show program's version number and exit + -e ENGINE, --engine ENGINE + Engine to use for parsing the query e.g google, yahoo, + bing,duckduckgo (default: google) + --show-summary Shows the summary of an engine + -u URL, --url URL A custom link to use as base url for search e.g + google.de + -p PAGE, --page PAGE Page of the result to return details for (default: 1) + -t TYPE, --type TYPE Type of detail to return i.e full, links, desciptions + or titles (default: full) + -cc, --clear-cache Clear cache of engine before searching + -r RANK, --rank RANK ID of Detail to return e.g 5 (default: 0) + --proxy PROXY Proxy address to make use of + --proxy-user PROXY_USER + Proxy user to make use of + --proxy-password PROXY_PASSWORD + Proxy password to make use of +``` + + + +## Code of Conduct +Make sure to adhere to the [code of conduct](CODE_OF_CONDUCT.md) at all times. + +## Contribution +Before making any contributions, please read the [contribution guide](CONTRIBUTING.md). + +## License (MIT) +This project is licensed under the [MIT 2.0 License](LICENSE) which allows very broad use for both academic and commercial purposes. + +## Contributors β¨ + +Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)): + +<!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section --> +<!-- prettier-ignore-start --> +<!-- markdownlint-disable --> +<table> + <tr> + <td align="center"><a href="https://github.com/Rexogamer"><img src="https://avatars0.githubusercontent.com/u/42586271?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Ed Luff</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=Rexogamer" title="Code">π»</a></td> + <td align="center"><a href="http://diretnandomnan.webnode.com"><img src="https://avatars3.githubusercontent.com/u/23453888?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Diretnan Domnan</b></sub></a><br /><a href="#infra-deven96" title="Infrastructure (Hosting, Build-Tools, etc)">π</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=deven96" title="Tests">β οΈ</a> <a href="#tool-deven96" title="Tools">π§</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=deven96" title="Code">π»</a></td> + <td align="center"><a href="http://mensaah.github.io"><img src="https://avatars3.githubusercontent.com/u/24734308?v=4?s=100" width="100px;" alt=""/><br /><sub><b>MeNsaaH</b></sub></a><br /><a href="#infra-MeNsaaH" title="Infrastructure (Hosting, Build-Tools, etc)">π</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=MeNsaaH" title="Tests">β οΈ</a> <a href="#tool-MeNsaaH" title="Tools">π§</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=MeNsaaH" title="Code">π»</a></td> + <td align="center"><a href="https://github.com/PalAditya"><img src="https://avatars2.githubusercontent.com/u/25523604?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Aditya Pal</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=PalAditya" title="Tests">β οΈ</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=PalAditya" title="Code">π»</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=PalAditya" title="Documentation">π</a></td> + <td align="center"><a href="http://energized.pro"><img src="https://avatars1.githubusercontent.com/u/27774996?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Avinash Reddy</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/issues?q=author%3AAvinashReddy3108" title="Bug reports">π</a></td> + <td align="center"><a href="https://github.com/Iamdavidonuh"><img src="https://avatars3.githubusercontent.com/u/37768509?v=4?s=100" width="100px;" alt=""/><br /><sub><b>David Onuh</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=Iamdavidonuh" title="Code">π»</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=Iamdavidonuh" title="Tests">β οΈ</a></td> + <td align="center"><a href="http://simakis.me"><img src="https://avatars2.githubusercontent.com/u/8322266?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Panagiotis Simakis</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=sp1thas" title="Code">π»</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=sp1thas" title="Tests">β οΈ</a></td> + </tr> + <tr> + <td align="center"><a href="https://github.com/reiarthur"><img src="https://avatars2.githubusercontent.com/u/20190646?v=4?s=100" width="100px;" alt=""/><br /><sub><b>reiarthur</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=reiarthur" title="Code">π»</a></td> + <td align="center"><a href="http://ashokkumarta.blogspot.com/"><img src="https://avatars0.githubusercontent.com/u/5450267?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Ashokkumar TA</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=ashokkumarta" title="Code">π»</a></td> + <td align="center"><a href="https://github.com/ateuber"><img src="https://avatars2.githubusercontent.com/u/44349054?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Andreas Teuber</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=ateuber" title="Code">π»</a></td> + <td align="center"><a href="https://github.com/mi096684"><img src="https://avatars3.githubusercontent.com/u/22032932?v=4?s=100" width="100px;" alt=""/><br /><sub><b>mi096684</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/issues?q=author%3Ami096684" title="Bug reports">π</a></td> + <td align="center"><a href="https://github.com/devajithvs"><img src="https://avatars1.githubusercontent.com/u/29475282?v=4?s=100" width="100px;" alt=""/><br /><sub><b>devajithvs</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=devajithvs" title="Code">π»</a></td> + <td align="center"><a href="https://github.com/zakaryan2004"><img src="https://avatars3.githubusercontent.com/u/29994884?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Geg Zakaryan</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=zakaryan2004" title="Code">π»</a> <a href="https://github.com/bisoncorps/search-engine-parser/issues?q=author%3Azakaryan2004" title="Bug reports">π</a></td> + <td align="center"><a href="https://www.hakanbogan.com"><img src="https://avatars1.githubusercontent.com/u/24498747?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Hakan BoΔan</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/issues?q=author%3Aredrussianarmy" title="Bug reports">π</a></td> + </tr> + <tr> + <td align="center"><a href="https://github.com/NicKoehler"><img src="https://avatars3.githubusercontent.com/u/53040044?v=4?s=100" width="100px;" alt=""/><br /><sub><b>NicKoehler</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/issues?q=author%3ANicKoehler" title="Bug reports">π</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=NicKoehler" title="Code">π»</a></td> + <td align="center"><a href="https://github.com/chris4540"><img src="https://avatars1.githubusercontent.com/u/12794588?v=4?s=100" width="100px;" alt=""/><br /><sub><b>ChrisLin</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/issues?q=author%3Achris4540" title="Bug reports">π</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=chris4540" title="Code">π»</a></td> + <td align="center"><a href="http://pete.world"><img src="https://avatars.githubusercontent.com/u/10454135?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Pietro</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=pgrandinetti" title="Code">π»</a> <a href="https://github.com/bisoncorps/search-engine-parser/issues?q=author%3Apgrandinetti" title="Bug reports">π</a></td> + </tr> +</table> + +<!-- markdownlint-restore --> +<!-- prettier-ignore-end --> + +<!-- ALL-CONTRIBUTORS-LIST:END --> + +This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome! + + +%package -n python3-search-engine-parser +Summary: scrapes search engine pages for query titles, descriptions and links +Provides: python-search-engine-parser +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-search-engine-parser +# Search Engine Parser + +<span><i>"If it is a search engine, then it can be parsed"</i> - some random guy</span> + + + +[](https://www.python.org/downloads/) +[](https://pypi.org/project/search-engine-parser/) +[](https://pypi.org/project/search-engine-parser/) +[](https://github.com/bisohns/search-engine-parser/actions/workflows/deploy.yml) +[](https://github.com/bisohns/search-engine-parser/actions/workflows/test.yml) +[](https://search-engine-parser.readthedocs.io/en/latest/?badge=latest) +[](https://opensource.org/licenses/MIT) +[](#contributors) +<hr/> + +search-engine-parser is a package that lets you query popular search engines and scrape for result titles, links, descriptions and more. It aims to scrape the widest range of search engines. +View all supported engines [here.](https://github.com/bisoncorps/search-engine-parser/blob/master/docs/supported_engines.md) + +- [Search Engine Parser](#search-engine-parser) + - [Popular Supported Engines](#popular-supported-engines) + - [Installation](#installation) + - [Development](#development) + - [Code Documentation](#code-documentation) + - [Running the tests](#running-the-tests) + - [Usage](#usage) + - [Code](#code) + - [Command line](#command-line) + - [FAQ](docs/faq.md) + - [Code of Conduct](#code-of-conduct) + - [Contribution](#contribution) + - [License (MIT)](#license-mit) + +## Popular Supported Engines +Popular search engines supported include: + +- Google +- DuckDuckGo +- GitHub +- StackOverflow +- Baidu +- YouTube + +View all supported engines [here.](docs/supported_engines.md) + +## Installation +Install from PyPi: + +```bash + # install only package dependencies + pip install search-engine-parser + # Installs `pysearch` cli tool + pip install "search-engine-parser[cli]" +``` + +or from master: +```bash + pip install git+https://github.com/bisoncorps/search-engine-parser +``` + +## Development +Clone the repository: + +```bash + git clone git@github.com:bisoncorps/search-engine-parser.git +``` + +Then create a virtual environment and install the required packages: + +```bash + mkvirtualenv search_engine_parser + pip install -r requirements/dev.txt +``` + + +## Code Documentation +Code docs can be found on [Read the Docs](https://search-engine-parser.readthedocs.io/en/latest). + +## Running the tests +```bash + pytest +``` + +## Usage + +### Code +Query results can be scraped from popular search engines, as shown in the example snippet below. + +```python + import pprint + + from search_engine_parser.core.engines.bing import Search as BingSearch + from search_engine_parser.core.engines.google import Search as GoogleSearch + from search_engine_parser.core.engines.yahoo import Search as YahooSearch + + search_args = ('preaching to the choir', 1) + gsearch = GoogleSearch() + ysearch = YahooSearch() + bsearch = BingSearch() + gresults = gsearch.search(*search_args) + yresults = ysearch.search(*search_args) + bresults = bsearch.search(*search_args) + a = { + "Google": gresults, + "Yahoo": yresults, + "Bing": bresults + } + + # pretty print the result from each engine + for k, v in a.items(): + print(f"-------------{k}------------") + for result in v: + pprint.pprint(result) + + # print first title from google search + print(gresults["titles"][0]) + # print 10th link from yahoo search + print(yresults["links"][9]) + # print 6th description from bing search + print(bresults["descriptions"][5]) + + # print first result containing links, descriptions and title + print(gresults[0]) +``` + +For localization, you can pass the `url` keyword and a localized url. This queries and parses the localized url using the same engine's parser: +```python + # Use google.de instead of google.com + results = gsearch.search(*search_args, url="google.de") +``` + +If you need results in a specific language you can pass the 'hl' keyword and the 2-letter country abbreviation (here's a [handy list](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes)): +```python + # Use 'it' to receive italian results + results = gsearch.search(*search_args, hl="it") +``` + +#### Cache +The results are automatically cached for engine searches. You can either bypass the cache by adding `cache=False` to the `search` or `async_search` method or clear the engine's cache +```python + from search_engine_parser.core.engines.github import Search as GitHub + github = GitHub() + # bypass the cache + github.search("search-engine-parser", cache=False) + + #OR + # clear cache before search + github.clear_cache() + github.search("search-engine-parser") +``` + +#### Proxy +Adding a proxy entails sending details to the search function +```python + from search_engine_parser.core.engines.github import Search as GitHub + github = GitHub() + github.search("search-engine-parser", + # http proxies supported only + proxy='http://123.12.1.0', + proxy_auth=('username', 'password')) +``` + + +#### Async +search-engine-parser supports `async`: +```python + results = await gsearch.async_search(*search_args) +``` + +#### Results +The `SearchResults` after searching: +```python + >>> results = gsearch.search("preaching to the choir", 1) + >>> results + <search_engine_parser.core.base.SearchResult object at 0x7f907426a280> + # the object supports retrieving individual results by iteration of just by type (links, descriptions, titles) + >>> results[0] # returns the first <SearchItem> + >>> results[0]["description"] # gets the description of the first item + >>> results[0]["link"] # gets the link of the first item + >>> results["descriptions"] # returns a list of all descriptions from all results +``` +It can be iterated like a normal list to return individual `SearchItem`s. + +### Command line + +search-engine-parser comes with a CLI tool known as `pysearch`. You can use it as such: + +```bash +pysearch --engine bing --type descriptions "Preaching to the choir" +``` + +Result: + +```bash +'Preaching to the choir' originated in the USA in the 1970s. It is a variant of the earlier 'preaching to the converted', which dates from England in the late 1800s and has the same meaning. Origin - the full story 'Preaching to the choir' (also sometimes spelled quire) is of US origin. +``` + + + +```bash +usage: pysearch [-h] [-V] [-e ENGINE] [--show-summary] [-u URL] [-p PAGE] + [-t TYPE] [-cc] [-r RANK] [--proxy PROXY] + [--proxy-user PROXY_USER] [--proxy-password PROXY_PASSWORD] + query + +SearchEngineParser + +positional arguments: + query Query string to search engine for + +optional arguments: + -h, --help show this help message and exit + -V, --version show program's version number and exit + -e ENGINE, --engine ENGINE + Engine to use for parsing the query e.g google, yahoo, + bing,duckduckgo (default: google) + --show-summary Shows the summary of an engine + -u URL, --url URL A custom link to use as base url for search e.g + google.de + -p PAGE, --page PAGE Page of the result to return details for (default: 1) + -t TYPE, --type TYPE Type of detail to return i.e full, links, desciptions + or titles (default: full) + -cc, --clear-cache Clear cache of engine before searching + -r RANK, --rank RANK ID of Detail to return e.g 5 (default: 0) + --proxy PROXY Proxy address to make use of + --proxy-user PROXY_USER + Proxy user to make use of + --proxy-password PROXY_PASSWORD + Proxy password to make use of +``` + + + +## Code of Conduct +Make sure to adhere to the [code of conduct](CODE_OF_CONDUCT.md) at all times. + +## Contribution +Before making any contributions, please read the [contribution guide](CONTRIBUTING.md). + +## License (MIT) +This project is licensed under the [MIT 2.0 License](LICENSE) which allows very broad use for both academic and commercial purposes. + +## Contributors β¨ + +Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)): + +<!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section --> +<!-- prettier-ignore-start --> +<!-- markdownlint-disable --> +<table> + <tr> + <td align="center"><a href="https://github.com/Rexogamer"><img src="https://avatars0.githubusercontent.com/u/42586271?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Ed Luff</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=Rexogamer" title="Code">π»</a></td> + <td align="center"><a href="http://diretnandomnan.webnode.com"><img src="https://avatars3.githubusercontent.com/u/23453888?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Diretnan Domnan</b></sub></a><br /><a href="#infra-deven96" title="Infrastructure (Hosting, Build-Tools, etc)">π</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=deven96" title="Tests">β οΈ</a> <a href="#tool-deven96" title="Tools">π§</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=deven96" title="Code">π»</a></td> + <td align="center"><a href="http://mensaah.github.io"><img src="https://avatars3.githubusercontent.com/u/24734308?v=4?s=100" width="100px;" alt=""/><br /><sub><b>MeNsaaH</b></sub></a><br /><a href="#infra-MeNsaaH" title="Infrastructure (Hosting, Build-Tools, etc)">π</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=MeNsaaH" title="Tests">β οΈ</a> <a href="#tool-MeNsaaH" title="Tools">π§</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=MeNsaaH" title="Code">π»</a></td> + <td align="center"><a href="https://github.com/PalAditya"><img src="https://avatars2.githubusercontent.com/u/25523604?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Aditya Pal</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=PalAditya" title="Tests">β οΈ</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=PalAditya" title="Code">π»</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=PalAditya" title="Documentation">π</a></td> + <td align="center"><a href="http://energized.pro"><img src="https://avatars1.githubusercontent.com/u/27774996?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Avinash Reddy</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/issues?q=author%3AAvinashReddy3108" title="Bug reports">π</a></td> + <td align="center"><a href="https://github.com/Iamdavidonuh"><img src="https://avatars3.githubusercontent.com/u/37768509?v=4?s=100" width="100px;" alt=""/><br /><sub><b>David Onuh</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=Iamdavidonuh" title="Code">π»</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=Iamdavidonuh" title="Tests">β οΈ</a></td> + <td align="center"><a href="http://simakis.me"><img src="https://avatars2.githubusercontent.com/u/8322266?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Panagiotis Simakis</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=sp1thas" title="Code">π»</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=sp1thas" title="Tests">β οΈ</a></td> + </tr> + <tr> + <td align="center"><a href="https://github.com/reiarthur"><img src="https://avatars2.githubusercontent.com/u/20190646?v=4?s=100" width="100px;" alt=""/><br /><sub><b>reiarthur</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=reiarthur" title="Code">π»</a></td> + <td align="center"><a href="http://ashokkumarta.blogspot.com/"><img src="https://avatars0.githubusercontent.com/u/5450267?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Ashokkumar TA</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=ashokkumarta" title="Code">π»</a></td> + <td align="center"><a href="https://github.com/ateuber"><img src="https://avatars2.githubusercontent.com/u/44349054?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Andreas Teuber</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=ateuber" title="Code">π»</a></td> + <td align="center"><a href="https://github.com/mi096684"><img src="https://avatars3.githubusercontent.com/u/22032932?v=4?s=100" width="100px;" alt=""/><br /><sub><b>mi096684</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/issues?q=author%3Ami096684" title="Bug reports">π</a></td> + <td align="center"><a href="https://github.com/devajithvs"><img src="https://avatars1.githubusercontent.com/u/29475282?v=4?s=100" width="100px;" alt=""/><br /><sub><b>devajithvs</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=devajithvs" title="Code">π»</a></td> + <td align="center"><a href="https://github.com/zakaryan2004"><img src="https://avatars3.githubusercontent.com/u/29994884?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Geg Zakaryan</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=zakaryan2004" title="Code">π»</a> <a href="https://github.com/bisoncorps/search-engine-parser/issues?q=author%3Azakaryan2004" title="Bug reports">π</a></td> + <td align="center"><a href="https://www.hakanbogan.com"><img src="https://avatars1.githubusercontent.com/u/24498747?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Hakan BoΔan</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/issues?q=author%3Aredrussianarmy" title="Bug reports">π</a></td> + </tr> + <tr> + <td align="center"><a href="https://github.com/NicKoehler"><img src="https://avatars3.githubusercontent.com/u/53040044?v=4?s=100" width="100px;" alt=""/><br /><sub><b>NicKoehler</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/issues?q=author%3ANicKoehler" title="Bug reports">π</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=NicKoehler" title="Code">π»</a></td> + <td align="center"><a href="https://github.com/chris4540"><img src="https://avatars1.githubusercontent.com/u/12794588?v=4?s=100" width="100px;" alt=""/><br /><sub><b>ChrisLin</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/issues?q=author%3Achris4540" title="Bug reports">π</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=chris4540" title="Code">π»</a></td> + <td align="center"><a href="http://pete.world"><img src="https://avatars.githubusercontent.com/u/10454135?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Pietro</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=pgrandinetti" title="Code">π»</a> <a href="https://github.com/bisoncorps/search-engine-parser/issues?q=author%3Apgrandinetti" title="Bug reports">π</a></td> + </tr> +</table> + +<!-- markdownlint-restore --> +<!-- prettier-ignore-end --> + +<!-- ALL-CONTRIBUTORS-LIST:END --> + +This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome! + + +%package help +Summary: Development documents and examples for search-engine-parser +Provides: python3-search-engine-parser-doc +%description help +# Search Engine Parser + +<span><i>"If it is a search engine, then it can be parsed"</i> - some random guy</span> + + + +[](https://www.python.org/downloads/) +[](https://pypi.org/project/search-engine-parser/) +[](https://pypi.org/project/search-engine-parser/) +[](https://github.com/bisohns/search-engine-parser/actions/workflows/deploy.yml) +[](https://github.com/bisohns/search-engine-parser/actions/workflows/test.yml) +[](https://search-engine-parser.readthedocs.io/en/latest/?badge=latest) +[](https://opensource.org/licenses/MIT) +[](#contributors) +<hr/> + +search-engine-parser is a package that lets you query popular search engines and scrape for result titles, links, descriptions and more. It aims to scrape the widest range of search engines. +View all supported engines [here.](https://github.com/bisoncorps/search-engine-parser/blob/master/docs/supported_engines.md) + +- [Search Engine Parser](#search-engine-parser) + - [Popular Supported Engines](#popular-supported-engines) + - [Installation](#installation) + - [Development](#development) + - [Code Documentation](#code-documentation) + - [Running the tests](#running-the-tests) + - [Usage](#usage) + - [Code](#code) + - [Command line](#command-line) + - [FAQ](docs/faq.md) + - [Code of Conduct](#code-of-conduct) + - [Contribution](#contribution) + - [License (MIT)](#license-mit) + +## Popular Supported Engines +Popular search engines supported include: + +- Google +- DuckDuckGo +- GitHub +- StackOverflow +- Baidu +- YouTube + +View all supported engines [here.](docs/supported_engines.md) + +## Installation +Install from PyPi: + +```bash + # install only package dependencies + pip install search-engine-parser + # Installs `pysearch` cli tool + pip install "search-engine-parser[cli]" +``` + +or from master: +```bash + pip install git+https://github.com/bisoncorps/search-engine-parser +``` + +## Development +Clone the repository: + +```bash + git clone git@github.com:bisoncorps/search-engine-parser.git +``` + +Then create a virtual environment and install the required packages: + +```bash + mkvirtualenv search_engine_parser + pip install -r requirements/dev.txt +``` + + +## Code Documentation +Code docs can be found on [Read the Docs](https://search-engine-parser.readthedocs.io/en/latest). + +## Running the tests +```bash + pytest +``` + +## Usage + +### Code +Query results can be scraped from popular search engines, as shown in the example snippet below. + +```python + import pprint + + from search_engine_parser.core.engines.bing import Search as BingSearch + from search_engine_parser.core.engines.google import Search as GoogleSearch + from search_engine_parser.core.engines.yahoo import Search as YahooSearch + + search_args = ('preaching to the choir', 1) + gsearch = GoogleSearch() + ysearch = YahooSearch() + bsearch = BingSearch() + gresults = gsearch.search(*search_args) + yresults = ysearch.search(*search_args) + bresults = bsearch.search(*search_args) + a = { + "Google": gresults, + "Yahoo": yresults, + "Bing": bresults + } + + # pretty print the result from each engine + for k, v in a.items(): + print(f"-------------{k}------------") + for result in v: + pprint.pprint(result) + + # print first title from google search + print(gresults["titles"][0]) + # print 10th link from yahoo search + print(yresults["links"][9]) + # print 6th description from bing search + print(bresults["descriptions"][5]) + + # print first result containing links, descriptions and title + print(gresults[0]) +``` + +For localization, you can pass the `url` keyword and a localized url. This queries and parses the localized url using the same engine's parser: +```python + # Use google.de instead of google.com + results = gsearch.search(*search_args, url="google.de") +``` + +If you need results in a specific language you can pass the 'hl' keyword and the 2-letter country abbreviation (here's a [handy list](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes)): +```python + # Use 'it' to receive italian results + results = gsearch.search(*search_args, hl="it") +``` + +#### Cache +The results are automatically cached for engine searches. You can either bypass the cache by adding `cache=False` to the `search` or `async_search` method or clear the engine's cache +```python + from search_engine_parser.core.engines.github import Search as GitHub + github = GitHub() + # bypass the cache + github.search("search-engine-parser", cache=False) + + #OR + # clear cache before search + github.clear_cache() + github.search("search-engine-parser") +``` + +#### Proxy +Adding a proxy entails sending details to the search function +```python + from search_engine_parser.core.engines.github import Search as GitHub + github = GitHub() + github.search("search-engine-parser", + # http proxies supported only + proxy='http://123.12.1.0', + proxy_auth=('username', 'password')) +``` + + +#### Async +search-engine-parser supports `async`: +```python + results = await gsearch.async_search(*search_args) +``` + +#### Results +The `SearchResults` after searching: +```python + >>> results = gsearch.search("preaching to the choir", 1) + >>> results + <search_engine_parser.core.base.SearchResult object at 0x7f907426a280> + # the object supports retrieving individual results by iteration of just by type (links, descriptions, titles) + >>> results[0] # returns the first <SearchItem> + >>> results[0]["description"] # gets the description of the first item + >>> results[0]["link"] # gets the link of the first item + >>> results["descriptions"] # returns a list of all descriptions from all results +``` +It can be iterated like a normal list to return individual `SearchItem`s. + +### Command line + +search-engine-parser comes with a CLI tool known as `pysearch`. You can use it as such: + +```bash +pysearch --engine bing --type descriptions "Preaching to the choir" +``` + +Result: + +```bash +'Preaching to the choir' originated in the USA in the 1970s. It is a variant of the earlier 'preaching to the converted', which dates from England in the late 1800s and has the same meaning. Origin - the full story 'Preaching to the choir' (also sometimes spelled quire) is of US origin. +``` + + + +```bash +usage: pysearch [-h] [-V] [-e ENGINE] [--show-summary] [-u URL] [-p PAGE] + [-t TYPE] [-cc] [-r RANK] [--proxy PROXY] + [--proxy-user PROXY_USER] [--proxy-password PROXY_PASSWORD] + query + +SearchEngineParser + +positional arguments: + query Query string to search engine for + +optional arguments: + -h, --help show this help message and exit + -V, --version show program's version number and exit + -e ENGINE, --engine ENGINE + Engine to use for parsing the query e.g google, yahoo, + bing,duckduckgo (default: google) + --show-summary Shows the summary of an engine + -u URL, --url URL A custom link to use as base url for search e.g + google.de + -p PAGE, --page PAGE Page of the result to return details for (default: 1) + -t TYPE, --type TYPE Type of detail to return i.e full, links, desciptions + or titles (default: full) + -cc, --clear-cache Clear cache of engine before searching + -r RANK, --rank RANK ID of Detail to return e.g 5 (default: 0) + --proxy PROXY Proxy address to make use of + --proxy-user PROXY_USER + Proxy user to make use of + --proxy-password PROXY_PASSWORD + Proxy password to make use of +``` + + + +## Code of Conduct +Make sure to adhere to the [code of conduct](CODE_OF_CONDUCT.md) at all times. + +## Contribution +Before making any contributions, please read the [contribution guide](CONTRIBUTING.md). + +## License (MIT) +This project is licensed under the [MIT 2.0 License](LICENSE) which allows very broad use for both academic and commercial purposes. + +## Contributors β¨ + +Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)): + +<!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section --> +<!-- prettier-ignore-start --> +<!-- markdownlint-disable --> +<table> + <tr> + <td align="center"><a href="https://github.com/Rexogamer"><img src="https://avatars0.githubusercontent.com/u/42586271?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Ed Luff</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=Rexogamer" title="Code">π»</a></td> + <td align="center"><a href="http://diretnandomnan.webnode.com"><img src="https://avatars3.githubusercontent.com/u/23453888?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Diretnan Domnan</b></sub></a><br /><a href="#infra-deven96" title="Infrastructure (Hosting, Build-Tools, etc)">π</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=deven96" title="Tests">β οΈ</a> <a href="#tool-deven96" title="Tools">π§</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=deven96" title="Code">π»</a></td> + <td align="center"><a href="http://mensaah.github.io"><img src="https://avatars3.githubusercontent.com/u/24734308?v=4?s=100" width="100px;" alt=""/><br /><sub><b>MeNsaaH</b></sub></a><br /><a href="#infra-MeNsaaH" title="Infrastructure (Hosting, Build-Tools, etc)">π</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=MeNsaaH" title="Tests">β οΈ</a> <a href="#tool-MeNsaaH" title="Tools">π§</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=MeNsaaH" title="Code">π»</a></td> + <td align="center"><a href="https://github.com/PalAditya"><img src="https://avatars2.githubusercontent.com/u/25523604?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Aditya Pal</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=PalAditya" title="Tests">β οΈ</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=PalAditya" title="Code">π»</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=PalAditya" title="Documentation">π</a></td> + <td align="center"><a href="http://energized.pro"><img src="https://avatars1.githubusercontent.com/u/27774996?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Avinash Reddy</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/issues?q=author%3AAvinashReddy3108" title="Bug reports">π</a></td> + <td align="center"><a href="https://github.com/Iamdavidonuh"><img src="https://avatars3.githubusercontent.com/u/37768509?v=4?s=100" width="100px;" alt=""/><br /><sub><b>David Onuh</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=Iamdavidonuh" title="Code">π»</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=Iamdavidonuh" title="Tests">β οΈ</a></td> + <td align="center"><a href="http://simakis.me"><img src="https://avatars2.githubusercontent.com/u/8322266?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Panagiotis Simakis</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=sp1thas" title="Code">π»</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=sp1thas" title="Tests">β οΈ</a></td> + </tr> + <tr> + <td align="center"><a href="https://github.com/reiarthur"><img src="https://avatars2.githubusercontent.com/u/20190646?v=4?s=100" width="100px;" alt=""/><br /><sub><b>reiarthur</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=reiarthur" title="Code">π»</a></td> + <td align="center"><a href="http://ashokkumarta.blogspot.com/"><img src="https://avatars0.githubusercontent.com/u/5450267?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Ashokkumar TA</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=ashokkumarta" title="Code">π»</a></td> + <td align="center"><a href="https://github.com/ateuber"><img src="https://avatars2.githubusercontent.com/u/44349054?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Andreas Teuber</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=ateuber" title="Code">π»</a></td> + <td align="center"><a href="https://github.com/mi096684"><img src="https://avatars3.githubusercontent.com/u/22032932?v=4?s=100" width="100px;" alt=""/><br /><sub><b>mi096684</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/issues?q=author%3Ami096684" title="Bug reports">π</a></td> + <td align="center"><a href="https://github.com/devajithvs"><img src="https://avatars1.githubusercontent.com/u/29475282?v=4?s=100" width="100px;" alt=""/><br /><sub><b>devajithvs</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=devajithvs" title="Code">π»</a></td> + <td align="center"><a href="https://github.com/zakaryan2004"><img src="https://avatars3.githubusercontent.com/u/29994884?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Geg Zakaryan</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=zakaryan2004" title="Code">π»</a> <a href="https://github.com/bisoncorps/search-engine-parser/issues?q=author%3Azakaryan2004" title="Bug reports">π</a></td> + <td align="center"><a href="https://www.hakanbogan.com"><img src="https://avatars1.githubusercontent.com/u/24498747?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Hakan BoΔan</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/issues?q=author%3Aredrussianarmy" title="Bug reports">π</a></td> + </tr> + <tr> + <td align="center"><a href="https://github.com/NicKoehler"><img src="https://avatars3.githubusercontent.com/u/53040044?v=4?s=100" width="100px;" alt=""/><br /><sub><b>NicKoehler</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/issues?q=author%3ANicKoehler" title="Bug reports">π</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=NicKoehler" title="Code">π»</a></td> + <td align="center"><a href="https://github.com/chris4540"><img src="https://avatars1.githubusercontent.com/u/12794588?v=4?s=100" width="100px;" alt=""/><br /><sub><b>ChrisLin</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/issues?q=author%3Achris4540" title="Bug reports">π</a> <a href="https://github.com/bisoncorps/search-engine-parser/commits?author=chris4540" title="Code">π»</a></td> + <td align="center"><a href="http://pete.world"><img src="https://avatars.githubusercontent.com/u/10454135?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Pietro</b></sub></a><br /><a href="https://github.com/bisoncorps/search-engine-parser/commits?author=pgrandinetti" title="Code">π»</a> <a href="https://github.com/bisoncorps/search-engine-parser/issues?q=author%3Apgrandinetti" title="Bug reports">π</a></td> + </tr> +</table> + +<!-- markdownlint-restore --> +<!-- prettier-ignore-end --> + +<!-- ALL-CONTRIBUTORS-LIST:END --> + +This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome! + + +%prep +%autosetup -n search-engine-parser-0.6.8 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-search-engine-parser -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Mon Apr 10 2023 Python_Bot <Python_Bot@openeuler.org> - 0.6.8-1 +- Package Spec generated @@ -0,0 +1 @@ +5d209d53424304d4f4504409f80785a5 search-engine-parser-0.6.8.tar.gz |