From 4f9b28030f16b2e688fb30261e553720e28d35a2 Mon Sep 17 00:00:00 2001 From: CoprDistGit Date: Mon, 15 May 2023 08:42:57 +0000 Subject: automatic import of python-amazon-textract-caller --- python-amazon-textract-caller.spec | 500 +++++++++++++++++++++++++++++++++++++ 1 file changed, 500 insertions(+) create mode 100644 python-amazon-textract-caller.spec (limited to 'python-amazon-textract-caller.spec') diff --git a/python-amazon-textract-caller.spec b/python-amazon-textract-caller.spec new file mode 100644 index 0000000..807aa2c --- /dev/null +++ b/python-amazon-textract-caller.spec @@ -0,0 +1,500 @@ +%global _empty_manifest_terminate_build 0 +Name: python-amazon-textract-caller +Version: 0.0.29 +Release: 1 +Summary: Amazon Textract Caller tools +License: Apache License Version 2.0 +URL: https://github.com/aws-samples/amazon-textract-textractor/tree/master/caller +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/29/a6/a2404130a56e1ac26ddc57fba4c7609957b63d3ad033ab234cb7eb5775ae/amazon-textract-caller-0.0.29.tar.gz +BuildArch: noarch + +Requires: python3-boto3 +Requires: python3-botocore +Requires: python3-amazon-textract-response-parser +Requires: python3-amazon-textract-response-parser +Requires: python3-pytest + +%description +# Textract-Caller + +amazon-textract-caller provides a collection of ready to use functions and sample implementations to speed up the evaluation and development for any project using Amazon Textract. + +Making it easy to call Amazon Textract regardless of file type and location. + +## Install + +```bash +> python -m pip install amazon-textract-caller +``` + +## Functions + +```python +from textractcaller import call_textract +def call_textract(input_document: Union[str, bytes], + features: Optional[List[Textract_Features]] = None, + queries_config: Optional[QueriesConfig] = None, + output_config: Optional[OutputConfig] = None, + kms_key_id: str = "", + job_tag: str = "", + notification_channel: Optional[NotificationChannel] = None, + client_request_token: str = "", + return_job_id: bool = False, + force_async_api: bool = False, + call_mode: Textract_Call_Mode = Textract_Call_Mode.DEFAULT, + boto3_textract_client=None, + job_done_polling_interval=1) -> dict: +``` + +Also useful when receiving the JSON response from an asynchronous job (start_document_text_detection or start_document_analysis) + +```python +from textractcaller import get_full_json +def get_full_json(job_id: str = None, + textract_api: Textract_API = Textract_API.DETECT, + boto3_textract_client=None)->dict: +``` + +And when receiving the JSON from the OutputConfig location, this method is useful as well. + +```python +from textractcaller import get_full_json_from_output_config +def get_full_json_from_output_config(output_config: OutputConfig = None, + job_id: str = None, + s3_client = None)->dict: +``` + +## Samples + +### Calling with file from local filesystem only with detect_text + +```python +textract_json = call_textract(input_document="/folder/local-filesystem-file.png") +``` + +### Calling with file from local filesystem only detect_text and using in Textract Response Parser + +(needs trp dependency through ```python -m pip install amazon-textract-response-parser```) + +```python +import json +from trp import Document +from textractcaller import call_textract + +textract_json = call_textract(input_document="/folder/local-filesystem-file.png") +d = Document(textract_json) +``` + +### Calling with Queries for a multi-page document and extract the Answers + +sample also uses the amazon-textract-response-parser + +``` +python -m pip install amazon-textract-caller amazon-textract-response-parser +``` + +```python +import textractcaller as tc +import trp.trp2 as t2 +import boto3 + +textract = boto3.client('textract', region_name="us-east-2") +q1 = tc.Query(text="What is the employee SSN?", alias="SSN", pages=["1"]) +q2 = tc.Query(text="What is YTD gross pay?", alias="GROSS_PAY", pages=["2"]) +textract_json = tc.call_textract( + input_document="s3://amazon-textract-public-content/blogs/2-pager.pdf", + queries_config=tc.QueriesConfig(queries=[q1, q2]), + features=[tc.Textract_Features.QUERIES], + force_async_api=True, + boto3_textract_client=textract) +t_doc: t2.TDocument = t2.TDocumentSchema().load(textract_json) # type: ignore +for page in t_doc.pages: + query_answers = t_doc.get_query_answers(page=page) + for x in query_answers: + print(f"{x[1]},{x[2]}") +``` + + +### Calling with file from local filesystem with TABLES features + +```python +from textractcaller import call_textract, Textract_Features +features = [Textract_Features.TABLES] +response = call_textract( + input_document="/folder/local-filesystem-file.png", features=features) +``` + +### Call with images located on S3 but force asynchronous API + +```python +from textractcaller import call_textract +response = call_textract(input_document="s3://some-bucket/w2-example.png", force_async_api=True) +``` + +### Call with OutputConfig, Customer-Managed-Key + +```python +from textractcaller import call_textract +from textractcaller import OutputConfig, Textract_Features +output_config = OutputConfig(s3_bucket="somebucket-encrypted", s3_prefix="output/") +response = call_textract(input_document="s3://someprefix/somefile.png", + force_async_api=True, + output_config=output_config, + kms_key_id="arn:aws:kms:us-east-1:12345678901:key/some-key-id-ref-erence", + return_job_id=False, + job_tag="sometag", + client_request_token="sometoken") + +``` + +### Call with PDF located on S3 and force return of JobId instead of JSON response + +```python +from textractcaller import call_textract +response = call_textract(input_document="s3://some-bucket/some-document.pdf", return_job_id=True) +job_id = response['JobId'] +``` + + + + +%package -n python3-amazon-textract-caller +Summary: Amazon Textract Caller tools +Provides: python-amazon-textract-caller +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-amazon-textract-caller +# Textract-Caller + +amazon-textract-caller provides a collection of ready to use functions and sample implementations to speed up the evaluation and development for any project using Amazon Textract. + +Making it easy to call Amazon Textract regardless of file type and location. + +## Install + +```bash +> python -m pip install amazon-textract-caller +``` + +## Functions + +```python +from textractcaller import call_textract +def call_textract(input_document: Union[str, bytes], + features: Optional[List[Textract_Features]] = None, + queries_config: Optional[QueriesConfig] = None, + output_config: Optional[OutputConfig] = None, + kms_key_id: str = "", + job_tag: str = "", + notification_channel: Optional[NotificationChannel] = None, + client_request_token: str = "", + return_job_id: bool = False, + force_async_api: bool = False, + call_mode: Textract_Call_Mode = Textract_Call_Mode.DEFAULT, + boto3_textract_client=None, + job_done_polling_interval=1) -> dict: +``` + +Also useful when receiving the JSON response from an asynchronous job (start_document_text_detection or start_document_analysis) + +```python +from textractcaller import get_full_json +def get_full_json(job_id: str = None, + textract_api: Textract_API = Textract_API.DETECT, + boto3_textract_client=None)->dict: +``` + +And when receiving the JSON from the OutputConfig location, this method is useful as well. + +```python +from textractcaller import get_full_json_from_output_config +def get_full_json_from_output_config(output_config: OutputConfig = None, + job_id: str = None, + s3_client = None)->dict: +``` + +## Samples + +### Calling with file from local filesystem only with detect_text + +```python +textract_json = call_textract(input_document="/folder/local-filesystem-file.png") +``` + +### Calling with file from local filesystem only detect_text and using in Textract Response Parser + +(needs trp dependency through ```python -m pip install amazon-textract-response-parser```) + +```python +import json +from trp import Document +from textractcaller import call_textract + +textract_json = call_textract(input_document="/folder/local-filesystem-file.png") +d = Document(textract_json) +``` + +### Calling with Queries for a multi-page document and extract the Answers + +sample also uses the amazon-textract-response-parser + +``` +python -m pip install amazon-textract-caller amazon-textract-response-parser +``` + +```python +import textractcaller as tc +import trp.trp2 as t2 +import boto3 + +textract = boto3.client('textract', region_name="us-east-2") +q1 = tc.Query(text="What is the employee SSN?", alias="SSN", pages=["1"]) +q2 = tc.Query(text="What is YTD gross pay?", alias="GROSS_PAY", pages=["2"]) +textract_json = tc.call_textract( + input_document="s3://amazon-textract-public-content/blogs/2-pager.pdf", + queries_config=tc.QueriesConfig(queries=[q1, q2]), + features=[tc.Textract_Features.QUERIES], + force_async_api=True, + boto3_textract_client=textract) +t_doc: t2.TDocument = t2.TDocumentSchema().load(textract_json) # type: ignore +for page in t_doc.pages: + query_answers = t_doc.get_query_answers(page=page) + for x in query_answers: + print(f"{x[1]},{x[2]}") +``` + + +### Calling with file from local filesystem with TABLES features + +```python +from textractcaller import call_textract, Textract_Features +features = [Textract_Features.TABLES] +response = call_textract( + input_document="/folder/local-filesystem-file.png", features=features) +``` + +### Call with images located on S3 but force asynchronous API + +```python +from textractcaller import call_textract +response = call_textract(input_document="s3://some-bucket/w2-example.png", force_async_api=True) +``` + +### Call with OutputConfig, Customer-Managed-Key + +```python +from textractcaller import call_textract +from textractcaller import OutputConfig, Textract_Features +output_config = OutputConfig(s3_bucket="somebucket-encrypted", s3_prefix="output/") +response = call_textract(input_document="s3://someprefix/somefile.png", + force_async_api=True, + output_config=output_config, + kms_key_id="arn:aws:kms:us-east-1:12345678901:key/some-key-id-ref-erence", + return_job_id=False, + job_tag="sometag", + client_request_token="sometoken") + +``` + +### Call with PDF located on S3 and force return of JobId instead of JSON response + +```python +from textractcaller import call_textract +response = call_textract(input_document="s3://some-bucket/some-document.pdf", return_job_id=True) +job_id = response['JobId'] +``` + + + + +%package help +Summary: Development documents and examples for amazon-textract-caller +Provides: python3-amazon-textract-caller-doc +%description help +# Textract-Caller + +amazon-textract-caller provides a collection of ready to use functions and sample implementations to speed up the evaluation and development for any project using Amazon Textract. + +Making it easy to call Amazon Textract regardless of file type and location. + +## Install + +```bash +> python -m pip install amazon-textract-caller +``` + +## Functions + +```python +from textractcaller import call_textract +def call_textract(input_document: Union[str, bytes], + features: Optional[List[Textract_Features]] = None, + queries_config: Optional[QueriesConfig] = None, + output_config: Optional[OutputConfig] = None, + kms_key_id: str = "", + job_tag: str = "", + notification_channel: Optional[NotificationChannel] = None, + client_request_token: str = "", + return_job_id: bool = False, + force_async_api: bool = False, + call_mode: Textract_Call_Mode = Textract_Call_Mode.DEFAULT, + boto3_textract_client=None, + job_done_polling_interval=1) -> dict: +``` + +Also useful when receiving the JSON response from an asynchronous job (start_document_text_detection or start_document_analysis) + +```python +from textractcaller import get_full_json +def get_full_json(job_id: str = None, + textract_api: Textract_API = Textract_API.DETECT, + boto3_textract_client=None)->dict: +``` + +And when receiving the JSON from the OutputConfig location, this method is useful as well. + +```python +from textractcaller import get_full_json_from_output_config +def get_full_json_from_output_config(output_config: OutputConfig = None, + job_id: str = None, + s3_client = None)->dict: +``` + +## Samples + +### Calling with file from local filesystem only with detect_text + +```python +textract_json = call_textract(input_document="/folder/local-filesystem-file.png") +``` + +### Calling with file from local filesystem only detect_text and using in Textract Response Parser + +(needs trp dependency through ```python -m pip install amazon-textract-response-parser```) + +```python +import json +from trp import Document +from textractcaller import call_textract + +textract_json = call_textract(input_document="/folder/local-filesystem-file.png") +d = Document(textract_json) +``` + +### Calling with Queries for a multi-page document and extract the Answers + +sample also uses the amazon-textract-response-parser + +``` +python -m pip install amazon-textract-caller amazon-textract-response-parser +``` + +```python +import textractcaller as tc +import trp.trp2 as t2 +import boto3 + +textract = boto3.client('textract', region_name="us-east-2") +q1 = tc.Query(text="What is the employee SSN?", alias="SSN", pages=["1"]) +q2 = tc.Query(text="What is YTD gross pay?", alias="GROSS_PAY", pages=["2"]) +textract_json = tc.call_textract( + input_document="s3://amazon-textract-public-content/blogs/2-pager.pdf", + queries_config=tc.QueriesConfig(queries=[q1, q2]), + features=[tc.Textract_Features.QUERIES], + force_async_api=True, + boto3_textract_client=textract) +t_doc: t2.TDocument = t2.TDocumentSchema().load(textract_json) # type: ignore +for page in t_doc.pages: + query_answers = t_doc.get_query_answers(page=page) + for x in query_answers: + print(f"{x[1]},{x[2]}") +``` + + +### Calling with file from local filesystem with TABLES features + +```python +from textractcaller import call_textract, Textract_Features +features = [Textract_Features.TABLES] +response = call_textract( + input_document="/folder/local-filesystem-file.png", features=features) +``` + +### Call with images located on S3 but force asynchronous API + +```python +from textractcaller import call_textract +response = call_textract(input_document="s3://some-bucket/w2-example.png", force_async_api=True) +``` + +### Call with OutputConfig, Customer-Managed-Key + +```python +from textractcaller import call_textract +from textractcaller import OutputConfig, Textract_Features +output_config = OutputConfig(s3_bucket="somebucket-encrypted", s3_prefix="output/") +response = call_textract(input_document="s3://someprefix/somefile.png", + force_async_api=True, + output_config=output_config, + kms_key_id="arn:aws:kms:us-east-1:12345678901:key/some-key-id-ref-erence", + return_job_id=False, + job_tag="sometag", + client_request_token="sometoken") + +``` + +### Call with PDF located on S3 and force return of JobId instead of JSON response + +```python +from textractcaller import call_textract +response = call_textract(input_document="s3://some-bucket/some-document.pdf", return_job_id=True) +job_id = response['JobId'] +``` + + + + +%prep +%autosetup -n amazon-textract-caller-0.0.29 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-amazon-textract-caller -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Mon May 15 2023 Python_Bot - 0.0.29-1 +- Package Spec generated -- cgit v1.2.3