From 0f8ed973459ad6b8a5ebe82670653fd05ebcecce Mon Sep 17 00:00:00 2001 From: CoprDistGit Date: Thu, 18 May 2023 04:00:32 +0000 Subject: automatic import of python-migbq --- .gitignore | 1 + python-migbq.spec | 774 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ sources | 1 + 3 files changed, 776 insertions(+) create mode 100644 python-migbq.spec create mode 100644 sources diff --git a/.gitignore b/.gitignore index e69de29..6635427 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1 @@ +/migbq-0.0.85.tar.gz diff --git a/python-migbq.spec b/python-migbq.spec new file mode 100644 index 0000000..e1b9007 --- /dev/null +++ b/python-migbq.spec @@ -0,0 +1,774 @@ +%global _empty_manifest_terminate_build 0 +Name: python-migbq +Version: 0.0.85 +Release: 1 +Summary: read microsoft sql server table data and upload to bigquery +License: MIT +URL: https://github.com/jo8937/migbq +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/67/17/843d78730cc91a64ec9c6bb997176d929fff87e07fcadb2ba45c6a5e5c19/migbq-0.0.85.tar.gz +BuildArch: noarch + +Requires: python3-ujson +Requires: python3-peewee +Requires: python3-pymssql +Requires: python3-peewee-mssql +Requires: python3-concurrent-log-handler +Requires: python3-pyyaml +Requires: python3-google-cloud-bigquery +Requires: python3-Jinja2 +Requires: python3-httplib2 + +%description +# migbq + +rdbms-to-bigquery-data-loader + +## Requirement + +* Python + - CPython 2.7.x + +* RDBMS (below, DB) + - Microsoft SQL Server + - Mysql (development) + +* Table Spec + - All table must have Numeric Primary Key Field + +* DB User Grant + - SELECT, INSERT, UPDATE, CREATE + - can access DB's metadata ([INFORMATION_SCHEMA] database) + - some metadata tables create in source RDBMS + - (If you don't want create table in source, you can use sqlite. fork this project and edit source) + +* Google Cloud SDK + - install Google Cloud SDK must be required + - https://cloud.google.com/sdk/downloads + - https://cloud.google.com/sdk/gcloud/reference/auth/login + +* Pymssql freetds + - http://www.pymssql.org/en/stable/ + +## Install + +``` +export PYMSSQL_BUILD_WITH_BUNDLED_FREETDS=1 +pip install migbq +``` + +## Usage + +### write Configuration File + +* like embulk ( http://www.embulk.org ) + +### Example + +#### general congif file +* config.yml + +```yml +in: + type: mssql + host: localhost + user: USER + password: PASSWORD + port: 1433 + database: DATABASE + tables: + - tbl + - tbl2 + - tbl3 + batch_size: 50000 + temp_csv_path: /temp/pymig_csv + temp_csv_path_complete: /temp/pymig_csv_complete +out: + type: bigquery + project: GCP_PROJECT + dataset: BQ_DATASET +``` + +#### jinja2 template + +* config.j2.yml + - variable is enviromant variable only. + - file extension is **.j2.yml** + +```yml +in: + type: mssql +{% include "mssql-connect.yml" %} + tables: + - tbl + - tbl2 + - tbl3 + batch_size: 50000 + temp_csv_path: /temp/pymig_csv + temp_csv_path_complete: /temp/pymig_csv_complete +out: + type: bigquery + project: {{ env.GCP_PROJECT }} + dataset: BQ_DATASET +``` + + +### Run + +#### (1) Execute + +```bash +migbq run config.yml +``` + +#### (2) Check Job Complete + +```bash +migbq check config.yml +``` + + +#### (3) Check table count equals + +```bash +migbq sync config.yml +``` + +* Primary Key base count check. + +### Run Forever + +* you can add crontab +* migbq have exclusive process lock. so you can add crontab every minute. +* you must add both **run** and **check** + + +## Description + +### run command + +**[1]** select RDBMS table metadata + - get table primary key name in RDBMS metadata table. + - get column name and type fields in RDBMS metadata table. + +**[2]** select RDBMS Primary key value range + - get min / max PK of table + +**[3]** select data in primary key range + - select with pk min and min + batch_size + +```sql + select * from tbl where 0 < idx and idx <= 100; +``` + + - create file **pymig-tbl-idx-1-100** + - gzip csv + +**[4]** upload csv file to bigquery + - direct upload to bigquery table. not upload to GCS (quota exceed can occur) + +**[5]** Repeat 1~4 until over the max primary key. + +For example, batch_size : 100, max pk is 321, then rdbms query execute like below. + +```sql + +select * from tbl where 0 < idx and idx <= 100; +select * from tbl where 100 < idx and idx <= 200; +select * from tbl where 200 < idx and idx <= 300; +select * from tbl where 300 < idx and idx <= 400; + +-- end + +``` + +### check command + +* check bigquery jobid end. +* retry fail job. + + +### Log file of program + +* log file create in config file's sub directory [log] + +### Pid file of program + +* pid file provide unique process for unique command. created at below directory. exclusive file lock. + + +``` +/tmp +``` + +### load metadata table + +#### META: migrationmetadata + +* one row insert when each 'select' runs + +| field name | type | description | smaple value | etc | +| ----: |--------|----------------------------------------|-----------------|-------------| +| tableName | STRING | target [tableName] | tbl | Primary Key | +| firstPk | INTEGER | [tableName]'s Min Primary Key value | 1 | | +| lastPk | INTEGER | [tableName]'s Max Primary Key value | 123 | | +| currentPk | STRING | [tableName]'s read complete Primary Key value | 20 | | +| regDate | DATETIME| this row's insert date | 2017-11-29 01:02:03 | | +| modDate | DATETIME| firstPk, lastPk modify date | 2017-11-29 01:02:03 | | +| endDate | DATETIME| currentPk reach lastPk date | 2017-11-29 11:22:33 | | +| pkName | STRING | [tableNames]'s Primary Key Name | idx | | +| rowCnt | INTEGER | [tableNames]'s count(*) | 123 | | +| pageTokenCurrent | STRING | not use now | tbl | | +| pageTokenNext | STRING | not use now | tbl | | + +#### LOG: migrationmetadatalog + +* sequance + - run : insert a row to this table when 'select [tableName]' executed + - run : update a row to this table when bigquery jobId created + - check : update a row to this table's jobComplete and checkComplete when bigquery jobId call ends + +| field name | type | description | smaple value | etc | +| ----: |--------|----------------------------------------|-----------------|-------------| +| idx | BigInt | PK | 1 | Primary Key Auto Increment | +| tableName | STRING | [tableName] | tbl | Primary Key | +| regDate | DATETIME | row insert date | 2017-11-29 01:02:03 | | +| endDate | DATETIME | when jobId is 'DONE' | 2017-11-29 11:22:33 | | +| pkName | STRING | [tableNames]'s Primary Key Name | idx | | +| cnt | INTEGER | bigquery api : statistics.load.outputRows | 123 | | +| pkUpper | INTEGER | each 'select' executed : [PKName] <= [pkUpper] | 100 | | +| pkLower | INTEGER | each 'select' executed : [PKName] > [pkLower] | 0 | | +| pkCurrent | INTEGER | same as pkUpper | 99 | | +| jobId | STRING | bigquery upload job jobId | job-adf132f31rf3f | | +| errorMessage | STRING | when jodId check result is 'ERROR', then write this | ERROR:bigquery quota exceed | | +| checkComplete | INTEGER | check command | 1 | | +| jobComplete | INTEGER | check command jobId check complete. success=1, fail=-1 | 1 | | +| pageToken | STRING | use as etc | | | + + +## loadmap + +* parallel loading not supported. + + + + +%package -n python3-migbq +Summary: read microsoft sql server table data and upload to bigquery +Provides: python-migbq +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-migbq +# migbq + +rdbms-to-bigquery-data-loader + +## Requirement + +* Python + - CPython 2.7.x + +* RDBMS (below, DB) + - Microsoft SQL Server + - Mysql (development) + +* Table Spec + - All table must have Numeric Primary Key Field + +* DB User Grant + - SELECT, INSERT, UPDATE, CREATE + - can access DB's metadata ([INFORMATION_SCHEMA] database) + - some metadata tables create in source RDBMS + - (If you don't want create table in source, you can use sqlite. fork this project and edit source) + +* Google Cloud SDK + - install Google Cloud SDK must be required + - https://cloud.google.com/sdk/downloads + - https://cloud.google.com/sdk/gcloud/reference/auth/login + +* Pymssql freetds + - http://www.pymssql.org/en/stable/ + +## Install + +``` +export PYMSSQL_BUILD_WITH_BUNDLED_FREETDS=1 +pip install migbq +``` + +## Usage + +### write Configuration File + +* like embulk ( http://www.embulk.org ) + +### Example + +#### general congif file +* config.yml + +```yml +in: + type: mssql + host: localhost + user: USER + password: PASSWORD + port: 1433 + database: DATABASE + tables: + - tbl + - tbl2 + - tbl3 + batch_size: 50000 + temp_csv_path: /temp/pymig_csv + temp_csv_path_complete: /temp/pymig_csv_complete +out: + type: bigquery + project: GCP_PROJECT + dataset: BQ_DATASET +``` + +#### jinja2 template + +* config.j2.yml + - variable is enviromant variable only. + - file extension is **.j2.yml** + +```yml +in: + type: mssql +{% include "mssql-connect.yml" %} + tables: + - tbl + - tbl2 + - tbl3 + batch_size: 50000 + temp_csv_path: /temp/pymig_csv + temp_csv_path_complete: /temp/pymig_csv_complete +out: + type: bigquery + project: {{ env.GCP_PROJECT }} + dataset: BQ_DATASET +``` + + +### Run + +#### (1) Execute + +```bash +migbq run config.yml +``` + +#### (2) Check Job Complete + +```bash +migbq check config.yml +``` + + +#### (3) Check table count equals + +```bash +migbq sync config.yml +``` + +* Primary Key base count check. + +### Run Forever + +* you can add crontab +* migbq have exclusive process lock. so you can add crontab every minute. +* you must add both **run** and **check** + + +## Description + +### run command + +**[1]** select RDBMS table metadata + - get table primary key name in RDBMS metadata table. + - get column name and type fields in RDBMS metadata table. + +**[2]** select RDBMS Primary key value range + - get min / max PK of table + +**[3]** select data in primary key range + - select with pk min and min + batch_size + +```sql + select * from tbl where 0 < idx and idx <= 100; +``` + + - create file **pymig-tbl-idx-1-100** + - gzip csv + +**[4]** upload csv file to bigquery + - direct upload to bigquery table. not upload to GCS (quota exceed can occur) + +**[5]** Repeat 1~4 until over the max primary key. + +For example, batch_size : 100, max pk is 321, then rdbms query execute like below. + +```sql + +select * from tbl where 0 < idx and idx <= 100; +select * from tbl where 100 < idx and idx <= 200; +select * from tbl where 200 < idx and idx <= 300; +select * from tbl where 300 < idx and idx <= 400; + +-- end + +``` + +### check command + +* check bigquery jobid end. +* retry fail job. + + +### Log file of program + +* log file create in config file's sub directory [log] + +### Pid file of program + +* pid file provide unique process for unique command. created at below directory. exclusive file lock. + + +``` +/tmp +``` + +### load metadata table + +#### META: migrationmetadata + +* one row insert when each 'select' runs + +| field name | type | description | smaple value | etc | +| ----: |--------|----------------------------------------|-----------------|-------------| +| tableName | STRING | target [tableName] | tbl | Primary Key | +| firstPk | INTEGER | [tableName]'s Min Primary Key value | 1 | | +| lastPk | INTEGER | [tableName]'s Max Primary Key value | 123 | | +| currentPk | STRING | [tableName]'s read complete Primary Key value | 20 | | +| regDate | DATETIME| this row's insert date | 2017-11-29 01:02:03 | | +| modDate | DATETIME| firstPk, lastPk modify date | 2017-11-29 01:02:03 | | +| endDate | DATETIME| currentPk reach lastPk date | 2017-11-29 11:22:33 | | +| pkName | STRING | [tableNames]'s Primary Key Name | idx | | +| rowCnt | INTEGER | [tableNames]'s count(*) | 123 | | +| pageTokenCurrent | STRING | not use now | tbl | | +| pageTokenNext | STRING | not use now | tbl | | + +#### LOG: migrationmetadatalog + +* sequance + - run : insert a row to this table when 'select [tableName]' executed + - run : update a row to this table when bigquery jobId created + - check : update a row to this table's jobComplete and checkComplete when bigquery jobId call ends + +| field name | type | description | smaple value | etc | +| ----: |--------|----------------------------------------|-----------------|-------------| +| idx | BigInt | PK | 1 | Primary Key Auto Increment | +| tableName | STRING | [tableName] | tbl | Primary Key | +| regDate | DATETIME | row insert date | 2017-11-29 01:02:03 | | +| endDate | DATETIME | when jobId is 'DONE' | 2017-11-29 11:22:33 | | +| pkName | STRING | [tableNames]'s Primary Key Name | idx | | +| cnt | INTEGER | bigquery api : statistics.load.outputRows | 123 | | +| pkUpper | INTEGER | each 'select' executed : [PKName] <= [pkUpper] | 100 | | +| pkLower | INTEGER | each 'select' executed : [PKName] > [pkLower] | 0 | | +| pkCurrent | INTEGER | same as pkUpper | 99 | | +| jobId | STRING | bigquery upload job jobId | job-adf132f31rf3f | | +| errorMessage | STRING | when jodId check result is 'ERROR', then write this | ERROR:bigquery quota exceed | | +| checkComplete | INTEGER | check command | 1 | | +| jobComplete | INTEGER | check command jobId check complete. success=1, fail=-1 | 1 | | +| pageToken | STRING | use as etc | | | + + +## loadmap + +* parallel loading not supported. + + + + +%package help +Summary: Development documents and examples for migbq +Provides: python3-migbq-doc +%description help +# migbq + +rdbms-to-bigquery-data-loader + +## Requirement + +* Python + - CPython 2.7.x + +* RDBMS (below, DB) + - Microsoft SQL Server + - Mysql (development) + +* Table Spec + - All table must have Numeric Primary Key Field + +* DB User Grant + - SELECT, INSERT, UPDATE, CREATE + - can access DB's metadata ([INFORMATION_SCHEMA] database) + - some metadata tables create in source RDBMS + - (If you don't want create table in source, you can use sqlite. fork this project and edit source) + +* Google Cloud SDK + - install Google Cloud SDK must be required + - https://cloud.google.com/sdk/downloads + - https://cloud.google.com/sdk/gcloud/reference/auth/login + +* Pymssql freetds + - http://www.pymssql.org/en/stable/ + +## Install + +``` +export PYMSSQL_BUILD_WITH_BUNDLED_FREETDS=1 +pip install migbq +``` + +## Usage + +### write Configuration File + +* like embulk ( http://www.embulk.org ) + +### Example + +#### general congif file +* config.yml + +```yml +in: + type: mssql + host: localhost + user: USER + password: PASSWORD + port: 1433 + database: DATABASE + tables: + - tbl + - tbl2 + - tbl3 + batch_size: 50000 + temp_csv_path: /temp/pymig_csv + temp_csv_path_complete: /temp/pymig_csv_complete +out: + type: bigquery + project: GCP_PROJECT + dataset: BQ_DATASET +``` + +#### jinja2 template + +* config.j2.yml + - variable is enviromant variable only. + - file extension is **.j2.yml** + +```yml +in: + type: mssql +{% include "mssql-connect.yml" %} + tables: + - tbl + - tbl2 + - tbl3 + batch_size: 50000 + temp_csv_path: /temp/pymig_csv + temp_csv_path_complete: /temp/pymig_csv_complete +out: + type: bigquery + project: {{ env.GCP_PROJECT }} + dataset: BQ_DATASET +``` + + +### Run + +#### (1) Execute + +```bash +migbq run config.yml +``` + +#### (2) Check Job Complete + +```bash +migbq check config.yml +``` + + +#### (3) Check table count equals + +```bash +migbq sync config.yml +``` + +* Primary Key base count check. + +### Run Forever + +* you can add crontab +* migbq have exclusive process lock. so you can add crontab every minute. +* you must add both **run** and **check** + + +## Description + +### run command + +**[1]** select RDBMS table metadata + - get table primary key name in RDBMS metadata table. + - get column name and type fields in RDBMS metadata table. + +**[2]** select RDBMS Primary key value range + - get min / max PK of table + +**[3]** select data in primary key range + - select with pk min and min + batch_size + +```sql + select * from tbl where 0 < idx and idx <= 100; +``` + + - create file **pymig-tbl-idx-1-100** + - gzip csv + +**[4]** upload csv file to bigquery + - direct upload to bigquery table. not upload to GCS (quota exceed can occur) + +**[5]** Repeat 1~4 until over the max primary key. + +For example, batch_size : 100, max pk is 321, then rdbms query execute like below. + +```sql + +select * from tbl where 0 < idx and idx <= 100; +select * from tbl where 100 < idx and idx <= 200; +select * from tbl where 200 < idx and idx <= 300; +select * from tbl where 300 < idx and idx <= 400; + +-- end + +``` + +### check command + +* check bigquery jobid end. +* retry fail job. + + +### Log file of program + +* log file create in config file's sub directory [log] + +### Pid file of program + +* pid file provide unique process for unique command. created at below directory. exclusive file lock. + + +``` +/tmp +``` + +### load metadata table + +#### META: migrationmetadata + +* one row insert when each 'select' runs + +| field name | type | description | smaple value | etc | +| ----: |--------|----------------------------------------|-----------------|-------------| +| tableName | STRING | target [tableName] | tbl | Primary Key | +| firstPk | INTEGER | [tableName]'s Min Primary Key value | 1 | | +| lastPk | INTEGER | [tableName]'s Max Primary Key value | 123 | | +| currentPk | STRING | [tableName]'s read complete Primary Key value | 20 | | +| regDate | DATETIME| this row's insert date | 2017-11-29 01:02:03 | | +| modDate | DATETIME| firstPk, lastPk modify date | 2017-11-29 01:02:03 | | +| endDate | DATETIME| currentPk reach lastPk date | 2017-11-29 11:22:33 | | +| pkName | STRING | [tableNames]'s Primary Key Name | idx | | +| rowCnt | INTEGER | [tableNames]'s count(*) | 123 | | +| pageTokenCurrent | STRING | not use now | tbl | | +| pageTokenNext | STRING | not use now | tbl | | + +#### LOG: migrationmetadatalog + +* sequance + - run : insert a row to this table when 'select [tableName]' executed + - run : update a row to this table when bigquery jobId created + - check : update a row to this table's jobComplete and checkComplete when bigquery jobId call ends + +| field name | type | description | smaple value | etc | +| ----: |--------|----------------------------------------|-----------------|-------------| +| idx | BigInt | PK | 1 | Primary Key Auto Increment | +| tableName | STRING | [tableName] | tbl | Primary Key | +| regDate | DATETIME | row insert date | 2017-11-29 01:02:03 | | +| endDate | DATETIME | when jobId is 'DONE' | 2017-11-29 11:22:33 | | +| pkName | STRING | [tableNames]'s Primary Key Name | idx | | +| cnt | INTEGER | bigquery api : statistics.load.outputRows | 123 | | +| pkUpper | INTEGER | each 'select' executed : [PKName] <= [pkUpper] | 100 | | +| pkLower | INTEGER | each 'select' executed : [PKName] > [pkLower] | 0 | | +| pkCurrent | INTEGER | same as pkUpper | 99 | | +| jobId | STRING | bigquery upload job jobId | job-adf132f31rf3f | | +| errorMessage | STRING | when jodId check result is 'ERROR', then write this | ERROR:bigquery quota exceed | | +| checkComplete | INTEGER | check command | 1 | | +| jobComplete | INTEGER | check command jobId check complete. success=1, fail=-1 | 1 | | +| pageToken | STRING | use as etc | | | + + +## loadmap + +* parallel loading not supported. + + + + +%prep +%autosetup -n migbq-0.0.85 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-migbq -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Thu May 18 2023 Python_Bot - 0.0.85-1 +- Package Spec generated diff --git a/sources b/sources new file mode 100644 index 0000000..bda9baf --- /dev/null +++ b/sources @@ -0,0 +1 @@ +5a40a6a90302303dadc8b25b74de48f6 migbq-0.0.85.tar.gz -- cgit v1.2.3