diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | python-pipelineprofiler.spec | 402 | ||||
-rw-r--r-- | sources | 1 |
3 files changed, 404 insertions, 0 deletions
@@ -0,0 +1 @@ +/pipelineprofiler-0.1.18.tar.gz diff --git a/python-pipelineprofiler.spec b/python-pipelineprofiler.spec new file mode 100644 index 0000000..8eab3f7 --- /dev/null +++ b/python-pipelineprofiler.spec @@ -0,0 +1,402 @@ +%global _empty_manifest_terminate_build 0 +Name: python-pipelineprofiler +Version: 0.1.18 +Release: 1 +Summary: Pipeline Profiler tool. Enables the exploration of D3M pipelines in Jupyter Notebooks +License: BSD License +URL: https://github.com/VIDA-NYU/PipelineVis +Source0: https://mirrors.aliyun.com/pypi/web/packages/46/39/204e9f0a7fde560e178dd82d987b747d450a0521b5b4db4bf1d9792ece4d/pipelineprofiler-0.1.18.tar.gz +BuildArch: noarch + +Requires: python3-dateutil +Requires: python3-numpy +Requires: python3-scipy +Requires: python3-scikit-learn +Requires: python3-networkx +Requires: python3-notebook + +%description +# PipelineProfiler + +AutoML Pipeline exploration tool compatible with Jupyter Notebooks. Supports auto-sklearn and D3M pipeline format. + +[](https://arxiv.org/abs/2005.00160) + + + +(Shift click to select multiple pipelines) + +**Paper**: [https://arxiv.org/abs/2005.00160](https://arxiv.org/abs/2005.00160) + +**Video**: [https://youtu.be/2WSYoaxLLJ8](https://youtu.be/2WSYoaxLLJ8) + +**Blog**: [Medium post](https://towardsdatascience.com/exploring-auto-sklearn-models-with-pipelineprofiler-5b2c54136044) + +## Demo + +Live demo (Google Colab): +- [Heart Stat Log data](https://colab.research.google.com/drive/1k_h4HWUKsd83PmYMEBJ87UP2SSJQYw9A?usp=sharing) +- [auto-sklearn classification](https://colab.research.google.com/drive/1_2FRIkHNFGOiIJt-n_3zuh8vpSMLhwzx?usp=sharing) + +In Jupyter Notebook: +```Python +import PipelineProfiler +data = PipelineProfiler.get_heartstatlog_data() +PipelineProfiler.plot_pipeline_matrix(data) +``` + +## Install + +### Option 1: install via pip: +~~~~ +pip install pipelineprofiler +~~~~ + +### Option 2: Run the docker image: +~~~~ +docker build -t pipelineprofiler . +docker run -p 9999:8888 pipelineprofiler +~~~~ + +Then copy the access token and log in to jupyter in the browser url: +~~~~ +localhost:9999 +~~~~ + +## Data preprocessing + +PipelineProfiler reads data from the D3M Metalearning database. You can download this data from: https://metalearning.datadrivendiscovery.org/dumps/2020/03/04/metalearningdb_dump_20200304.tar.gz + +You need to merge two files in order to explore the pipelines: pipelines.json and pipeline_runs.json. To do so, run +~~~~ +python -m PipelineProfiler.pipeline_merge [-n NUMBER_PIPELINES] pipeline_runs_file pipelines_file output_file +~~~~ + +## Pipeline exploration + +```Python +import PipelineProfiler +import json +``` + +In a jupyter notebook, load the output_file + +```Python +with open("output_file.json", "r") as f: + pipelines = json.load(f) +``` + +and then plot it using: + +```Python +PipelineProfiler.plot_pipeline_matrix(pipelines[:10]) +``` + +## Data postprocessing + +You might want to group pipelines by problem type, and select the top k pipelines from each team. To do so, use the code: + +```Python +def get_top_k_pipelines_team(pipelines, k): + team_pipelines = defaultdict(list) + for pipeline in pipelines: + source = pipeline['pipeline_source']['name'] + team_pipelines[source].append(pipeline) + for team in team_pipelines.keys(): + team_pipelines[team] = sorted(team_pipelines[team], key=lambda x: x['scores'][0]['normalized'], reverse=True) + team_pipelines[team] = team_pipelines[team][:k] + new_pipelines = [] + for team in team_pipelines.keys(): + new_pipelines.extend(team_pipelines[team]) + return new_pipelines + +def sort_pipeline_scores(pipelines): + return sorted(pipelines, key=lambda x: x['scores'][0]['value'], reverse=True) + +pipelines_problem = {} +for pipeline in pipelines: + problem_id = pipeline['problem']['id'] + if problem_id not in pipelines_problem: + pipelines_problem[problem_id] = [] + pipelines_problem[problem_id].append(pipeline) +for problem in pipelines_problem.keys(): + pipelines_problem[problem] = sort_pipeline_scores(get_top_k_pipelines_team(pipelines_problem[problem], k=100)) +``` + + + + +%package -n python3-pipelineprofiler +Summary: Pipeline Profiler tool. Enables the exploration of D3M pipelines in Jupyter Notebooks +Provides: python-pipelineprofiler +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-pipelineprofiler +# PipelineProfiler + +AutoML Pipeline exploration tool compatible with Jupyter Notebooks. Supports auto-sklearn and D3M pipeline format. + +[](https://arxiv.org/abs/2005.00160) + + + +(Shift click to select multiple pipelines) + +**Paper**: [https://arxiv.org/abs/2005.00160](https://arxiv.org/abs/2005.00160) + +**Video**: [https://youtu.be/2WSYoaxLLJ8](https://youtu.be/2WSYoaxLLJ8) + +**Blog**: [Medium post](https://towardsdatascience.com/exploring-auto-sklearn-models-with-pipelineprofiler-5b2c54136044) + +## Demo + +Live demo (Google Colab): +- [Heart Stat Log data](https://colab.research.google.com/drive/1k_h4HWUKsd83PmYMEBJ87UP2SSJQYw9A?usp=sharing) +- [auto-sklearn classification](https://colab.research.google.com/drive/1_2FRIkHNFGOiIJt-n_3zuh8vpSMLhwzx?usp=sharing) + +In Jupyter Notebook: +```Python +import PipelineProfiler +data = PipelineProfiler.get_heartstatlog_data() +PipelineProfiler.plot_pipeline_matrix(data) +``` + +## Install + +### Option 1: install via pip: +~~~~ +pip install pipelineprofiler +~~~~ + +### Option 2: Run the docker image: +~~~~ +docker build -t pipelineprofiler . +docker run -p 9999:8888 pipelineprofiler +~~~~ + +Then copy the access token and log in to jupyter in the browser url: +~~~~ +localhost:9999 +~~~~ + +## Data preprocessing + +PipelineProfiler reads data from the D3M Metalearning database. You can download this data from: https://metalearning.datadrivendiscovery.org/dumps/2020/03/04/metalearningdb_dump_20200304.tar.gz + +You need to merge two files in order to explore the pipelines: pipelines.json and pipeline_runs.json. To do so, run +~~~~ +python -m PipelineProfiler.pipeline_merge [-n NUMBER_PIPELINES] pipeline_runs_file pipelines_file output_file +~~~~ + +## Pipeline exploration + +```Python +import PipelineProfiler +import json +``` + +In a jupyter notebook, load the output_file + +```Python +with open("output_file.json", "r") as f: + pipelines = json.load(f) +``` + +and then plot it using: + +```Python +PipelineProfiler.plot_pipeline_matrix(pipelines[:10]) +``` + +## Data postprocessing + +You might want to group pipelines by problem type, and select the top k pipelines from each team. To do so, use the code: + +```Python +def get_top_k_pipelines_team(pipelines, k): + team_pipelines = defaultdict(list) + for pipeline in pipelines: + source = pipeline['pipeline_source']['name'] + team_pipelines[source].append(pipeline) + for team in team_pipelines.keys(): + team_pipelines[team] = sorted(team_pipelines[team], key=lambda x: x['scores'][0]['normalized'], reverse=True) + team_pipelines[team] = team_pipelines[team][:k] + new_pipelines = [] + for team in team_pipelines.keys(): + new_pipelines.extend(team_pipelines[team]) + return new_pipelines + +def sort_pipeline_scores(pipelines): + return sorted(pipelines, key=lambda x: x['scores'][0]['value'], reverse=True) + +pipelines_problem = {} +for pipeline in pipelines: + problem_id = pipeline['problem']['id'] + if problem_id not in pipelines_problem: + pipelines_problem[problem_id] = [] + pipelines_problem[problem_id].append(pipeline) +for problem in pipelines_problem.keys(): + pipelines_problem[problem] = sort_pipeline_scores(get_top_k_pipelines_team(pipelines_problem[problem], k=100)) +``` + + + + +%package help +Summary: Development documents and examples for pipelineprofiler +Provides: python3-pipelineprofiler-doc +%description help +# PipelineProfiler + +AutoML Pipeline exploration tool compatible with Jupyter Notebooks. Supports auto-sklearn and D3M pipeline format. + +[](https://arxiv.org/abs/2005.00160) + + + +(Shift click to select multiple pipelines) + +**Paper**: [https://arxiv.org/abs/2005.00160](https://arxiv.org/abs/2005.00160) + +**Video**: [https://youtu.be/2WSYoaxLLJ8](https://youtu.be/2WSYoaxLLJ8) + +**Blog**: [Medium post](https://towardsdatascience.com/exploring-auto-sklearn-models-with-pipelineprofiler-5b2c54136044) + +## Demo + +Live demo (Google Colab): +- [Heart Stat Log data](https://colab.research.google.com/drive/1k_h4HWUKsd83PmYMEBJ87UP2SSJQYw9A?usp=sharing) +- [auto-sklearn classification](https://colab.research.google.com/drive/1_2FRIkHNFGOiIJt-n_3zuh8vpSMLhwzx?usp=sharing) + +In Jupyter Notebook: +```Python +import PipelineProfiler +data = PipelineProfiler.get_heartstatlog_data() +PipelineProfiler.plot_pipeline_matrix(data) +``` + +## Install + +### Option 1: install via pip: +~~~~ +pip install pipelineprofiler +~~~~ + +### Option 2: Run the docker image: +~~~~ +docker build -t pipelineprofiler . +docker run -p 9999:8888 pipelineprofiler +~~~~ + +Then copy the access token and log in to jupyter in the browser url: +~~~~ +localhost:9999 +~~~~ + +## Data preprocessing + +PipelineProfiler reads data from the D3M Metalearning database. You can download this data from: https://metalearning.datadrivendiscovery.org/dumps/2020/03/04/metalearningdb_dump_20200304.tar.gz + +You need to merge two files in order to explore the pipelines: pipelines.json and pipeline_runs.json. To do so, run +~~~~ +python -m PipelineProfiler.pipeline_merge [-n NUMBER_PIPELINES] pipeline_runs_file pipelines_file output_file +~~~~ + +## Pipeline exploration + +```Python +import PipelineProfiler +import json +``` + +In a jupyter notebook, load the output_file + +```Python +with open("output_file.json", "r") as f: + pipelines = json.load(f) +``` + +and then plot it using: + +```Python +PipelineProfiler.plot_pipeline_matrix(pipelines[:10]) +``` + +## Data postprocessing + +You might want to group pipelines by problem type, and select the top k pipelines from each team. To do so, use the code: + +```Python +def get_top_k_pipelines_team(pipelines, k): + team_pipelines = defaultdict(list) + for pipeline in pipelines: + source = pipeline['pipeline_source']['name'] + team_pipelines[source].append(pipeline) + for team in team_pipelines.keys(): + team_pipelines[team] = sorted(team_pipelines[team], key=lambda x: x['scores'][0]['normalized'], reverse=True) + team_pipelines[team] = team_pipelines[team][:k] + new_pipelines = [] + for team in team_pipelines.keys(): + new_pipelines.extend(team_pipelines[team]) + return new_pipelines + +def sort_pipeline_scores(pipelines): + return sorted(pipelines, key=lambda x: x['scores'][0]['value'], reverse=True) + +pipelines_problem = {} +for pipeline in pipelines: + problem_id = pipeline['problem']['id'] + if problem_id not in pipelines_problem: + pipelines_problem[problem_id] = [] + pipelines_problem[problem_id].append(pipeline) +for problem in pipelines_problem.keys(): + pipelines_problem[problem] = sort_pipeline_scores(get_top_k_pipelines_team(pipelines_problem[problem], k=100)) +``` + + + + +%prep +%autosetup -n pipelineprofiler-0.1.18 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "\"/%h/%f\"\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "\"/%h/%f\"\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "\"/%h/%f\"\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "\"/%h/%f\"\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "\"/%h/%f.gz\"\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-pipelineprofiler -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Tue Jun 20 2023 Python_Bot <Python_Bot@openeuler.org> - 0.1.18-1 +- Package Spec generated @@ -0,0 +1 @@ +a69147df0bc3d8f11e0712a0503c331c pipelineprofiler-0.1.18.tar.gz |