diff options
| author | CoprDistGit <infra@openeuler.org> | 2023-05-18 07:08:25 +0000 |
|---|---|---|
| committer | CoprDistGit <infra@openeuler.org> | 2023-05-18 07:08:25 +0000 |
| commit | a017162e8ba6e83747ffe5c06b22cab584ffb7c3 (patch) | |
| tree | fee3bff28dedaeb015d03532e52977a8c01d6851 /python-anonypy.spec | |
| parent | b5486c3078d4f31da48250547dfa940dfdac65ba (diff) | |
automatic import of python-anonypy
Diffstat (limited to 'python-anonypy.spec')
| -rw-r--r-- | python-anonypy.spec | 315 |
1 files changed, 315 insertions, 0 deletions
diff --git a/python-anonypy.spec b/python-anonypy.spec new file mode 100644 index 0000000..d8c467c --- /dev/null +++ b/python-anonypy.spec @@ -0,0 +1,315 @@ +%global _empty_manifest_terminate_build 0 +Name: python-anonypy +Version: 0.1.7 +Release: 1 +Summary: Anonymization library for python +License: MIT License +URL: https://github.com/glassonion1/anonypy +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/04/5d/adc4824c45316d48c1448082a8144ba07378f5b22dfb08a5b3bd112e7e49/anonypy-0.1.7.tar.gz +BuildArch: noarch + + +%description +# AnonyPy +Anonymization library for python. +AnonyPy provides following privacy preserving techniques for the anonymization. +- K Anonymity +- L Diversity +- T Closeness + +## The Anonymization method +- Anonymization method aims at making the individual record be indistinguishable among a group record by using techniques of generalization and suppression. +- Turning a dataset into a k-anonymous (and possibly l-diverse or t-close) dataset is a complex problem, and finding the optimal partition into k-anonymous groups is an NP-hard problem. +- AnonyPy uses "Mondrian" algorithm to partition the original data into smaller and smaller groups +- The algorithm assumes that we have converted all attributes into numerical or categorical values and that we are able to measure the “span” of a given attribute Xi. + +## Install +``` +$ pip install anonypy +``` + +## Usage +```python +import anonypy +import pandas as pd + +data = [ + [6, "1", "test1", "x", 20], + [6, "1", "test1", "x", 30], + [8, "2", "test2", "x", 50], + [8, "2", "test3", "w", 45], + [8, "1", "test2", "y", 35], + [4, "2", "test3", "y", 20], + [4, "1", "test3", "y", 20], + [2, "1", "test3", "z", 22], + [2, "2", "test3", "y", 32], +] + +columns = ["col1", "col2", "col3", "col4", "col5"] +categorical = set(("col2", "col3", "col4")) + +def main(): + df = pd.DataFrame(data=data, columns=columns) + + for name in categorical: + df[name] = df[name].astype("category") + + feature_columns = ["col1", "col2", "col3"] + sensitive_column = "col4" + + p = anonypy.Preserver(df, feature_columns, sensitive_column) + rows = p.anonymize_k_anonymity(k=2) + + dfn = pd.DataFrame(rows) + print(dfn) +``` + +Original data +```bash + col1 col2 col3 col4 col5 +0 6 1 test1 x 20 +1 6 1 test1 x 30 +2 8 2 test2 x 50 +3 8 2 test3 w 45 +4 8 1 test2 y 35 +5 4 2 test3 y 20 +6 4 1 test3 y 20 +7 2 1 test3 z 22 +8 2 2 test3 y 32 +``` + +The created anonymized data is below(Guarantee 2-anonymity). +```bash + col1 col2 col3 col4 count +0 2-4 2 test3 y 2 +1 2-4 1 test3 y 1 +2 2-4 1 test3 z 1 +3 6-8 1 test1,test2 x 2 +4 6-8 1 test1,test2 y 1 +5 8 2 test3,test2 w 1 +6 8 2 test3,test2 x 1 +``` + + + + +%package -n python3-anonypy +Summary: Anonymization library for python +Provides: python-anonypy +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-anonypy +# AnonyPy +Anonymization library for python. +AnonyPy provides following privacy preserving techniques for the anonymization. +- K Anonymity +- L Diversity +- T Closeness + +## The Anonymization method +- Anonymization method aims at making the individual record be indistinguishable among a group record by using techniques of generalization and suppression. +- Turning a dataset into a k-anonymous (and possibly l-diverse or t-close) dataset is a complex problem, and finding the optimal partition into k-anonymous groups is an NP-hard problem. +- AnonyPy uses "Mondrian" algorithm to partition the original data into smaller and smaller groups +- The algorithm assumes that we have converted all attributes into numerical or categorical values and that we are able to measure the “span” of a given attribute Xi. + +## Install +``` +$ pip install anonypy +``` + +## Usage +```python +import anonypy +import pandas as pd + +data = [ + [6, "1", "test1", "x", 20], + [6, "1", "test1", "x", 30], + [8, "2", "test2", "x", 50], + [8, "2", "test3", "w", 45], + [8, "1", "test2", "y", 35], + [4, "2", "test3", "y", 20], + [4, "1", "test3", "y", 20], + [2, "1", "test3", "z", 22], + [2, "2", "test3", "y", 32], +] + +columns = ["col1", "col2", "col3", "col4", "col5"] +categorical = set(("col2", "col3", "col4")) + +def main(): + df = pd.DataFrame(data=data, columns=columns) + + for name in categorical: + df[name] = df[name].astype("category") + + feature_columns = ["col1", "col2", "col3"] + sensitive_column = "col4" + + p = anonypy.Preserver(df, feature_columns, sensitive_column) + rows = p.anonymize_k_anonymity(k=2) + + dfn = pd.DataFrame(rows) + print(dfn) +``` + +Original data +```bash + col1 col2 col3 col4 col5 +0 6 1 test1 x 20 +1 6 1 test1 x 30 +2 8 2 test2 x 50 +3 8 2 test3 w 45 +4 8 1 test2 y 35 +5 4 2 test3 y 20 +6 4 1 test3 y 20 +7 2 1 test3 z 22 +8 2 2 test3 y 32 +``` + +The created anonymized data is below(Guarantee 2-anonymity). +```bash + col1 col2 col3 col4 count +0 2-4 2 test3 y 2 +1 2-4 1 test3 y 1 +2 2-4 1 test3 z 1 +3 6-8 1 test1,test2 x 2 +4 6-8 1 test1,test2 y 1 +5 8 2 test3,test2 w 1 +6 8 2 test3,test2 x 1 +``` + + + + +%package help +Summary: Development documents and examples for anonypy +Provides: python3-anonypy-doc +%description help +# AnonyPy +Anonymization library for python. +AnonyPy provides following privacy preserving techniques for the anonymization. +- K Anonymity +- L Diversity +- T Closeness + +## The Anonymization method +- Anonymization method aims at making the individual record be indistinguishable among a group record by using techniques of generalization and suppression. +- Turning a dataset into a k-anonymous (and possibly l-diverse or t-close) dataset is a complex problem, and finding the optimal partition into k-anonymous groups is an NP-hard problem. +- AnonyPy uses "Mondrian" algorithm to partition the original data into smaller and smaller groups +- The algorithm assumes that we have converted all attributes into numerical or categorical values and that we are able to measure the “span” of a given attribute Xi. + +## Install +``` +$ pip install anonypy +``` + +## Usage +```python +import anonypy +import pandas as pd + +data = [ + [6, "1", "test1", "x", 20], + [6, "1", "test1", "x", 30], + [8, "2", "test2", "x", 50], + [8, "2", "test3", "w", 45], + [8, "1", "test2", "y", 35], + [4, "2", "test3", "y", 20], + [4, "1", "test3", "y", 20], + [2, "1", "test3", "z", 22], + [2, "2", "test3", "y", 32], +] + +columns = ["col1", "col2", "col3", "col4", "col5"] +categorical = set(("col2", "col3", "col4")) + +def main(): + df = pd.DataFrame(data=data, columns=columns) + + for name in categorical: + df[name] = df[name].astype("category") + + feature_columns = ["col1", "col2", "col3"] + sensitive_column = "col4" + + p = anonypy.Preserver(df, feature_columns, sensitive_column) + rows = p.anonymize_k_anonymity(k=2) + + dfn = pd.DataFrame(rows) + print(dfn) +``` + +Original data +```bash + col1 col2 col3 col4 col5 +0 6 1 test1 x 20 +1 6 1 test1 x 30 +2 8 2 test2 x 50 +3 8 2 test3 w 45 +4 8 1 test2 y 35 +5 4 2 test3 y 20 +6 4 1 test3 y 20 +7 2 1 test3 z 22 +8 2 2 test3 y 32 +``` + +The created anonymized data is below(Guarantee 2-anonymity). +```bash + col1 col2 col3 col4 count +0 2-4 2 test3 y 2 +1 2-4 1 test3 y 1 +2 2-4 1 test3 z 1 +3 6-8 1 test1,test2 x 2 +4 6-8 1 test1,test2 y 1 +5 8 2 test3,test2 w 1 +6 8 2 test3,test2 x 1 +``` + + + + +%prep +%autosetup -n anonypy-0.1.7 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-anonypy -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Thu May 18 2023 Python_Bot <Python_Bot@openeuler.org> - 0.1.7-1 +- Package Spec generated |
