summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--python-anonypy.spec315
-rw-r--r--sources1
3 files changed, 317 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index e69de29..fcbd27c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+/anonypy-0.1.7.tar.gz
diff --git a/python-anonypy.spec b/python-anonypy.spec
new file mode 100644
index 0000000..d8c467c
--- /dev/null
+++ b/python-anonypy.spec
@@ -0,0 +1,315 @@
+%global _empty_manifest_terminate_build 0
+Name: python-anonypy
+Version: 0.1.7
+Release: 1
+Summary: Anonymization library for python
+License: MIT License
+URL: https://github.com/glassonion1/anonypy
+Source0: https://mirrors.nju.edu.cn/pypi/web/packages/04/5d/adc4824c45316d48c1448082a8144ba07378f5b22dfb08a5b3bd112e7e49/anonypy-0.1.7.tar.gz
+BuildArch: noarch
+
+
+%description
+# AnonyPy
+Anonymization library for python.
+AnonyPy provides following privacy preserving techniques for the anonymization.
+- K Anonymity
+- L Diversity
+- T Closeness
+
+## The Anonymization method
+- Anonymization method aims at making the individual record be indistinguishable among a group record by using techniques of generalization and suppression.
+- Turning a dataset into a k-anonymous (and possibly l-diverse or t-close) dataset is a complex problem, and finding the optimal partition into k-anonymous groups is an NP-hard problem.
+- AnonyPy uses "Mondrian" algorithm to partition the original data into smaller and smaller groups
+- The algorithm assumes that we have converted all attributes into numerical or categorical values and that we are able to measure the “span” of a given attribute Xi.
+
+## Install
+```
+$ pip install anonypy
+```
+
+## Usage
+```python
+import anonypy
+import pandas as pd
+
+data = [
+ [6, "1", "test1", "x", 20],
+ [6, "1", "test1", "x", 30],
+ [8, "2", "test2", "x", 50],
+ [8, "2", "test3", "w", 45],
+ [8, "1", "test2", "y", 35],
+ [4, "2", "test3", "y", 20],
+ [4, "1", "test3", "y", 20],
+ [2, "1", "test3", "z", 22],
+ [2, "2", "test3", "y", 32],
+]
+
+columns = ["col1", "col2", "col3", "col4", "col5"]
+categorical = set(("col2", "col3", "col4"))
+
+def main():
+ df = pd.DataFrame(data=data, columns=columns)
+
+ for name in categorical:
+ df[name] = df[name].astype("category")
+
+ feature_columns = ["col1", "col2", "col3"]
+ sensitive_column = "col4"
+
+ p = anonypy.Preserver(df, feature_columns, sensitive_column)
+ rows = p.anonymize_k_anonymity(k=2)
+
+ dfn = pd.DataFrame(rows)
+ print(dfn)
+```
+
+Original data
+```bash
+ col1 col2 col3 col4 col5
+0 6 1 test1 x 20
+1 6 1 test1 x 30
+2 8 2 test2 x 50
+3 8 2 test3 w 45
+4 8 1 test2 y 35
+5 4 2 test3 y 20
+6 4 1 test3 y 20
+7 2 1 test3 z 22
+8 2 2 test3 y 32
+```
+
+The created anonymized data is below(Guarantee 2-anonymity).
+```bash
+ col1 col2 col3 col4 count
+0 2-4 2 test3 y 2
+1 2-4 1 test3 y 1
+2 2-4 1 test3 z 1
+3 6-8 1 test1,test2 x 2
+4 6-8 1 test1,test2 y 1
+5 8 2 test3,test2 w 1
+6 8 2 test3,test2 x 1
+```
+
+
+
+
+%package -n python3-anonypy
+Summary: Anonymization library for python
+Provides: python-anonypy
+BuildRequires: python3-devel
+BuildRequires: python3-setuptools
+BuildRequires: python3-pip
+%description -n python3-anonypy
+# AnonyPy
+Anonymization library for python.
+AnonyPy provides following privacy preserving techniques for the anonymization.
+- K Anonymity
+- L Diversity
+- T Closeness
+
+## The Anonymization method
+- Anonymization method aims at making the individual record be indistinguishable among a group record by using techniques of generalization and suppression.
+- Turning a dataset into a k-anonymous (and possibly l-diverse or t-close) dataset is a complex problem, and finding the optimal partition into k-anonymous groups is an NP-hard problem.
+- AnonyPy uses "Mondrian" algorithm to partition the original data into smaller and smaller groups
+- The algorithm assumes that we have converted all attributes into numerical or categorical values and that we are able to measure the “span” of a given attribute Xi.
+
+## Install
+```
+$ pip install anonypy
+```
+
+## Usage
+```python
+import anonypy
+import pandas as pd
+
+data = [
+ [6, "1", "test1", "x", 20],
+ [6, "1", "test1", "x", 30],
+ [8, "2", "test2", "x", 50],
+ [8, "2", "test3", "w", 45],
+ [8, "1", "test2", "y", 35],
+ [4, "2", "test3", "y", 20],
+ [4, "1", "test3", "y", 20],
+ [2, "1", "test3", "z", 22],
+ [2, "2", "test3", "y", 32],
+]
+
+columns = ["col1", "col2", "col3", "col4", "col5"]
+categorical = set(("col2", "col3", "col4"))
+
+def main():
+ df = pd.DataFrame(data=data, columns=columns)
+
+ for name in categorical:
+ df[name] = df[name].astype("category")
+
+ feature_columns = ["col1", "col2", "col3"]
+ sensitive_column = "col4"
+
+ p = anonypy.Preserver(df, feature_columns, sensitive_column)
+ rows = p.anonymize_k_anonymity(k=2)
+
+ dfn = pd.DataFrame(rows)
+ print(dfn)
+```
+
+Original data
+```bash
+ col1 col2 col3 col4 col5
+0 6 1 test1 x 20
+1 6 1 test1 x 30
+2 8 2 test2 x 50
+3 8 2 test3 w 45
+4 8 1 test2 y 35
+5 4 2 test3 y 20
+6 4 1 test3 y 20
+7 2 1 test3 z 22
+8 2 2 test3 y 32
+```
+
+The created anonymized data is below(Guarantee 2-anonymity).
+```bash
+ col1 col2 col3 col4 count
+0 2-4 2 test3 y 2
+1 2-4 1 test3 y 1
+2 2-4 1 test3 z 1
+3 6-8 1 test1,test2 x 2
+4 6-8 1 test1,test2 y 1
+5 8 2 test3,test2 w 1
+6 8 2 test3,test2 x 1
+```
+
+
+
+
+%package help
+Summary: Development documents and examples for anonypy
+Provides: python3-anonypy-doc
+%description help
+# AnonyPy
+Anonymization library for python.
+AnonyPy provides following privacy preserving techniques for the anonymization.
+- K Anonymity
+- L Diversity
+- T Closeness
+
+## The Anonymization method
+- Anonymization method aims at making the individual record be indistinguishable among a group record by using techniques of generalization and suppression.
+- Turning a dataset into a k-anonymous (and possibly l-diverse or t-close) dataset is a complex problem, and finding the optimal partition into k-anonymous groups is an NP-hard problem.
+- AnonyPy uses "Mondrian" algorithm to partition the original data into smaller and smaller groups
+- The algorithm assumes that we have converted all attributes into numerical or categorical values and that we are able to measure the “span” of a given attribute Xi.
+
+## Install
+```
+$ pip install anonypy
+```
+
+## Usage
+```python
+import anonypy
+import pandas as pd
+
+data = [
+ [6, "1", "test1", "x", 20],
+ [6, "1", "test1", "x", 30],
+ [8, "2", "test2", "x", 50],
+ [8, "2", "test3", "w", 45],
+ [8, "1", "test2", "y", 35],
+ [4, "2", "test3", "y", 20],
+ [4, "1", "test3", "y", 20],
+ [2, "1", "test3", "z", 22],
+ [2, "2", "test3", "y", 32],
+]
+
+columns = ["col1", "col2", "col3", "col4", "col5"]
+categorical = set(("col2", "col3", "col4"))
+
+def main():
+ df = pd.DataFrame(data=data, columns=columns)
+
+ for name in categorical:
+ df[name] = df[name].astype("category")
+
+ feature_columns = ["col1", "col2", "col3"]
+ sensitive_column = "col4"
+
+ p = anonypy.Preserver(df, feature_columns, sensitive_column)
+ rows = p.anonymize_k_anonymity(k=2)
+
+ dfn = pd.DataFrame(rows)
+ print(dfn)
+```
+
+Original data
+```bash
+ col1 col2 col3 col4 col5
+0 6 1 test1 x 20
+1 6 1 test1 x 30
+2 8 2 test2 x 50
+3 8 2 test3 w 45
+4 8 1 test2 y 35
+5 4 2 test3 y 20
+6 4 1 test3 y 20
+7 2 1 test3 z 22
+8 2 2 test3 y 32
+```
+
+The created anonymized data is below(Guarantee 2-anonymity).
+```bash
+ col1 col2 col3 col4 count
+0 2-4 2 test3 y 2
+1 2-4 1 test3 y 1
+2 2-4 1 test3 z 1
+3 6-8 1 test1,test2 x 2
+4 6-8 1 test1,test2 y 1
+5 8 2 test3,test2 w 1
+6 8 2 test3,test2 x 1
+```
+
+
+
+
+%prep
+%autosetup -n anonypy-0.1.7
+
+%build
+%py3_build
+
+%install
+%py3_install
+install -d -m755 %{buildroot}/%{_pkgdocdir}
+if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi
+if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi
+if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi
+if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi
+pushd %{buildroot}
+if [ -d usr/lib ]; then
+ find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/lib64 ]; then
+ find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/bin ]; then
+ find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+if [ -d usr/sbin ]; then
+ find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst
+fi
+touch doclist.lst
+if [ -d usr/share/man ]; then
+ find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst
+fi
+popd
+mv %{buildroot}/filelist.lst .
+mv %{buildroot}/doclist.lst .
+
+%files -n python3-anonypy -f filelist.lst
+%dir %{python3_sitelib}/*
+
+%files help -f doclist.lst
+%{_docdir}/*
+
+%changelog
+* Thu May 18 2023 Python_Bot <Python_Bot@openeuler.org> - 0.1.7-1
+- Package Spec generated
diff --git a/sources b/sources
new file mode 100644
index 0000000..9db555b
--- /dev/null
+++ b/sources
@@ -0,0 +1 @@
+41038868033017c3b0446e3dea12b14a anonypy-0.1.7.tar.gz