opendp · joshua-oss · Jul 30, 2024 · Jul 30, 2024 · Jul 30, 2024 · Jul 30, 2024
diff --git a/synth/HISTORY.md b/synth/HISTORY.md
@@ -1,3 +1,10 @@
+# SmartNoise Synth v1.0.4 Release Notes
+
+* Use newer faker versions
+* Support Python 3.12
+* Switch to use SmartNoise SQL v1.0.4
+* Switch to torch >=2.2.0
+
 # SmartNoise Synth v1.0.3 Release Notes
 
 * Switch to use SmartNoise SQL v1.0.3

diff --git a/synth/VERSION b/synth/VERSION
@@ -1 +1 @@
-1.0.3
+1.0.4
diff --git a/synth/pyproject.toml b/synth/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "smartnoise-synth"
-version = "1.0.3"
+version = "1.0.4"
 description = "Differentially Private Synthetic Data"
 authors = ["SmartNoise Team <smartnoise@opendp.org>"]
 license = "MIT"
@@ -10,15 +10,16 @@ repository = "https://github.com/opendp/smartnoise-sdk"
 readme = "README.md"
 
 [tool.poetry.dependencies]
-python = ">=3.7,<=3.11"
+python = ">=3.9,<3.13"
 opacus = "^0.14.0"
-torch = "<2.0.0"
+torch = {version = ">=2.2.0", optional = true}
 pac-synth = "^0.0.8"
-smartnoise-sql = "^1.0.3"
-Faker = "^15.0.0"
+smartnoise-sql = "^1.0.4"
+Faker = ">=17.0.0"
+private-pgm = { git = "https://github.com/ryan112358/private-pgm.git", rev = "01f02f17eba440f4e76c1d06fa5ee9eed0bd2bca" }
 
 [tool.poetry.dev-dependencies]
 
 [build-system]
-requires = ["setuptools", "poetry-core>=1.0.0"]
+requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
diff --git a/synth/setup.py b/synth/setup.py
diff --git a/synth/tests/requirements.txt b/synth/tests/requirements.txt
@@ -5,4 +5,4 @@ mlflow
 scikit-learn
 numpy
 pytest
-git+https://github.com/ryan112358/private-pgm.git
+git+https://github.com/ryan112358/private-pgm.git@01f02f17eba440f4e76c1d06fa5ee9eed0bd2bca
diff --git a/synth/tests/test_aggregate_seeded.py b/synth/tests/test_aggregate_seeded.py
@@ -10,6 +10,7 @@
 import json
 import pytest
 
+
 from snsynth.transform.table import NoTransformer
 
 def gen_data_frame_with_schema(schema, n_records):
@@ -57,8 +58,8 @@ def gen_data_frame(number_of_records_to_generate):
     )
 
 
-class TestAggregateSeeded:
-    def setup(self):
+class TestAggregateSeeded():
+    def setup_method(self):
         self.sensitive_df = gen_data_frame(10000)
 
     def test_synth_creation_with_default_params(self):

diff --git a/synth/tests/test_aim.py b/synth/tests/test_aim.py
@@ -9,7 +9,7 @@ class TestAIM(TestCase):
     aim = None
 
     @classmethod
-    def setUpClass(cls) -> None:
+    def setup_class(cls) -> None:
         cls.example_df = pd.read_csv(cls.input_data_path)
         cls.aim = AIMSynthesizer()
 

diff --git a/synth/tests/test_factory.py b/synth/tests/test_factory.py
@@ -2,6 +2,7 @@
 import subprocess
 import pandas as pd
 from sklearn import preprocessing
+from unittest import TestCase
 from snsynth import *
 
 git_root_dir = subprocess.check_output("git rev-parse --show-toplevel".split(" ")).decode("utf-8").strip()
@@ -10,7 +11,7 @@
 
 df = pd.read_csv(csv_path, index_col=None)
 
-class TestFactory:
+class TestFactory(TestCase):
     def test_create_empty(self):
         for synth in Synthesizer.list_synthesizers():
             _ = Synthesizer.create(synth, epsilon=1.0)

diff --git a/synth/tests/test_input_checks.py b/synth/tests/test_input_checks.py
@@ -1,6 +1,7 @@
 import numpy as np
 import pandas as pd
 
+
 from snsynth.pytorch import PytorchDPSynthesizer
 from snsynth.pytorch.nn import PATECTGAN, PATEGAN
 
@@ -13,7 +14,7 @@
 )
 
 
-class TestDPGANInputChecks:
+class TestDPGANInputChecks():
     def test_train_patectgan_continuous(self):
         dpgan = PATECTGAN(epsilon=eps, batch_size=batch_size)
         try:

diff --git a/synth/tests/test_mst.py b/synth/tests/test_mst.py
@@ -1,6 +1,7 @@
 import subprocess
 import os
 
+
 import numpy as np
 import pandas as pd
 
@@ -14,10 +15,14 @@
 df = df.drop(["income"], axis=1)
 df = df.sample(frac=1, random_state=42)
 
-class TestMST:
+class TestMST():
 
-    def setup(self):
-        self.mst = MSTSynthesizer()
+    @classmethod
+    def setup_class(cls) -> None:
+        print("Setting up class")
+        cls.mst = MSTSynthesizer()
+        print("Setup class")
+        print(cls.mst)
 
     def test_fit(self):
         self.df_non_continuous = df[['sex','educ','race','married']]

diff --git a/synth/tests/test_mwem.py b/synth/tests/test_mwem.py
@@ -29,7 +29,7 @@
 
 test_histogram_dims = (3,3,3)
 
-class TestMWEM:
+class TestMWEM():
     def test_short_import_works(self):
         assert MWEMSynthesizer == ShortMWEMSynthesizer
 

diff --git a/synth/tests/test_pategan.py b/synth/tests/test_pategan.py
@@ -5,6 +5,7 @@
 import string
 import pandas as pd
 
+
 # try:
 from snsynth.pytorch import PytorchDPSynthesizer
 from snsynth.pytorch.nn import PATEGAN
@@ -21,8 +22,8 @@
 df = pd.read_csv(csv_path)
 
 @pytest.mark.torch
-class TestDPGAN:
-    def setup(self):
+class TestDPGAN():
+    def setup_method(self):
         self.pategan = PytorchDPSynthesizer(1.0, PATEGAN(1.0), None)
 
     def test_fit(self):

diff --git a/synth/tests/test_pytorch_synthesizer.py b/synth/tests/test_pytorch_synthesizer.py
@@ -4,6 +4,7 @@
 import pytest
 import pandas as pd
 
+
 from snsynth.pytorch import PytorchDPSynthesizer
 from snsynth.pytorch.nn import DPGAN, DPCTGAN, PATECTGAN
 from snsynth.transform.table import TableTransformer
@@ -26,8 +27,8 @@
 
 
 @pytest.mark.torch
-class TestPytorchDPSynthesizer_DPGAN:
-    def setup(self):
+class TestPytorchDPSynthesizer_DPGAN():
+    def setup_method(self):
         self.dpgan = PytorchDPSynthesizer(1.0, DPGAN(), None)
 
     def test_fit(self):
@@ -52,8 +53,8 @@ def test_fit_continuous(self):
         assert synth_data.shape == df_continuous.shape
 
 
-class TestPytorchDPSynthesizer_DPCTGAN:
-    def setup(self):
+class TestPytorchDPSynthesizer_DPCTGAN():
+    def setup_method(self):
         self.dpctgan = PytorchDPSynthesizer(1.0, DPCTGAN(), None)
 
     def test_fit(self):
@@ -81,8 +82,8 @@ def test_fit_numpy(self):
         dpctgan.train(nf_non_continuous, preprocessor_eps=0.5, categorical_columns=[0, 1, 2, 3])
 
 
-class TestPytorchDPSynthesizer_PATECTGAN:
-    def setup(self):
+class TestPytorchDPSynthesizer_PATECTGAN():
+    def setup_method(self):
         self.patectgan = PytorchDPSynthesizer(1.0, PATECTGAN(), None)
 
     def test_fit(self):
@@ -106,8 +107,8 @@ def test_sample(self):
         assert synth_data.shape == df.shape
 
 
-class TestPytorchDPSynthesizer_PATECTDRAGAN:
-    def setup(self):
+class TestPytorchDPSynthesizer_PATECTDRAGAN():
+    def setup_method(self):
         self.patectgan = PytorchDPSynthesizer(
             1.0, PATECTGAN(regularization="dragan"), None
         )

diff --git a/synth/tests/test_quail.py b/synth/tests/test_quail.py
@@ -4,6 +4,7 @@
 import pytest
 import pandas as pd
 
+
 from diffprivlib.models import LogisticRegression as DPLR
 
 from snsynth.pytorch import PytorchDPSynthesizer
@@ -24,8 +25,8 @@
 
 
 @pytest.mark.torch
-class TestQUAIL:
-    def setup(self):
+class TestQUAIL():
+    def setup_method(self):
         def QuailClassifier(epsilon):
             return DPLR(epsilon=epsilon)
 

diff --git a/synth/tests/test_sample_conditional.py b/synth/tests/test_sample_conditional.py
@@ -16,7 +16,7 @@
 narrow_df = pd.read_csv(csv_path, index_col=None, usecols=narrrow_columns)
 
 
-class TestSampleConditional:
+class TestSampleConditional():
     def test_n_row_invalid(self):
         dummy_synth = Synthesizer()
         for n_row in [-np.inf, -1, 0, 0.9]: