Synthetic single-table data with specific_combinations#

import pandas as pd

from sdgx.data_connectors.csv_connector import CsvConnector
from sdgx.data_models.metadata import Metadata
from sdgx.models.ml.single_table.ctgan import CTGANSynthesizerModel
from sdgx.synthesizer import Synthesizer
from sdgx.utils import download_demo_data

dataset_csv = download_demo_data()
data_connector = CsvConnector(path=dataset_csv)

# Specific the fixed column combinations.
# It can be specified multiple combinations by different tuples. Here we only specify one.
metadata = Metadata.from_dataframe(pd.read_csv(dataset_csv))
combinations = {("education", "educational-num")}
metadata.update({"specific_combinations": combinations})

synthesizer = Synthesizer(
    model=CTGANSynthesizerModel(epochs=1),  # For quick demo
    data_connector=data_connector,
    metadata=metadata
)
synthesizer.fit()
sampled_data = synthesizer.sample(1000)
synthesizer.cleanup()  # Clean all cache


from sdgx.metrics.column.jsd import JSD

JSD = JSD()


selected_columns = ["workclass"]
isDiscrete = True
metrics = JSD.calculate(data_connector.read(), sampled_data, selected_columns, isDiscrete)

print("JSD metric of column %s: %g" % (selected_columns[0], metrics))