Source code for sdgx.data_processors.transformers.column_order
from __future__ import annotations
from typing import Any
import pandas as pd
from sdgx.data_models.metadata import Metadata
from sdgx.data_processors.extension import hookimpl
from sdgx.data_processors.transformers.base import Transformer
from sdgx.utils import logger
[docs]
class ColumnOrderTransformer(Transformer):
"""
A transformer that rearranges the columns of a DataFrame to a specified order.
Attributes:
column_list (list): The list of column names in the desired order.
Methods:
fit(metadata: Metadata | None = None, **kwargs: dict[str, Any]): Fits the transformer by remembering the order of the columns.
convert(raw_data: pd.DataFrame) -> pd.DataFrame: Converts the input DataFrame by rearranging its columns.
reverse_convert(processed_data: pd.DataFrame) -> pd.DataFrame: Reverse-converts the processed DataFrame by rearranging its columns back to their original order.
rearrange_columns(column_list, processed_data): Rearranges the columns of a DataFrame according to the provided column list.
"""
column_list: list
"""
The list of tabular data's columns.
"""
def __init__(self):
self.column_list = None
[docs]
def fit(self, metadata: Metadata | None = None, **kwargs: dict[str, Any]):
"""
Fit method for the transformer.
Remember the order of the columns.
"""
self.column_list = list(metadata.column_list)
logger.info("ColumnOrderTransformer Fitted.")
self.fitted = True
return
[docs]
def convert(self, raw_data: pd.DataFrame) -> pd.DataFrame:
"""
Convert method to handle missing values in the input data.
"""
logger.info("Converting data using ColumnOrderTransformer...")
logger.info("Converting data using ColumnOrderTransformer... Finished (No action).")
return raw_data
[docs]
def reverse_convert(self, processed_data: pd.DataFrame) -> pd.DataFrame:
"""
Reverse_convert method for the transformer.
"""
res = self.rearrange_columns(self.column_list, processed_data)
logger.info("Data reverse-converted by ColumnOrderTransformer.")
return res
[docs]
@staticmethod
def rearrange_columns(column_list, processed_data):
"""
This method rearranges the columns of a given DataFrame according to the provided column list.
Any columns in the DataFrame that are not in the column list are dropped.
Args:
- column_list (list): A list of column names in the order they should appear in the output DataFrame.
- processed_data (pd.DataFrame): The DataFrame to be rearranged.
Returns:
- result_data (pd.DataFrame): The rearranged DataFrame.
"""
result_data = processed_data.reindex(columns=column_list)
return result_data
@hookimpl
def register(manager):
manager.register("ColumnOrderTransformer", ColumnOrderTransformer)