Source code for packages.decorators.schema_transform

'''
schema_transform
================

Apply a field name transformation to a data output from the wrapped function,
such that specified field names are transformed and unspecified fields are dropped.
A valid file would be formatted as shown:

[{"tier_0": "bad_col", "tier_1": "good_col"},
{"tier_0": "another_bad_col", "tier_1": "another_good_col"},
...]

where :code:`tier_0` and :code:`tier_1` correspond to :code:`from_key` and :code:`to_key`
in the below documentation.    
'''

import pandas
import json

[docs]def load_transformer(filename, from_key, to_key):
    with open(filename) as f:
        _data = json.load(f)
    transformer = {row[from_key]:row[to_key] for row in _data}
    return transformer


[docs]def schema_transform(filename, from_key, to_key):
    '''
    Args:
        filename (str): A record-oriented JSON file path mapping field names
                        denoted by from :code:`from_key` and :code:`to_key`.
        from_key (str): The key in file indicated by :code:`filename` which indicates
                        the field name to transform.
        to_key (str): The key in file indicated by :code:`filename` which what
                      the field name indicated by :code:`from_key` will be transformed to.

    Returns:
        Data in the format it was originally passed to the wrapper in, with 
        specified field names transformed and unspecified fields dropped.
    '''

    transformer = load_transformer(filename, from_key, to_key)
    def wrapper(func):
        def transformed(*args, **kwargs):
            data = func(*args,**kwargs)
            # Accept DataFrames...
            if type(data) == pandas.DataFrame:
                drop_cols = [c for c in data.columns 
                             if c not in transformer]
                data.drop(drop_cols, axis=1, inplace=True)
                data.rename(columns=transformer, inplace=True)
            # ... OR list of dicts
            elif type(data) == list and all(type(row) == dict for row in data):
                data = [{transformer[k]:v for k, v in row.items()
                         if k in transformer} for row in data]
            # Otherwise throw an error
            else:
                raise ValueError("Schema transform expects EITHER a "
                                 "pandas.DataFrame "
                                 "OR a list of dict from the wrapped "
                                 "function.")
            return data
        return transformed
    return wrapper


[docs]def schema_transformer(data, *, filename, from_key, to_key, ignore=[]):
    '''Function version of the schema_transformer wrapper.
    Args:
        data (dataframe OR list of dicts): the data requiring the schama transformation
        filename (str): the path to the schema json file
        from_key (str): tier level of the data
        to_key (str): tier level to be applied to the data
        ignore (list): optional list of fields, eg ids or keys which shouldn't be dropped

    Returns:
        supplied data with schema applied
    '''
    # Accept DataFrames...
    transformer = load_transformer(filename, from_key, to_key)
    if type(data) == pandas.DataFrame:
        drop_cols = [c for c in data.columns
                     if c not in transformer
                     and c not in ignore]
        data.drop(drop_cols, axis=1, inplace=True)
        data.rename(columns=transformer, inplace=True)
        return data
    # ... OR list of dicts
    elif type(data) == list and all(type(row) == dict for row in data):
        transformed_data = []
        for row in data:
            transformed = {transformer[k]: v for k, v in row.items() 
                           if k in transformer}
            ignored = {k: v for k, v in row.items() if k in ignore}
            transformed_data.append({**transformed, **ignored})
        return transformed_data
    # ... OR a single dict
    elif type(data) == dict:
        transformed = {transformer[k]: v for k, v in data.items() if k in transformer}

        ignored = {k: v for k, v in data.items() if k in ignore}
        return {**transformed, **ignored}

    # Otherwise throw an error
    else:
        raise ValueError("Schema transform expects EITHER a "
                         "pandas.DataFrame "
                         "OR a list of dict, "
                         "OR a single dict from the "
                         "wrapped function.")