Skip to content

cosmotech.coal.azure.adx.utils

utils

create_column_mapping(data)

Create a column mapping for a PyArrow table.

Args: data: The PyArrow table data

Returns: dict: A mapping of column names to their ADX types

Source code in cosmotech/coal/azure/adx/utils.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
def create_column_mapping(data: pyarrow.Table) -> Dict[str, str]:
    """
    Create a column mapping for a PyArrow table.

    Args:
        data: The PyArrow table data

    Returns:
        dict: A mapping of column names to their ADX types
    """
    mapping = dict()
    for column_name in data.column_names:
        column = data.column(column_name)
        try:
            ex = next(v for v in column.to_pylist() if v is not None)
        except StopIteration:
            LOGGER.error(T("coal.services.adx.empty_column").format(column_name=column_name))
            mapping[column_name] = type_mapping(column_name, "string")
            continue
        else:
            mapping[column_name] = type_mapping(column_name, ex)
    return mapping

type_mapping(key, key_example_value)

Map Python types to ADX types.

Args: key: The name of the key key_example_value: A possible value of the key

Returns: str: The name of the type used in ADX

Source code in cosmotech/coal/azure/adx/utils.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def type_mapping(key: str, key_example_value: Any) -> str:
    """
    Map Python types to ADX types.

    Args:
        key: The name of the key
        key_example_value: A possible value of the key

    Returns:
        str: The name of the type used in ADX
    """
    LOGGER.debug(T("coal.services.adx.mapping_type").format(key=key, value_type=type(key_example_value).__name__))

    if key == "SimulationRun":
        return "guid"

    try:
        # Use dateutil parser to test if the value could be a date, in case of error it is not
        dateutil.parser.parse(key_example_value, fuzzy=False)
        return "datetime"
    except (ValueError, TypeError):
        pass

    if isinstance(key_example_value, float):
        return "real"

    if isinstance(key_example_value, int):
        return "long"

    # Default case to string
    return "string"