Skip to content

cosmotech.coal.cosmotech_api.dataset.utils

utils

get_content_from_twin_graph_data(nodes, relationships, restore_names=False)

Extract content from twin graph data.

When restore_names is True, the "id" value inside the "properties" field in the cypher query response is used instead of the numerical id found in the "id" field. When restore_names is set to False, this function keeps the previous behavior implemented when adding support for twingraph in v2 (default: False)

Example with a sample of cypher response: [{ n: { id: "50" <-- this id is used if restore_names is False label: "Customer" properties: { Satisfaction: 0 SurroundingSatisfaction: 0 Thirsty: false id: "Lars_Coret" <-- this id is used if restore_names is True } type: "NODE" } }]

Args: nodes: List of node data from cypher query relationships: List of relationship data from cypher query restore_names: Whether to use property ID instead of node ID

Returns: Dict mapping entity types to lists of entities

Source code in cosmotech/coal/cosmotech_api/dataset/utils.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def get_content_from_twin_graph_data(
    nodes: List[Dict], relationships: List[Dict], restore_names: bool = False
) -> Dict[str, List[Dict]]:
    """
    Extract content from twin graph data.

    When restore_names is True, the "id" value inside the "properties" field in the cypher query response is used
    instead of the numerical id found in the "id" field. When restore_names is set to False, this function
    keeps the previous behavior implemented when adding support for twingraph in v2 (default: False)

    Example with a sample of cypher response:
    [{
      n: {
        id: "50"  <-- this id is used if restore_names is False
        label: "Customer"
        properties: {
          Satisfaction: 0
          SurroundingSatisfaction: 0
          Thirsty: false
          id: "Lars_Coret"  <-- this id is used if restore_names is True
        }
        type: "NODE"
      }
    }]

    Args:
        nodes: List of node data from cypher query
        relationships: List of relationship data from cypher query
        restore_names: Whether to use property ID instead of node ID

    Returns:
        Dict mapping entity types to lists of entities
    """
    LOGGER.debug(
        T("coal.services.dataset.processing_graph_data").format(
            nodes_count=len(nodes),
            relationships_count=len(relationships),
            restore_names=restore_names,
        )
    )

    content = dict()
    # build keys
    for item in relationships:
        content[item["src"]["label"]] = list()
        content[item["dest"]["label"]] = list()
        content[item["rel"]["label"]] = list()

    # Process nodes
    for item in nodes:
        label = item["n"]["label"]
        props = item["n"]["properties"].copy()  # Create a copy to avoid modifying the original
        if not restore_names:
            props.update({"id": item["n"]["id"]})
        content.setdefault(label, list())
        content[label].append(props)

    # Process relationships
    for item in relationships:
        src = item["src"]
        dest = item["dest"]
        rel = item["rel"]
        props = rel["properties"].copy()  # Create a copy to avoid modifying the original
        content[rel["label"]].append(
            {
                "id": rel["id"],
                "source": src["properties"]["id"] if restore_names else src["id"],
                "target": dest["properties"]["id"] if restore_names else dest["id"],
                **props,
            }
        )

    # Log the number of entities by type
    for entity_type, entities in content.items():
        LOGGER.debug(T("coal.services.dataset.entity_count").format(entity_type=entity_type, count=len(entities)))

    return content

sheet_to_header(sheet_content)

Extract header fields from sheet content.

Args: sheet_content: List of dictionaries representing sheet rows

Returns: List of field names with id, source, and target fields first if present

Source code in cosmotech/coal/cosmotech_api/dataset/utils.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
def sheet_to_header(sheet_content: List[Dict]) -> List[str]:
    """
    Extract header fields from sheet content.

    Args:
        sheet_content: List of dictionaries representing sheet rows

    Returns:
        List of field names with id, source, and target fields first if present
    """
    LOGGER.debug(T("coal.services.dataset.extracting_headers").format(rows=len(sheet_content)))

    fieldnames = []
    has_src = False
    has_id = False

    for r in sheet_content:
        for k in r.keys():
            if k not in fieldnames:
                if k in ["source", "target"]:
                    has_src = True
                elif k == "id":
                    has_id = True
                else:
                    fieldnames.append(k)

    # Ensure source/target and id fields come first
    if has_src:
        fieldnames = ["source", "target"] + fieldnames
    if has_id:
        fieldnames = ["id"] + fieldnames

    LOGGER.debug(
        T("coal.services.dataset.headers_extracted").format(
            count=len(fieldnames),
            fields=", ".join(fieldnames[:5]) + ("..." if len(fieldnames) > 5 else ""),
        )
    )

    return fieldnames