Skip to content

kg_creation_utils

add_and_attach_data_entity(kg, data, top_level_kg, top_level_schema_namespace, data_entity, relation, task_entity)

Adds a data entity to the knowledge graph and attaches it to a task entity using a specified relation.

Parameters:

Name Type Description Default
kg Graph

The knowledge graph to add the data entity to.

required
data Entity

The data entity to add.

required
top_level_kg Graph

The top-level knowledge graph.

required
top_level_schema_namespace Namespace

The namespace for the top-level schema.

required
data_entity DataEntity

The data entity to attach.

required
relation URIRef

The relation to use for attaching the data entity.

required
task_entity Task

The task entity to attach the data entity to.

required

Returns:

Type Description
None

None

Source code in exe_kg_lib/utils/kg_creation_utils.py
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
def add_and_attach_data_entity(
    kg: Graph,
    data: Entity,
    top_level_kg: Graph,
    top_level_schema_namespace: Namespace,
    data_entity: DataEntity,
    relation: URIRef,
    task_entity: Task,
) -> None:
    """
    Adds a data entity to the knowledge graph and attaches it to a task entity using a specified relation.

    Args:
        kg (Graph): The knowledge graph to add the data entity to.
        data (Entity): The data entity to add.
        top_level_kg (Graph): The top-level knowledge graph.
        top_level_schema_namespace (Namespace): The namespace for the top-level schema.
        data_entity (DataEntity): The data entity to attach.
        relation (URIRef): The relation to use for attaching the data entity.
        task_entity (Task): The task entity to attach the data entity to.

    Returns:
        None
    """
    add_data_entity_instance(kg, data, top_level_kg, top_level_schema_namespace, data_entity)
    add_relation(kg, task_entity, relation, data_entity)

add_data_entity_instance(kg, data, top_level_kg, top_level_schema_namespace, data_entity)

Adds a data entity instance to the knowledge graph.

Parameters:

Name Type Description Default
kg Graph

The knowledge graph to add the data entity instance to.

required
data Entity

The data entity instance to be added.

required
top_level_kg Graph

The top-level knowledge graph.

required
top_level_schema_namespace Namespace

The namespace for the top-level schema.

required
data_entity DataEntity

The data entity object.

required

Returns:

Type Description
None

None

Source code in exe_kg_lib/utils/kg_creation_utils.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
def add_data_entity_instance(
    kg: Graph,
    data: Entity,
    top_level_kg: Graph,
    top_level_schema_namespace: Namespace,
    data_entity: DataEntity,
) -> None:
    """
    Adds a data entity instance to the knowledge graph.

    Args:
        kg (Graph): The knowledge graph to add the data entity instance to.
        data (Entity): The data entity instance to be added.
        top_level_kg (Graph): The top-level knowledge graph.
        top_level_schema_namespace (Namespace): The namespace for the top-level schema.
        data_entity (DataEntity): The data entity object.

    Returns:
        None
    """
    add_instance(kg, data_entity)

    if data_entity.source:
        # has_source_iri, range_iri = get_first_query_result_if_exists(
        #     get_method_params_plus_inherited, data_entity.parent_entity.iri, top_level_kg
        # )

        source_literal = Literal(
            lexical_or_value=data_entity.source,
            datatype=XSD.string,
        )

        add_literal(kg, data_entity, top_level_schema_namespace.hasSource, source_literal)

    if data_entity.data_structure:
        add_relation(
            kg,
            data_entity,
            RDF.type,
            Entity(data_entity.data_structure),
        )

    if data_entity.data_semantics:
        add_relation(
            kg,
            data_entity,
            RDF.type,
            Entity(data_entity.data_semantics),
        )

    if data_entity.reference:
        add_relation(
            kg,
            data_entity,
            top_level_schema_namespace.hasReference,
            Entity(data_entity.reference),
        )

add_instance(kg, entity_instance, extra_parent_iri=None)

Adds an instance of an entity to the knowledge graph.

Parameters:

Name Type Description Default
kg Graph

The knowledge graph to add the instance to.

required
entity_instance Entity

The entity instance to be added.

required
extra_parent_iri str

An extra parent IRI to add to the instance.

None

Returns:

Type Description
None

None

Source code in exe_kg_lib/utils/kg_creation_utils.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def add_instance(kg: Graph, entity_instance: Entity, extra_parent_iri: str = None) -> None:
    """
    Adds an instance of an entity to the knowledge graph.

    Parameters:
        kg (Graph): The knowledge graph to add the instance to.
        entity_instance (Entity): The entity instance to be added.
        extra_parent_iri (str): An extra parent IRI to add to the instance.

    Returns:
        None
    """
    kg.add((entity_instance.iri, RDF.type, entity_instance.parent_entity.iri))
    if extra_parent_iri:
        kg.add((entity_instance.iri, RDF.type, URIRef(extra_parent_iri)))

add_instance_from_parent_with_relation(namespace, kg, parent_entity, relation_iri, related_entity, instance_name, extra_parent_iri=None)

Adds an instance to the knowledge graph with a relation to a given entity.

Parameters:

Name Type Description Default
namespace Namespace

The namespace for the instance.

required
kg Graph

The knowledge graph.

required
parent_entity Entity

The parent entity of the instance.

required
relation_iri str

The IRI of the relation between the related entity and the instance.

required
related_entity Entity

The related entity.

required
instance_name str

The name of the instance.

required
extra_parent_iri str

An extra parent IRI to add to the instance.

None

Returns:

Name Type Description
Entity Entity

The created instance.

Source code in exe_kg_lib/utils/kg_creation_utils.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def add_instance_from_parent_with_relation(
    namespace: Namespace,
    kg: Graph,
    parent_entity: Entity,
    relation_iri: str,
    related_entity: Entity,
    instance_name: str,
    extra_parent_iri: str = None,
) -> Entity:
    """
    Adds an instance to the knowledge graph with a relation to a given entity.

    Args:
        namespace (Namespace): The namespace for the instance.
        kg (Graph): The knowledge graph.
        parent_entity (Entity): The parent entity of the instance.
        relation_iri (str): The IRI of the relation between the related entity and the instance.
        related_entity (Entity): The related entity.
        instance_name (str): The name of the instance.
        extra_parent_iri (str): An extra parent IRI to add to the instance.

    Returns:
        Entity: The created instance.
    """
    entity_iri = namespace + instance_name
    instance = Entity(entity_iri, parent_entity)

    add_instance(kg, instance, extra_parent_iri)
    add_relation(kg, related_entity, relation_iri, instance)

    return instance

add_literal(kg, from_entity, relation_iri, literal)

Adds a literal value to the knowledge graph.

Parameters:

Name Type Description Default
kg Graph

The knowledge graph to add the literal to.

required
from_entity Entity

The entity from which the relation originates.

required
relation_iri str

The IRI of the relation.

required
literal Literal

The literal value to add.

required

Returns:

Type Description
None

None

Source code in exe_kg_lib/utils/kg_creation_utils.py
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def add_literal(kg: Graph, from_entity: Entity, relation_iri: str, literal: Literal) -> None:
    """
    Adds a literal value to the knowledge graph.

    Parameters:
        kg (Graph): The knowledge graph to add the literal to.
        from_entity (Entity): The entity from which the relation originates.
        relation_iri (str): The IRI of the relation.
        literal (Literal): The literal value to add.

    Returns:
        None
    """
    kg.add((from_entity.iri, URIRef(relation_iri), literal))

add_relation(kg, from_entity, relation_iri, to_entity)

Adds a relation between two entities in the knowledge graph.

Parameters:

Name Type Description Default
kg Graph

The knowledge graph to add the relation to.

required
from_entity Entity

The entity from which the relation originates.

required
relation_iri str

The IRI of the relation.

required
to_entity Entity

The entity to which the relation points.

required

Returns:

Type Description
None

None

Source code in exe_kg_lib/utils/kg_creation_utils.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def add_relation(kg: Graph, from_entity: Entity, relation_iri: str, to_entity: Entity) -> None:
    """
    Adds a relation between two entities in the knowledge graph.

    Args:
        kg (Graph): The knowledge graph to add the relation to.
        from_entity (Entity): The entity from which the relation originates.
        relation_iri (str): The IRI of the relation.
        to_entity (Entity): The entity to which the relation points.

    Returns:
        None
    """
    kg.add(
        (
            from_entity.iri,
            URIRef(relation_iri),
            to_entity.iri,
        )
    )

create_pipeline_task(top_level_schema_namespace, parent_entity, kg, pipeline_name, input_data_path, plots_output_dir)

Create a pipeline task in the knowledge graph.

Parameters:

Name Type Description Default
top_level_schema_namespace Namespace

The top-level schema namespace.

required
parent_entity Entity

The parent entity of the pipeline task.

required
kg Graph

The knowledge graph.

required
pipeline_name str

The name of the pipeline.

required
input_data_path str

The path to the input data for the pipeline.

required
plots_output_dir str

The directory to store the output plots when executing the pipeline.

required

Returns:

Name Type Description
Task Task

The created pipeline task.

Source code in exe_kg_lib/utils/kg_creation_utils.py
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
def create_pipeline_task(
    top_level_schema_namespace: Namespace,
    parent_entity: Entity,
    kg: Graph,
    pipeline_name: str,
    input_data_path: str,
    plots_output_dir: str,
) -> Task:
    """
    Create a pipeline task in the knowledge graph.

    Args:
        top_level_schema_namespace (Namespace): The top-level schema namespace.
        parent_entity (Entity): The parent entity of the pipeline task.
        kg (Graph): The knowledge graph.
        pipeline_name (str): The name of the pipeline.
        input_data_path (str): The path to the input data for the pipeline.
        plots_output_dir (str): The directory to store the output plots when executing the pipeline.

    Returns:
        Task: The created pipeline task.
    """
    pipeline = Task(top_level_schema_namespace + pipeline_name, parent_entity)
    add_instance(kg, pipeline)

    input_data_path_literal = Literal(lexical_or_value=input_data_path, datatype=XSD.string)
    add_literal(kg, pipeline, top_level_schema_namespace.hasInputDataPath, input_data_path_literal)

    plots_output_dir_literal = Literal(lexical_or_value=plots_output_dir, datatype=XSD.string)
    add_literal(kg, pipeline, top_level_schema_namespace.hasPlotsOutputDir, plots_output_dir_literal)

    return pipeline

deserialize_input_entity_info_dict(input_entity_info_dict, data_entities_dict, task_output_dicts, pipeline_name, namespace)

Deserializes the serialized input entity dictionary.

Parameters:

Name Type Description Default
input_entity_info_dict Dict[str, Union[List[str], Method]]

The serialized input entity dictionary.

required
data_entities_dict Dict[str, DataEntity]

The dictionary of data entities.

required
task_output_dicts Dict[str, Task]

The dictionary of task output objects.

required
pipeline_name str

The name of the pipeline.

required

Returns:

Type Description
Dict[str, Union[List[DataEntity], Method]]

Dict[str, Union[List[DataEntity], Method]]: The deserialized input data entity dictionary.

Source code in exe_kg_lib/utils/kg_creation_utils.py
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
def deserialize_input_entity_info_dict(
    input_entity_info_dict: Dict[str, Union[List[str], MethodSerializable]],
    data_entities_dict: Dict[str, DataEntity],
    task_output_dicts: Dict[str, TaskSerializable],
    pipeline_name: str,
    namespace: Namespace,
) -> Dict[str, Union[List[DataEntity], Method]]:
    """
    Deserializes the serialized input entity dictionary.

    Args:
        input_entity_info_dict (Dict[str, Union[List[str], MethodSerializable]]): The serialized input entity dictionary.
        data_entities_dict (Dict[str, DataEntity]): The dictionary of data entities.
        task_output_dicts (Dict[str, TaskSerializable]): The dictionary of task output objects.
        pipeline_name (str): The name of the pipeline.

    Returns:
        Dict[str, Union[List[DataEntity], Method]]: The deserialized input data entity dictionary.
    """
    input_entity_dict: Dict[str, List[DataEntity]] = {}
    for input_name, input_value in input_entity_info_dict.items():
        if isinstance(input_value, MethodSerializable):  # provided input is a method
            input_method = input_value
            input_entity_dict[input_name] = Method(
                namespace + input_method.method_type, parent_entity=None, params_dict=input_method.params_dict
            )
        elif isinstance(input_value, list) and all(
            isinstance(elem, str) for elem in input_value
        ):  # provided input is list of data entity names
            input_data_entity_names = input_value
            input_entity_dict[input_name] = []
            for data_entity_name in input_data_entity_names:
                match = re.match(TASK_OUTPUT_NAME_REGEX, data_entity_name)
                if match:
                    # input entity refers to a data entity that is an output of a previous task
                    prev_task_output_name = match.group(1)
                    prev_task_type = match.group(2)
                    prev_task_instance_number = int(match.group(3))

                    try:
                        # regex matched so assume that the data_entity_name is an output of a previous task
                        prev_task_name = get_instance_name(prev_task_type, prev_task_instance_number, pipeline_name)
                        input_entity_dict[input_name].append(task_output_dicts[prev_task_name][prev_task_output_name])
                    except KeyError:
                        # regex matched but the data_entity_name is NOT an output of a previous task
                        input_entity_dict[input_name].append(data_entities_dict[data_entity_name])
                else:
                    input_entity_dict[input_name].append(data_entities_dict[data_entity_name])

    return input_entity_dict

field_value_to_literal(field_value)

Converts a Python field value to a Literal object with the appropriate datatype.

Parameters:

Name Type Description Default
field_value Union[str, int, float, bool]

The value to be converted.

required

Returns:

Name Type Description
Literal Literal

The converted Literal object.

Source code in exe_kg_lib/utils/kg_creation_utils.py
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
def field_value_to_literal(field_value: Union[str, int, float, bool]) -> Literal:
    """
    Converts a Python field value to a Literal object with the appropriate datatype.

    Args:
        field_value (Union[str, int, float, bool]): The value to be converted.

    Returns:
        Literal: The converted Literal object.

    """
    if isinstance(field_value, str):
        return Literal(field_value, datatype=XSD.string)
    elif isinstance(field_value, bool):
        return Literal(field_value, datatype=XSD.boolean)
    elif isinstance(field_value, int):
        return Literal(field_value, datatype=XSD.int)
    elif isinstance(field_value, float):
        return Literal(field_value, datatype=XSD.float)
    else:
        return Literal(str(field_value), datatype=XSD.string)

load_exe_kg(input_path, exe_kg_from_json_method)

Loads the ExeKG from the specified input path.

Parameters:

Name Type Description Default
input_path str

The path to the ExeKG file.

required
exe_kg_from_json_method Callable[[str], Graph]

The method to convert a simplified serialized pipeline to ExeKG.

required

Returns:

Name Type Description
Graph Graph

The loaded ExeKG.

Source code in exe_kg_lib/utils/kg_creation_utils.py
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
def load_exe_kg(input_path: str, exe_kg_from_json_method: Callable[[Union[Path, TextIOWrapper, str]], Graph]) -> Graph:
    """
    Loads the ExeKG from the specified input path.

    Args:
        input_path (str): The path to the ExeKG file.
        exe_kg_from_json_method (Callable[[str], Graph]): The method to convert a simplified serialized pipeline to ExeKG.

    Returns:
        Graph: The loaded ExeKG.
    """
    input_exe_kg = Graph(bind_namespaces="rdflib")
    if input_path.endswith(".ttl"):
        # parse ExeKG from Turtle file
        input_exe_kg.parse(input_path, format="n3")
    elif input_path.endswith(".json"):
        # convert simplified serialized pipeline to ExeKG
        input_exe_kg = exe_kg_from_json_method(input_path)

    return input_exe_kg