def semantic_join(
self,
other: "DataFrame",
user_instruction: str,
left_on: str,
right_on: str,
how: Literal["inner", "left", "right"] = "inner",
context: list[dict] | str | None = None,
model: str | None = None,
func: Callable | None = None,
strategy: Literal["nest", "block"] = "nest",
limit: int | None = None,
rate_limit: int = 16,
assertions: list[Callable] | None = [],
batch_size: int = 5,
):
union_fields = (
list(set(self.leaf_node.node_fields.output_fields) | set(other.leaf_node.node_fields.output_fields))
)
op_kwargs = {
"user_instruction": user_instruction,
"left_on": [left_on],
"right_on": [right_on],
"how": how,
"context": context,
"model": model,
"tool": func,
"strategy": strategy,
"limit": limit,
"rate_limit": rate_limit,
"assertions": assertions,
"batch_size": batch_size,
}
data_kwargs = {
"input_left_fields": self.leaf_node.node_fields.output_fields,
"input_right_fields": other.leaf_node.node_fields.output_fields,
"output_fields": union_fields
}
self.add_operator(op_name="join",
op_kwargs=op_kwargs,
data_kwargs=data_kwargs,
other=other,
rate_limit=rate_limit)