Skip to content

nirvana.DataFrame

nirvana.dataframe.frame.DataFrame(data: dict | pd.DataFrame = None, *args, **kwargs)

Bases: LineageMixin

Source code in nirvana/dataframe/frame.py
def __init__(
    self,
    data: dict | pd.DataFrame = None,
    *args,
    **kwargs
):
    self._data = pd.DataFrame(data) if isinstance(data, dict) else data
    for col in self._data.columns:
        converted_array, inferred_dtype = infer_and_convert_dtype(self._data[col])
        self._data[col] = converted_array
    self.initialize()

Attributes

columns property writable
primary_key property
nrows property

Functions

__len__()
Source code in nirvana/dataframe/frame.py
def __len__(self):
    _len = self.nrows
    return _len
__contains__(item)
Source code in nirvana/dataframe/frame.py
def __contains__(self, item):
    return self.columns.__contains__(item)
__repr__() -> str
Source code in nirvana/dataframe/frame.py
def __repr__(self) -> str:
    return f"{self.__class__.__name__}(nrows={self.nrows}, ncols={len(self.columns)})"
to_pandas() -> pd.DataFrame
Source code in nirvana/dataframe/frame.py
def to_pandas(self) -> pd.DataFrame:
    return self._data.copy()
from_external_file(path: str, sep=',', **kwargs) classmethod
Source code in nirvana/dataframe/frame.py
@classmethod
def from_external_file(cls, path: str, sep=',', **kwargs):
    df = pd.read_table(path, sep=sep, **kwargs)
    return cls(df)
head(n=5)
Source code in nirvana/dataframe/frame.py
def head(self, n=5):
    return self._data.head(n)
tail(n=5)
Source code in nirvana/dataframe/frame.py
def tail(self, n=5):
    return self._data.tail(n)
__getitem__(posidx)
Source code in nirvana/dataframe/frame.py
def __getitem__(self, posidx):
    return self._get(posidx)
semantic_map(user_instruction: str, input_columns: list[str], output_columns: list[str], context: list[dict] | str | None = None, model: str | None = None, func: Callable | None = None, strategy: Literal['plain', 'fewshot', 'self-refine'] = 'plain', limit: int | None = None, rate_limit: int = 16, assertions: list[Callable] | None = [])
Source code in nirvana/dataframe/frame.py
def semantic_map(
    self,
    user_instruction: str,
    input_columns: list[str],
    output_columns: list[str],
    context: list[dict] | str | None = None,
    model: str | None = None,
    func: Callable | None = None,
    strategy: Literal["plain", "fewshot", "self-refine"] = "plain",
    limit: int | None = None,
    rate_limit: int = 16,
    assertions: list[Callable] | None = [],
):
    op_kwargs = {
        "user_instruction": user_instruction,
        "input_columns": input_columns,
        "output_columns": output_columns,
        "context": context,
        "model": model,
        "tool": func,
        "strategy": strategy,
        "limit": limit,
        "rate_limit": rate_limit,
        "assertions": assertions,
    }
    data_kwargs = {
        "left_input_fields": self.leaf_node.node_fields.output_fields,
        "right_input_fields": [],
        "output_fields": self.leaf_node.node_fields.left_input_fields + output_columns,
    }
    self.add_operator(op_name="map",
                      op_kwargs=op_kwargs,
                      data_kwargs=data_kwargs,
                      rate_limit=rate_limit)
semantic_filter(user_instruction: str, input_columns: list[str], func: Callable | None = None, context: list[dict] | str | None = None, model: str | None = None, strategy: Literal['plain', 'fewshot', 'self-refine'] = 'plain', limit: int | None = None, rate_limit: int = 16, assertions: list[Callable] | None = [])
Source code in nirvana/dataframe/frame.py
def semantic_filter(
    self,
    user_instruction: str,
    input_columns: list[str],
    func: Callable | None = None,
    context: list[dict] | str | None = None,
    model: str | None = None,
    strategy: Literal["plain", "fewshot", "self-refine"] = "plain",
    limit: int | None = None,
    rate_limit: int = 16,
    assertions: list[Callable] | None = [],
):
    op_kwargs = {
        "user_instruction": user_instruction,
        "input_columns": input_columns,
        "context": context,
        "model": model,
        "tool": func,
        "strategy": strategy,
        "limit": limit,
        "rate_limit": rate_limit,
        "assertions": assertions,
    }
    data_kwargs = {
        "left_input_fields": self.leaf_node.node_fields.output_fields,
        "right_input_fields": [],
        "output_fields": self.leaf_node.node_fields.output_fields,
    }
    self.add_operator(op_name="filter",
                      op_kwargs=op_kwargs,
                      data_kwargs=data_kwargs,
                      rate_limit=rate_limit)
semantic_reduce(user_instruction: str, input_column: str, context: list[dict] | str | None = None, model: str | None = None, func: Callable | None = None, strategy: Literal['plain'] = 'plain', rate_limit: int = 16, assertions: list[Callable] | None = [])
Source code in nirvana/dataframe/frame.py
def semantic_reduce(
    self,
    user_instruction: str,
    input_column: str,
    context: list[dict] | str | None = None,
    model: str | None = None,
    func: Callable | None = None,
    strategy: Literal["plain"] = "plain",
    rate_limit: int = 16,
    assertions: list[Callable] | None = [],
):
    op_kwargs = {
        "user_instruction": user_instruction,
        "input_columns": [input_column],
        "context": context,
        "model": model,
        "tool": func,
        "strategy": strategy,
        "rate_limit": rate_limit,
        "assertions": assertions,
    }
    data_kwargs = {
        "left_input_fields": self.leaf_node.node_fields.output_fields,
        "right_input_fields": [],
        "output_fields": []
    }
    self.add_operator(op_name="reduce",
                      op_kwargs=op_kwargs,
                      data_kwargs=data_kwargs,
                      rate_limit=rate_limit)
semantic_join(other: DataFrame, user_instruction: str, left_on: str, right_on: str, how: Literal['inner', 'left', 'right'] = 'inner', context: list[dict] | str | None = None, model: str | None = None, func: Callable | None = None, strategy: Literal['nest', 'block'] = 'nest', limit: int | None = None, rate_limit: int = 16, assertions: list[Callable] | None = [], batch_size: int = 5)
Source code in nirvana/dataframe/frame.py
def semantic_join(
    self,
    other: "DataFrame",
    user_instruction: str,
    left_on: str,
    right_on: str,
    how: Literal["inner", "left", "right"] = "inner",
    context: list[dict] | str | None = None,
    model: str | None = None,
    func: Callable | None = None,
    strategy: Literal["nest", "block"] = "nest",
    limit: int | None = None,
    rate_limit: int = 16,
    assertions: list[Callable] | None = [],
    batch_size: int = 5,
):
    union_fields = (
        list(set(self.leaf_node.node_fields.output_fields) | set(other.leaf_node.node_fields.output_fields))
    )
    op_kwargs = {
        "user_instruction": user_instruction,
        "left_on": [left_on],
        "right_on": [right_on],
        "how": how,
        "context": context,
        "model": model,
        "tool": func,
        "strategy": strategy,
        "limit": limit,
        "rate_limit": rate_limit,
        "assertions": assertions,
        "batch_size": batch_size,
    }
    data_kwargs = {
        "input_left_fields": self.leaf_node.node_fields.output_fields,
        "input_right_fields": other.leaf_node.node_fields.output_fields,
        "output_fields": union_fields
    }
    self.add_operator(op_name="join",
                      op_kwargs=op_kwargs,
                      data_kwargs=data_kwargs,
                      other=other,
                      rate_limit=rate_limit)
semantic_rank(user_instruction: str, input_column: str, descend: bool = True, context: list[dict] | str | None = None, model: str | None = None, func: Callable | None = None, strategy: Literal['plain'] = 'plain', limit: int | None = None, rate_limit: int = 16, assertions: list[Callable] | None = [])
Source code in nirvana/dataframe/frame.py
def semantic_rank(
    self,
    user_instruction: str,
    input_column: str,
    descend: bool = True,
    context: list[dict] | str | None = None,
    model: str | None = None,
    func: Callable | None = None,
    strategy: Literal["plain"] = "plain",
    limit: int | None = None,
    rate_limit: int = 16,
    assertions: list[Callable] | None = [],
):
    op_kwargs = {
        "user_instruction": user_instruction,
        "input_columns": [input_column],
        "descend": descend,
        "context": context,
        "model": model,
        "tool": func,
        "strategy": strategy,
        "limit": limit,
        "rate_limit": rate_limit,
        "assertions": assertions,
    }
    data_kwargs = {
        "left_input_fields": self.leaf_node.node_fields.output_fields,
        "right_input_fields": [],
        "output_fields": self.leaf_node.node_fields.output_fields,
    }
    self.add_operator(op_name="rank",
                      op_kwargs=op_kwargs,
                      data_kwargs=data_kwargs,
                      rate_limit=rate_limit)
optimize_and_execute(optim_config=None)
Source code in nirvana/dataframe/frame.py
def optimize_and_execute(self, optim_config = None):
    self.create_plan_optimizer(optim_config)
    if self.optimizer.config.do_logical_optimization:
        self.leaf_node = self.optimizer.optimize_logical_plan(self.leaf_node)
    if self.optimizer.config.do_physical_optimization:
        output, cost, runtime = self.optimizer.optimize_physical_plan(
            self.leaf_node,
        )
    else:
        output, cost, runtime = self.execute()
    return output, cost, runtime