Skip to content

DataFrame Utilities

ensure_columns

ensure_columns(df: DataFrame, required: Iterable[str]) -> DataFrame

Validate that df contains all required columns.

Raises a ValueError including the missing columns otherwise.

Source code in src/spark_fuse/utils/dataframe.py
15
16
17
18
19
20
21
22
23
def ensure_columns(df: DataFrame, required: Iterable[str]) -> DataFrame:
    """Validate that `df` contains all `required` columns.

    Raises a `ValueError` including the missing columns otherwise.
    """
    missing = [c for c in required if c not in df.columns]
    if missing:
        raise ValueError(f"Missing required columns: {missing}")
    return df

preview

preview(df: DataFrame, n: int = 5) -> str

Return a string preview of the dataframe head and schema.

Source code in src/spark_fuse/utils/dataframe.py
 8
 9
10
11
12
def preview(df: DataFrame, n: int = 5) -> str:
    """Return a string preview of the dataframe head and schema."""
    rows = [r.asDict(recursive=True) for r in df.limit(n).collect()]
    schema = df.schema.simpleString()
    return f"rows={rows}\nschema={schema}"