Skip to content

Module: spark_fuse.spark

create_session

create_session(app_name: str = 'spark-fuse', *, master: Optional[str] = None, extra_configs: Optional[Dict[str, str]] = None) -> SparkSession

Create a SparkSession with Delta configs and light Azure defaults.

  • Uses local[2] when no master is provided and not on Databricks or Fabric.
  • Applies Delta extensions; works both on Databricks and local delta-spark.
  • Accepts extra_configs to inject environment-specific credentials.
Source code in src/spark_fuse/spark.py
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
def create_session(
    app_name: str = "spark-fuse",
    *,
    master: Optional[str] = None,
    extra_configs: Optional[Dict[str, str]] = None,
) -> SparkSession:
    """Create a SparkSession with Delta configs and light Azure defaults.

    - Uses `local[2]` when no master is provided and not on Databricks or Fabric.
    - Applies Delta extensions; works both on Databricks and local delta-spark.
    - Accepts `extra_configs` to inject environment-specific credentials.
    """
    env = detect_environment()

    builder = SparkSession.builder.appName(app_name)
    if master:
        builder = builder.master(master)
    elif env == "local":
        builder = builder.master("local[2]")

    builder = _apply_delta_configs(builder)

    # Minimal IO friendliness. Advanced auth must come via extra_configs or cluster env.
    builder = builder.config("spark.sql.shuffle.partitions", "8")

    if extra_configs:
        for k, v in extra_configs.items():
            builder = builder.config(k, v)

    spark = builder.getOrCreate()
    return spark

detect_environment

detect_environment() -> str

Detect a likely runtime environment: databricks, fabric, or local.

Heuristics only; callers should not rely on this for security decisions.

Source code in src/spark_fuse/spark.py
21
22
23
24
25
26
27
28
29
30
def detect_environment() -> str:
    """Detect a likely runtime environment: databricks, fabric, or local.

    Heuristics only; callers should not rely on this for security decisions.
    """
    if os.environ.get("DATABRICKS_RUNTIME_VERSION") or os.environ.get("DATABRICKS_CLUSTER_ID"):
        return "databricks"
    if os.environ.get("FABRIC_ENVIRONMENT") or os.environ.get("MS_FABRIC"):
        return "fabric"
    return "local"