In [1]:
import pandas as pd
import findspark
findspark.init("/opt/spark-3.0.1-bin-hadoop3.2/")
from pyspark.sql import SparkSession, DataFrame
from pyspark.sql.functions import *
from pyspark.sql.types import StructType
spark = (
SparkSession.builder.appName("Spark_Configuration")
.enableHiveSupport()
.getOrCreate()
)
Tips and Traps¶
The
Environment
tab on the Spark application minotoring UI contains information of environment variables and Spark configurations. It is helpful if you forget configurations set for your Spark application or if you want to confirm that configurations for your Spark application are correct.Please refer to Tips on Spark Configuration to Avoid Issues for suggestions on Spark configurations.
Get sparkConf from a SparkSession Object¶
In [2]:
conf = spark.sparkContext.getConf()
conf
Out[2]:
In [18]:
conf.getAll()
Out[18]:
In [9]:
print(conf.toDebugString())
In [16]:
conf.get("spark.yarn.appMasterEnv.ARROW_PRE_0_15_IPC_FORMAT")
In [15]:
conf.get("spark.executorEnv.ARROW_PRE_0_15_IPC_FORMAT")
-XX:MaxDirectMemorySize¶
--conf spark.executor.extraJavaOptions=-XX:MaxDirectMemorySize=8G
SparkConf.set¶
SparkConf.setAll¶
SparkSession vs SparkContext vs SparkConf¶
In [ ]: