In [1]:
%%classpath add mvn
org.apache.spark spark-core_2.11 2.1.1
org.apache.spark spark-sql_2.11 2.1.1
In [2]:
import org.apache.spark.sql.SparkSession
val spark = SparkSession.
builder().
master("local").
appName("Spark SQL basic example").
config("spark.some.config.option", "some-value").
getOrCreate()
spark
import spark.implicits._
Out[2]:
In [3]:
val df = spark.read.json("../../data/people.json")
df.show
Out[3]:
Filtering¶
In [12]:
df.filter($"age" > 21).show()
Out[12]:
In [14]:
import org.apache.spark.sql.Column
def greaterThan(column: Column, v: Int):
Column = {column > v}
Out[14]:
In [15]:
df.filter(greaterThan(df("age"), 21)).show
Out[15]:
In [16]:
df.filter(greaterThan($"age", 21)).show
Out[16]:
In [17]:
df.filter($"age" === 30).show
Out[17]:
In [18]:
df.filter($"age" === null).show
Out[18]:
In [19]:
df.filter($"age" <=> 30).show
Out[19]:
In [20]:
df.filter($"age" <=> null).show
Out[20]:
In [ ]: