from pyspark.sql import SparkSession
# Create a SparkSession
spark = SparkSession.builder \
.appName("Multiple DataFrame SQL") \
.getOrCreate()
# Assume we have three DataFrames: df1, df2, and df3
# Register df1 as a temporary table
df1.createOrReplaceTempView("table1")
# Register df2 as a temporary table
df2.createOrReplaceTempView("table2")
# Register df3 as a temporary table
df3.createOrReplaceTempView("table3")
# Execute SQL queries on the registered tables
result = spark.sql("""
SELECT t1.col1, t2.col2, t3.col3
FROM table1 t1
JOIN table2 t2 ON t1.id = t2.id
JOIN table3 t3 ON t2.id = t3.id
WHERE t1.col1 > 10
""")
# Display the result
result.show()
# Perform further transformations or analysis on the result as needed
# Stop the SparkSession
spark.stop()