Below is a comprehensive list of SQL functions available in PySpark SQL, along with an example for each:
Mathematical Functions:
abs(): Absolute value of a number.
sql
Copy code
SELECT abs(-5) AS result
-- Output: 5
ceil(): Ceiling of a number.
sql
Copy code
SELECT ceil(4.2) AS result
-- Output: 5
floor(): Floor of a number.
sql
Copy code
SELECT floor(4.8) AS result
-- Output: 4
round(): Rounds a number to the specified decimal places.
sql
Copy code
SELECT round(3.14159, 2) AS result
-- Output: 3.14
exp(): Exponential value of a number.
sql
Copy code
SELECT exp(1) AS result
-- Output: 2.718281828459045
log(): Natural logarithm of a number.
sql
Copy code
SELECT log(10) AS result
-- Output: 2.302585092994046
sqrt(): Square root of a number.
sql
Copy code
SELECT sqrt(25) AS result
-- Output: 5.0
power(): Raises a number to the specified power.
sql
Copy code
SELECT power(2, 3) AS result
-- Output: 8.0
String Functions:
concat(): Concatenates multiple strings.
sql
Copy code
SELECT concat('Hello', ' ', 'World') AS result
-- Output: Hello World
substring(): Extracts a substring from a string.
sql
Copy code
SELECT substring('Hello World', 7, 5) AS result
-- Output: World
length(): Length of a string.
sql
Copy code
SELECT length('Hello') AS result
-- Output: 5
trim(): Removes leading and trailing spaces from a string.
sql
Copy code
SELECT trim(' Hello ') AS result
-- Output: Hello
upper(): Converts a string to uppercase.
sql
Copy code
SELECT upper('hello') AS result
-- Output: HELLO
lower(): Converts a string to lowercase.
sql
Copy code
SELECT lower('HELLO') AS result
-- Output: hello
replace(): Replaces occurrences of a substring in a string.
sql
Copy code
SELECT replace('Hello, Hello, Hello', 'Hello', 'Hi') AS result
-- Output: Hi, Hi, Hi
regexp_extract(): Extracts substrings using a regular expression.
sql
Copy code
SELECT regexp_extract('Hello 123 World', '\\d+', 0) AS result
-- Output: 123
Date and Time Functions:
current_date(): Returns the current date.
sql
Copy code
SELECT current_date() AS result
-- Output: 2023-07-09
current_timestamp(): Returns the current timestamp.
sql
Copy code
SELECT current_timestamp() AS result
-- Output: 2023-07-09 12:34:56
date_format(): Formats a date or timestamp.
sql
Copy code
SELECT date_format(current_timestamp(), 'yyyy-MM-dd HH:mm:ss') AS result
-- Output: 2023-07-09 12:34:56
year(), month(), day(): Extracts year, month, and day from a date or timestamp.
sql
Copy code
SELECT year('2023-07-09') AS year, month('2023-07-09') AS month, day('2023-07-09') AS day
-- Output: year=2023, month=7, day=9
datediff(): Calculates the number of days between two dates.
sql
Copy code
SELECT datediff('2023-07-31', '2023-07-01') AS result
-- Output: 30
date_add(), date_sub(): Adds or subtracts a specified number of days to/from a date.
sql
Copy code
SELECT date_add('2023-07-01', 7) AS added_date, date_sub('2023-07-31', 7) AS subtracted_date
-- Output: added_date=2023-07-08, subtracted_date=2023-07-24
Aggregate Functions:
count(): Count of rows or non-null values in a column.
sql
Copy code
SELECT count(*) AS result
-- Output: Total number of rows in the table
sum(): Sum of values in a column.
sql
Copy code
SELECT sum(salary) AS result
-- Output: Sum of salaries in the column
avg(): Average of values in a column.
sql
Copy code
SELECT avg(age) AS result
-- Output: Average age in the column
min(), max(): Minimum and maximum values in a column.
sql
Copy code
SELECT min(price) AS min_price, max(price) AS max_price
-- Output: Minimum and maximum price values
first(), last(): First and last values in a group.
sql
Copy code
SELECT first(name) AS first_name, last(name) AS last_name
-- Output: First and last names in the group
group_concat(): Concatenates values from multiple rows into a single string.
sql
Copy code
SELECT group_concat(name) AS result
-- Output: Concatenated string of names in the group