Master complex queries, optimization techniques, and advanced database concepts
Advanced SQL involves complex queries that go beyond basic SELECT, INSERT, UPDATE, and DELETE statements. These techniques allow for sophisticated data analysis and manipulation.
-- Find employees who earn more than the average salary
SELECT employee_name, salary
FROM employees
WHERE salary > (
SELECT AVG(salary)
FROM employees
);
-- Find customers who have placed orders
SELECT customer_name
FROM customers
WHERE customer_id IN (
SELECT DISTINCT customer_id
FROM orders
);
-- Calculate department averages and compare to company average
SELECT dept_avg.dept_name, dept_avg.avg_salary
FROM (
SELECT department, AVG(salary) AS avg_salary
FROM employees
GROUP BY department
) AS dept_avg
WHERE dept_avg.avg_salary > (
SELECT AVG(salary) FROM employees
);
-- Find employees who earn more than the average in their department
SELECT e1.employee_name, e1.salary, e1.department
FROM employees e1
WHERE e1.salary > (
SELECT AVG(e2.salary)
FROM employees e2
WHERE e2.department = e1.department
);
-- Rank employees by salary within each department
SELECT
employee_name,
department,
salary,
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS row_num,
RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS rank,
DENSE_RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS dense_rank
FROM employees;
-- Calculate running totals
SELECT
order_date,
order_amount,
SUM(order_amount) OVER (ORDER BY order_date) AS running_total
FROM orders;
-- Calculate moving averages
SELECT
date,
sales,
AVG(sales) OVER (
ORDER BY date
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) AS moving_avg_3_days
FROM daily_sales;
-- Lead and Lag functions
SELECT
product_name,
price,
LAG(price) OVER (ORDER BY date) AS prev_price,
LEAD(price) OVER (ORDER BY date) AS next_price,
price - LAG(price) OVER (ORDER BY date) AS price_change
FROM products;
-- Define a CTE for complex calculations
WITH department_stats AS (
SELECT
department,
AVG(salary) AS avg_salary,
MAX(salary) AS max_salary,
MIN(salary) AS min_salary
FROM employees
GROUP BY department
)
SELECT
e.employee_name,
e.salary,
ds.avg_salary,
(e.salary - ds.avg_salary) AS diff_from_avg
FROM employees e
JOIN department_stats ds ON e.department = ds.department;
-- Generate a series of numbers
WITH RECURSIVE number_series AS (
SELECT 1 AS n
UNION ALL
SELECT n + 1
FROM number_series
WHERE n < 10
)
SELECT * FROM number_series;
-- Hierarchical data (organizational chart)
WITH RECURSIVE org_chart AS (
-- Base case: top-level managers
SELECT employee_id, employee_name, manager_id, 0 AS level
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive case: subordinates
SELECT e.employee_id, e.employee_name, e.manager_id, oc.level + 1
FROM employees e
JOIN org_chart oc ON e.manager_id = oc.employee_id
)
SELECT * FROM org_chart
ORDER BY level, employee_name;
-- Generate all possible combinations
SELECT
c.customer_name,
p.product_name
FROM customers c
CROSS JOIN products p;
-- Calendar generation
WITH RECURSIVE date_range AS (
SELECT '2023-01-01'::date AS date_val
UNION ALL
SELECT date_val + INTERVAL '1 day'
FROM date_range
WHERE date_val < '2023-01-31'
)
SELECT
dr.date_val,
s.store_name
FROM date_range dr
CROSS JOIN stores s;
-- Find employees and their managers
SELECT
e.employee_name AS employee,
m.employee_name AS manager
FROM employees e
JOIN employees m ON e.manager_id = m.employee_id;
-- Find employees in the same department
SELECT
e1.employee_name AS emp1,
e2.employee_name AS emp2,
e1.department
FROM employees e1
JOIN employees e2 ON e1.department = e2.department
WHERE e1.employee_id < e2.employee_id;
-- Multiple join conditions
SELECT
c.customer_name,
o.order_date,
p.product_name,
oi.quantity
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
JOIN order_items oi ON o.order_id = oi.order_id
JOIN products p ON oi.product_id = p.product_id
WHERE o.order_date >= '2023-01-01'
AND p.category = 'Electronics';
-- Use EXISTS instead of IN for subqueries
-- Instead of:
SELECT * FROM customers
WHERE customer_id IN (SELECT customer_id FROM orders);
-- Use:
SELECT * FROM customers
WHERE EXISTS (SELECT 1 FROM orders WHERE orders.customer_id = customers.customer_id);
-- Use UNION ALL instead of UNION when duplicates are acceptable
SELECT customer_id FROM active_customers
UNION ALL
SELECT customer_id FROM archived_customers;
-- Limit result sets when possible
SELECT * FROM large_table
ORDER BY date_created DESC
LIMIT 100;
-- Create composite indexes for multi-column queries
CREATE INDEX idx_customer_date_status
ON orders (customer_id, order_date, status);
-- Covering index to avoid table lookups
CREATE INDEX idx_covering
ON orders (customer_id, order_date)
INCLUDE (total_amount, status);
-- Partial index for frequently filtered data
CREATE INDEX idx_active_customers
ON customers (last_login_date)
WHERE status = 'active';
Performance Best Practices:
-- PostgreSQL UPSERT
INSERT INTO users (id, name, email)
VALUES (1, 'John Doe', 'john@example.com')
ON CONFLICT (id)
DO UPDATE SET
name = EXCLUDED.name,
email = EXCLUDED.email;
-- MySQL INSERT ... ON DUPLICATE KEY UPDATE
INSERT INTO users (id, name, email)
VALUES (1, 'John Doe', 'john@example.com')
ON DUPLICATE KEY UPDATE
name = VALUES(name),
email = VALUES(email);
-- SQL Server MERGE
MERGE users AS target
USING (VALUES (1, 'John Doe', 'john@example.com')) AS source (id, name, email)
ON target.id = source.id
WHEN MATCHED THEN
UPDATE SET name = source.name, email = source.email
WHEN NOT MATCHED THEN
INSERT (id, name, email) VALUES (source.id, source.name, source.email);
-- Update with JOIN
UPDATE employees e
SET salary = salary * 1.10
FROM departments d
WHERE e.department_id = d.department_id
AND d.budget > 1000000;
-- Delete with subquery
DELETE FROM orders
WHERE customer_id IN (
SELECT customer_id
FROM customers
WHERE status = 'inactive'
AND last_order_date < '2022-01-01'
);
-- Type 2 SCD - Keep historical records
CREATE TABLE customer_scd (
customer_id INT,
name VARCHAR(255),
email VARCHAR(255),
valid_from DATE,
valid_to DATE,
is_current BOOLEAN
);
-- When updating, insert new record and expire old one
UPDATE customer_scd
SET valid_to = CURRENT_DATE - 1, is_current = FALSE
WHERE customer_id = 1 AND is_current = TRUE;
INSERT INTO customer_scd (customer_id, name, email, valid_from, valid_to, is_current)
VALUES (1, 'New Name', 'newemail@example.com', CURRENT_DATE, NULL, TRUE);
-- Star Schema: Fact and Dimension Tables
-- Fact table
CREATE TABLE sales_fact (
sale_id SERIAL PRIMARY KEY,
product_key INT,
customer_key INT,
date_key INT,
store_key INT,
quantity INT,
amount DECIMAL(10,2)
);
-- Dimension tables
CREATE TABLE product_dim (
product_key SERIAL PRIMARY KEY,
product_name VARCHAR(255),
category VARCHAR(255),
brand VARCHAR(255)
);
-- Data aggregation query
SELECT
pd.category,
dd.year,
SUM(sf.amount) AS total_sales,
AVG(sf.amount) AS avg_sale
FROM sales_fact sf
JOIN product_dim pd ON sf.product_key = pd.product_key
JOIN date_dim dd ON sf.date_key = dd.date_key
GROUP BY pd.category, dd.year;