use between
for date functions
get the 3rd highest salary
-- Row Number :
SELECT Salary,EmpName
FROM
(
SELECT Salary,EmpName,ROW_NUMBER() OVER(ORDER BY Salary) As RowNum
FROM EMPLOYEE
) As A
WHERE A.RowNum IN (2,3)
-- Sub Query :
SELECT *
FROM Employee Emp1
WHERE (N-1) = (
SELECT COUNT(DISTINCT(Emp2.Salary))
FROM Employee Emp2
WHERE Emp2.Salary > Emp1.Salary
)
-- Top Keyword :
SELECT TOP 1 salary
FROM (
SELECT DISTINCT TOP n salary
FROM employee
ORDER BY salary DESC
) a
ORDER BY salary
update_statistics
after a bulk data transform
SELECT * FROM Employees WHERE EmpName like ‘S%’
count
sum
average
HAVING
select age, sum(weight) as total_weight
from cats
group by age
having sum(weight) > 12 order by age DESC
execute parts of a SQL statement conditionally
CASE input_expression /* usually a column_name */
WHEN conditional_expression1 THEN result_expression1
[WHEN conditional_expression2 THEN result_expression2]
[ELSE result_expression]
END
-- Example
SELECT LANGUAGE_NAME, RATING_CODE AS RATING,
CASE RATING_CODE
WHEN '3' THEN 'DIFFICULT'
WHEN '2' THEN 'FAIRLY DIFFICULT'
WHEN '1' THEN 'EASY'
ELSE 'UNKNOWN'
END AS RATING_DESCRIPTION
FROM PROGRAMMING_LANGUAGES;
WHERE
=
Equal<>
Not equal. Note: In some versions of SQL this operator may be written as !=>
Greater than<
Less than>=
Greater than or equal<=
Less than or equalBETWEEN
Between an inclusive rangeLIKE
Search for a pattern
%
is like *
_
is like ?
IN
To specify multiple possible values for a columnIS NULL
/ IS NOT NULL
HAVING
SELECT column1, column2, ...
FROM table_name
ORDER BY column1, column2, ... ASC|DESC;
http://dcx.sap.com/1200/en/dbreference/window-statement.html
-- Running total
select name, sum(weight) over (order by name) as running_total_weight
from cats
order by name
-- Partitioned running total
select name, breed, sum(weight) over (partition by breed order by name) as running_total_weight
from cats
-- Counting - ROW_NUMBER()
select row_number() over (order by color,name) as unique_number, name, color
from cats
-- Ordering - RANK()
select rank() over (order by weight desc) as ranking, weight, name
from cats
order by ranking, name DESC
-- ntile() lets us divide our data into percentiles/quartiles.
-- If we only need to see the lowest X% this can be done via ntile()
-- Quartiles - NTILES()
select name, weight, ntile(4) over ( order by weight) as weight_quartile
from cats
order by weight_quartile, name
-- Dense Rank - DENSE_RANK()
-- assigns each group a rank sequentially
select dense_rank() over (order by age DESC) as r, name,age
from cats
order by r, name
-- Compare previous row
-- LAG() lets us compare to the previous rows [and lead() the next rows] [min() also works] It is useful for looking for strange step ups/downs in data
select name, weight, coalesce(weight - lag(weight, 1) over (order by weight), 0) as weight_to_lose
FROM cats
order by weight
-- Compare previous with partition
select name, breed, weight, coalesce(weight - lag(weight, 1) over (partition by breed order by weight), 0) as weight_to_lose
from cats
order by weight, name
-- FIRST_VALUE()
-- lets us get the first value in a subgroup
select name, color, first_value(weight) over (partition by color order by weight) as lowest_weight_by_color
from cats
order by color, name
-- Window
select name, weight, ntile(2) over ntile_window as by_half, ntile(3) over ntile_window as thirds, ntile(4) over ntile_window as quart
from cats window ntile_window AS ( ORDER BY weight)
order by weight, name
https://en.wikipedia.org/wiki/Set_operations_(SQL)
UNION
INTERSECT
EXCEPT
INTERSECT
sometimes the DWH is normalized and the data marts are denormalized
http://www.vertabelo.com/blog/technical-articles/denormalization-when-why-and-how
having redundant data records for the sake of better performance
AKA dimensional data model
In dimensional modeling, it’s very important to define the right dimensions and choose proper granulation
state_code
and state_name
so queries are fasterSnowflake
The snowflake is an extension to a dimension intended to reduce storage and duplication. It has the undesirable side effect of complicating and slowing queries.
Bridge Tables: for many to many relationships
Slowly Changing Dimension (SCD)
Data marts are like small pieces of a DWH. Helps with security and less confusing for user.
http://www.vertabelo.com/blog/technical-articles/5-steps-for-an-effective-database-model