Tutorial Window Function con DuckDB (2ª parte)
👉 Enlace 1ª parte
import duckdb
%load_ext sql
%config SqlMagic.autopandas = True
%config SqlMagic.displaycon = False
conn = duckdb.connect('BBDD/WindowFunction.duckdb')
%sql conn --alias duckdb
# con.sql("show databases")
conn.sql("show tables")
┌─────────────┐
│ name │
│ varchar │
├─────────────┤
│ order_items │
│ orders │
│ products │
└─────────────┘
GROUP BY VS PARTITION BY
%sql SELECT model_year, AVG(list_price) avg_price\
FROM products\
GROUP BY model_year
| model_year | avg_price |
| 0 | 2016 | 980.299231 |
| 1 | 2017 | 1279.931176 |
| 2 | 2018 | 1658.470441 |
| 3 | 2019 | 2583.323333 |
%sql SELECT model_year, product_name, list_price,\
AVG(list_price) OVER (PARTITION BY model_year) avg_price\
FROM products\
ORDER BY product_name
| model_year | product_name | list_price | avg_price |
| 0 | 2018 | Electra Amsterdam Fashion 3i Ladies' - 2017/2018 | 899.99 | 1658.470441 |
| 1 | 2017 | Electra Amsterdam Fashion 7i Ladies' - 2017 | 1099.99 | 1279.931176 |
| 2 | 2017 | Electra Amsterdam Original 3i - 2015/2017 | 659.99 | 1279.931176 |
| 3 | 2017 | Electra Amsterdam Original 3i Ladies' - 2017 | 659.99 | 1279.931176 |
| 4 | 2018 | Electra Amsterdam Royal 8i - 2017/2018 | 1259.90 | 1658.470441 |
| ... | ... | ... | ... | ... |
| 316 | 2017 | Trek X-Caliber 8 - 2017 | 999.99 | 1279.931176 |
| 317 | 2018 | Trek X-Caliber 8 - 2018 | 999.99 | 1658.470441 |
| 318 | 2018 | Trek X-Caliber Frameset - 2018 | 1499.99 | 1658.470441 |
| 319 | 2018 | Trek XM700+ - 2018 | 3499.99 | 1658.470441 |
| 320 | 2018 | Trek XM700+ Lowstep - 2018 | 3499.99 | 1658.470441 |
COUNT DISTINCT
%%sql
SELECT year(order_date) y, COUNT(DISTINCT order_id) num_orders
FROM orders
GROUP BY year(order_date)
| y | num_orders |
| 0 | 2017 | 688 |
| 1 | 2016 | 635 |
| 2 | 2018 | 292 |
RANK()
%%sql
/* Rank price from LOW->HIGH */
SELECT product_name, list_price,
RANK() OVER (ORDER BY list_price ASC) rank
FROM products
| product_name | list_price | rank |
| 0 | Strider Classic 12 Balance Bike - 2018 | 89.99 | 1 |
| 1 | Sun Bicycles Lil Kitt'n - 2017 | 109.99 | 2 |
| 2 | Trek Boy's Kickster - 2015/2017 | 149.99 | 3 |
| 3 | Trek Girl's Kickster - 2017 | 149.99 | 3 |
| 4 | Trek Kickster - 2018 | 159.99 | 5 |
| ... | ... | ... | ... |
| 316 | Trek Domane SL Frameset - 2018 | 6499.99 | 316 |
| 317 | Trek Domane SL Frameset Women's - 2018 | 6499.99 | 316 |
| 318 | Trek Emonda SLR 8 - 2018 | 6499.99 | 316 |
| 319 | Trek Domane SLR 8 Disc - 2018 | 7499.99 | 320 |
| 320 | Trek Domane SLR 9 Disc - 2018 | 11999.99 | 321 |
%%sql
/* Rank price from HIGH->LOW */
SELECT product_name, list_price,
RANK() OVER (ORDER BY list_price DESC) rank
FROM products
| product_name | list_price | rank |
| 0 | Trek Domane SLR 9 Disc - 2018 | 11999.99 | 1 |
| 1 | Trek Domane SLR 8 Disc - 2018 | 7499.99 | 2 |
| 2 | Trek Silque SLR 8 Women's - 2017 | 6499.99 | 3 |
| 3 | Trek Domane SL Frameset - 2018 | 6499.99 | 3 |
| 4 | Trek Domane SL Frameset Women's - 2018 | 6499.99 | 3 |
| ... | ... | ... | ... |
| 316 | Trek Kickster - 2018 | 159.99 | 317 |
| 317 | Trek Boy's Kickster - 2015/2017 | 149.99 | 318 |
| 318 | Trek Girl's Kickster - 2017 | 149.99 | 318 |
| 319 | Sun Bicycles Lil Kitt'n - 2017 | 109.99 | 320 |
| 320 | Strider Classic 12 Balance Bike - 2018 | 89.99 | 321 |
FIRST_VALUE
%%sql
/* Find the difference of price from cheapest alternative */
SELECT
product_name,
list_price,
FIRST_VALUE(list_price) OVER (ORDER BY list_price ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) cheapest_price,
list_price - FIRST_VALUE(list_price) OVER (ORDER BY list_price ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) diff
FROM products
| product_name | list_price | cheapest_price | diff |
| 0 | Strider Classic 12 Balance Bike - 2018 | 89.99 | 89.99 | 0.0 |
| 1 | Sun Bicycles Lil Kitt'n - 2017 | 109.99 | 89.99 | 20.0 |
| 2 | Trek Boy's Kickster - 2015/2017 | 149.99 | 89.99 | 60.0 |
| 3 | Trek Girl's Kickster - 2017 | 149.99 | 89.99 | 60.0 |
| 4 | Trek Kickster - 2018 | 159.99 | 89.99 | 70.0 |
| ... | ... | ... | ... | ... |
| 316 | Trek Domane SL Frameset - 2018 | 6499.99 | 89.99 | 6410.0 |
| 317 | Trek Domane SL Frameset Women's - 2018 | 6499.99 | 89.99 | 6410.0 |
| 318 | Trek Emonda SLR 8 - 2018 | 6499.99 | 89.99 | 6410.0 |
| 319 | Trek Domane SLR 8 Disc - 2018 | 7499.99 | 89.99 | 7410.0 |
| 320 | Trek Domane SLR 9 Disc - 2018 | 11999.99 | 89.99 | 11910.0 |
LAST_VALUE
%%sql
/* Find the difference of price from the priciest alternative */
SELECT
product_name,
list_price,
LAST_VALUE(list_price) OVER (ORDER BY list_price ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) highest_price,
LAST_VALUE(list_price) OVER (ORDER BY list_price ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) - list_price diff
FROM products
| product_name | list_price | highest_price | diff |
| 0 | Strider Classic 12 Balance Bike - 2018 | 89.99 | 11999.99 | 11910.0 |
| 1 | Sun Bicycles Lil Kitt'n - 2017 | 109.99 | 11999.99 | 11890.0 |
| 2 | Trek Boy's Kickster - 2015/2017 | 149.99 | 11999.99 | 11850.0 |
| 3 | Trek Girl's Kickster - 2017 | 149.99 | 11999.99 | 11850.0 |
| 4 | Trek Kickster - 2018 | 159.99 | 11999.99 | 11840.0 |
| ... | ... | ... | ... | ... |
| 316 | Trek Domane SL Frameset - 2018 | 6499.99 | 11999.99 | 5500.0 |
| 317 | Trek Domane SL Frameset Women's - 2018 | 6499.99 | 11999.99 | 5500.0 |
| 318 | Trek Emonda SLR 8 - 2018 | 6499.99 | 11999.99 | 5500.0 |
| 319 | Trek Domane SLR 8 Disc - 2018 | 7499.99 | 11999.99 | 4500.0 |
| 320 | Trek Domane SLR 9 Disc - 2018 | 11999.99 | 11999.99 | 0.0 |
LAG
%%sql
/* Find number of orders in a year */
WITH yearly_orders AS (
SELECT
year(order_date) y,
COUNT(DISTINCT order_id) num_orders
FROM orders
GROUP BY year(order_date)
)
/* Compare this year's sales to last year's */
SELECT
*,
LAG(num_orders) OVER (ORDER BY y) last_year_order,
LAG(num_orders) OVER (ORDER BY y) - num_orders diff_from_last_year
FROM yearly_orders
| y | num_orders | last_year_order | diff_from_last_year |
| 0 | 2016 | 635 | <NA> | <NA> |
| 1 | 2017 | 688 | 635 | -53 |
| 2 | 2018 | 292 | 688 | 396 |
LEAD
%%sql
/* Find number of orders in a year */
WITH yearly_orders AS (
SELECT
year(order_date) y,
COUNT(DISTINCT order_id) num_orders
FROM orders
GROUP BY year(order_date)
)
/* Compare number of years compared to next year */
SELECT *,
LEAD(num_orders) OVER (ORDER BY y) next_year_order,
LEAD(num_orders) OVER (ORDER BY y) - num_orders diff_from_next_year
FROM yearly_orders
| y | num_orders | next_year_order | diff_from_next_year |
| 0 | 2016 | 635 | 688 | 53 |
| 1 | 2017 | 688 | 292 | -396 |
| 2 | 2018 | 292 | <NA> | <NA> |
RANKING
DENSE_RANK(), ROW_NUMBER(), RANK(), PERCENT_RANK(), NTILE(), CUME_DIST()
%%sql
/* Rank all products by price */
SELECT
product_name,
list_price,
ROW_NUMBER() OVER (ORDER BY list_price) row_num,
DENSE_RANK() OVER (ORDER BY list_price) dense_rank,
RANK() OVER (ORDER BY list_price) rank,
PERCENT_RANK() OVER (ORDER BY list_price) pct_rank,
NTILE(75) OVER (ORDER BY list_price) ntile,
CUME_DIST() OVER (ORDER BY list_price) cume_dist
FROM products
| product_name | list_price | row_num | dense_rank | rank | pct_rank | ntile | cume_dist |
| 0 | Strider Classic 12 Balance Bike - 2018 | 89.99 | 1 | 1 | 1 | 0.000000 | 1 | 0.003115 |
| 1 | Sun Bicycles Lil Kitt'n - 2017 | 109.99 | 2 | 2 | 2 | 0.003125 | 1 | 0.006231 |
| 2 | Trek Boy's Kickster - 2015/2017 | 149.99 | 3 | 3 | 3 | 0.006250 | 1 | 0.012461 |
| 3 | Trek Girl's Kickster - 2017 | 149.99 | 4 | 3 | 3 | 0.006250 | 1 | 0.012461 |
| 4 | Trek Kickster - 2018 | 159.99 | 5 | 4 | 5 | 0.012500 | 1 | 0.015576 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 316 | Trek Domane SL Frameset - 2018 | 6499.99 | 317 | 104 | 316 | 0.984375 | 74 | 0.993769 |
| 317 | Trek Domane SL Frameset Women's - 2018 | 6499.99 | 318 | 104 | 316 | 0.984375 | 75 | 0.993769 |
| 318 | Trek Emonda SLR 8 - 2018 | 6499.99 | 319 | 104 | 316 | 0.984375 | 75 | 0.993769 |
| 319 | Trek Domane SLR 8 Disc - 2018 | 7499.99 | 320 | 105 | 320 | 0.996875 | 75 | 0.996885 |
| 320 | Trek Domane SLR 9 Disc - 2018 | 11999.99 | 321 | 106 | 321 | 1.000000 | 75 | 1.000000 |
AVG, MIN, MAX
%%sql
SELECT
order_id,
product_id,
discount,
AVG(discount) OVER (PARTITION BY product_id) avg_discount,
MIN(discount) OVER (PARTITION BY product_id) min_discount,
MAX(discount) OVER (PARTITION BY product_id) max_discount
FROM order_items
WHERE order_id != 1
ORDER BY order_id
| order_id | product_id | discount | avg_discount | min_discount | max_discount |
| 0 | 2 | 20 | 0.07 | 0.112530 | 0.05 | 0.20 |
| 1 | 2 | 16 | 0.05 | 0.113191 | 0.05 | 0.20 |
| 2 | 3 | 20 | 0.05 | 0.112530 | 0.05 | 0.20 |
| 3 | 3 | 3 | 0.05 | 0.105581 | 0.05 | 0.20 |
| 4 | 4 | 2 | 0.10 | 0.107792 | 0.05 | 0.20 |
| ... | ... | ... | ... | ... | ... | ... |
| 4712 | 1614 | 124 | 0.07 | 0.125714 | 0.07 | 0.20 |
| 4713 | 1614 | 159 | 0.07 | 0.062000 | 0.05 | 0.07 |
| 4714 | 1615 | 214 | 0.07 | 0.085000 | 0.07 | 0.10 |
| 4715 | 1615 | 197 | 0.20 | 0.175000 | 0.10 | 0.20 |
| 4716 | 1615 | 182 | 0.20 | 0.130000 | 0.05 | 0.20 |
# Explicitly close the connection
conn.close()