Saltar a contenido

Tutorial Window Function con DuckDB (2ª parte)

👉 Enlace 1ª parte

pip install duckdb
import duckdb

%load_ext sql
%config SqlMagic.autopandas = True
%config SqlMagic.displaycon = False

conn = duckdb.connect('BBDD/WindowFunction.duckdb')
%sql conn --alias duckdb
# con.sql("show databases")
conn.sql("show tables")
┌─────────────┐
│    name     │
│   varchar   │
├─────────────┤
│ order_items │
│ orders      │
│ products    │
└─────────────┘

GROUP BY VS PARTITION BY

%sql SELECT model_year, AVG(list_price) avg_price\
     FROM products\
     GROUP BY model_year
model_year avg_price
0 2016 980.299231
1 2017 1279.931176
2 2018 1658.470441
3 2019 2583.323333
%sql SELECT model_year, product_name, list_price,\
     AVG(list_price) OVER (PARTITION BY model_year) avg_price\
     FROM products\
     ORDER BY product_name
model_year product_name list_price avg_price
0 2018 Electra Amsterdam Fashion 3i Ladies' - 2017/2018 899.99 1658.470441
1 2017 Electra Amsterdam Fashion 7i Ladies' - 2017 1099.99 1279.931176
2 2017 Electra Amsterdam Original 3i - 2015/2017 659.99 1279.931176
3 2017 Electra Amsterdam Original 3i Ladies' - 2017 659.99 1279.931176
4 2018 Electra Amsterdam Royal 8i - 2017/2018 1259.90 1658.470441
... ... ... ... ...
316 2017 Trek X-Caliber 8 - 2017 999.99 1279.931176
317 2018 Trek X-Caliber 8 - 2018 999.99 1658.470441
318 2018 Trek X-Caliber Frameset - 2018 1499.99 1658.470441
319 2018 Trek XM700+ - 2018 3499.99 1658.470441
320 2018 Trek XM700+ Lowstep - 2018 3499.99 1658.470441

COUNT DISTINCT

%%sql
  SELECT year(order_date) y, COUNT(DISTINCT order_id) num_orders
  FROM orders
  GROUP BY year(order_date)
y num_orders
0 2017 688
1 2016 635
2 2018 292

RANK()

%%sql
/* Rank price from LOW->HIGH  */
  SELECT product_name, list_price,
         RANK() OVER (ORDER BY list_price ASC) rank
  FROM products
product_name list_price rank
0 Strider Classic 12 Balance Bike - 2018 89.99 1
1 Sun Bicycles Lil Kitt'n - 2017 109.99 2
2 Trek Boy's Kickster - 2015/2017 149.99 3
3 Trek Girl's Kickster - 2017 149.99 3
4 Trek Kickster - 2018 159.99 5
... ... ... ...
316 Trek Domane SL Frameset - 2018 6499.99 316
317 Trek Domane SL Frameset Women's - 2018 6499.99 316
318 Trek Emonda SLR 8 - 2018 6499.99 316
319 Trek Domane SLR 8 Disc - 2018 7499.99 320
320 Trek Domane SLR 9 Disc - 2018 11999.99 321
%%sql
/* Rank price from HIGH->LOW  */
  SELECT product_name, list_price,
         RANK() OVER (ORDER BY list_price DESC) rank
  FROM products
product_name list_price rank
0 Trek Domane SLR 9 Disc - 2018 11999.99 1
1 Trek Domane SLR 8 Disc - 2018 7499.99 2
2 Trek Silque SLR 8 Women's - 2017 6499.99 3
3 Trek Domane SL Frameset - 2018 6499.99 3
4 Trek Domane SL Frameset Women's - 2018 6499.99 3
... ... ... ...
316 Trek Kickster - 2018 159.99 317
317 Trek Boy's Kickster - 2015/2017 149.99 318
318 Trek Girl's Kickster - 2017 149.99 318
319 Sun Bicycles Lil Kitt'n - 2017 109.99 320
320 Strider Classic 12 Balance Bike - 2018 89.99 321

FIRST_VALUE

%%sql
/* Find the difference of price from cheapest alternative */
SELECT
  product_name,
  list_price,
  FIRST_VALUE(list_price) OVER (ORDER BY list_price ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) cheapest_price,
  list_price - FIRST_VALUE(list_price) OVER (ORDER BY list_price ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) diff
FROM products
product_name list_price cheapest_price diff
0 Strider Classic 12 Balance Bike - 2018 89.99 89.99 0.0
1 Sun Bicycles Lil Kitt'n - 2017 109.99 89.99 20.0
2 Trek Boy's Kickster - 2015/2017 149.99 89.99 60.0
3 Trek Girl's Kickster - 2017 149.99 89.99 60.0
4 Trek Kickster - 2018 159.99 89.99 70.0
... ... ... ... ...
316 Trek Domane SL Frameset - 2018 6499.99 89.99 6410.0
317 Trek Domane SL Frameset Women's - 2018 6499.99 89.99 6410.0
318 Trek Emonda SLR 8 - 2018 6499.99 89.99 6410.0
319 Trek Domane SLR 8 Disc - 2018 7499.99 89.99 7410.0
320 Trek Domane SLR 9 Disc - 2018 11999.99 89.99 11910.0

LAST_VALUE

%%sql
/* Find the difference of price from the priciest alternative */
SELECT
   product_name,
   list_price,
   LAST_VALUE(list_price) OVER (ORDER BY list_price ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) highest_price,
   LAST_VALUE(list_price) OVER (ORDER BY list_price ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) - list_price diff
FROM products
product_name list_price highest_price diff
0 Strider Classic 12 Balance Bike - 2018 89.99 11999.99 11910.0
1 Sun Bicycles Lil Kitt'n - 2017 109.99 11999.99 11890.0
2 Trek Boy's Kickster - 2015/2017 149.99 11999.99 11850.0
3 Trek Girl's Kickster - 2017 149.99 11999.99 11850.0
4 Trek Kickster - 2018 159.99 11999.99 11840.0
... ... ... ... ...
316 Trek Domane SL Frameset - 2018 6499.99 11999.99 5500.0
317 Trek Domane SL Frameset Women's - 2018 6499.99 11999.99 5500.0
318 Trek Emonda SLR 8 - 2018 6499.99 11999.99 5500.0
319 Trek Domane SLR 8 Disc - 2018 7499.99 11999.99 4500.0
320 Trek Domane SLR 9 Disc - 2018 11999.99 11999.99 0.0

LAG

%%sql
/* Find number of orders in a year */
WITH yearly_orders AS (
   SELECT
    year(order_date) y,
    COUNT(DISTINCT order_id) num_orders
    FROM orders
    GROUP BY year(order_date)
)

/* Compare this year's sales to last year's */
SELECT
*,
LAG(num_orders) OVER (ORDER BY y) last_year_order,
LAG(num_orders) OVER (ORDER BY y) - num_orders diff_from_last_year
FROM yearly_orders
y num_orders last_year_order diff_from_last_year
0 2016 635 <NA> <NA>
1 2017 688 635 -53
2 2018 292 688 396

LEAD

%%sql
/* Find number of orders in a year */
WITH yearly_orders AS (
   SELECT
    year(order_date) y,
    COUNT(DISTINCT order_id) num_orders
    FROM orders
    GROUP BY year(order_date)
)

/* Compare number of years compared to next year */
SELECT *,
   LEAD(num_orders) OVER (ORDER BY y) next_year_order,
   LEAD(num_orders) OVER (ORDER BY y) - num_orders diff_from_next_year
FROM yearly_orders
y num_orders next_year_order diff_from_next_year
0 2016 635 688 53
1 2017 688 292 -396
2 2018 292 <NA> <NA>

RANKING

DENSE_RANK(), ROW_NUMBER(), RANK(), PERCENT_RANK(), NTILE(), CUME_DIST()

%%sql
/* Rank all products by price */
SELECT
product_name,
list_price,
ROW_NUMBER() OVER (ORDER BY list_price) row_num,
DENSE_RANK() OVER (ORDER BY list_price) dense_rank,
RANK() OVER (ORDER BY list_price) rank,
PERCENT_RANK() OVER (ORDER BY list_price) pct_rank,
NTILE(75) OVER (ORDER BY list_price) ntile,
CUME_DIST() OVER (ORDER BY list_price) cume_dist
FROM products
product_name list_price row_num dense_rank rank pct_rank ntile cume_dist
0 Strider Classic 12 Balance Bike - 2018 89.99 1 1 1 0.000000 1 0.003115
1 Sun Bicycles Lil Kitt'n - 2017 109.99 2 2 2 0.003125 1 0.006231
2 Trek Boy's Kickster - 2015/2017 149.99 3 3 3 0.006250 1 0.012461
3 Trek Girl's Kickster - 2017 149.99 4 3 3 0.006250 1 0.012461
4 Trek Kickster - 2018 159.99 5 4 5 0.012500 1 0.015576
... ... ... ... ... ... ... ... ...
316 Trek Domane SL Frameset - 2018 6499.99 317 104 316 0.984375 74 0.993769
317 Trek Domane SL Frameset Women's - 2018 6499.99 318 104 316 0.984375 75 0.993769
318 Trek Emonda SLR 8 - 2018 6499.99 319 104 316 0.984375 75 0.993769
319 Trek Domane SLR 8 Disc - 2018 7499.99 320 105 320 0.996875 75 0.996885
320 Trek Domane SLR 9 Disc - 2018 11999.99 321 106 321 1.000000 75 1.000000

AVG, MIN, MAX

%%sql
SELECT
   order_id,
   product_id,
   discount,
   AVG(discount) OVER (PARTITION BY product_id) avg_discount,
   MIN(discount) OVER (PARTITION BY product_id) min_discount,
   MAX(discount) OVER (PARTITION BY product_id) max_discount
FROM order_items
WHERE order_id != 1
ORDER BY order_id
order_id product_id discount avg_discount min_discount max_discount
0 2 20 0.07 0.112530 0.05 0.20
1 2 16 0.05 0.113191 0.05 0.20
2 3 20 0.05 0.112530 0.05 0.20
3 3 3 0.05 0.105581 0.05 0.20
4 4 2 0.10 0.107792 0.05 0.20
... ... ... ... ... ... ...
4712 1614 124 0.07 0.125714 0.07 0.20
4713 1614 159 0.07 0.062000 0.05 0.07
4714 1615 214 0.07 0.085000 0.07 0.10
4715 1615 197 0.20 0.175000 0.10 0.20
4716 1615 182 0.20 0.130000 0.05 0.20

# Explicitly close the connection
conn.close()