探索PostgreSQL:复杂查询的实例解析

发表时间: 2023-01-11 23:04


本文内容是多年前观看一个英文学习视频作得记录,相关视频找不到了,发布在这里和学习者分享,也方便自已查看。

01 PostgreSQL - Setting Up the Database and Left vs Inner Joins

--SELECT * from payment;--SELECT p.*,c.* --FROM payment p LEFT JOIN customer c ON p.customer_id = c.customer_idFROM payment p JOIN customer c ON p.customer_id = c.customer_id;-- 上述两种JOIN的结果,14596条记录受影响-- Rows in payament and i did not have a customer -- INNER JOIN only returns rows that matches in both tables;-- ***** SELECT p.*,c.*,r.* FROM payment p 	LEFT JOIN customer c ON p.customer_id = c.customer_id	LEFT JOIN rental r ON p.rental_id = r.rental_id;

02 Basics of SQL Joins using DVD Rental Database

-- LEFT JOIN,INNER JOINSselect generate_series(1, 10);-- column with 1 - 10SELECT val.val, set.set FROM generate_series(1,10)valjoin generate_series(5,15)set ON val.val = set.set;SELECT val.val, set.set FROM generate_series(1,10)valLEFT JOIN generate_series(5,15)set on val.val = set.set;  --keeps the columns in the first sideSELECT val.val, set.set FROM generate_series(1,10)valRight join generate_series(5,15)set ON val.val = set.set; -- keeps the on the right side-- column with 5 - 15/* LEFT JOIN returns matches from the left table and those found on the right side of the join*/--LEFT JOIN generate_series(5,16)set on val.val = set.set  --keeps the columns in the first side/* RIGHT JOIN returns matches from the right table and those found on the left side of the join */--Right join generate_series(5,15)set ON val.val = set.set -- keeps the on the right side-- 以上向左结合查询和向右结合查询的结果不同

03 Intro To Date Parsing and Time Series Data in SQL

SELECT p.customer_id, c.first_name,SUM(p.amount)::money, COUNT(*), SUM(p.amount)/COUNT(*) as AVG_RENTAL_VALUEFROM payment p LEFT JOIN customer c ON p.customer_id = c.customer_idWHERE p.customer_id > 300 AND c.first_name like '%b%'GROUP BY 1,2HAVING COUNT(*) > 20 	--having is when you filter on aggregates, is a where clause for aggregatesORDER BY 4 DESC;SELECT p.*, payment_date::date, extract(year from p.payment_date),extract(month from p.payment_date),extract(day from p.payment_date)FROM payment pwhere extract(month from p.payment_date) in (1,2)-- 0419select p.payment_date, p.amount from payment pselect to_char(p.payment_date,'yyyy-mm'),SUM(p.amount),MAX(p.amount),MIN(p.amount)from payment p group by 1-- -- 按照月份合计,by 1 指查询结果的第一列select SUM(p.amount),MAX(p.amount),MIN(p.amount), to_char(p.payment_date,'yyyy-mm')from payment pgroup by 4 order by 1 

04 SQL - Get Day of Week With extract() + CASE statement

-- what day of the week does the most revenue? to_chat(date,'dy')-- 一周中获利最大的是哪一天?select extract(dow from p.payment_date) from payment p;  -- 星期的第几天,sunday is zero---select to_char(p.payment_date,'dy') as dow,sum(p.amount)from payment pgroup by 1order by 2 desc;----select * from (	select t.*,		CASE			WHEN t.dow = 0 THEN 'Sunday'			WHEN t.dow = 1 THEN 'Monday'			WHEN t.dow = 2 THEN 'Tuesday'			WHEN t.dow = 3 THEN 'Wednesday'			WHEN t.dow = 4 THEN 'Thurs'			WHEN t.dow = 5 THEN 'Fri'			WHEN t.dow = 6 THEN 'Saturday'		END as day_of_week	from(			select extract(dow from p.payment_date) as dow,sum(p.amount)::money   					--::money 表示货币数据, as dow  字段标识为dow,替代payment_date			from payment p 			group by 1			order by 1		)t)t2where t2.day_of_week = 'Fri' or t2.day_of_week = 'Thurs';

05 More Date and Time Analysis Using SQL

-- 一周内各天的合计select p.*, to_char(p.payment_date,'dy') as dow  -- dy 表示星期几的英文缩写from payment p;select to_char(p.payment_date,'dy') as dow,sum(p.amount)from payment pgroup by 1order by 2 desclimit 3;

06 SQL - Finding Top Grossing Genres Within Each Rating

-- top grossing films per genre, within each rating-- 各种题材的影片观看数最高的,WITH base_table AS (select p.rental_id,p.amount,r.inventory_id,f.title,f.rating,c.name as genrefrom payment p	left join rental r on p.rental_id = r.rental_id	left join inventory i on i.inventory_id = r.inventory_id	left join film f on f.film_id = i.film_id		left join film_category fc on fc.film_id = i.film_id	left join category c on c.category_id = fc.category_id)SELECT * from (	select t.*,	ROW_NUMBER() over (partition by t.rating order by t.sales desc)	from(		select bt.rating, bt.genre, SUM(bt.amount) as sales		from base_table bt		group by 1,2		order by 3 desc	 )t)radicalwhere radical.row_number = 1;

07 Handling Missing Dates in SQL

-- 处理缺失的日期-- dealing with fragmented data (do not have data for each day in the range)-- drilling the difference again between INNER and LEFT JOINs :)-- using the payment table from the dvd rental database-- 生成日期序列select gs::date from generate_series('2007-01-01','2007-12-31',interval '1 day'	) gs-- 查询租赁日合计为空的记录With base_table as (		--上句为后套入	select gs::date,SUM(p.amount) as sales from generate_series(	'2007-01-01',	'2007-12-31',	interval '1 day'		) gs LEFT JOIN payment p on gs=p.payment_date::date	group by 1	order by 1)	--下句为后套入select extract(week from bt.gs),SUM(bt.sales)   --按照星期合计查询from base_table btgroup by 1	order by 1

08 Caution - Generate Series Illustrating Left vs Inner Joins and Dangers of Not Understanding Them

With base_table as (		--上句为后套入	select gs::date,SUM(p.amount) as sales from generate_series(	'2007-01-01',	'2007-12-31',	interval '1 day'		) gs LEFT JOIN payment p on gs=p.payment_date::date  --去掉Left,则查询sum数据不为0的记录	group by 1	order by 1)	--下句为后套入select extract(week from bt.gs),SUM(bt.sales)   --按照星期合计查询from base_table btgroup by 1	order by 1

09 Filtering your database using an external data source

-- 用外部数据源过滤数据库-- get earliest order date, correlated subquery!-- get most recent order date, correlated subquery-- days since most recent order, use current_date a bitselectcase   when t.country = 'United States' THEN 'USA'  else 'Rest of world' END as geo_type,  count(*) from (select c.customer_id,c.first_name,c.last_name,c.email,c.address_id,	   split_part(c.email,'@',2),c.last_name ||', '|| c.first_name as last_first,ci.city,	   ci.country_id, co.country,	-- outer table, c.customer_id, inner table p.customer_id(	select min(p.payment_date)			from payment p where p.customer_id = c.customer_id	)::date as cust_min_order_date,	(	select max(p.payment_date)			from payment p where p.customer_id = c.customer_id)::date as cust_max_order_date,	current_date - (	select max(p.payment_date)		from payment p where p.customer_id = c.customer_id)::date as days_since_recent_order	From customer c		left join address a on a.address_id = c.address_id		left join city ci on ci.city_id = a.city_id		left join country co on ci.country_id = co.country_id	--say you have 10 email from a tradeshow and you want to check whethre or --not they exist in your customer table?		)tgroup by 1 order by 2 desc;-------------------selectcase   when t.country = 'United States' THEN 'USA'  else 'Rest of world' END as geo_type,  count(*) from (select c.customer_id,c.first_name,c.last_name,c.email,c.address_id,	   split_part(c.email,'@',2),c.last_name ||', '|| c.first_name as last_first,ci.city,	   ci.country_id, co.country,	-- outer table, c.customer_id, inner table p.customer_id(	select min(p.payment_date)			from payment p where p.customer_id = c.customer_id	)::date as cust_min_order_date,	(	select max(p.payment_date)			from payment p where p.customer_id = c.customer_id)::date as cust_max_order_date,	current_date - (	select max(p.payment_date)		from payment p where p.customer_id = c.customer_id)::date as days_since_recent_order	From customer c		left join address a on a.address_id = c.address_id		left join city ci on ci.city_id = a.city_id		left join country co on ci.country_id = co.country_id--Inner Join on Outside of the DB data	where c.email IN (		'sonia.gregory@sakilacustomer.org',	'miriam.mckinney@sakilacustomer.org',	'velma.lucas@sakilacustomer.org',	'becky.miles@sakilacustomer.org',	'bobbie.craig@sakilacustomer.org',	'violet.rodriquez@sakilacustomer.org',	'kristina.chambers@sakilacustomer.org',	'toni.holt@sakilacustomer.org',	'misty.lambert@sakilacustomer.org',	'mae.fletcher@sakilacustomer.org',	'shelly.watts@sakilacustomer.org',	'daisy.bates@sakilacustomer.org',	'ramona.hale@sakilacustomer.org')		)tgroup by 1 order by 2 desc

10 Using postgres regexp_replace to clean text data

-- 使用 regexp_replace 清洗文本数据-- Regular Expressions (clearning data)-- Remove stop words-- Lowercase all text-- Trim text-- then count by first letter of first name in title-- remove of, a, in-- word boundarySelect f.description,--regexp_replace(f.description,'of|a|in','xxx','g')--regexp_replace(f.description,'of|\ma\M|in','xxx','g')--regexp_replace(f.description,'\mof\M|\ma\M|\min\M','xxx','g')--regexp_replace(f.description,'\mof\M|\ma\M|\min\M','','g')--regexp_replace(f.description,'\mof\M|\ma\M|\min\M|\s\s+','','g')lower(		regexp_replace(			regexp_replace(f.description,'\mof\M|\ma\M|\min\M','','g'),				'\s+',' ','g')),	--left(lower(-- regexp_replace(-- 			regexp_replace(f.description,'\mof\M|\ma\M|\min\M','','g'),-- 				'\s+',' ','g')),3)trim(lower(		regexp_replace(			regexp_replace(f.description,'\mof\M|\ma\M|\min\M','','g'),				'\s+',' ','g')))from film f;

11 Postgres Generate Series and Inner vs Left Joins

--  生成序列数据,左联合vs右联合			select * from(	select date::date, sum(p.amount), count(*) from generate_series(	'2007-01-01'::date,	'2007-12-31'::date,	'1 day'::interval	)date left join payment p on p.payment_date::date = date.date 		--测试去掉left 或使用INNER 的查询结果	group by 1	order by 1-- nulls last -- 查询sum为null的记录)twhere t.count > 1-- without left join, you reduce the result to only matching cases on either-- side of the join in this case, that means we needed sales on the date...

12 Array Operations in Postgres - Grouping to An Array

-- 数组操作 - 数组分组			select f.film_id,count(*)from film f		left join film_actor fa on f.film_id = fa.film_idgroup by 1order by 1;/**/select f.film_id,fa.*from film f		left join film_actor fa on f.film_id = fa.film_idorder by 1,2;/*****/select f.film_id,count(*),	array_agg(fa.actor_id),	(array_agg(fa.actor_id))[1:3],	-- 数组中为前3个演员的ID	(select sum(x) from unnest(array_agg(fa.actor_id)) x)from film f		left join film_actor fa on f.film_id = fa.film_idgroup by 1order by 1,2

13 Finding Customers by Unique Genres Rented

-- 查找租借单一题材的客户-- find customers who have rented from >=10 genres-- you can select from any select, everything is a table!!!select t2.customer_id,count(distinct t2.genre) from (	select t.customer_id,t.genre,count(*) from(		select r.rental_id,r.customer_id,r.inventory_id,f.title,fc.category_id,c.name as genre		from rental r 			left join inventory i on r.inventory_id = i.inventory_id			left join film f on i.film_id = f.film_id			left join film_category fc on fc.film_id = f.film_id			left join category c on c.category_id = fc.category_id	)t	group by 1,2	order by 1,3 desc)t2group by 1having count(*) > 10order by 2;             

14 SQL For Actors with the Most Films

-- 查询参演最多电影的演员-- which film has the most actors?  --哪支股票有最多的基金参与?-- everything is a table, everything is table-- find the actor with the most films?-- actor_id, nbr of films they are inselect t.actor_id, count(distinct t.film_id) from (	select f.film_id,f.title,f.release_year,fa.actor_id,a.first_name,a.last_name	from film f 			left join film_actor fa on f.film_id = fa.film_id			left join actor a on fa.actor_id = a.actor_id	order by 1,4)tgroup by 1having count(distinct t.film_id) > 34order by 2 desc;

15 Postgres - Top Movies by Genre Window Functions and CTEs

-- 按照题材WF和CTE列出最受欢迎的电影-- Joins Joins Joins!!! what are the thop selling movies?-- payment --> rental(inventory) --> inventory to find more about the movie?-- Top selling movie per genre 		-- 各种题材电影销量最好的(各个板块中涨幅最大的/涨幅最小且持续时间最长的)-- common table expressionwith base_table as (	select p.rental_id, p.amount,r.inventory_id,i.film_id,f.title,f.rating	from payment p 			left join rental r on p.rental_id = r.rental_id			left join inventory i on i.inventory_id = r.inventory_id			left join film f on f.film_id = i.film_id),next_table as(select bt.title,bt.rating, sum(bt.amount)from base_table btgroup by 1,2order by 3 desc)select * from(		select nt.*,	row_number() over (partition by nt.rating order by nt.sum desc) as rating_rank	from next_table nt	)twhere t.rating_rank = 1;

16 SQL - everything is a table, some more joining basics

-- 所有的事情都是数据表,更多的一些联合基本用法-- more grouping , counting, etc....select t.film_id, count(*) from(	select f.film_id, f.title, f.release_year,fa.actor_id	from film f			left join film_actor fa on f.film_id = fa.film_id		 -- in film, film_id is the primary key	order by 1,4	--which film has the most actors?\	--everything is a table)tgroup by 1order by 2 desc;/* ------------ */select f.film_id, f.title, f.release_year,fa.actor_id,a.first_name,a.last_name	from film f			left join film_actor fa on f.film_id = fa.film_id			left join actor a on fa.actor_id = a.actor_id							order by 1,4

17 SQL - Find New Customers by Date of Acquisition

-- 按照购买日期查找新的客户							   select * from (	select t.*,		CASE			WHEN t.dow = 0 THEN 'Sunday'			WHEN t.dow = 1 THEN 'Monday'			WHEN t.dow = 2 THEN 'Tuesday'			WHEN t.dow = 3 THEN 'Wednesday'			WHEN t.dow = 4 THEN 'Thurs'			WHEN t.dow = 5 THEN 'Fri'			WHEN t.dow = 6 THEN 'Saturday'		END as day_of_week	from(			select extract(dow from p.payment_date) as dow,sum(p.amount)::money   			--::money 表示货币数据, as dow  字段标识为dow,替代payment_date			from payment p 			group by 1			order by 1		)t)t2where t2.day_of_week = 'Fri' or t2.day_of_week = 'Thurs';------with first_payments as (	select p.customer_id,min(p.payment_date)::date   		-- ::date 一定要有,否则查询结果就不是想要的结果	from payment p	group by 1	order by 2,1)-- new customers by date (common table expression)select * from first_payments;

18 Abstract Intro the ROW_NUMBER Window Function Using Random Data

-- 日期属于哪个季度; 销售业绩最好的前5天,是几月几日select date::date, extract(quarter from date) from generate_series('2007-01-01'::date,'2007-12-31'::date,'1 day'::interval)date----select * from (	select t.*,		row_number() over(partition by t.qtr order by t.sales desc) as day_rank	from (		select date::date, extract(quarter from date) as 			qtr,floor(random()*10000)::numeric::money as sales		from generate_series(		'2007-01-01'::date,		'2007-12-31'::date,		'1 day'::interval)date	)t-- top 5 performing days (sales_per quarte -- what are the dates?))t2where t2.day_rank in (1,2,3,4,5)  -- 每个季度销售业绩最好的5天

19 Simulate ROW_NUMBER() Without Window Functions

select * from(	select p.payment_id, p.customer_id,		-- correlated subquery:		-- inner query below is counting the nbr of times that the inner payment id <= outer pmt ID		-- inner always matches OUTER customer id, 以下内查询时间的编号,时间的符合条件为....	(		select count(*) from payment p2 			where p2.payment_id <= p.payment_id and p2.customer_id = p.customer_id	) as row_nbr	from payment p	order by 2,1)t where t.row_nbr =1;	

20 Postgres - Use Row_Number to Find Customer First Orders

-- 找到客户的第一次下单-- solve it uses row_numberselect * from(	select p.payment_id, p.customer_id, p.payment_date,	Row_number() over(partition by p.customer_id order by p.payment_id asc) as cust_order_rank,	Row_number() over(partition by p.customer_id order by p.payment_date asc) as cust_order_rank_date	from payment p 	order by 2,3)twhere t.cust_order_rank_date = 1;

21 First Customer Order With Subquery But No Common Table Expression

-- 用子查询找到客户的第一个单子							   -- more customer value: getting first orders and other data about it-- payment, p.payment_id goes up over time, its a primary keyselect p.customer_id,p.amount, min(p.payment_id) as first_pmt_idfrom payment pgroup by 1,2order by 1/*-------------------*/select p.customer_id, min(p.payment_id) as first_pmt_id,-- subquery to join the outerrows to this inner row(	select p2.amount from payment p2		where p2.payment_id = min(p.payment_id)) as first_pmt_amt,(	select p2.rental_id from payment p2		where p2.payment_id = min(p.payment_id)) as first_rnt_idfrom payment pgroup by 1 order by 1;

22 Using a Common Table Expression To Find Customer 1st Order Date

-- CTE (common table expression) -- 找到客户第一次下单的时间With first_orders as (	select p.customer_id, min(p.payment_id) as first_pmt	from payment p	group by 1	)-- select * from first_orders-- order by 1select * from payment p2where p2.payment_id in (						 select fo.first_pmt from first_orders fo												)order by 2;

23 SQL - Break out Customer Orders by New vs Repeat

-- 把客户的首次下单和再次下单分出来-- get the customer's first order date and the amount they spent-- 得到客户首次下单的时间和话费的金额							   select t.payment_date::date,	case		when t.row_number =1 then 'New_buyer'		else 'repeat_buyer'		end as order_type,				sum(t.amount)from (				select 			p.customer_id, 			p.payment_date, 			Row_number() over (partition by p.customer_id),			amount		from payment p	)tgroup by 1,2order by 1	-- order can be first order, or a prpeat order(if row_number =1, new else repeat)-- date, rev for 1st time, rev for repeat

24 SQL - Do Customers Spend More On 1st or 2nd Order

-- 客户的第二次下单花费多或少 -- do they spend more or less on their second order?/*----------------------------*/with base_table as(	select * from (		select p.payment_id, p.customer_id, p.payment_date, p.amount,				row_number() over(partition by p.customer_id order by p.payment_date asc)							as cust_order_rank,				LAG(p.payment_date) over (partition by p.customer_id 							order by p.payment_date asc) as prior_order,				p.payment_date -lag(p.payment_date) over (partition by p.customer_id 							order by p.payment_date asc) as time_since,				floor(extract(epoch from p.payment_date - lag(p.payment_date)							 over(partition by p.customer_id order by p.payment_date asc))/60)							as mins_since_prior_order		from payment p		order by 2,3	)t where t.cust_order_rank IN (1,2))-- which order nbr of 1 or 2, has more moneyselect bt.cust_order_rank, sum(bt.amount),count(*)from base_table btgroup by 1/*------------------------------------------------------------*/-- every customer has at least 2 orders , order rank 2 < order rank 1-- 至少有两个订单的客户,订单排名2 < 订单排名1		with base_table as(	select * from (		select p.payment_id, p.customer_id, p.payment_date, p.amount,				row_number() over(partition by p.customer_id order by p.payment_date asc)						as cust_order_rank,				LAG(p.payment_date) over (partition by p.customer_id 						order by p.payment_date asc) as prior_order,				p.payment_date -lag(p.payment_date) over (partition by p.customer_id 						order by p.payment_date asc) as time_since,				floor(extract(epoch from p.payment_date - lag(p.payment_date)							 over(partition by p.customer_id order by p.payment_date asc))/60)								as mins_since_prior_order		from payment p		order by 2,3	)t where t.cust_order_rank IN (1,2,3,4,5,6,7,8,9))-- which order nbr of 1 or 2, has more moneyselect bt.cust_order_rank, sum(bt.amount),count(*),sum(bt.amount)/count(*)from base_table btgroup by 1

25 The SUM(CASE WHEN pattern in SQL to compute valued columns

-- get the customer's first order date and the amount they spent-- 获得客户的第一个订单和他们的消费总额		select t.payment_date::date,case 	when t.row_number = 1 then 'New_buyer'	else 'repeat_buyer'	end as order_type,		sum(t.amount)	from (	select 		p.customer_id,		p.payment_date,		row_number() over (partition by p.customer_id),		amount	from payment p	)tgroup by 1,2order by 1/*----------------------------------------*/-- max rising change of priceselect t.payment_date::date,sum (case when t.row_number = 1 then t.amount else 0 end)::money as new_buyer_revenues,sum (case when t.row_number <> 1 then t.amount else 0 end)::money as repeat_buyer_revenues,sum (case when t.row_number > 0 then t.amount else 0 end) as all_revenues,sum (case when t.row_number = 1 then t.amount else 0 end)/ 			sum (case when t.row_number > 0 then t.amount else 0 end) as pct_from_newfrom (	select 		p.customer_id,		p.payment_date,		row_number() over (partition by p.customer_id),		amount	from payment p	)tgroup by 1 order by 1

26 A Gentle Intro to the LAG() Window Function in SQL

-- LAG 窗口函数简介-- feel good lag today is novermber 25,2016-- select current_date, current_date - interval '364' days as ly-- day,measure, measure for same time lywith sales_data as(	select date::date, floor(random()*100000)::numeric::money as sales_this_yr	from generate_series(	'2015-01-01'::date,	'2016-12-31'::date,	'1 day'::interval		)date)-- Nov 25, 2016 black friday-- the comp date for LY was 11/27/15select sd.*,LAG(sd.sales_this_yr,364) over(), current_date::date - interval '364 days'from sales_data sd;

27 Use LAG() To find timing between the 1st and 2nd Customer Order

-- 使用lag函数,查到客户的第一单和第二单之间的间隔时间--what's the avg delay between the first and second order???-- 第一个订单和第二个订单之间平均相隔几天?-- 借鉴此类方法,计算Zen的各种几何片段????with base_table As(	select * from (		select p.payment_id,p.customer_id,p.payment_date,				row_number() over(partition by p.customer_id order by p.payment_date ASC) 							as cust_order_rank,				lag(p.payment_date) over(partition by p.customer_id order by p.payment_date ASC) 							as prior_order,						p.payment_date - lag(p.payment_date) over(partition by p.customer_id order by							p.payment_date asc) as time_series,				floor(extract(epoch from p.payment_date - lag(p.payment_date) 							  over(partition by p.customer_id order by p.payment_date asc))/60) 							as mins_since_prior_order		from payment p		order by 2,3	)t where t.cust_order_rank = 2 -- IN (1,2)  		-- 第二单,或第一单和第二单)select 	case		when bt.mins_since_prior_order < 100 then '< 100'		when bt.mins_since_prior_order < 200 then '< 200'		when bt.mins_since_prior_order < 300 then '< 300'		when bt.mins_since_prior_order < 400 then '< 400'		when bt.mins_since_prior_order < 500 then '< 500'		else ' >= 500' END as time_distribution,				count(*)from base_table btgroup by 1order by 2 desc

28 Use Correlated Subqueries to Find First 7 and First 30 Days Revenues

-- get the first order revenue and the forward 7 days sales-- 前7天和前30天的销售额With base_table as (		select p.payment_id, p.customer_id, p.payment_date, p.amount,		row_number() over(partition by p.customer_id order by p.payment_id asc) 						as cust_order_rank_payment		from payment p		order by 2,3	)select bt.*,(	select sum(p2.amount) from payment p2	where p2.payment_date between bt.payment_date and bt.payment_date + INTERVAL '7 days'						  and bt.customer_id = p2.customer_id	) fwd_7_days_sales,	-- correlated subquery(	select sum(p2.amount) from payment p2	where p2.payment_date between bt.payment_date and bt.payment_date + INTERVAL '30 days'						  and bt.customer_id = p2.customer_id	) fwd_30_days_salesfrom base_table bt where bt.cust_order_rank_payment = 1;

29 PostgresSQL Row Number (Hourly sales for each staff member)

-- without left join, you reduce the results to only matching cases on either side of the join-- in this case, that means needed sales on the date...-- which is the most popular hour for each staff member?-- 对员工会员而言,业绩最好的时间是哪个小时?with hourly_sales as (	select p.staff_id, extract(hour from p.payment_date) as hour_of_day,sum(p.amount)	from payment p	group by 1,2	order by 3 desc, 1 desc)-- what hour is the best for each staff_id?select * from (	select hs.*,		row_number() over (partition by hs.staff_id order by hs.sum desc)	from hourly_sales hs)twhere t.row_number = 1

30 Compute Moving Averages Using Postgres

-- select sd.*, lag(sd.sales_this_yr,364) over(), sd.date - interval '364 days'-- from sales_date sd-- 7 day trailing MAwith sales_data as (	select date::date, floor(random()*10000)::numeric::money as sales_this_yr	from generate_series(	'2015-01-01'::date,	'2016-12-31'::date,	'1 day'::interval		)date)	select sd.*,	floor(avg(sd.sales_this_yr::numeric) over (order by sd.date rows between 7 preceding and 0 following)) 					as l7davg,	sd.sales_this_yr::numeric - floor(avg(sd.sales_this_yr::numeric) over(order by sd.date rows 					between 7 preceding and 0 following)) as vsavg		from sales_data sd;-- time diffs between order by customers

31 SQL - Multiple Row_Number functions addressing different partitions

-- what about the top day per quarter and the top day per month?select t.*,	row_number() over(partition by t.qtr order by t.sales desc) as day_rank_qtr,	row_number() over(partition by t.month order by t.sales desc) as day_rank_mofrom (		select date::date, extract(quarter from date) as qtr, extract(month from date) as month,	floor(random()*100000)::numeric::money as sales	from generate_series(	'2007-01-01'::date,	'2007-12-31'::date,	'1 day'::interval		)date	)t

32 Postgres split_part, plus CASE stmt tutorial

-- 美国员工和美国之外的员工																		  -- Live coding this (start with customer table)   -- splitting on email domain, combined like domains into groups   -- concatenating text, get first, last, etc.,,,   -- Grouping based on country, more joins and then a CASE statement   -- get earliest order date, correlated subquery!   -- get most recent order date, correlated subquery   -- days since most recent order, use current_date a bitselect 		case 		when t.country = 'United States' then 'USA' 		else 'Rest of World' end as geo_type,		count(*) from (			select c.customer_id, c.first_name, c.last_name, c.email, c.address_id,			split_part(c.email,'@',2),c.last_name||', '||c.first_name as last_first,ci.city,			ci.country_id,co.country,			-- outer table, c.customer_id, inner table p.customer_id			(				select min(p.payment_date)						from payment p where p.customer_id = c.customer_id			)::date as cust_min_order_date,			(				select max(p.payment_date)						from payment p where p.customer_id = c.customer_id			)::date as cust_max_order_date,			current_date - (				select max(p.payment_date)						from payment p where p.customer_id = c.customer_id			)::date as days_since_recent_order			from customer c					left join  address a on a.address_id = c.address_id					left join city ci on ci.city_id = a.city_id					left join country co on ci.country_id = co.country_id		)t		group by 1 order by 2 desc;

33 SQL Row Number _ Window Function Example - Top NY Baby Names by Year