【SQL】グループ内のMAXデータを取得する

■ はじめに

性別・部署別の最高齢を取得するSQLを考える
相関サブクエリ / 自己相関サブクエリを使う。

相関サブクエリ / 自己相関サブクエリについては、
以下の関連記事を参照のこと。

https://dk521123.hatenablog.com/entry/2016/01/23/230608

目次

【1】MySQL
 例1:性別別の最高齢
 例2:部署別の最高齢
【2】PostgreSQL
 例1:最新実行された処理結果を取得する
 例2:その年の最新実行されたデータを一覧表示する

【1】MySQL

 使用テーブル / データ

-- 従業員
CREATE TABLE Employees
(
id char(4) NOT NULL,
name varchar(20) NULL,
departmentId char(4) NULL,
bossId char(4) NULL,
sex char(1) NULL, -- m : male, f : famale
birthDate Date NULL
)

-- 部署
CREATE TABLE Departments
(
id char(4) NOT NULL,
name varchar(20) NULL,
)

データ

INSERT INTO Employees VALUES('1000', 'Robin','D000', NULL, 'm', '1973-5-6')
INSERT INTO Employees VALUES('1001', 'Mike','D001', NULL, 'm', '1945-11-9')
INSERT INTO Employees VALUES('1002', 'Tony','D000', NULL, 'm', '1965-1-12')
INSERT INTO Employees VALUES('1003', 'Claire','D001', '1001', 'f', '1987-9-22')
INSERT INTO Employees VALUES('1004', 'John','D002', '1002', 'm', '1972-4-2')
INSERT INTO Employees VALUES('1005', 'Ken','D002', '1001', 'm', '1982-12-22')
INSERT INTO Employees VALUES('1006', 'Hilary','D001', '1003', 'f', '1979-9-9')
INSERT INTO Employees VALUES('1007', 'Becky','D000', '1007', 'f', '1980-3-2')
INSERT INTO Employees VALUES('1008', 'Anne','D002', '1007', 'f', '1969-1-31')
INSERT INTO Employees VALUES('1007', 'Steve','D000', NULL, 'm', '1955-2-24')
INSERT INTO Employees VALUES('1008', 'Thomas','D001', '1007', 'm', '1959-9-12')

INSERT INTO Departments VALUES('D000', 'IT')
INSERT INTO Departments VALUES('D001', 'Sales')
INSERT INTO Departments VALUES('D002', 'Accounting')

確認用 select文

SELECT 
E1.Id,
E1.name,
D.name,
E1.bossId,
E1.sex,
E1.birthDate,
((CONVERT(INT,CONVERT(VARCHAR(8),GETDATE(),112)) - CONVERT(INT,CONVERT(VARCHAR(8),E1.birthDate,112))) / 10000) AS age
FROM
  Employees E1
INNER JOIN
  Departments D
ON
  E1.departmentId = D.id

出力結果

Id   name    name    bossId  sex birthDate   age
1000    Robin   IT  NULL    m   1973-05-06  42
1001    Mike    Sales   NULL    m   1945-11-09  70
1002    Tony    IT  NULL    m   1965-01-12  51
1003    Claire  Sales   1001    f   1987-09-22  28
1004    John    Accounting  1002    m   1972-04-02  43
1005    Ken Accounting  1001    m   1982-12-22  33
1006    Hilary  Sales   1003    f   1979-09-09  36
1007    Becky   IT  1007    f   1980-03-02  35
1008    Anne    Accounting  1007    f   1969-01-31  46
1007    Steve   IT  NULL    m   1955-02-24  60
1008    Thomas  Sales   1007    m   1959-09-12  56

 例1:性別別の最高齢

SELECT 
E1.Id,
E1.name,
D.name,
E1.bossId,
E1.sex,
E1.birthDate,
((CONVERT(INT,CONVERT(VARCHAR(8),GETDATE(),112)) - CONVERT(INT,CONVERT(VARCHAR(8),E1.birthDate,112))) / 10000) AS age
FROM
  Employees E1
INNER JOIN
  Departments D
ON
  E1.departmentId = D.id
WHERE
  ((CONVERT(INT,CONVERT(VARCHAR(8),GETDATE(),112)) - CONVERT(INT,CONVERT(VARCHAR(8),E1.birthDate,112))) / 10000) = 
  (SELECT MAX((CONVERT(INT,CONVERT(VARCHAR(8),GETDATE(),112)) - CONVERT(INT,CONVERT(VARCHAR(8),E2.birthDate,112))) / 10000)
  FROM Employees E2 WHERE E1.sex = E2.sex
  )

例2:部署別の最高齢

SELECT 
E1.Id,
E1.name,
D.name,
E1.bossId,
E1.sex,
E1.birthDate,
((CONVERT(INT,CONVERT(VARCHAR(8),GETDATE(),112)) - CONVERT(INT,CONVERT(VARCHAR(8),E1.birthDate,112))) / 10000) AS age
FROM
  Employees E1
INNER JOIN
  Departments D
ON
  E1.departmentId = D.id
WHERE
  ((CONVERT(INT,CONVERT(VARCHAR(8),GETDATE(),112)) - CONVERT(INT,CONVERT(VARCHAR(8),E1.birthDate,112))) / 10000) = 
  (SELECT MAX((CONVERT(INT,CONVERT(VARCHAR(8),GETDATE(),112)) - CONVERT(INT,CONVERT(VARCHAR(8),E2.birthDate,112))) / 10000)
  FROM Employees E2 WHERE E1.departmentId = E2.departmentId
  )

【2】PostgreSQL

例1:最新実行された処理結果を取得する

 使用テーブル / データ

DROP TABLE IF EXISTS process_history;
CREATE TABLE process_history
(
  process_date CHAR(8) NOT NULL,
  process_name VARCHAR(10) NOT NULL,
  result VARCHAR(20) NOT NULL,
  create_at TIMESTAMP NULL DEFAULT CURRENT_TIMESTAMP
);

データ

DELETE FROM process_history;

INSERT INTO process_history VALUES ('20210901', 'process-1', 'SUCCESS', '2021-09-01 21:50:21.3885');
INSERT INTO process_history VALUES ('20210901', 'process-1', 'ERROR(-1)', '2021-09-02 21:50:21.3885');
INSERT INTO process_history VALUES ('20210901', 'process-2', 'SUCCESS', '2021-09-01 21:50:21.3885');
INSERT INTO process_history VALUES ('20210901', 'process-2', 'ERROR(-2)', '2021-09-02 21:50:21.3885');
INSERT INTO process_history VALUES ('20210902', 'process-1', 'SUCCESS', '2021-09-01 21:50:21.3885');
INSERT INTO process_history VALUES ('20210902', 'process-1', 'ERROR(-3)', '2021-09-02 21:50:21.3885');
INSERT INTO process_history VALUES ('20210902', 'process-1', 'ERROR(-4)', '2021-09-03 21:50:21.3885');
INSERT INTO process_history VALUES ('20210902', 'process-2', 'ERROR(-5)', '2021-09-03 21:50:21.3885');
INSERT INTO process_history VALUES ('20210903', 'process-1', 'ERROR(-6)', '2021-09-04 21:50:21.3885');
INSERT INTO process_history VALUES ('20210903', 'process-1', 'SUCCESS', '2021-09-05 21:50:21.3885');
INSERT INTO process_history VALUES ('20210903', 'process-2', 'ERROR(-7)', '2021-09-04 21:50:21.3885');
INSERT INTO process_history VALUES ('20210903', 'process-2', 'ERROR(-8)', '2021-09-05 21:50:21.3885');

SQL

SELECT
 main.process_name,
 main.process_date,
 main.result,
 main.create_at
FROM
 process_history AS main
INNER JOIN (
 SELECT
  process_name,
  process_date,
  MAX(create_at) AS create_at
 FROM
  process_history
 WHERE
  process_name='process-1'
 GROUP BY
  process_date, process_name   
) AS sub
ON
 main.process_date=sub.process_date
 AND main.process_name=sub.process_name
 AND main.create_at=sub.create_at
ORDER BY 
 main.process_date DESC;

出力結果

process_name,process_date,result,create_at
process-1,20210903,SUCCESS,2021-09-05 21:50:21.3885
process-1,20210902,ERROR(-4),2021-09-03 21:50:21.3885
process-1,20210901,ERROR(-1),2021-09-02 21:50:21.3885

参考文献
https://lightgauge.net/database/sqlserver/1672/

例2:その年の最新実行されたデータを一覧表示する

書き方を2通りあげておく。

そのうちのSQL文で使用している「RANK + PARTITION BY」については
以下の関連記事を参照のこと。

https://dk521123.hatenablog.com/entry/2021/03/11/210937

 使用テーブル / データ

-- 年間売上
DROP TABLE IF EXISTS sale;
CREATE TABLE sale
(
  -- 年(同じ年に複数データがある可能性がある)
  year INT,
  sales INT,
  -- 最新データ
  create_at TIMESTAMP NULL DEFAULT CURRENT_TIMESTAMP
);

データ

INSERT INTO sale VALUES(2011, 1420, '2011-09-01 21:50:21.3885');
INSERT INTO sale VALUES(2013, 1720, '2013-09-01 21:50:21.3885');
INSERT INTO sale VALUES(2013, 1620, '2013-08-01 21:50:21.3885');
INSERT INTO sale VALUES(2014, 1640, '2014-09-01 21:50:21.3885');
INSERT INTO sale VALUES(2014, 1600, '2014-08-01 21:50:21.3885');
INSERT INTO sale VALUES(2017, 2310, '2017-09-01 21:50:21.3885');
INSERT INTO sale VALUES(2017, 2210, '2017-08-01 21:50:21.3885');
INSERT INTO sale VALUES(2018, 2810, '2018-09-01 21:50:21.3885');
INSERT INTO sale VALUES(2020, 2710, '2020-09-01 21:50:21.3885');
INSERT INTO sale VALUES(2020, 2670, '2020-08-01 21:50:21.3885');
INSERT INTO sale VALUES(2021, 2810, '2021-08-01 21:50:21.3885');
INSERT INTO sale VALUES(2021, 2710, '2021-09-01 21:50:21.3885');

SQL文 - その1

SELECT
 s1.year AS year,
 s1.sales AS sales,
 s1.create_at
FROM
 sale AS s1
WHERE
 s1.create_at=(SELECT MAX(create_at) FROM sale AS s2 WHERE s1.year=s2.year)
ORDER BY s1.year DESC;
;

出力結果 - その1

year,sales,create_at
2021,2710,2021-09-01 21:50:21.3885
2020,2710,2020-09-01 21:50:21.3885
2018,2810,2018-09-01 21:50:21.3885
2017,2310,2017-09-01 21:50:21.3885
2014,1640,2014-09-01 21:50:21.3885
2013,1720,2013-09-01 21:50:21.3885
2011,1420,2011-09-01 21:50:21.3885

SQL文 - その2(RANK OVER PARTITION BYを使ったやり方)

SELECT
 *
FROM
(
SELECT
 s.year,
 s.sales,
 s.create_at,
 RANK() OVER (PARTITION BY s.year ORDER BY s.create_at DESC) AS rank
FROM
 sale AS s
) AS main
WHERE
 main.rank=1
ORDER BY
 year DESC
;

出力結果 - その2

year,sales,create_at,rank
2021,2710,2021-09-01 21:50:21.3885,1
2020,2710,2020-09-01 21:50:21.3885,1
2018,2810,2018-09-01 21:50:21.3885,1
2017,2310,2017-09-01 21:50:21.3885,1
2014,1640,2014-09-01 21:50:21.3885,1
2013,1720,2013-09-01 21:50:21.3885,1
2011,1420,2011-09-01 21:50:21.3885,1

関連記事

サブクエリ / 副問合せ
https://dk521123.hatenablog.com/entry/2015/12/21/002727
相関サブクエリ / 自己相関サブクエリ
https://dk521123.hatenablog.com/entry/2016/01/23/230608
SQLで変化(増加/変化なし/減少)について考える
https://dk521123.hatenablog.com/entry/2021/08/26/222043
WITH句 ~ 共通テーブル式
https://dk521123.hatenablog.com/entry/2012/07/26/013620
ROW_NUMBER / RANK + PARTITION BY
https://dk521123.hatenablog.com/entry/2021/03/11/210937