SQL 常用查询场景 | 数学统计 - 众数 中位数 分位值
#
一、常用案例
#
- 根据不同条件,更新成不同值
1
2
3
4
5
6
|
UPDATE Salaries
SET salary = CASE
WHEN salary >= 300000 THEN salary * 0.9
WHEN salary >= 250000 AND salary < 280000 THEN salary * 1.2
ELSE salary -- 如果这里不标记 salary,会让不满足条件的行变成 null
END;
|
- 更换键值
1
2
3
4
|
UPDATE SomeTable
SET p_key = CASE
WHEN p_key = 'a' THEN 'b' WHEN p_key = 'b' THEN 'a' ELSE p_key END
WHERE p_key IN ('a', 'b');
|
- 索引变更
1
2
|
DROP INDEX [indexName] ON mytable;
ALTER table tableName ADD INDEX indexName(columnName);
|
二、删除重复行
#
rowid |
name |
price |
1 |
apple |
10.1 |
1
2
3
4
5
|
DELETE FROM Products P1
WHERE rowid < (
SELECT MAX(P2.rowid) FROM Products P2
WHERE P1.name = P2.name AND P1.price = P2.price
);
|
三、价格相同的水果位次一致
#
name |
price |
apple |
10 |
banana |
10 |
origin |
20 |
1
2
3
4
5
6
7
|
SELECT P1.name,
P1.price,
(SELECT COUNT(P2.price)
FROM Products P2
WHERE P2.price > P1.price) + 1 AS rank_1
FROM Products P1
ORDER BY rank_1;
|
四、统计学
#
4.1 众数(mysql mode)
#
1
2
3
4
|
SELECT income, COUNT(*) AS cnt
FROM Graduates
GROUP BY income
HAVING COUNT(*) >= ALL ( SELECT COUNT(*) FROM Graduates GROUP BY income);
|
将集合里的元素按照大小分为上半部分(S1)和下半部分(S2)两个子集,同时让这 2 个子集共同拥有集合正中间的元素。这样,共同部分的元素的平均值就是中位数,思路如下所示:
1
2
3
4
5
6
7
8
9
10
|
SELECT AVG(DISTINCT income)
FROM (
SELECT T1.income
FROM Graduates T1, Graduates T2
GROUP BY T1.income
--S1 的条件
HAVING SUM(CASE WHEN T2.income >= T1.income THEN 1 ELSE 0 END) >= COUNT(*) / 2
--S2 的条件
AND SUM(CASE WHEN T2.income <= T1.income THEN 1 ELSE 0 END) >= COUNT(*) / 2
) TMP;
|
4.3 分位值(mysql percentile)
#
有些数据库会支持类似 approx_percentile 的函数做分位值统计,但 mysql percentile 并不被支持。所以,需要在 mysql 中计算分位值需要使用一些技巧,如下是一个简单案例.
- 以
user_info
表uid
字段的p95
分位计算为例,首先需要处理记录行,给每行添加一个行id
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
+------+
| uid |
+------+
| 1001 |
| 1002 |
| 1003 |
| 1004 |
| 1005 |
+------+
-- 这里大抵的意思就是将 uid 按照大小排序,且每行动态加个行 id,
SELECT t.uid, @row_num := @row_num + 1 AS row_num
FROM user_info t, (SELECT @row_num:=0) row_counter
ORDER BY t.uid desc
+------+---------+
| uid | row_num |
+------+---------+
| 1005 | 1 |
| 1004 | 2 |
| 1003 | 3 |
| 1002 | 4 |
| 1001 | 5 |
+------+---------+
|
- 根据行
id
结果,计算并查询出总记录数的 95% 所对应的行,由此可得出对应分位值
1
2
3
4
5
6
7
8
9
10
11
|
SELECT * FROM (
SELECT t.uid, @row_num := @row_num + 1 AS row_num
FROM user_info t, (SELECT @row_num:=0) row_counter
ORDER BY t.uid desc
) temp
WHERE temp.row_num = ROUND (.95* @row_num);
+------+---------+
| uid | row_num |
+------+---------+
| 1001 | 5 |
+------+---------+
|
sql 风格规范
sql 在线美化
sql 进阶教程