17. 统计分析函数最常用的是 ratio_to_report 。可以有 window 子句。
SELECT department_id ,sum(salary) dept_sum, SELECT department_id ,sum(salary) dept_sum,
SUM(SUM(salary)) over() all_sum, SUM(SUM(salary)) over() all_sum,
round(SUM(salary)/(SUM(SUM(salary)) round(ratio_to_report(SUM(salary)) over(),2)*100||'%'
over()),2)*100||'%' ratio ratio
FROM hr.employees FROM hr.employees
GROUP BY department_id GROUP BY department_id
ORDER BY 1 ORDER BY 1
DEPARTMENT_ID DEPT_SUM ALL_SUM RATIO
------------- ---------- ---------- -------
10 4400 691416 1%
20 19000 691416 3%
30 24900 691416 4%
40 6500 691416 1%
50 156400 691416 23%
60 28800 691416 4%
70 10000 691416 1%
80 304500 691416 44%
90 58000 691416 8%
100 51608 691416 7%
110 20308 691416 3%
7000 691416 1%
18. 要求对 ID 相同, num 连续的,查找最小 num 以及 val 求和。
select id,num,val from test_tab;
ID NUM VAL
---------- ---------- ----------
1 1 50
1 2 100
1 3 150 ID MIN(NUM) SUM(VAL)
1 5 250 ---- ---------- ----------
2 1 100 1 1 300
2 3 400 1 5 250
3 1 100 2 1 100
2 3 400
3 2 200 3 1 300
SELECT ID,MIN(num),SUM(val)
FROM
(
SELECT ID,num,val,
num-row_number() over(PARTITION BY ID
ORDER BY num) rn
FROM test_tab
)
GROUP BY ID,rn
ORDER BY 1,2
19. 将 num 的值按 id 相同的,按月升序,如果当前行为空,将前面最近
非空的 num 填充到当前行,否则找最近的后面行。
SELECT ID,mm,num, ID MM NUM NEW_NUM
nvl(last_value(num IGNORE NULLS) -- ---------- ---------- ----------
1 201001 3 3
over(PARTITION BY ID ORDER BY mm) , 1 201002 2 2
last_value(num IGNORE NULLS) 1 201003 2
over(PARTITION BY ID ORDER BY mm DESC) 1 201004 2
) new_num 1 201005 1 1
FROM demo5 2 201001 2
2 201002 2 2
ORDER BY ID,mm 2 201003 3 3
2 201004 3
20. SQL*PLUS 提供 BREAK 命令,就是当前列值与前面相同,则置
NULL ,这是报表常用的一种手段。
SQL> break ON department_id
SQL> SELECT department_id,first_name
2 FROM hr.employees
3 WHERE department_id<40
4 ORDER BY 1,2; SELECT decode(lag(department_id,1)
over(PARTITION BY department_id ORDER BY
DEPARTMENT_ID FIRST_NAME first_name),
------------- -------------------- department_id,NULL,department_id
10 Jennifer
20 Michael ) newdepartment_id,
Pat first_name
30 Alexander FROM hr.employees
Den WHERE department_id<40
Guy ORDER BY department_id,first_name
Karen
Shelli
Sigal
21. ----------------------------------------------------
-
DELETE STATEMENT | | 2
DELETE | DUPROWS |
DELETE FROM duprows a HASH JOIN | | 2
WHERE a.ROWID <> VIEW | VW_SQ_1 | 3
(SELECT MIN(b.ROWID) SORT GROUP BY | | 3
TABLE ACCESS FULL| DUPROWS | 3
FROM duprows b TABLE ACCESS FULL | DUPROWS | 3
WHERE a.ext=b.ext)
------------------------------------------------
DELETE FROM duprows a DELETE STATEMENT | | 1
DELETE | DUPROWS |
WHERE a.ROWID IN NESTED LOOPS | | 1
(SELECT ROWID FROM VIEW | VW_NSO_1 | 3
(SELECT row_number() over(PARTITION BY b.ext SORT UNIQUE | | 1
ORDER BY b.ROWID) rn VIEW | | 3
WINDOW SORT | | 3
FROM duprows b TABLE ACCESS FULL | DUPROWS | 3
)c TABLE ACCESS BY USER ROWID| DUPROWS | 1
WHERE c.rn>1 ------------------------------------------------
)
22. SQL> SELECT INDEX_NAME, COLUMN_NAME
2 FROM user_ind_columns
3 WHERE INDEX_NAME LIKE '%PK'
4 AND rownum < 10;
INDEX_NAME COLUMN_NAME
------------------------------ --------------------------------------------------------------------------------
ALL_ORDERS_PK YEAR
ALL_ORDERS_PK MONTH
ALL_ORDERS_PK CUST_NBR
ALL_ORDERS_PK REGION_ID
ALL_ORDERS_PK SALESPERSON_ID
ASSEMBLY_PK ASSEMBLY_TYPE
ASSEMBLY_PK ASSEMBLY_ID
A_ID_PK ID
select INDEX_NAME,
CUSTOMER_PK CUST_NBR
max(decode(rn, 1, COLUMN_NAME))
c1,
max(decode(rn, 2, COLUMN_NAME))
c2, INDEX_NAME C1 C2 C3 C4
C5
max(decode(rn, 3, COLUMN_NAME)) ------------------------------ --------------- ---------------
c3, -------------
max(decode(rn, 4, COLUMN_NAME)) ALL_ORDERS_PK CUST_NBR MONTH REGION_ID SALESPERSON_ID
c4, SALESPERSON_ID
ASSEMBLY_PK ASSEMBLY_ID ASSEMBLY_TYPE
max(decode(rn, 4, COLUMN_NAME)) c5
from (select INDEX_NAME, A_ID_PK ID
TABLE_NAME,
COLUMN_NAME, CUSTOMER_PK CUST_NBR
row_number() over(partition by
INDEX_NAME order by COLUMN_NAME) rn
from user_ind_columns
where INDEX_NAME like '%PK' and
23. 平均分派问题,如何将金额平均分摊,并且小数也分摊掉,避免误差
。
SELECT ID, persons,(CASE
WHEN rn <= (amount - amount2) * 100 THEN
0.01
SQL> select * from demo7_1; ELSE 0 END) + je AS je,amount
-- 然后排序,与总金额有差额的补 0.01
FROM (SELECT t.*, SUM(je) OVER(PARTITION BY id)
ID AMOUNT AS amount2,
---------- ---------- ROW_NUMBER() OVER(PARTITION BY id
1 100 ORDER BY je DESC) rn
2 50 FROM (
-- 先展开记录数,用 trunc 先平均 , 只舍不入
SELECT tt.*
已用时间 : 00: 00: 00.01 FROM (SELECT t2.id, t2.persons,
SQL> select * from demo7_2; TRUNC(t1.amount /t2.persons, 2)
je, t1.amount amount
FROM demo7_1 t1, demo7_2 t2
ID PERSONS WHERE t1.id = t2.id
---------- ---------- ) tt,
1 3 -- 构造最大的人数序列
2 2 (SELECT LEVEL rn
FROM dual
CONNECT BY LEVEL <=
(SELECT MAX(persons)
max_num
FROM demo7_2)
) tm
WHERE tt.persons >= tm.rn) t
ID PERSONS JE AMOUNT )
--- ---------- ---------- ----------
1 3 33.34 100
1 3 33.33 100
1 3 33.33 100
2 2 25 50
2 2 25 50
DROP TABLE demo2; -- 生成一些批量数据 CREATE TABLE demo2 AS SELECT * FROM all_objects,(SELECT 1 FROM dual CONNECT BY LEVEL<10); CREATE INDEX idx_demo2 ON demo2(trunc(created,'dd')); BEGIN dbms_stats .gather_table_stats(ownname => USER,tabname => 'demo2',CASCADE =>TRUE); END; / SELECT COUNT(*) FROM demo2; -- 进一步测试 DROP INDEX idx_demo2; CREATE INDEX idx_demo2 ON demo2(owner,trunc(created,'dd') DESC); SELECT COUNT(*) FROM demo2; --646749 SELECT owner,object_type FROM demo2 WHERE owner='DINGJUN123' AND trunc (created,'dd')=(SELECT MAX(trunc(created,'dd')) FROM demo2 WHERE owner='DINGJUN123'); SELECT owner,object_type FROM ( SELECT owner,object_type,dense_rank() over(ORDER BY trunc(created,'dd') DESC) rn FROM demo2 WHERE owner='DINGJUN123' ) WHERE rn=1;
DROP TABLE t1; CREATE TABLE t1 AS SELECT mod(LEVEL,1000) ID,LEVEL+1000 sal,MOD(LEVEL,10) ext FROM dual CONNECT BY LEVEL<1000000; SELECT a.ID,a.sal,a.ext FROM t1 a, (SELECT ID,MAX(sal) max_sal FROM t1 GROUP BY ID ) b WHERE a.sal=b.max_sal AND a.ID=b.ID SELECT ID,sal,ext FROM ( SELECT ID,sal,ext,rank() over(PARTITION BY ID ORDER BY sal DESC) rn FROM t1 ) WHERE rn=1 -- 唯一后则分析函数块,因为分析函数本身需要全部排序,然后扫描,内部有优化 DROP TABLE t1; CREATE TABLE t1 AS SELECT LEVEL ID,LEVEL+1000 sal,MOD(LEVEL,10) ext FROM dual CONNECT BY LEVEL<1000000;
从文档中的描述和语法结构图,抓住关注点: 1 . 有的分析函数可以带 window , 有的不能带 window 子句 , 不能带 window 子句的比如 rank , dense_rank , row_number ,FIRST,LAST, lead , lag 可以带 windows 的比如 count ,SUM,AVG,MIN,MAX, first_value , last_value 。 FIRST, LAST 里的分析函数部分 order BY 都是不允许的 有的分析函数,比如 row_number , dense_rank , rank 是必须要有 order BY 的。 2 .PARTITION BY 是按字段值将对应的行分组 ( 不能带括号,带括号的是 model 和 PARTITIONED outer JOIN 使用的 ) , ORDER BY 是组内行的顺序, window 子句决定每行对应的窗口范围 3 .PARTITION BY ,ORDER BY , window 子句共同决定了当前行对应的窗口范围,当前行分析函数值就是基于这个窗口计算的 4 . 注意 partition BY,ORDER BY, window 子句的关系 window 子句是在 partition by 和 order BY 前提下设定当前行对应的窗口范围的 , 因此必须有 order by 才能写 window 子句。 可以没有这三个子句,那么相当于当前行对应于所有行的窗口中。 BETWEEN unbounded preceding AND unbounded following 有 partition by 可以没有有 order BY ,注意有的分析函数必须要有 order BY. 5 .ORDER by 与 rows , range 的区别 range 保证结果的稳定性, RANGE 的行都是逻辑行,按 order BY 值计算,包括 current ROW 也是逻辑行。 默认的有 order by 没有 window 就是逻辑上限到当前逻辑行,排序重复稳定 ROWS 是物理行,按排序的行标计算, CURRENT ROW 是物理行。排序重复不稳定 ORDER BY 如果有多个排序键值,那么 range 则必须对应的窗口是(因为逻辑窗口不知道按什么键来计算) : a . between unbounded preceding and current row -- 相当于没有写 window ,因为 order by 默认就是组的首行到当前行 b .between current row and unbounded following c .between unbounded preceding and unbounded following – 相当于没有写 order by ,表示是组的首行到组的末行 在排名函数, FIRST/LAST 6 . window 子句定义的范围必须从上到下。比如: rows 1 following , range 1 following ,rows between 1 preceding and 2 preceding 都是错误的 默认的窗口是 range 到 current ROW 7. 0 following 和 0 preceding 都相同于 current row 。 分析函数里使用 DISTINCT 有限制 , 不能带 order by 8. 分析子句顺序是 partition 子句 order by 子句 window 子句。 其中有的分析函数必须有 order by 子句,另外有 window 子句必须要有 order by 子句。 分析函数是在 from , where , group by , having 之后才开始工作的。出现在最后 order by 和 select 之前,分析函数也可以用于子查询,用于过滤父查询的查询结果。分析函数只允许出现在 order by 和 select 中,只是针对同一级查询。 不可嵌套 如果最后查询结果需要一定的顺序,则在最后显示指定 order by ,因为分区子句的 order by 只保证组内有序,特别是有多个分析函数的时候,会覆盖的排序结果 ( 函数优先级,顺序等 ) ,所以最后要显示 order by 8. 分析函数的缺点: 经常需要有排序操作,很多就算无 order by 也需要内部排序,如果写多个分析函数,会产生很多排序,依赖于内存。当然也可以优化排序,比如通过索引消除排序。 分析函数的优点: 代替复杂的子查询, join 等,减少表的扫描次数,提高效率。 了解逻辑窗口与物理窗口,逻辑当前行与物理当前行,也就是 rows 与 range
DROP TABLE emp ; CREATE TABLE emp AS SELECT empno , sal , deptno FROM scott . emp ; SELECT * FROM emp ; INSERT INTO emp VALUES( 7845 , 1500 , 30 ); INSERT INTO emp VALUES( 7846 , 1500 , 30 ); SELECT empno , sal , deptno , row_number () over (PARTITION BY deptno ORDER BY sal ) rn FROM emp ; SELECT empno , sal , deptno , rank () over (PARTITION BY deptno ORDER BY sal ) rn FROM emp ; SELECT empno , sal , deptno , dense_rank () over (PARTITION BY deptno ORDER BY sal ) rn FROM emp ; -- 加 distinct, 模拟 dense_rank SELECT t1 . empno , t1 . sal , t1 . deptno , (SELECT COUNT(DISTINCT t2 . sal )+ 1 FROM emp t2 WHERE t1 . deptno = t2 . deptno AND t1 . sal > t2 . sal ) rn FROM emp t1 ORDER BY t1 . deptno , rn ; -- 模拟 rank SELECT t1 . empno , t1 . sal , t1 . deptno , (SELECT COUNT( t2 . sal )+ 1 FROM emp t2 WHERE t1 . deptno = t2 . deptno AND t1 . sal > t2 . sal ) rn FROM emp t1 ORDER BY t1 . deptno , rn ; -- 模拟 row_number,rowid 可以换成唯一值 SELECT t1 . empno , t1 . sal , t1 . deptno , (SELECT COUNT( t2 . sal )+ 1 FROM emp t2 WHERE t1 . deptno = t2 . deptno AND ( t1 . sal > t2 . sal OR( t1 . sal = t2 . sal AND t1 .ROWID> t2 .ROWID))) rn FROM emp t1 ORDER BY t1 . deptno , rn ;
FIRST/LAST 可以作为组函数和分析函数,组函数不带 OVER, 分析函数带 OVER, 它必须结合 KEEP 关键字, KEEP 关键字就是起到一个语义的作用:说明按照指定的排序规则找到 TOP 1 或 BOTTOM 1 ,因为是 DENSE_RANK, 所以 TOP1 和 BOTTOM1 可能会有多行,然后外层用组函数对 TOP 1 或 BOTTOM 1 的行再处理 FIRST/LAST 的计算顺序是:如果有 OVER ,则按照指定分区规则,对分个分区内行按照 KEEP 中的 ORDER BY 排序,计算 TOP 1 或 BOTTOM 1 ,然后外层用组函数求值,没有 OVER 就是对所有行 ( 可能是分组后的行 ) ,计算一次聚合值,返回一行。 first 和 last 允许我们对 A 列排名,但是组函数操作可以对 B 列进行,避免了子查询和自连接,从而提高效率。 First 和 last 可以用于组函数和分析函数。如果没有 over() 那么就是使用组函数。其中语法图中的 aggregate_function 可以使用 MIN, MAX, SUM, AVG, COUNT,VARIANCE, or STDDEV 函数。 Keep 与 first 、 last 经常不用于分析函数,而直接用于组函数,对一列排序,求另一列的聚合函数值
--demo5.sql DROP TABLE demo5 ; CREATE TABLE demo5 (ID NUMBER, mm NUMBER, num NUMBER); INSERT INTO demo5 VALUES( 1 , 201001 , 3 ); INSERT INTO demo5 VALUES( 1 , 201002 , 2 ); INSERT INTO demo5 VALUES( 1 , 201003 ,NULL); INSERT INTO demo5 VALUES( 1 , 201004 ,NULL); INSERT INTO demo5 VALUES( 1 , 201005 , 1 ); INSERT INTO demo5 VALUES( 2 , 201001 ,NULL); INSERT INTO demo5 VALUES( 2 , 201002 , 2 ); INSERT INTO demo5 VALUES( 2 , 201003 , 3 ); INSERT INTO demo5 VALUES( 2 , 201004 ,NULL); COMMIT; SELECT * FROM demo5 ; --10g SELECT ID, mm , num , nvl ( last_value ( num IGNORE NULLS) over (PARTITION BY ID ORDER BY mm ) , last_value ( num IGNORE NULLS) over (PARTITION BY ID ORDER BY mm DESC) ) new_num FROM demo5 ORDER BY ID, mm ; --11g SELECT ID, mm , num , coalesce( num , lag ( num IGNORE NULLS) over (PARTITION BY ID ORDER BY mm ) , lead ( num IGNORE NULLS) over (PARTITION BY ID ORDER BY mm )) new_num FROM demo5 ;
CREATE TABLE duprows AS SELECT LEVEL ID,mod(level,3) ext FROM dual CONNECT BY LEVEL<20; DELETE FROM duprows a WHERE a.ROWID <> (SELECT MIN(b.ROWID) FROM duprows b WHERE a.ext=b.ext); DELETE FROM duprows a WHERE a.ROWID IN (SELECT ROWID FROM (SELECT row_number() over(PARTITION BY b.ext ORDER BY b.ROWID) rn FROM duprows b ) c WHERE c.rn>1 );
SELECT INDEX_NAME, COLUMN_NAME FROM user_ind_columns WHERE INDEX_NAME LIKE '%PK' AND rownum < 10; select INDEX_NAME, max(decode(rn, 1, COLUMN_NAME)) c1, max(decode(rn, 2, COLUMN_NAME)) c2, max(decode(rn, 3, COLUMN_NAME)) c3, max(decode(rn, 4, COLUMN_NAME)) c4, max(decode(rn, 4, COLUMN_NAME)) c5 from (select INDEX_NAME, TABLE_NAME , COLUMN_NAME , row_number () over(partition by INDEX_NAME order by COLUMN_NAME) rn from user_ind_columns where INDEX_NAME like '%PK' and rownum<10) t1 group by INDEX_NAME;
--demo7_1 为金额表, id 与 demo7_2 对应 --demo7_2 中的 persons 为 demo7_1 中的金额对应均分的人数 -- 思想:全部用舍不用入;如果多余出来的钱,按比例从大到小排序,每行给一分钱,分完为止。这样也不见得合理,必须看看业务上是否接受。 create table demo7_1 ( ID NUMBER, amount NUMBER); create table demo7_2 ( ID NUMBER, persons NUMBER); insert into demo7_1 values ( 1 , 100 ); insert into demo7_1 values ( 2 , 50 ); insert into demo7_2 values ( 1 , 3 ); insert into demo7_2 values ( 2 , 2 ); commit; SELECT ID, persons ,(CASE WHEN rn <= ( amount - amount2 ) * 100 THEN 0.01 ELSE 0 END) + je AS je , amount -- 然后排序,与总金额有差额的补 0.01 FROM (SELECT t .*, SUM( je ) OVER (PARTITION BY id) AS amount2 , ROW_NUMBER () OVER (PARTITION BY id ORDER BY je DESC) rn FROM ( -- 先展开记录数,用 trunc 先平均 , 只舍不入 SELECT tt .* FROM (SELECT t2 .id, t2 . persons , TRUNC ( t1 . amount / t2 . persons , 2 ) je , t1 . amount amount FROM demo7_1 t1 , demo7_2 t2 WHERE t1 .id = t2 .id ) tt , -- 构造最大的人数序列 (SELECT LEVEL rn FROM dual CONNECT BY LEVEL <= (SELECT MAX( persons ) max_num FROM demo7_2 ) ) tm WHERE tt . persons >= tm . rn ) t );