1. Oracle 中比对 2 张表之间
数据是否一致的几种方法
by Maclean.liu
liu.maclean@gmail.com
www.oracledatabase12g.com
2. About Me
l Email & Gtalk:liu.maclean@gmail.com
l Blog:www.oracledatabase12g.com
l QQ:47079569 QQ Group:23549328
l Oracle Certified Database Administrator Master 10g
and 11g
l Over 6 years experience with Oracle DBA technology
l Over 7 years experience with Linux technology
l Member Independent Oracle Users Group
l Member All China Oracle Users Group
l Presents for advanced Oracle topics: RAC,
DataGuard, Performance Tuning and Oracle Internal.
4. 大约是 2 个星期前做一个夜班的时候,开发人员需要比对 shareplex 数据同步复制软件在 源
端和目标端的 2 张表上的数据是否一致,实际上后来想了下 shareplex 本身应当具有这种数
据校验功能, 但是还是希望从数据库的角度得出几种可用的同表结构下的数据比对方法。
注意以下几种数据比对方式适用的前提条件:
1. 所要比对的表的结构是一致的
2. 比对过程中源端和 目标端 表上的数据都是静态的,没有任何 DML 修改
方式 1:
假设你所要进行数据比对的数据库其中有一个 版本为 11g 且该表上有相应的主键索引
(primary key index)或者唯一非空索引(unique key ¬ null)的话,那么恭喜你! 你可以
借助 11g 新引入的专门做数据对比的 PL/SQL Package dbms_comparison 来实现数据校验的
目的,如以下演示:
Source 源端版本为 11gR2 :
conn maclean/maclean
SQL> select * from v$version;
BANNER
--------------------------------------------------------------------------------
Oracle Database 11g Enterprise Edition Release 11.2.0.3.0 - 64bit Production
PL/SQL Release 11.2.0.3.0 - Production
CORE 11.2.0.3.0 Production
TNS for Linux: Version 11.2.0.3.0 - Production
NLSRTL Version 11.2.0.3.0 - Production
SQL> select * from global_name;
GLOBAL_NAME
--------------------------------------------------------------------------------
www.oracledatabase12g.com & www.askmaclean.com
5. drop table test1;
create table test1 tablespace users as select object_id t1,object_name t2 from
dba_objects where object_id is not null;
alter table test1 add primary key(t1);
exec dbms_stats.gather_table_stats('MACLEAN','TEST1',cascade=>TRUE);
create database link maclean connect to maclean identified by maclean using
'G10R21';
Database link created.
以上源端数据库版本为 11.2.0.3 , 源表结构为 test1(t1 number primary key,t2 varchar2(128),
透过 dblink 链接到版本为 10.2.0.1 的目标端
conn maclean/maclean
SQL> select * from v$version
BANNER
----------------------------------------------------------------
Oracle Database 10g Enterprise Edition Release 10.2.0.1.0 - 64bi
PL/SQL Release 10.2.0.1.0 - Production
CORE 10.2.0.1.0 Production
TNS for Linux: Version 10.2.0.1.0 - Production
NLSRTL Version 10.2.0.1.0 - Production
create table test2 tablespace users as select object_id t1,object_name t2
from dba_objects where object_id is not null;
alter table test2 add primary key(t1);
exec dbms_stats.gather_table_stats('MACLEAN','TEST2',cascade=>TRUE);
目标端版本为 10.2.0.1 , 表结构为 test2(t1 number primary key,t2 varchar2(128))。
注意这里 2 张表上均必须有相同的主键索引或者伪主键索引(pseudoprimary key 伪主键要求是
唯一键且所有的成员列均是非空 NOT NULL)。
7. 利用 dbms_comparison.create_comparison 创建 comparison 后,新建的 comparison 会出现在
user_comparison 视图中;
以上我们完成了 comparison 的创建,但实际的校验仍未发生我们利用 10046 事件监控这个数
据对比过程:
conn maclean/maclean
set timing on;
alter system flush shared_pool;
alter session set events '10046 trace name context forever,level 8';
set serveroutput on
DECLARE
retval dbms_comparison.comparison_type;
BEGIN
IF dbms_comparison.compare('MACLEAN_TEST_COM', retval, perform_row_dif => TRUE)
THEN
dbms_output.put_line('No Differences');
ELSE
dbms_output.put_line('Differences Found');
END IF;
END;
/
Differences Found =====> 返回结果为 Differences Found,说明数据存在差异并不一
致
PL/SQL procedure successfully completed.
Elapsed: 00:00:10.87
===========================10046 tkprof result =========================
SELECT MIN("T1"), MAX("T1")
FROM
"MACLEAN"."TEST1"
SELECT MIN("T1"), MAX("T1")
FROM
"MACLEAN"."TEST2"@MACLEAN
SELECT COUNT(1)
FROM
"MACLEAN"."TEST1" s WHERE ("T1" >= :scan_min AND "T1" <= :scan_max )
SELECT COUNT(1)
FROM
"MACLEAN"."TEST2"@MACLEAN s WHERE ("T1" >= :scan_min AND "T1" <= :scan_max )
SELECT q.wb1, min(q."T1") min_range1, max(q."T1") max_range1, count(*)
num_rows, sum(q.s_hash) sum_range_hash
FROM
8. (SELECT /*+ FULL(s) */ width_bucket(s."T1", :scan_min1, :scan_max_inc1,
:num_buckets) wb1, s."T1", ora_hash(NVL(to_char(s."T1"), 'ORA$STREAMS$NV'),
4294967295, ora_hash(NVL((s."T2"), 'ORA$STREAMS$NV'), 4294967295, 0))
s_hash FROM "MACLEAN"."TEST1" s WHERE (s."T1">=:scan_min1 AND s."T1"<=
:scan_max1) ) q GROUP BY q.wb1 ORDER BY q.wb1
SELECT /*+ REMOTE_MAPPED */ q.wb1, min(q."T1") min_range1, max(q."T1")
max_range1, count(*) num_rows, sum(q.s_hash) sum_range_hash
FROM
(SELECT /*+ FULL(s) REMOTE_MAPPED */ width_bucket(s."T1", :scan_min1,
:scan_max_inc1, :num_buckets) wb1, s."T1", ora_hash(NVL(to_char(s."T1"),
'ORA$STREAMS$NV'), 4294967295, ora_hash(NVL((s."T2"), 'ORA$STREAMS$NV'),
4294967295, 0)) s_hash FROM "MACLEAN"."TEST2"@MACLEAN s WHERE (s."T1">=
:scan_min1 AND s."T1"<=:scan_max1) ) q GROUP BY q.wb1 ORDER BY q.wb1
SELECT /*+ FULL(P) +*/ * FROM "MACLEAN"."TEST2" P
SELECT /*+ FULL ("A1") */
WIDTH_BUCKET("A1"."T1", :SCAN_MIN1, :SCAN_MAX_INC1, :NUM_BUCKETS),
MIN("A1"."T1"),
MAX("A1"."T1"),
COUNT(*),
SUM(ORA_HASH(NVL(TO_CHAR("A1"."T1"), 'ORA$STREAMS$NV'),
4294967295,
ORA_HASH(NVL("A1"."T2", 'ORA$STREAMS$NV'), 4294967295, 0)))
FROM "MACLEAN"."TEST2" "A1"
WHERE "A1"."T1" >= :SCAN_MIN1
AND "A1"."T1" <= :SCAN_MAX1
GROUP BY WIDTH_BUCKET("A1"."T1", :SCAN_MIN1, :SCAN_MAX_INC1, :NUM_BUCKETS)
ORDER BY WIDTH_BUCKET("A1"."T1", :SCAN_MIN1, :SCAN_MAX_INC1, :NUM_BUCKETS)
SELECT ROWID, "T1", "T2"
FROM "MACLEAN"."TEST2" "R"
WHERE "T1" >= :1
AND "T1" <= :2
--------------------------------------------------------------------------------
------------
| Id | Operation | Name | Rows | Bytes | Cost
(%CPU)| Time |
--------------------------------------------------------------------------------
------------
| 0 | SELECT STATEMENT | | 126 | 3528 | 4
(0)| 00:00:01 |
|* 1 | FILTER | | | |
| |
| 2 | TABLE ACCESS BY INDEX ROWID| TEST2 | 126 | 3528 | 4
(0)| 00:00:01 |
|* 3 | INDEX RANGE SCAN | SYS_C006255 | 227 | | 2
(0)| 00:00:01 |
--------------------------------------------------------------------------------
------------
Predicate Information (identified by operation id):
---------------------------------------------------
9. 1 - filter(TO_NUMBER(:1)<=TO_NUMBER(:2))
3 - access("T1">=TO_NUMBER(:1) AND "T1"<=TO_NUMBER(:2))
SELECT ll.l_rowid, rr.r_rowid, NVL(ll."T1", rr."T1") idx_val
FROM
(SELECT l.rowid l_rowid, l."T1", ora_hash(NVL(to_char(l."T1"),
'ORA$STREAMS$NV'), 4294967295, ora_hash(NVL((l."T2"), 'ORA$STREAMS$NV'),
4294967295, 0)) l_hash FROM "MACLEAN"."TEST1" l WHERE l."T1">=:scan_min1
AND l."T1"<=:scan_max1 ) ll FULL OUTER JOIN (SELECT /*+ NO_MERGE
REMOTE_MAPPED */ r.rowid r_rowid, r."T1", ora_hash(NVL(to_char(r."T1"),
'ORA$STREAMS$NV'), 4294967295, ora_hash(NVL((r."T2"), 'ORA$STREAMS$NV'),
4294967295, 0)) r_hash FROM "MACLEAN"."TEST2"@MACLEAN r WHERE r."T1">=
:scan_min1 AND r."T1"<=:scan_max1 ) rr ON ll."T1"=rr."T1" WHERE ll.l_hash
IS NULL OR rr.r_hash IS NULL OR ll.l_hash <> rr.r_hash
--------------------------------------------------------------------------------
--------------------------------
| Id | Operation | Name | Rows | Bytes | Cost
(%CPU)| Time | Inst |IN-OUT|
--------------------------------------------------------------------------------
--------------------------------
| 0 | SELECT STATEMENT | | 190 | 754K| 9
(12)| 00:00:01 | | |
|* 1 | VIEW | VW_FOJ_0 | 190 | 754K| 9
(12)| 00:00:01 | | |
|* 2 | HASH JOIN FULL OUTER | | 190 | 754K| 9
(12)| 00:00:01 | | |
| 3 | VIEW | | 190 | 7220 | 4
(0)| 00:00:01 | | |
|* 4 | FILTER | | |
| | | | |
| 5 | TABLE ACCESS BY INDEX ROWID| TEST1 | 190 | 5510 | 4
(0)| 00:00:01 | | |
|* 6 | INDEX RANGE SCAN | SYS_C0013098 | 341 | | 2
(0)| 00:00:01 | | |
| 7 | VIEW | | 126 | 495K| 4
(0)| 00:00:01 | | |
| 8 | REMOTE | TEST2 | 126 | 3528 | 4
(0)| 00:00:01 | MACLE~ | R->S |
--------------------------------------------------------------------------------
--------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter("LL"."L_HASH" IS NULL OR "RR"."R_HASH" IS NULL OR
"LL"."L_HASH"<>"RR"."R_HASH")
2 - access("LL"."T1"="RR"."T1")
4 - filter(TO_NUMBER(:SCAN_MIN1)<=TO_NUMBER(:SCAN_MAX1))
6 - access("L"."T1">=TO_NUMBER(:SCAN_MIN1) AND
"L"."T1"<=TO_NUMBER(:SCAN_MAX1))
Remote SQL Information (identified by operation id):
----------------------------------------------------
8 - SELECT ROWID,"T1","T2" FROM "MACLEAN"."TEST2" "R" WHERE "T1">=:1 AND
"T1"<=:2 (accessing
'MACLEAN' )
10. 可以看到以上过程中虽然没有避免对 TEST1、TEST2 表的全表扫描(FULL TABLE SCAN),
但是好在实际参与 HASH JOIN FULL OUTER 的仅是访问索引后获得的少量数据,所以效率
还是挺高的。
此外可以通过 user_comparison_row_dif 了解实际那些 row 存在差异,如:
SQL> set linesize 80 pagesize 1400
SQL> select *
2 from user_comparison_row_dif
3 where comparison_name = 'MACLEAN_TEST_COM'
4 and rownum < 2;
COMPARISON_NAME SCAN_ID LOCAL_ROWID REMOTE_ROWID
------------------------------ ---------- ------------------ ------------------
INDEX_VALUE
--------------------------------------------------------------------------------
STA LAST_UPDATE_TIME
--- ---------------------------------------------------------------------------
MACLEAN_TEST_COM 42 AAATWGAAEAAANBrAAB AAANJrAAEAAB8AMAAd
46
DIF 20-DEC-11 01.18.08.917257 PM
以上利用 dbms_comparison 包完成了一次简单的数据比对,该方法适用于 11g 以上版本且要
求表上有主键索引或非空唯一索引, 且不支持以下数据类型字段的比对
• LONG
• LONG RAW
• ROWID
• UROWID
• CLOB
• NCLOB
• BLOB
• BFILE
• User-defined types (including object types, REFs, varrays, and nested tables)
• Oracle-supplied types (including any types, XML types, spatial types, and media types)
12. Select *
from (select 'MACLEAN.TEST1' "Row Source", a.*
from (select /*+ FULL(Tbl1) */
T1, T2
from MACLEAN.TEST1 Tbl1
minus
select /*+ FULL(Tbl2) */
T1, T2
from MACLEAN.TEST2@"MACLEAN" Tbl2) A
union all
select 'MACLEAN.TEST2@"MACLEAN"', b.*
from (select /*+ FULL(Tbl2) */
T1, T2
from MACLEAN.TEST2@"MACLEAN" Tbl2
minus
select /*+ FULL(Tbl1) */
T1, T2
from MACLEAN.TEST1 Tbl1) B) Order by 1;
MINUS Clause 会导致 2 张表均在本地被全表扫描(TABLE FULL SCAN),且要求发生 SORT
排序。 若所对比的表上有大量的数据,那么排序的代价将会是非常大的, 因此这种方法的
效率不高。
方式 2 MINUS 的优点在于操作简便,特别适合于小表之间的数据检验。
缺点在于 由于 SORT 排序可能导致在大数据量的情况下效率很低, 且同样不支持 LOB 和
LONG 这样的大对象。
方式 3:
使用 not exists 子句,如:
select *
from test1 a
where not exists (select 1
from test2 b
where a.t1 = b.t1
and a.t2 = b.t2);
no rows selected
21. Toad 的 compare data 功能是基于 MINUS 实现的,所以效率上并没有优势。但是通过图形界
面省去了写 SQL 语句的麻烦。这种方法同样不支持 LOB、LONG 等对象。
22. 方式 5:
这是一种别出心裁的做法。 将一行数据的上所有字段合并起来,并使用
dbms_utility.get_hash_value 对合并后的中间值取 hash value,再将所有这些从各行所获得的
hash 值 sum 累加, 若 2 表的 hash 累加值相等则判定 2 表的数据一致。
简单来说,如下面这样:
create table hash_one as select object_id t1,object_name t2 from dba_objects;
select dbms_utility.get_hash_value(t1||t2,0,power(2,30)) from hash_one where
rownum <3;
DBMS_UTILITY.GET_HASH_VALUE(T1||T2,0,POWER(2,30))
-------------------------------------------------
89209477
757190129
select sum(dbms_utility.get_hash_value(t1||t2,0,power(2,30))) from hash_one;
SUM(DBMS_UTILITY.GET_HASH_VALU
------------------------------
40683165992756
select sum(dbms_utility.get_hash_value(object_id||object_name,0,power(2,30)))
from dba_objects;
SUM(DBMS_UTILITY.GET_HASH_VALU
------------------------------
40683165992756
23. 对于列较多的表,手动去构造所有字段合并可能会比较麻烦,利用以下 SQL 可以快速构造
出我们所需要的语句:
放到 PL/SQL Developer 等工具中运行,在 sqlplus 中可能因 ORA-00923: FROM keyword not
found where expected 出错
select 'select sum(dbms_utility.get_hash_value('||
column_name_path||',0,power(2,30)) ) from '||owner||'.'||table_name||';' from
(select owner,table_name,column_name_path,row_number() over(partition by
table_name order by table_name,curr_level desc) column_name_path_rank from
(select owner,table_name,column_name,rank,level as
curr_level,ltrim(sys_connect_by_path(column_name,'||''|''||'),'||''|''||')
column_name_path from (select owner,table_name,column_name,row_number()
over(partition by table_name order by table_name,column_name) rank from
dba_tab_columns where owner=UPPER('&OWNER') and table_name=UPPER('&TABNAME')
order by table_name,column_name) connect by table_name = prior table_name and
rank-1 = prior rank)) where column_name_path_rank=1;
使用示范:
SQL> @get_hash_col
Enter value for owner: SYS
Enter value for tabname: TAB$
'SELECTSUM(DBMS_UTILITY.GET_HASH_VALUE('||
COLUMN_NAME_PATH||',0,POWER(2,30)))FROM
--------------------------------------------------------------------------------
select sum(dbms_utility.get_hash_value(ANALYZETIME||'|'||AUDIT$||'|'||AVGRLN||'|
'||AVGSPC||'|'||AVGSPC_FLB||'|'||BLKCNT||'|'||BLOCK#||'|'||BOBJ#||'|'||CHNCNT||'
|'||CLUCOLS||'|'||COLS||'|'||DATAOBJ#||'|'||DEGREE||'|'||EMPCNT||'|'||FILE#||'|'
||FLAGS||'|'||FLBCNT||'|'||INITRANS||'|'||INSTANCES||'|'||INTCOLS||'|'||KERNELCO
LS||'|'||MAXTRANS||'|'||OBJ#||'|'||PCTFREE$||'|'||PCTUSED$||'|'||PROPERTY||'|'||
ROWCNT||'|'||SAMPLESIZE||'|'||SPARE1||'|'||SPARE2||'|'||SPARE3||'|'||SPARE4||'|'
||SPARE5||'|'||SPARE6||'|'||TAB#||'|'||TRIGFLAG||'|'||TS#,0,1073741824) ) from S
YS.TAB$;