Storing tree structures in a bi-dimensional table has always been problematic. The simplest tree models are usually quite inefficient, while more complex ones aren't necessarily better. In this talk I briefly go through the most used models (adjacency list, materialized path, nested sets) and introduce some more advanced ones belonging to the nested intervals family (Farey algorithm, Continued Fractions, and other encodings). I describe the advantages and pitfalls of each model, some proprietary solutions (e.g. Oracle's CONNECT BY) and one of the SQL Standard's upcoming features, Common Table Expressions.
11. Adjacency List Model
CREATE TABLE emp_orgchart (
emp VARCHAR(10) NOT NULL PRIMARY KEY,
boss VARCHAR(10),
salary DECIMAL(6,2) NOT NULL
);
A
emp boss salary
A NULL 1000.00
B C
B A 900.00
C A 900.00
D C 800.00 D E F
E C 700.00
F C 600.00
5
12. Adjacency List Model
Finding the root node
SELECT *
FROM emp_orgchart
WHERE boss IS NULL;
Finding the leaf nodes
SELECT e1.*
FROM emp_orgchart AS e1
LEFT JOIN emp_orgchart AS e2 ON e1.emp = e2.boss
WHERE e2.emp IS NULL;
6
13. Adjacency List Model Anomalies
UPDATE anomalies
START TRANSACTION;
emp boss salary
UPDATE emp_orgchart A NULL 1000.00
SET emp = ‘C1’ B A 900.00
WHERE emp = ‘C’; C1 A 900.00
D C 800.00
UPDATE emp_orgchart E C 700.00
SET boss = ‘C1’ F C 600.00
WHERE boss = ‘C’;
COMMIT;
7
14. Adjacency List Model Anomalies
UPDATE anomalies
START TRANSACTION;
emp boss salary
UPDATE emp_orgchart A NULL 1000.00
SET emp = ‘C1’ B A 900.00
WHERE emp = ‘C’; C1 A 900.00
D C 800.00
UPDATE emp_orgchart E C 700.00
SET boss = ‘C1’ F C 600.00
WHERE boss = ‘C’;
COMMIT;
7
15. Adjacency List Model Anomalies
UPDATE anomalies
START TRANSACTION;
UPDATE emp_orgchart
SET emp = CASE emp
UPDATE emp_orgchart
WHEN ‘C’ THEN ‘C1’
SET emp = ‘C1’
ELSE emp END,
WHERE emp = ‘C’;
boss = CASE boss
WHEN ‘C’ THEN ‘C1’
UPDATE emp_orgchart
ELSE boss END
SET boss = ‘C1’
WHERE ‘C’ IN (emp, boss);
WHERE boss = ‘C’;
COMMIT;
7
17. Adjacency List Model Anomalies
INSERT anomalies
INSERT INTO emp_orgchart (emp, boss) VALUES
(‘G’, ‘H’),
(‘H’, ‘G’);
G H
8
18. Adjacency List Model Anomalies
INSERT anomalies
INSERT INTO emp_orgchart (emp, boss) VALUES
(‘G’, ‘H’),
(‘H’, ‘G’);
G H
INSERT INTO emp_orgchart VALUES (‘M’, ‘M’);
M
8
19. Adjacency List Model Anomalies
DELETE anomalies
DELETE FROM emp_orgchart WHERE emp = ‘C’;
A
B C
D E F
9
20. Adjacency List Model Anomalies
DELETE anomalies
DELETE FROM emp_orgchart WHERE emp = ‘C’;
A
B C
D E F
9
21. Fixing the Adjacency List Model
CREATE TABLE emp (
id INTEGER DEFAULT 0 PRIMARY KEY,
name VARCHAR(10) NOT NULL,
salary DECIMAL (6,2) NOT NULL
CHECK (salary >= 0.00),
...
);
CREATE TABLE orgchart (
emp_id INTEGER NOT NULL DEFAULT 0 -- vacant
REFERENCES emp(id)
ON DELETE SET DEFAULT ON UPDATE CASCADE,
boss_id INTEGER -- NULL means root node
REFERENCES emp(id),
...
);
10
22. Fixing the Adjacency List Model
-- cannot be your own boss
CHECK ((boss_id <> emp_id) OR
(boss_id = 0 AND emp_id = 0)),
11
23. Fixing the Adjacency List Model
-- cannot be your own boss
CHECK ((boss_id <> emp_id) OR
(boss_id = 0 AND emp_id = 0)),
-- prevent longer cycles
UNIQUE (emp_id, boss_id),
11
24. Fixing the Adjacency List Model
-- cannot be your own boss
CHECK ((boss_id <> emp_id) OR
(boss_id = 0 AND emp_id = 0)),
-- prevent longer cycles
UNIQUE (emp_id, boss_id),
-- connected graph (edges = nodes - 1)
CHECK ((SELECT COUNT(*) FROM orgchart) - 1
= (SELECT COUNT(boss_id) FROM orgchart)),
11
25. Fixing the Adjacency List Model
-- cannot be your own boss
CHECK ((boss_id <> emp_id) OR
(boss_id = 0 AND emp_id = 0)),
-- prevent longer cycles
UNIQUE (emp_id, boss_id),
-- connected graph (edges = nodes - 1)
CHECK ((SELECT COUNT(*) FROM orgchart) - 1
= (SELECT COUNT(boss_id) FROM orgchart)),
-- one root only
CHECK ((SELECT COUNT(*) FROM orgchart WHERE
boss_id IS NULL) = 1),
11
26. Adjacency List Model - Navigation
Loops / Self-Joins
procedural code
SELECT o1.emp AS e1,
o2.emp AS e2,
CREATE PROCEDURE
o3.emp AS e3
TreeTraversal (IN
FROM emp_orgchart AS o1
current_emp_id INT)
LEFT OUTER JOIN
BEGIN
emp_orgchart AS o2
...
ON o1.emp = o2.boss
LEFT OUTER JOIN
emp_orgchart AS o3
ON o2.emp = o3.boss
WHERE o1.emp = ‘A’;
12
36. Path Enumeration Model
CREATE TABLE emp_orgchart (
emp_id CHAR(1) NOT NULL PRIMARY KEY
-- if using a separator, don’t use it in the ID
CHECK (REPLACE(emp_id, ‘/’, ‘’) = emp_id),
emp_name VARCHAR(10) NOT NULL,
path_string VARCHAR(512) NOT NULL
);
emp_id emp_name path_string A
A Anthony A
B Beryl AB
B C
C Charles AC
D Denise ACD
E Ellen ACE D E F
F Frank ACF
15
37. Path Enumeration Model
-- prevent cycles (nodes appearing twice: ‘ABCA’)
CHECK (NOT EXISTS(
SELECT *
FROM emp_orgchart AS D1,
emp_orgchart AS P1
WHERE LENGTH(REPLACE(P1.path_string, D1.emp_id,
‘’)) <
LENGTH(P1.path_string) - LENGTH(D1.emp_id))
))
-- No path can be longer than the number of nodes
CHECK((SELECT MAX(LENGTH(path_string))
FROM emp_orgchart AS E1) <=
(SELECT COUNT(emp_id) * LENGTH(emp_id)
FROM emp_orgchart AS E2))
16
41. Path Enumeration Model
Depth of the node
• LENGTH(path_string) / LENGTH(emp_id)
• LENGTH(path_string) -
LENGTH(REPLACE(path_string, ‘/’, ‘’)) + 1
17
42. Path Enumeration Model
Depth of the node
• LENGTH(path_string) / LENGTH(emp_id)
• LENGTH(path_string) -
LENGTH(REPLACE(path_string, ‘/’, ‘’)) + 1
17
43. Path Enumeration Model
Depth of the node
• LENGTH(path_string) / LENGTH(emp_id)
• LENGTH(path_string) -
LENGTH(REPLACE(path_string, ‘/’, ‘’)) + 1
Searching for subordinates
17
44. Path Enumeration Model
Depth of the node
• LENGTH(path_string) / LENGTH(emp_id)
• LENGTH(path_string) -
LENGTH(REPLACE(path_string, ‘/’, ‘’)) + 1
Searching for subordinates
• SELECT * FROM emp_orgchart
WHERE path_string LIKE ‘%’ || :parent_emp_id ||
‘%’;
17
45. Path Enumeration Model
Depth of the node
• LENGTH(path_string) / LENGTH(emp_id)
• LENGTH(path_string) -
LENGTH(REPLACE(path_string, ‘/’, ‘’)) + 1
Searching for subordinates
• SELECT * FROM emp_orgchart
WHERE path_string LIKE ‘%’ || :parent_emp_id ||
‘%’;
• SELECT * FROM emp_orgchart
WHERE path_string LIKE
(SELECT path_string FROM emp_orgchart
WHERE emp_id = :parent_emp_id) || ‘%’;
17
46. Path Enumeration Model
Searching for superiors
SELECT SUBSTRING(P1.path_string
FROM seq * LENGTH(P1.emp_id)
FOR LENGTH(P1.emp_id)) AS emp_id
FROM emp_orgchart AS P1,
sequence AS S1
WHERE P1.emp_id = :search_emp_id
AND S1.seq <= LENGTH(path_string)/
LENGTH(emp_id);
18
47. Path Enumeration Model
Deleting a subtree:
DELETE FROM emp_orgchart
WHERE path_string LIKE (
SELECT path_string
FROM emp_orgchart
WHERE emp_id = :deleted_node) || ‘%’;
Deleting a single node:
START TRANSACTION;
DELETE FROM emp_orgchart
WHERE emp_id = :deleted_node;
UPDATE emp_orgchart SET path_string =
REPLACE(path_string, :deleted_node, ‘’);
COMMIT;
19
48. Path Enumeration Model
Insertion of a new node
INSERT INTO emp_orgchart VALUES (
:new_emp_id, -- ‘M’
:new_name, -- ‘Maggie’
(SELECT path_string
FROM emp_orgchart
WHERE emp_id = :boss_of_new_emp) -- ‘D’
|| :new_emp_id -- ‘M’
);
emp_id emp_name path_string
D Denise ACD
M Maggie ACDM
20
49. Path Enumeration Model
Insertion of a new node
INSERT INTO emp_orgchart VALUES (
:new_emp_id, -- ‘M’
:new_name, -- ‘Maggie’
(SELECT path_string
FROM emp_orgchart
WHERE emp_id = :boss_of_new_emp) -- ‘D’
|| :new_emp_id -- ‘M’
);
emp_id emp_name path_string
D Denise ACD
M Maggie ACDM
20
50. Path Enumeration Model
Insertion of a new node
INSERT INTO emp_orgchart VALUES (
:new_emp_id, -- ‘M’
:new_name, -- ‘Maggie’
(SELECT path_string
FROM emp_orgchart
WHERE emp_id = :boss_of_new_emp) -- ‘D’
|| :new_emp_id -- ‘M’
);
emp_id emp_name path_string
D Denise ACD
M Maggie ACDM
20
51. Path Enumeration Model
Splitting the path string into nodes
SELECT CASE
WHEN SUBSTRING(‘/’ || P1.path_string || ‘/’
FROM S1.seq FOR 1) = ‘/’
THEN SUBSTRING(‘/’ || P1.path_string || ‘/’
FROM (S1.seq + 1)
FOR CharIndex(‘/’ ,
‘/’ || P1.path_string || ‘/’,
S1.seq + 1) - S1.seq - 1
ELSE NULL END AS emp_id
FROM sequence AS S1,
emp_orgchart AS P1
WHERE S1.seq BETWEEN 1
AND LENGTH(P1.path_string) + 1
AND SUBSTRING(‘/’ || P1.path_string || ‘/’
FROM S1.seq FOR 1) = ‘/’
21
52. Path Enumeration Model
emp_name edge_path Edge
Anthony 1.
Beryl 1.1.
Enumeration
Charles 1.2. Model
Denise 1.2.1.
Ellen 1.2.2.
Frank 1.2.3.
Anthony
Beryl Charles
Denise Ellen Frank
22
55. Nested Set Model
A
C
B D E F
A
1 12
B C
2 3 4 11
D E F
5 6 7 8 9 10
24
56. Nested Set Model
CREATE TABLE orgchart (
emp VARCHAR(10) NOT NULL PRIMARY KEY,
lft INTEGER NOT NULL,
rgt INTEGER NOT NULL
); A
1 12
B C
2 3 4 11
D E F
5 6 7 8 9 10
25
57. Nested Set Model
CREATE TABLE orgchart (
emp VARCHAR(10) NOT NULL PRIMARY KEY,
lft INTEGER NOT NULL,
rgt INTEGER NOT NULL
); A
1 12
B C
1 A 12 2 3 4 11
D E F
5 6 7 8 9 10
2 B 3 4 C 11
D E F
5 6 7 8 9 10
25
58. Nested Set Model
Finding root node
1 A 12
2 B 3 4 C 11
D E F
5 6 7 8 9 10
26
59. Nested Set Model
Finding root node
SELECT *
FROM orgchart
WHERE lft = 1; 1 A 12
2 B 3 4 C 11
D E F
5 6 7 8 9 10
26
60. Nested Set Model
Finding root node
SELECT *
FROM orgchart
WHERE lft = 1; 1 A 12
Finding leaf nodes
2 B 3 4 C 11
D E F
5 6 7 8 9 10
26
61. Nested Set Model
Finding root node
SELECT *
FROM orgchart
WHERE lft = 1; 1 A 12
Finding leaf nodes
SELECT *
2 B 3 4 C 11
FROM orgchart
WHERE lft = (rgt - 1);
D E F
5 6 7 8 9 10
26
62. Nested Set Model
Finding root node
SELECT *
FROM orgchart
WHERE lft = 1; 1 A 12
Finding leaf nodes
SELECT *
2 B 3 4 C 11
FROM orgchart
WHERE lft = (rgt - 1);
D E F
Finding subtrees 5 6 7 8 9 10
26
63. Nested Set Model
Finding root node
SELECT *
FROM orgchart
WHERE lft = 1; 1 A 12
Finding leaf nodes
SELECT *
2 B 3 4 C 11
FROM orgchart
WHERE lft = (rgt - 1);
D E F
Finding subtrees 5 6 7 8 9 10
SELECT Mgrs.emp AS boss
Workers.emp AS worker
FROM orgchart AS Mgrs,
orgchart AS Workers
WHERE Workers.lft > Mgrs.lft
AND Workers.lft < Mgrs.rgt
26
64. Nested Set Model
Finding root node
SELECT *
FROM orgchart
WHERE lft = 1; 1 A 12
Finding leaf nodes
SELECT *
2 B 3 4 C 11
FROM orgchart
WHERE lft = (rgt - 1);
D E F
Finding subtrees 5 6 7 8 9 10
SELECT Mgrs.emp AS boss
Workers.emp AS worker
FROM orgchart AS Mgrs,
orgchart AS Workers
WHERE Workers.lft > Mgrs.lft
BETWEEN Mgrs.lft AND Mgrs.rgt
AND Workers.lft < Mgrs.rgt
26
66. Nested Set Model
Finding the level of a node
SELECT o2.emp,
COUNT(o1.emp) AS lvl
FROM orgchart AS o1,
orgchart AS o2
WHERE o2.lft BETWEEN o1.lft AND o1.rgt
GROUP BY o2.emp;
27
67. Nested Set Model
Finding the level of a node
SELECT o2.emp,
COUNT(o1.emp) AS lvl
FROM orgchart AS o1,
orgchart AS o2
WHERE o2.lft BETWEEN o1.lft AND o1.rgt
GROUP BY o2.emp;
Finding the height of the tree
27
68. Nested Set Model
Finding the level of a node
SELECT o2.emp,
COUNT(o1.emp) AS lvl
FROM orgchart AS o1,
orgchart AS o2
WHERE o2.lft BETWEEN o1.lft AND o1.rgt
GROUP BY o2.emp;
Finding the height of the tree
SELECT MAX(lvl) AS height FROM (
SELECT o2.emp,
COUNT(o1.emp) - 1
FROM orgchart AS o1,
orgchart AS o2
WHERE o2.lft BETWEEN o1.lft AND o1.rgt
GROUP BY o2.emp) AS L1(emp, lvl);
27
69. Nested Set Model
Finding relative position
SELECT CASE
WHEN :emp1 = :emp2
THEN :emp1 || ‘ is ’ || :emp2
WHEN o1.lft BETWEEN o2.lft AND o2.rgt
THEN :emp1 || ‘ subordinate to ’ || :emp2
WHEN o2.lft BETWEEN o1.lft AND o1.rgt
THEN :emp2 || ‘ subordinate to ’ || :emp1
ELSE :emp1 || ‘ no relation to ’ || :emp2
END
FROM orgchart AS o1,
orgchart AS o2
WHERE o1.emp = :emp1 AND o2.emp = :emp2;
28
70. Nested Set Model
Deleting subtrees
DELETE FROM orgchart
WHERE lft BETWEEN (SELECT lft
FROM orgchart
WHERE emp = :start_node)
AND (SELECT rgt
FROM orgchart
Filling gaps WHERE emp = :start_node);
CREATE VIEW LftRgt (seq) AS
SELECT lft FROM orgchart
UNION ALL
SELECT rgt FROM orgchart;
UPDATE orgchart SET
lft = (SELECT COUNT(*) FROM LftRgt WHERE seq <= lft),
rgt = (SELECT COUNT(*) FROM LftRgt WHERE seq <= rgt)
29
71. Nested Set Model
Deleting a single node
DELETE FROM orgchart
WHERE emp = ‘C’;
A
C
B D E F
30
72. Nested Set Model
Deleting a single node
DELETE FROM orgchart
WHERE emp = ‘C’;
A
B D E F
30
73. Nested Set Model
Deleting a single node
CREATE PROCEDURE downsize(IN removed VARCHAR(10))
LANGUAGE SQL DETERMINISTIC
UPDATE orgchart SET emp = CASE
WHEN orgchart.emp = removed
AND orgchart.lft + 1 = orgchart.rgt --leaf
THEN ‘{vacant}’
WHEN orgchart.emp = removed
AND orgchart.lft + 1 <> orgchart.rgt
-- promote subordinate and vacate
THEN (SELECT o1.emp
FROM orgchart AS o1
WHERE orgchart.lft + 1 = o1.lft)
WHEN orgchart.emp = (SELECT o1.emp
FROM orgchart AS o1
WHERE orgchart.lft + 1 = o1.lft)
THEN ‘{vacant}’ ELSE emp END;
31
74. Nested Set Model
Deleting a single node
CREATE PROCEDURE downsize(IN removed VARCHAR(10))
LANGUAGE SQL DETERMINISTIC
UPDATE orgchart SET emp = CASE
WHEN orgchart.emp = removed
AND orgchart.lft + 1 = orgchart.rgt --leaf
THEN ‘{vacant}’
WHEN orgchart.emp = removed
AND orgchart.lft + 1 <> orgchart.rgt
-- promote subordinate and vacate
THEN (SELECT o1.emp
FROM orgchart AS o1
WHERE orgchart.lft + 1 = o1.lft)
WHEN orgchart.emp = (SELECT o1.emp
FROM orgchart AS o1
WHERE orgchart.lft + 1 = o1.lft)
THEN ‘{vacant}’ ELSE emp END;
31
75. Nested Set to Adjacency List
SELECT B.id AS boss,
P.emp, P.lft, P.rgt
FROM orgchart as P
LEFT OUTER JOIN emp AS B ON B.lft =
(SELECT MAX(S.lft)
FROM orgchart AS S
WHERE P.lft > S.lft
AND P.lft < S.rgt);
boss emp lft rgt
NULL Anthony 1 12
Anthony Beryl 2 3
Anthony Charles 4 11
Charles Denise 5 6
Charles Ellen 7 8
Charles Frank 9 10
32
76. Frequent Insertion Trees
Avoid overhead (locking + updating) with larger spreads
and bigger gaps
emp lft rgt
Anthony 100 1200
Beryl 200 300
Charles 400 1100
Denise 500 600
Ellen 700 800
Frank 900 1000
33
77. Frequent Insertion Trees
Avoid overhead (locking + updating) with larger spreads
and bigger gaps
emp lft rgt
Anthony 100 1200
Beryl 200 300
Charles 400 1100
Denise 500 600
Ellen 700 800
Frank 900 1000
Datatype of (lft, rgt)?
- INTEGER (full range)
- FLOAT / REAL / DOUBLE
- NUMERIC(p,s) / DECIMAL(p,s)
33
78. Frequent Insertion Trees
Avoid overhead (locking + updating) with larger spreads
and bigger gaps
emp lft rgt
Anthony 100 1200
Beryl 200 300
Charles 400 1100
Denise 500 600
Ellen 700 800
Frank 900 1000
Datatype of (lft, rgt)?
- INTEGER (full range)
- FLOAT / REAL / DOUBLE
- NUMERIC(p,s) / DECIMAL(p,s)
33
79. Frequent Insertion Trees
Avoid overhead (locking + updating) with larger spreads
and bigger gaps
emp lft rgt
Anthony 100 1200
Beryl 200 300
Charles 400 1100
Denise 500 600
Ellen 700 800
Frank 900 1000
Datatype of (lft, rgt)?
- INTEGER (full range)
- FLOAT / REAL / DOUBLE
- NUMERIC(p,s) / DECIMAL(p,s)
33
80. Frequent Insertion Trees
Avoid overhead (locking + updating) with larger spreads
and bigger gaps
emp lft rgt
Anthony 100 1200
Beryl 200 300
Charles 400 1100
Denise 500 600
Ellen 700 800
Frank 900 1000
Datatype of (lft, rgt)?
- INTEGER (full range)
- FLOAT / REAL / DOUBLE
- NUMERIC(p,s) / DECIMAL(p,s)
33
81. Hybrid Models
Adjacency List with Nested Sets (node, parent, lft, rgt)
Nested Sets with depth
Adjacency List with depth
...
Denormalisation to save
subselects or joins
34
86. Nested Intervals Model
Finding GCD (greatest common divisor)
CREATE FUNCTION gcd(IN x INTEGER, IN y INTEGER)
RETURNS INTEGER
LANGUAGE SQL DETERMINISTIC
BEGIN
WHILE x <> y DO
IF x > y THEN SET x = x - y; END IF;
IF y > x THEN SET y = y - x; END IF;
END WHILE;
RETURN x;
END;
38
89. Nested Intervals Model
Binary Node (x, y)
Fractions
First node (1, 1/2)
depth-first
convergence point
39
90. Nested Intervals Model
Binary Node (x, y)
Fractions
First child of first node (1, 3/4)
First node (1, 1/2)
depth-first
convergence point
39
91. Nested Intervals Model
Binary Node (x, y)
Fractions
First child of first node (1, 3/4)
First node (1, 1/2)
depth-first
convergence point
breadth-first
convergence point
39
92. Nested Intervals Model
Binary Node (x, y)
Fractions
First child of first node (1, 3/4)
First node (1, 1/2)
depth-first
convergence point
breadth-first
convergence point
39
93. Nested Intervals Model
Binary Node (x, y)
Fractions
First child of first node (1, 3/4)
First node (1, 1/2)
depth-first
convergence point
breadth-first
convergence point
39
95. Nested Intervals Model
CREATE FUNCTION x_numer(
IN numer INT, IN denom INT
) RETURNS INTEGER
BEGIN
DECLARE ret_num INTEGER;
DECLARE ret_den INTEGER;
SET ret_num = numer + 1;
SET ret_den = 2 * denom;
WHILE FLOOR(ret_num/2)
= ret_num/2
DO SET ret_num = ret_num/2;
SET red_den = ret_den/2;
END WHILE;
RETURN ret_num; -- ret_den
-- for x_denom()
END;
40
96. Nested Intervals Model
CREATE FUNCTION parent_numer(IN numer INTEGER,
IN denom INTEGER) RETURNS INTEGER
LANGUAGE SQL DETERMINISTIC
BEGIN
DECLARE ret_num INTEGER;
DECLARE ret_den INTEGER;
IF numer = 3
THEN RETURN CAST(NULL AS INTEGER)
END IF;
SET ret_num = (numer - 1)/2;
SET ret_den = denom/2;
WHILE FLOOR((ret_num - 1)/4) = (ret_num - 1)/4
DO SET ret_num = (ret_num + 1)/2;
SET red_den = ret_den/2;
END WHILE;
RETURN ret_num; -- ret_den for parent_denom()
END;
41
97. Nested Intervals Model
Similar functions for:
sibling_numer(), sibling_denom()
child_numer(), child_denom()
enumerated_path(), distance()
path_numer(), path_denom()
42
103. Oracle: CONNECT BY PRIOR
Traversing the tree
SELECT emp, boss
FROM emp_orgchart
START WITH boss IS NULL
CONNECT BY [NOCYCLE] PRIOR emp = boss;
emp boss
A NULL
B A
G B
C A
D C
E C
F C
46
104. Oracle: CONNECT BY PRIOR
Traversing the tree
SELECT emp, boss
FROM emp_orgchart
START WITH boss = ‘C’
IS NULL
CONNECT BY [NOCYCLE] PRIOR emp = boss;
emp boss
D C
E C
F C
46
105. Oracle: CONNECT BY PRIOR
Traversing the tree
SELECT emp, boss, salary, LEVEL AS lvl,
LPAD(' ', 2*(LEVEL-1)) || emp AS tree,
CONNECT_BY_IS_LEAF AS is_leaf
FROM emp_orgchart
CONNECT BY PRIOR emp = boss
START WITH boss IS NULL
ORDER SIBLINGS BY salary DESC;
emp boss salary lvl tree is_leaf
A NULL 1000.00 1 A 0
B A 900.00 2 B 0
G B 800.00 3 G 1
C A 900.00 2 C 0
D C 800.00 3 D 1
E C 700.00 3 E 1
F C 600.00 3 F 1
47
106. Oracle: CONNECT BY PRIOR
Determining the relationship between two nodes
SELECT COUNT(*) AS ‘is_subordinate’
FROM emp_orgchart
WHERE emp = :subordinate
AND LEVEL > 1
START WITH emp = :superior
CONNECT BY PRIOR emp = boss;
48
107. Oracle: CONNECT BY PRIOR
Determining the relationship between two nodes
SELECT COUNT(*) AS ‘is_subordinate’
FROM emp_orgchart
WHERE emp = :subordinate
AND LEVEL > 1
START WITH emp = :superior
CONNECT BY PRIOR emp = boss;
A subordinate of C? 0
F subordinate of A? 1
48
108. Oracle: CONNECT_BY_ROOT
Determining the ancestor of a node
SELECT emp_id,
CONNECT_BY_ROOT emp_id AS root
FROM orgchart
START WITH boss = ‘A’
CONNECT BY PRIOR emp_id = boss;
emp_id root
B B
G B
C C
D C
E C
F C
49
109. Oracle: SYS_CONNECT_BY_PATH
Materialized Path View
SELECT emp_id,
SYS_CONNECT_BY_PATH (emp_id,'.')
AS path_string
FROM orgchart
START WITH boss IS NULL
CONNECT BY PRIOR emp_id = boss
ORDER BY 2;
emp_id path_string
A .A
B .A.B
C .A.C
D .A.C.D
E .A.C.E
F .A.C.F
50
111. Common Table Expressions
CREATE TABLE emp_orgchart (
emp VARCHAR(10) PRIMARY KEY,
boss VARCHAR(10) REFERENCES emp_orgchart(emp_id),
salary TEXT
);
INSERT INTO emp_orgchart (emp, boss, salary) VALUES
(‘A’, NULL, 1000.00),
(‘B’, ‘A’, 900.00),
(‘C’, ‘A’, 900.00), A
(‘D’, ‘C’, 800.00),
(‘E’, ‘C’, 700.00),
(‘F’, ‘C’, 600.00), B C
(‘G’, ‘B’, 800.00);
G D E F
52
112. Common Table Expressions
WITH RECURSIVE hierarchy AS (
-- non recursive term
SELECT *
FROM emp_orgchart
WHERE boss IS NULL
UNION ALL
-- recursive term
SELECT E1.*
FROM emp_orgchart AS E1
JOIN hierarchy AS E2
ON E1.boss = E2.emp
)
SELECT *
FROM hierarchy
ORDER BY emp;
53
113. Common Table Expressions
WITH RECURSIVE hierarchy AS (
-- non recursive term
SELECT *
FROM emp_orgchart
WHERE boss IS NULL
UNION ALL
-- recursive term
SELECT E1.*
FROM emp_orgchart AS E1
JOIN hierarchy AS E2
ON E1.boss = E2.emp
)
SELECT *
FROM hierarchy
ORDER BY emp;
53
114. Common Table Expressions
WITH RECURSIVE hierarchy AS (
-- non recursive term
SELECT *
FROM emp_orgchart
WHERE boss IS NULL
UNION ALL
-- recursive term
SELECT E1.*
FROM emp_orgchart AS E1
JOIN hierarchy AS E2
ON E1.boss = E2.emp
)
SELECT *
FROM hierarchy
ORDER BY emp;
53
115. Common Table Expressions
WITH RECURSIVE hierarchy AS (
-- non recursive term
SELECT *
FROM emp_orgchart
WHERE boss IS NULL emp boss salary
UNION ALL A NULL 1000.00
-- recursive term B A 900.00
SELECT E1.* C A 900.00
FROM emp_orgchart AS E1 D C 800.00
JOIN hierarchy AS E2
E C 700.00
ON E1.boss = E2.emp
F C 600.00
)
SELECT * G B 800.00
FROM hierarchy
ORDER BY emp;
53
116. Common Table Expressions
WITH RECURSIVE hierarchy AS (
SELECT *,
1 AS lvl,
CAST(emp AS TEXT) AS tree,
emp || ‘.’ AS path
FROM emp_orgchart
WHERE boss IS NULL
UNION ALL
SELECT E1.*,
E2.lvl + 1 AS lvl,
LPAD(E1.emp, lvl * 2, ‘ ’) AS tree,
E2.path || E1.emp || ‘.’ AS path
FROM emp_orgchart
JOIN hierarchy AS E2
ON E1.boss = E2.emp
)
SELECT *
FROM hierarchy
ORDER BY path;
54
117. Common Table Expressions
WITH RECURSIVE hierarchy AS (
SELECT *,
1 AS lvl,
CAST(emp AS TEXT) AS tree,
emp || ‘.’ AS path
FROM emp_orgchart
WHERE boss IS NULL
UNION ALL
SELECT E1.*,
E2.lvl + 1 AS lvl,
LPAD(E1.emp, lvl * 2, ‘ ’) AS tree,
E2.path || E1.emp || ‘.’ AS path
FROM emp_orgchart
JOIN hierarchy AS E2
ON E1.boss = E2.emp
)
SELECT *
FROM hierarchy
ORDER BY path;
54
118. Common Table Expressions
WITH RECURSIVE hierarchy AS (
SELECT *,
1 AS lvl,
CAST(emp AS TEXT) AS tree,
emp || ‘.’ AS path
FROM emp_orgchart
WHERE boss IS NULL
UNION ALL
SELECT E1.*,
E2.lvl + 1 AS lvl,
LPAD(E1.emp, lvl * 2, ‘ ’) AS tree,
E2.path || E1.emp || ‘.’ AS path
FROM emp_orgchart
JOIN hierarchy AS E2
ON E1.boss = E2.emp
)
SELECT *
FROM hierarchy
ORDER BY path;
54
119. Common Table Expressions
WITH RECURSIVE hierarchy AS (
SELECT *,
1 AS lvl,
CAST(emp AS TEXT) AS tree,
emp || ‘.’ AS path
FROM emp_orgchart
WHERE boss IS NULL
UNION ALL
SELECT E1.*,
E2.lvl + 1 AS lvl,
LPAD(E1.emp, lvl * 2, ‘ ’) AS tree,
E2.path || E1.emp || ‘.’ AS path
FROM emp_orgchart
JOIN hierarchy AS E2
ON E1.boss = E2.emp
)
SELECT *
FROM hierarchy
ORDER BY path;
54
120. Common Table Expressions
WITH RECURSIVE hierarchy AS (
SELECT *,
1 AS lvl,
CAST(emp AS TEXT) AS tree,
emp || ‘.’ AS path
FROM emp_orgchart
WHERE boss IS NULL
UNION ALL
SELECT E1.*,
E2.lvl + 1 AS lvl,
LPAD(E1.emp, lvl * 2, ‘ ’) AS tree,
E2.path || E1.emp || ‘.’ AS path
FROM emp_orgchart
JOIN hierarchy AS E2
ON E1.boss = E2.emp
)
SELECT *
FROM hierarchy
ORDER BY path;
54
121. Common Table Expressions
WITH RECURSIVE hierarchy AS (
SELECT *,
1 AS lvl,
emp CAST(emp salary
boss AS TEXT) AS tree, tree
lvl path
emp || ‘.’ AS path
A NULL 1000.00 1 A A.
FROM emp_orgchart
WHERE boss IS NULL
B A 900.00 2 B A.B.
UNION ALL
SELECT E1.*,
G B 800.00 3 G A.B.G.
E2.lvl + 1 AS lvl,
C LPAD(E1.emp, lvl * 2, ‘ ’)CAS tree,
A 900.00 2 A.C.
E2.path || E1.emp || ‘.’ AS path
D C 800.00
FROM emp_orgchart 3 D A.C.D.
JOIN hierarchy AS E2
E C 700.00 3 E A.C.E.
ON E1.boss = E2.emp
) F C 600.00 3 F A.C.F.
SELECT *
FROM hierarchy
ORDER BY path;
54
122. Common Table Expressions
WITH RECURSIVE hierarchy AS (
SELECT *,
1 AS lvl,
CAST(emp AS TEXT) AS tree
FROM emp_orgchart
WHERE boss = ‘A’
UNION ALL
SELECT E1.*,
E2.lvl + 1 AS lvl,
LPAD(E1.emp, depth * 2, ‘ ’) AS tree
FROM emp_orgchart
JOIN hierarchy AS E2 ON E1.boss = E2.emp
WHERE E2.lvl < 3 -- termination condition
)
SELECT *
FROM hierarchy
ORDER BY path;
55
123. Common Table Expressions
WITH RECURSIVE hierarchy AS (
SELECT *,
1 AS lvl,
CAST(emp AS TEXT) AS tree
FROM emp_orgchart
WHERE boss = ‘A’
UNION ALL
SELECT E1.*,
E2.lvl + 1 AS lvl,
LPAD(E1.emp, depth * 2, ‘ ’) AS tree
FROM emp_orgchart
JOIN hierarchy AS E2 ON E1.boss = E2.emp
)
SELECT *,
COALESCE((SELECT 0 FROM emp_orgchart E3
WHERE hierarchy.emp = E3.boss
FETCH FIRST ROW ONLY), 1) AS is_leaf
FROM hierarchy;
56
127. Resources
Joe Celko, “Trees and Hierarchies in SQL for
Smarties”, Morgan Kaufmann:
http://www.amazon.com/dp/1558609202/
Vadim Tropashko, various articles and papers:
http://www.dbazine.com/top-auth/top-auth-trop/
http://www.sigmod.org/sigmod/record/issues/
0506/p47-article-tropashko.pdf
http://arxiv.org/pdf/cs/0402051
59
128. Thank you!
Contact details:
Lorenzo Alberton
lorenzo@ibuildings.com
http://www.alberton.info
http://joind.in/talk/view/585