The document contains code to test the performance of a balanced binary search tree (BST) implementation. It defines classes for a BST set, node, and data generator. Data sizes from 100 to 10,000 are tested with random, sorted, and reversed data distributions. Insertion and search times are measured over multiple runs and printed along with the data distribution and size. Edge cases like empty set search and duplicate insertion are also tested.
Hi can I see an example for the output of this code to make sure my .pdf
1. Hi can I see an example for the output of this code to make sure my output is correct.
import random
import time
import string
class TestDataGenerator:
def __init__(self):
pass
def generateData(self, size):
data = [""] * size
for i in range(size):
length = random.randint(1, 1000)
chars = string.ascii_uppercase + string.ascii_lowercase
string_val = ''.join(random.choice(chars) for _ in range(length))
data[i] = string_val
return data
from abc import ABC, abstractmethod
# abstract class to represent a set and its insert/search operations
class AbstractSet(ABC):
# constructor
@abstractmethod
def __init__(self):
pass
# inserts "element" in the set
# returns "True" after successful insertion, "False" if the element is already in the set
# element : str
# inserted : bool
@abstractmethod
def insertElement(self, element):
inserted = False
return inserted
2. # checks whether "element" is in the set
# returns "True" if it is, "False" otherwise
# element : str
# found : bool
@abstractmethod
def searchElement(self, element):
found = False
return found
# abstract class to represent a synthetic data generator
class AbstractTestDataGenerator(ABC):
# constructor
@abstractmethod
def __init__(self):
pass
# creates and returns a list of length "size" of strings
# size : int
# data : list
@abstractmethod
def generateData(self, size):
data = [""]*size
return data
import sys
sys.setrecursionlimit(10000)
class Node:
def __init__(self, value):
self.value = value
self.left = None
self.right = None
self.color = "BLACK"
import timeit
import string
3. import random
import sys
sys.setrecursionlimit(10000)
class Node:
def __init__(self, value):
self.value = value
self.left = None
self.right = None
self.color = "BLACK"
class BalancedSearchTreeSet(AbstractSet):
def __init__(self):
self.root = None
def rotateLeft(self, n):
x = n.right
n.right = x.left
x.left = n
x.color = n.color
n.color = "RED"
return x
def rotateRight(self, n):
x = n.left
n.left = x.right
x.right = n
x.color = n.color
n.color = "RED"
return x
def flipColor(self, n):
n.color = "RED"
n.left.color = "BLACK"
n.right.color = "BLACK"
def isRed(self, node):
if node is None:
return False
return node.color == "RED"
def insertElement(self, value):
self.root = self._insert(self.root, value)
4. self.root.color = "BLACK"
def _insert(self, node, value):
if node is None:
return Node(value)
if value < node.value:
node.left = self._insert(node.left, value)
else:
node.right = self._insert(node.right, value)
if self.isRed(node.right) and not self.isRed(node.left):
node = self.rotateLeft(node)
if self.isRed(node.left) and self.isRed(node.left.left):
node = self.rotateRight(node)
if self.isRed(node.left) and self.isRed(node.right):
self.flipColor(node)
return node
def searchElement(self, value):
return self._search(self.root, value)
def _search(self, node, value):
if node is None:
return None
if node.value.lower() == value.lower():
return node
if value.lower() < node.value.lower():
return self._search(node.left, value)
else:
return self._search(node.right, value)
bst_set = BalancedSearchTreeSet()
data_generator = TestDataGenerator()
# Test parameters
data_sizes = [100, 1000, 10000]
num_runs = 5
# Test with varying data sizes and distributions
for size in data_sizes:
print(f"Data size: {size}")
data = data_generator.generateData(size)
5. sorted_data = sorted(data)
reversed_data = list(reversed(sorted_data))
datasets = {"Random": data, "Sorted": sorted_data, "Reversed": reversed_data}
for dist, dataset in datasets.items():
print(f" Data distribution: {dist}")
# insertion time
insert_time = timeit.timeit(
lambda: [bst_set.insertElement(x) for x in dataset], number=num_runs
) / num_runs
# search time
search_time = timeit.timeit(
lambda: [x in bst_set for x in dataset], number=num_runs
) / num_runs
# Print the results
print(f" Insertion time: {insert_time:.6f} seconds")
print(f" Search time: {search_time:.6f} seconds")
print()
# Edge cases
edge_cases = [
("Empty set search", []),
("Duplicate insertion", data),
("Nonexistent element search", ["nonexistent_element"]),
]
for test_name, edge_data in edge_cases:
print(f"Edge case: {test_name}")