
说明
如果需要用到这些知识却没有掌握,则会让人感到沮丧,也可能导致面试被拒。无论是花几天时间“突击”,还是利用零碎的时间持续学习,在数据结构上下点功夫都是值得的。那么Python 中有哪些数据结构呢?列表、字典、集合,还有……栈?Python 有栈吗?本系列文章将给出详细拼图。
13章: Binary Tree
The binary Tree: 二叉树,每个节点做多只有两个子节点
class _BinTreeNode:
def __init__(self, data):
self.data = data
self.left = None
self.right = None
# 三种depth-first遍历
def preorderTrav(subtree):
""" 先(根)序遍历"""
if subtree is not None:
print(subtree.data)
preorderTrav(subtree.left)
preorderTrav(subtree.right)
def inorderTrav(subtree):
""" 中(根)序遍历"""
if subtree is not None:
preorderTrav(subtree.left)
print(subtree.data)
preorderTrav(subtree.right)
def postorderTrav(subtree):
""" 后(根)序遍历"""
if subtree is not None:
preorderTrav(subtree.left)
preorderTrav(subtree.right)
print(subtree.data)
# 宽度优先遍历(bradth-First Traversal): 一层一层遍历, 使用queue
def breadthFirstTrav(bintree):
from queue import Queue # py3
q = Queue()
q.put(bintree)
while not q.empty():
node = q.get()
print(node.data)
if node.left is not None:
q.put(node.left)
if node.right is not None:
q.put(node.right)
class _ExpTreeNode:
__slots__ = ('element', 'left', 'right')
def __init__(self, data):
self.element = data
self.left = None
self.right = None
def __repr__(self):
return '<_ExpTreeNode: {} {} {}>'.format(
self.element, self.left, self.right)
from queue import Queue
class ExpressionTree:
"""
表达式树: 操作符存储在内节点操作数存储在叶子节点的二叉树。(符号树真难打出来)
*
/ \
+ -
/ \ / \
9 3 8 4
(9+3) * (8-4)
Expression Tree Abstract Data Type,可以实现二元操作符
ExpressionTree(expStr): user string as constructor param
evaluate(varDict): evaluates the expression and returns the numeric result
toString(): constructs and retutns a string represention of the expression
Usage:
vars = {'a': 5, 'b': 12}
expTree = ExpressionTree("(a/(b-3))")
print('The result = ', expTree.evaluate(vars))
"""
def __init__(self, expStr):
self._expTree = None
self._buildTree(expStr)
def evaluate(self, varDict):
return self._evalTree(self._expTree, varDict)
def __str__(self):
return self._buildString(self._expTree)
def _buildString(self, treeNode):
""" 在一个子树被遍历之前添加做括号,在子树被遍历之后添加右括号 """
# print(treeNode)
if treeNode.left is None and treeNode.right is None:
return str(treeNode.element) # 叶子节点是操作数直接返回
else:
expStr = '('
expStr += self._buildString(treeNode.left)
expStr += str(treeNode.element)
expStr += self._buildString(treeNode.right)
expStr += ')'
return expStr
def _evalTree(self, subtree, varDict):
# 是不是叶子节点, 是的话说明是操作数,直接返回
if subtree.left is None and subtree.right is None:
# 操作数是合法数字吗
if subtree.element >= '0' and subtree.element <= '9':
return int(subtree.element)
else: # 操作数是个变量
assert subtree.element in varDict, 'invalid variable.'
return varDict[subtree.element]
else: # 操作符则计算其子表达式
lvalue = self._evalTree(subtree.left, varDict)
rvalue = self._evalTree(subtree.right, varDict)
print(subtree.element)
return self._computeOp(lvalue, subtree.element, rvalue)
def _computeOp(self, left, op, right):
assert op
op_func = {
'+': lambda left, right: left + right, # or import operator, operator.add
'-': lambda left, right: left - right,
'*': lambda left, right: left * right,
'/': lambda left, right: left / right,
'%': lambda left, right: left % right,
}
return op_func[op](left, right)
def _buildTree(self, expStr):
expQ = Queue()
for token in expStr: # 遍历表达式字符串的每个字符
expQ.put(token)
self._expTree = _ExpTreeNode(None) # 创建root节点
self._recBuildTree(self._expTree, expQ)
def _recBuildTree(self, curNode, expQ):
token = expQ.get()
if token == '(':
curNode.left = _ExpTreeNode(None)
self._recBuildTree(curNode.left, expQ)
# next token will be an operator: + = * / %
curNode.element = expQ.get()
curNode.right = _ExpTreeNode(None)
self._recBuildTree(curNode.right, expQ)
# the next token will be ')', remmove it
expQ.get()
else: # the token is a digit that has to be converted to an int.
curNode.element = token
vars = {'a': 5, 'b': 12}
expTree = ExpressionTree("((2*7)+8)")
print(expTree)
print('The result = ', expTree.evaluate(vars))
Heap(堆):二叉树最直接的一个应用就是实现堆。堆就是一颗完全二叉树,最大堆的非叶子节点的值都比孩子大,最小堆的非叶子结点的值都比孩子小。 python内置了heapq模块帮助我们实现堆操作,比如用内置的heapq模块实现个堆排序:
# 使用python内置的heapq实现heap sort
def heapsort(iterable):
from heapq import heappush, heappop
h = []
for value in iterable:
heappush(h, value)
return [heappop(h) for i in range(len(h))]
但是一般实现堆的时候实际上并不是用数节点来实现的,而是使用数组实现,效率比较高。为什么可以用数组实现呢?因为完全二叉树的性质, 可以用下标之间的关系表示节点之间的关系,MaxHeap的docstring中已经说明了
class MaxHeap:
"""
Heaps:
完全二叉树,最大堆的非叶子节点的值都比孩子大,最小堆的非叶子结点的值都比孩子小
Heap包含两个属性,order property 和 shape property(a complete binary tree),在插入
一个新节点的时候,始终要保持这两个属性
插入操作:保持堆属性和完全二叉树属性, sift-up 操作维持堆属性
extract操作:只获取根节点数据,并把树最底层最右节点copy到根节点后,sift-down操作维持堆属性
用数组实现heap,从根节点开始,从上往下从左到右给每个节点编号,则根据完全二叉树的
性质,给定一个节点i, 其父亲和孩子节点的编号分别是:
parent = (i-1) // 2
left = 2 * i + 1
rgiht = 2 * i + 2
使用数组实现堆一方面效率更高,节省树节点的内存占用,一方面还可以避免复杂的指针操作,减少
调试难度。
"""
def __init__(self, maxSize):
self._elements = Array(maxSize) # 第二章实现的Array ADT
self._count = 0
def __len__(self):
return self._count
def capacity(self):
return len(self._elements)
def add(self, value):
assert self._count < self.capacity(), 'can not add to full heap'
self._elements[self._count] = value
self._count += 1
self._siftUp(self._count - 1)
self.assert_keep_heap() # 确定每一步add操作都保持堆属性
def extract(self):
assert self._count > 0, 'can not extract from an empty heap'
value = self._elements[0] # save root value
self._count -= 1
self._elements[0] = self._elements[self._count] # 最右下的节点放到root后siftDown
self._siftDown(0)
self.assert_keep_heap()
return value
def _siftUp(self, ndx):
if ndx > 0:
parent = (ndx - 1) // 2
# print(ndx, parent)
if self._elements[ndx] > self._elements[parent]: # swap
self._elements[ndx], self._elements[parent] = self._elements[parent], self._elements[ndx]
self._siftUp(parent) # 递归
def _siftDown(self, ndx):
left = 2 * ndx + 1
right = 2 * ndx + 2
# determine which node contains the larger value
largest = ndx
if (left < self._count and
self._elements[left] >= self._elements[largest] and
self._elements[left] >= self._elements[right]): # 原书这个地方没写实际上找的未必是largest
largest = left
elif right < self._count and self._elements[right] >= self._elements[largest]:
largest = right
if largest != ndx:
self._elements[ndx], self._elements[largest] = self._elements[largest], self._elements[ndx]
self._siftDown(largest)
def __repr__(self):
return ' '.join(map(str, self._elements))
def assert_keep_heap(self):
""" 我加了这个函数是用来验证每次add或者extract之后,仍保持最大堆的性质"""
_len = len(self)
for i in range(0, int((_len-1)/2)): # 内部节点(非叶子结点)
l = 2 * i + 1
r = 2 * i + 2
if l < _len and r < _len:
assert self._elements[i] >= self._elements[l] and self._elements[i] >= self._elements[r]
def test_MaxHeap():
""" 最大堆实现的单元测试用例 """
_len = 10
h = MaxHeap(_len)
for i in range(_len):
h.add(i)
h.assert_keep_heap()
for i in range(_len):
# 确定每次出来的都是最大的数字,添加的时候是从小到大添加的
assert h.extract() == _len-i-1
test_MaxHeap()
def simpleHeapSort(theSeq):
""" 用自己实现的MaxHeap实现堆排序,直接修改原数组实现inplace排序"""
if not theSeq:
return theSeq
_len = len(theSeq)
heap = MaxHeap(_len)
for i in theSeq:
heap.add(i)
for i in reversed(range(_len)):
theSeq[i] = heap.extract()
return theSeq
def test_simpleHeapSort():
""" 用一些测试用例证明实现的堆排序是可以工作的 """
def _is_sorted(seq):
for i in range(len(seq)-1):
if seq[i] > seq[i+1]:
return False
return True
from random import randint
assert simpleHeapSort([]) == []
for i in range(1000):
_len = randint(1, 100)
to_sort = []
for i in range(_len):
to_sort.append(randint(0, 100))
simpleHeapSort(to_sort) # 注意这里用了原地排序,直接更改了数组
assert _is_sorted(to_sort)
test_simpleHeapSort()
14章: Search Trees
二叉差找树性质:对每个内部节点V, 1. 所有key小于V.key的存储在V的左子树。 2. 所有key大于V.key的存储在V的右子树 对BST进行中序遍历会得到升序的key序列
class _BSTMapNode:
__slots__ = ('key', 'value', 'left', 'right')
def __init__(self, key, value):
self.key = key
self.value = value
self.left = None
self.right = None
def __repr__(self):
return '<{}:{}> left:{}, right:{}'.format(
self.key, self.value, self.left, self.right)
__str__ = __repr__
class BSTMap:
""" BST,树节点包含key可payload。用BST来实现之前用hash实现过的Map ADT.
性质:对每个内部节点V,
1.对于节点V,所有key小于V.key的存储在V的左子树。
2.所有key大于V.key的存储在V的右子树
对BST进行中序遍历会得到升序的key序列
"""
def __init__(self):
self._root = None
self._size = 0
self._rval = None # 作为remove的返回值
def __len__(self):
return self._size
def __iter__(self):
return _BSTMapIterator(self._root, self._size)
def __contains__(self, key):
return self._bstSearch(self._root, key) is not None
def valueOf(self, key):
node = self._bstSearch(self._root, key)
assert node is not None, 'Invalid map key.'
return node.value
def _bstSearch(self, subtree, target):
if subtree is None: # 递归出口,遍历到树底没有找到key或是空树
return None
elif target < subtree.key:
return self._bstSearch(subtree.left, target)
elif target > subtree.key:
return self._bstSearch(subtree.right, target)
return subtree # 返回引用
def _bstMinumum(self, subtree):
""" 顺着树一直往左下角递归找就是最小的,向右下角递归就是最大的 """
if subtree is None:
return None
elif subtree.left is None:
return subtree
else:
return subtree._bstMinumum(self, subtree.left)
def add(self, key, value):
""" 添加或者替代一个key的value, O(N) """
node = self._bstSearch(self._root, key)
if node is not None: # if key already exists, update value
node.value = value
return False
else: # insert a new entry
self._root = self._bstInsert(self._root, key, value)
self._size += 1
return True
def _bstInsert(self, subtree, key, value):
""" 新的节点总是插入在树的叶子结点上 """
if subtree is None:
subtree = _BSTMapNode(key, value)
elif key < subtree.key:
subtree.left = self._bstInsert(subtree.left, key, value)
elif key > subtree.key:
subtree.right = self._bstInsert(subtree.right, key, value)
# 注意这里没有else语句了,应为在被调用处add函数里先判断了是否有重复key
return subtree
def remove(self, key):
""" O(N)
被删除的节点分为三种:
1.叶子结点:直接把其父亲指向该节点的指针置None
2.该节点有一个孩子: 删除该节点后,父亲指向一个合适的该节点的孩子
3.该节点有俩孩子:
(1)找到要删除节点N和其后继S(中序遍历后该节点下一个)
(2)复制S的key到N
(3)从N的右子树中删除后继S(即在N的右子树中最小的)
"""
assert key in self, 'invalid map key'
self._root = self._bstRemove(self._root, key)
self._size -= 1
return self._rval
def _bstRemove(self, subtree, target):
# search for the item in the tree
if subtree is None:
return subtree
elif target < subtree.key:
subtree.left = self._bstRemove(subtree.left, target)
return subtree
elif target > subtree.key:
subtree.right = self._bstRemove(subtree.right, target)
return subtree
else: # found the node containing the item
self._rval = subtree.value
if subtree.left is None and subtree.right is None:
# 叶子node
return None
elif subtree.left is None or subtree.right is None:
# 有一个孩子节点
if subtree.left is not None:
return subtree.left
else:
return subtree.right
else: # 有俩孩子节点
successor = self._bstMinumum(subtree.right)
subtree.key = successor.key
subtree.value = successor.value
subtree.right = self._bstRemove(subtree.right, successor.key)
return subtree
def __repr__(self):
return '->'.join([str(i) for i in self])
def assert_keep_bst_property(self, subtree):
""" 写这个函数为了验证add和delete操作始终维持了bst的性质 """
if subtree is None:
return
if subtree.left is not None and subtree.right is not None:
assert subtree.left.value <= subtree.value
assert subtree.right.value >= subtree.value
self.assert_keep_bst_property(subtree.left)
self.assert_keep_bst_property(subtree.right)
elif subtree.left is None and subtree.right is not None:
assert subtree.right.value >= subtree.value
self.assert_keep_bst_property(subtree.right)
elif subtree.left is not None and subtree.right is None:
assert subtree.left.value <= subtree.value
self.assert_keep_bst_property(subtree.left)
class _BSTMapIterator:
def __init__(self, root, size):
self._theKeys = Array(size)
self._curItem = 0
self._bstTraversal(root)
self._curItem = 0
def __iter__(self):
return self
def __next__(self):
if self._curItem < len(self._theKeys):
key = self._theKeys[self._curItem]
self._curItem += 1
return key
else:
raise StopIteration
def _bstTraversal(self, subtree):
if subtree is not None:
self._bstTraversal(subtree.left)
self._theKeys[self._curItem] = subtree.key
self._curItem += 1
self._bstTraversal(subtree.right)
def test_BSTMap():
l = [60, 25, 100, 35, 17, 80]
bst = BSTMap()
for i in l:
bst.add(i)
def test_HashMap():
""" 之前用来测试用hash实现的map,改为用BST实现的Map测试 """
# h = HashMap()
h = BSTMap()
assert len(h) == 0
h.add('a', 'a')
assert h.valueOf('a') == 'a'
assert len(h) == 1
a_v = h.remove('a')
assert a_v == 'a'
assert len(h) == 0
h.add('a', 'a')
h.add('b', 'b')
assert len(h) == 2
assert h.valueOf('b') == 'b'
b_v = h.remove('b')
assert b_v == 'b'
assert len(h) == 1
h.remove('a')
assert len(h) == 0
_len = 10
for i in range(_len):
h.add(str(i), i)
assert len(h) == _len
for i in range(_len):
assert str(i) in h
for i in range(_len):
print(len(h))
print('bef', h)
_ = h.remove(str(i))
assert _ == i
print('aft', h)
print(len(h))
assert len(h) == 0
test_HashMap()



















