
*说明
如果需要用到这些知识却没有掌握,则会让人感到沮丧,也可能导致面试被拒。无论是花几天时间“突击”,还是利用零碎的时间持续学习,在数据结构上下点功夫都是值得的。那么Python 中有哪些数据结构呢?列表、字典、集合,还有……栈?Python 有栈吗?本系列文章将给出详细拼图。
第5章:Searching 和 Sorting
排序和查找是最基础和频繁的操作,python内置了in操作符和bisect二分操作模块实现查找,内置了sorted方法来实现排序操作。二分和快排也是面试中经常考到的,本章讲的是基本的排序和查找。
def binary_search(sorted_seq, val):
""" 实现标准库中的bisect.bisect_left """
low = 0
high = len(sorted_seq) - 1
while low <= high:
mid = (high + low) // 2
if sorted_seq[mid] == val:
return mid
elif val < sorted_seq[mid]:
high = mid - 1
else:
low = mid + 1
return low
def bubble_sort(seq): # O(n^2), n(n-1)/2 = 1/2(n^2 + n)
n = len(seq)
for i in range(n-1):
for j in range(n-1-i): # 这里之所以 n-1 还需要 减去 i 是因为每一轮冒泡最大的元素都会冒泡到最后,无需再比较
if seq[j] > seq[j+1]:
seq[j], seq[j+1] = seq[j+1], seq[j]
def select_sort(seq):
"""可以看作是冒泡的改进,每次找一个最小的元素交换,每一轮只需要交换一次"""
n = len(seq)
for i in range(n-1):
min_idx = i # assume the ith element is the smallest
for j in range(i+1, n):
if seq[j] < seq[min_idx]: # find the minist element index
min_idx = j
if min_idx != i: # swap
seq[i], seq[min_idx] = seq[min_idx], seq[i]
def insertion_sort(seq):
""" 每次挑选下一个元素插入已经排序的数组中,初始时已排序数组只有一个元素"""
n = len(seq)
for i in range(1, n):
value = seq[i] # save the value to be positioned
# find the position where value fits in the ordered part of the list
pos = i
while pos > 0 and value < seq[pos-1]:
# Shift the items to the right during the search
seq[pos] = seq[pos-1]
pos -= 1
seq[pos] = value
def merge_sorted_list(listA, listB):
""" 归并两个有序数组 """
new_list = list()
a = b = 0
while a < len(listA) and b < len(listB):
if listA[a] < listB[b]:
new_list.append(listA[a])
a += 1
else:
new_list.append(listB[b])
b += 1
while a < len(listA):
new_list.append(listA[a])
a += 1
while b < len(listB):
new_list.append(listB[b])
b += 1
return new_list
第6章: Linked Structure
list是最常用的数据结构,但是list在中间增减元素的时候效率会很低,这时候linked list会更适合,缺点就是获取元素的平均时间复杂度变成了O(n)
# 单链表实现
class ListNode:
def __init__(self, data):
self.data = data
self.next = None
def travsersal(head, callback):
curNode = head
while curNode is not None:
callback(curNode.data)
curNode = curNode.next
def unorderdSearch(head, target):
curNode = head
while curNode is not None and curNode.data != target:
curNode = curNode.next
return curNode is not None
# Given the head pointer, prepend an item to an unsorted linked list.
def prepend(head, item):
newNode = ListNode(item)
newNode.next = head
head = newNode
# Given the head reference, remove a target from a linked list
def remove(head, target):
predNode = None
curNode = head
while curNode is not None and curNode.data != target:
# 寻找目标
predNode = curNode
curNode = curNode.data
if curNode is not None:
if curNode is head:
head = curNode.next
else:
predNode.next = curNode.next
第7章:Stacks
栈也是计算机里用得比较多的数据结构,栈是一种后进先出的数据结构,可以理解为往一个桶里放盘子,先放进去的会被压在地下,拿盘子的时候,后放的会被先拿出来。
class Stack:
""" Stack ADT, using a python list
Stack()
isEmpty()
length()
pop(): assert not empty
peek(): assert not empty, return top of non-empty stack without removing it
push(item)
"""
def __init__(self):
self._items = list()
def isEmpty(self):
return len(self) == 0
def __len__(self):
return len(self._items)
def peek(self):
assert not self.isEmpty()
return self._items[-1]
def pop(self):
assert not self.isEmpty()
return self._items.pop()
def push(self, item):
self._items.append(item)
class Stack:
""" Stack ADT, use linked list
使用list实现很简单,但是如果涉及大量push操作,list的空间不够时复杂度退化到O(n)
而linked list可以保证最坏情况下仍是O(1)
"""
def __init__(self):
self._top = None # top节点, _StackNode or None
self._size = 0 # int
def isEmpty(self):
return self._top is None
def __len__(self):
return self._size
def peek(self):
assert not self.isEmpty()
return self._top.item
def pop(self):
assert not self.isEmpty()
node = self._top
self.top = self._top.next
self._size -= 1
return node.item
def _push(self, item):
self._top = _StackNode(item, self._top)
self._size += 1
class _StackNode:
def __init__(self, item, link):
self.item = item
self.next = link
第8章:Queues
队列也是经常使用的数据结构,比如发送消息等,celery可以使用redis提供的list实现消息队列。 本章我们用list和linked list来实现队列和优先级队列。
class Queue:
""" Queue ADT, use list。list实现,简单但是push和pop效率最差是O(n)
Queue()
isEmpty()
length()
enqueue(item)
dequeue()
"""
def __init__(self):
self._qList = list()
def isEmpty(self):
return len(self) == 0
def __len__(self):
return len(self._qList)
def enquue(self, item):
self._qList.append(item)
def dequeue(self):
assert not self.isEmpty()
return self._qList.pop(0)
from array import Array # Array那一章实现的Array ADT
class Queue:
"""
circular Array ,通过头尾指针实现。list内置append和pop复杂度会退化,使用
环数组实现可以使得入队出队操作时间复杂度为O(1),缺点是数组长度需要固定。
"""
def __init__(self, maxSize):
self._count = 0
self._front = 0
self._back = maxSize - 1
self._qArray = Array(maxSize)
def isEmpty(self):
return self._count == 0
def isFull(self):
return self._count == len(self._qArray)
def __len__(self):
return len(self._count)
def enqueue(self, item):
assert not self.isFull()
maxSize = len(self._qArray)
self._back = (self._back + 1) % maxSize # 移动尾指针
self._qArray[self._back] = item
self._count += 1
def dequeue(self):
assert not self.isFull()
item = self._qArray[self._front]
maxSize = len(self._qArray)
self._front = (self._front + 1) % maxSize
self._count -= 1
return item
class _QueueNode:
def __init__(self, item):
self.item = item
class Queue:
""" Queue ADT, linked list 实现。为了改进环型数组有最大数量的限制,改用
带有头尾节点的linked list实现。
"""
def __init__(self):
self._qhead = None
self._qtail = None
self._qsize = 0
def isEmpty(self):
return self._qhead is None
def __len__(self):
return self._count
def enqueue(self, item):
node = _QueueNode(item) # 创建新的节点并用尾节点指向他
if self.isEmpty():
self._qhead = node
else:
self._qtail.next = node
self._qtail = node
self._qcount += 1
def dequeue(self):
assert not self.isEmpty(), 'Can not dequeue from an empty queue'
node = self._qhead
if self._qhead is self._qtail:
self._qtail = None
self._qhead = self._qhead.next # 前移头节点
self._count -= 1
return node.item
class UnboundedPriorityQueue:
""" PriorityQueue ADT: 给每个item加上优先级p,高优先级先dequeue
分为两种:
- bounded PriorityQueue: 限制优先级在一个区间[0...p)
- unbounded PriorityQueue: 不限制优先级
PriorityQueue()
BPriorityQueue(numLevels): create a bounded PriorityQueue with priority in range
[0, numLevels-1]
isEmpty()
length()
enqueue(item, priority): 如果是bounded PriorityQueue, priority必须在区间内
dequeue(): 最高优先级的出队,同优先级的按照FIFO顺序
- 两种实现方式:
1.入队的时候都是到队尾,出队操作找到最高优先级的出队,出队操作O(n)
2.始终维持队列有序,每次入队都找到该插入的位置,出队操作是O(1)
(注意如果用list实现list.append和pop操作复杂度会因内存分配退化)
"""
from collections import namedtuple
_PriorityQEntry = namedtuple('_PriorityQEntry', 'item, priority')
# 采用方式1,用内置list实现unbounded PriorityQueue
def __init__(self):
self._qlist = list()
def isEmpty(self):
return len(self) == 0
def __len__(self):
return len(self._qlist)
def enqueue(self, item, priority):
entry = UnboundedPriorityQueue._PriorityQEntry(item, priority)
self._qlist.append(entry)
def deque(self):
assert not self.isEmpty(), 'can not deque from an empty queue'
highest = self._qlist[0].priority
for i in range(len(self)): # 出队操作O(n),遍历找到最高优先级
if self._qlist[i].priority < highest:
highest = self._qlist[i].priority
entry = self._qlist.pop(highest)
return entry.item
class BoundedPriorityQueue:
""" BoundedPriorityQueue ADT,用linked list实现。上一个地方提到了 BoundedPriorityQueue
但是为什么需要 BoundedPriorityQueue呢? BoundedPriorityQueue 的优先级限制在[0, maxPriority-1]
对于 UnboundedPriorityQueue,出队操作由于要遍历寻找优先级最高的item,所以平均
是O(n)的操作,但是对于 BoundedPriorityQueue,用队列数组实现可以达到常量时间,
用空间换时间。比如要弹出一个元素,直接找到第一个非空队列弹出 元素就可以了。
(小数字代表高优先级,先出队)
qlist
[0] -> ["white"]
[1]
[2] -> ["black", "green"]
[3] -> ["purple", "yellow"]
"""
# Implementation of the bounded Priority Queue ADT using an array of #
# queues in which the queues are implemented using a linked list.
from array import Array # 第二章定义的ADT
def __init__(self, numLevels):
self._qSize = 0
self._qLevels = Array(numLevels)
for i in range(numLevels):
self._qLevels[i] = Queue() # 上一节讲到用linked list实现的Queue
def isEmpty(self):
return len(self) == 0
def __len__(self):
return len(self._qSize)
def enqueue(self, item, priority):
assert priority >= 0 and priority < len(self._qLevels), 'invalid priority'
self._qLevel[priority].enquue(item) # 直接找到 priority 对应的槽入队
def deque(self):
assert not self.isEmpty(), 'can not deque from an empty queue'
i = 0
p = len(self._qLevels)
while i < p and not self._qLevels[i].isEmpty(): # 找到第一个非空队列
i += 1
return self._qLevels[i].dequeue()



















