数据结构和算法的Python编程手册

发表时间: 2016-07-24 17:20

PythonCookbook学习笔记

第一章 数据结构和算法

1.1 将序列分解为单独的变量

p = (4, 5)x, y = pprint x print y data = [ 'ACME', 50, 91.1, (2012, 12, 21) ]name, shares, price, date = dataprint nameprint shares print price print date name, shares, price, (year, mon, day ) = dataprint year p = (4, 5)#x, y, z = p 错误!!!s = 'hello!'a, b, c, d, e, f = sprint aprint fdata = [ 'ACME', 50, 91.1, (2012, 12, 21) ]_, shares, price, _ = data print sharesprint price#其他数据可以丢弃了

1.2 从任意长度的可迭代对象中分解元素

from audioop import avgdef drop_first_last(grades):    first, *middle, last = grades    return avg(middle)record = ('Dave', 'dave@example.com', '777-333-2323', '234-234-2345')name, email, *phone_numbers = recordprint name print emailprint phone_numbers*trailing, current = [10, 8, 7, 2, 5]print trailing  #[10, 8, 7, 2, ]print current #5records = [ ('foo', 1, 2), ('bar', 'hello'), ('foo', 5, 3) ]def do_foo(x, y):    print ('foo', x, y)def do_bar(s):    print ('bar', s)for tag, *args in records:    if tag == 'foo':        do_foo(*args)    elif tag == 'bar':        do_bar(*args)        line = 'asdf:fedfr234://wef:678d:asdf'uname, *fields, homedir, sh = line.split(':')print uname print homedirrecord = ('ACME', 50, 123.45, (12, 18, 2012))name, *_, (*_, year) = recordprint nameprint yearitems = [1, 10, 7, 4, 5, 9]head, *tail = itemsprint head #1print tail #[10, 7, 4, 5, 9]def sum(items):    head, *tail = items    return head + sum(tail) if tail else headsum(items)

1.3 保存最后N个元素

from _collections import dequedef search(lines, pattern, history=5):    previous_lines = deque(maxlen = history)    for line in lines:        if pattern in line: yield line, previous_lines        previous_lines.append(line)# Example use on a fileif __name__ == '__main__':    with open('somefile.txt') as f:        for line, prevlines in search(f, 'python', 5): for pline in prevlines: print (pline) #print (pline, end='') print (line) #print (pline, end='') print ('-'*20) q = deque(maxlen=3)q.append(1)q.append(2)q.append(3)print qq.append(4)print qq = dequeq.append(1)q.append(2)q.append(3)print qq.appendleft(4)print qq_pop = q.popprint q_popprint qq_popleft = q.popleftprint q_popleftprint q

1.4 找到最大或最小的N个元素

import heapqnums = [1,30,6,2,36,33,46,3,23,43]print (heapq.nlargest(3, nums))print (heapq.nsmallest(3, nums))portfolio = [ {'name':'IBM', 'shares':100, 'price':2.4}, {'name':'A', 'shares':1040, 'price':12.4}, {'name':'S', 'shares':40, 'price':23.4}, {'name':'D', 'shares':1, 'price':2.49}, {'name':'F', 'shares':9, 'price':24} ]cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price'])print cheapprint expensivenums = [1,8,2,23,7,-4,18,23,42,37,2]heap = list(nums)print heapheapq.heapify(heap)print heapprint heapq.heappop(heap)print heapq.heappop(heap)print heapq.heappop(heap)

1.5 实现优先级队列

import heapqclass PriorityQueue:    def __init__(self):        self._queue =         self._index = 0    def push(self, item, priority):        heapq.heappush(self._queue, (-priority, self._index, item))        self._index += 1    def pop(self):        return heapq.heappop(self._queue)[-1]#Exampleclass Item:    def __init__(self, name):        self.name = name    def __repr__(self):        return 'Item({!r})'.format(self.name)q = PriorityQueueq.push(Item('foo'), 1)q.push(Item('spam'), 4)q.push(Item('bar'), 5)q.push(Item('grok'), 1)print q.popprint q.popprint q.popa = Item('foo')b = Item('bar')#a < b    errora = (1, Item('foo'))b = (5, Item('bar'))print a < bc = (1, Item('grok'))#a < c  errora = (1, 0, Item('foo'))b = (5, 1, Item('bar'))c = (1, 2, Item('grok'))print a < bprint a < c

1.6 在字典中将建映射到多个值上

d = {        'a' : [1, 2, 3],        'b' : [4, 5]     }e = {        'a' : {1, 2, 3},        'b' : {4, 5}     }from collections import defaultdictd = defaultdict(list)d['a'].append(1)d['a'].append(2)d['a'].append(3)print dd = defaultdict(set)d['a'].add(1)d['a'].add(2)d['a'].add(3)print dd = {}d.setdefault('a', []).append(1)d.setdefault('a', []).append(2)d.setdefault('b', []).append(3)print d d = {}for key, value in d:#pairs:    if key not in d:        d[key] =     d[key].append(value)d = defaultdict(list)for key, value in d:#pairs:    d[key].append(value)

1.7 让字典保持有序

from collections import OrderedDictd = OrderedDictd['foo'] = 1d['bar'] = 2d['spam'] = 3d['grol'] = 4for key in d:    print (key, d[key])    import jsonjson.dumps(d)

1.8 与字典有关的计算问题

price = { 'ACME':23.45, 'IBM':25.45, 'FB':13.45, 'IO':4.45, 'JAVA':45.45, 'AV':38.38,         }min_price = min( zip( price.values, price.keys ) )print min_pricemax_price = max( zip( price.values, price.keys ) )print max_priceprice_sorted = sorted( zip( price.values, price.keys ) )print price_sorted   price_and_names = zip( price.values, price.keys )print (min(price_and_names))#print (max(price_and_names))  error  zip创建了迭代器,内容只能被消费一次print min(price)print max(price)print min(price.values)print max(price.values)print min(price, key = lambda k : price[k])print max(price, key = lambda k : price[k])min_value = price[ min(price, key = lambda k : price[k]) ]print min_valueprice = { 'AAA': 23, 'ZZZ': 23,         }print min( zip( price.values, price.keys ) )print max( zip( price.values, price.keys ) )

1.9 在两个字典中寻找相同点

a = {        'x':1,        'y':2,        'z':3     }b = {        'x':11,        'y':2,        'w':10     }print a.keys & b.keys #{'x','y'}print a.keys - b.keys #{'z'}print a.items & b.items #{('y', 2)}c = {key: a[key] for key in a.keys - {'z', 'w'} }print c #{'x':1, 'y':2}

1.10 从序列中移除重复项且保持元素间顺序不变

def dedupe(items):    seen = set    for item in items:        if item not in seen: yield item seen.add(item)#examplea = [1,5,2,1,9,1,5,10]print list(dedupe(a))def dedupe2(items, key = None):    seen = set    for item in items:        val = item if key is None else key(item)        if val not in seen: yield item seen.add(val) #examplea = [         {'x':1, 'y':2},         {'x':1, 'y':3},         {'x':1, 'y':2},         {'x':2, 'y':4},      ]print list( dedupe2(a, key=lambda d : (d['x'], d['y']) ) )print list( dedupe2(a, key=lambda d : (d['x']) ) )a = [1,5,2,1,9,1,5,10]print set(a)   

1.11 对切片命名

items = [0,1,2,3,4,5,6]a = slice(2,4)print items[2:4]print items[a]items[a] = [10,11]print itemsprint a.startprint a.stopprint a.step

1.12 找出序列中出现次数最多的元素

words = [ 'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes', 'the', 'look'         ]from collections import Counterword_counts = Counter(words)top_three = word_counts.most_common(3)print top_threeprint word_counts['look']print word_counts['the']morewords = ['why', 'are', 'you', 'not', 'looking', 'in', 'my', 'eyes']for word in morewords:    word_counts[word] += 1print word_counts['eyes']print word_counts['why']word_counts.update(morewords)print word_counts['eyes']print word_counts['why']a = Counter(words)b = Counter(morewords)print aprint bc = a + bprint cd = a - bprint b

1.13 通过公共键对字典列表排序

rows = [ {'fname':'Brian', 'lname':'Jones', 'uid':1003}, {'fname':'David', 'lname':'Beazley', 'uid':1002}, {'fname':'John', 'lname':'Cleese', 'uid':1001}, {'fname':'Big', 'lname':'Jones', 'uid':1004}        ]from operator import itemgetterrows_by_fname = sorted(rows, key=itemgetter('fname'))rows_by_uid = sorted(rows, key=itemgetter('uid'))print rows_by_fnameprint rows_by_uidrows_by_lfname = sorted(rows, key=itemgetter('lname', 'fname'))print rows_by_lfnamerows_by_fname = sorted(rows, key=lambda r: r['fname'])rows_by_lfname = sorted(rows, key=lambda r: (r['fname'], r['lname']))print rows_by_fnameprint rows_by_lfnameprint min(rows, key=itemgetter('uid'))print max(rows, key=itemgetter('uid'))

1.14 对不原生支持比较操作的对象排序

class User:    def __init__(self, user_id):        self.user_id = user_id    def __repr__(self):        return 'User({})'.format(self.user_id)users = [User(23), User(3), User(99)]print usersprint sorted(users, key = lambda u: u.user_id)from operator import attrgetterprint sorted(users, key=attrgetter('user_id'))print min(users, key=attrgetter('user_id'))print max(users, key=attrgetter('user_id'))

1.15 根据字段将记录分组

rows = [ {'address':'5412 N CLARK', 'data':'07/01/2012'}, {'address':'5232 N CLARK', 'data':'07/04/2012'}, {'address':'5542 E 58ARK', 'data':'07/02/2012'}, {'address':'5152 N CLARK', 'data':'07/03/2012'}, {'address':'7412 N CLARK', 'data':'07/02/2012'}, {'address':'6789 w CLARK', 'data':'07/03/2012'}, {'address':'9008 N CLARK', 'data':'07/01/2012'}, {'address':'2227 W CLARK', 'data':'07/04/2012'}        ]from operator import itemgetterfrom itertools import groupbyrows.sort(key=itemgetter('data'))for data, items in groupby(rows, key=itemgetter('data')):    print (data)    for i in items:        print (' ', i)        from collections import defaultdictrows_by_date = defaultdict(list)for row in rows:    rows_by_date[row['data']].append(row)for r in rows_by_date['07/04/2012']:    print(r)

1.16 筛选序列中的元素

mylist = [1,4,-5,10,-7,2,3,-1]print [n for n in mylist if n > 0]#列表推导式print [n for n in mylist if n < 0]pos = (n for n in mylist if n > 0)#生成器表达式print posfor x in pos:    print(x)values = ['1', '2', '-3', '-', '4', 'N/A', '5']def is_int(val):    try:        x = int(val)        return True    except ValueError:        return Falseivals = list(filter(is_int, values))print(ivals)mylist = [1,4,-5,10,-7,2,3,-1]import mathprint [math.sqrt(n) for n in mylist if n > 0]clip_neg = [n if n > 0 else 0 for n in mylist]print clip_negclip_pos = [n if n < 0 else 0 for n in mylist]print clip_posaddresses = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']counts = [0, 3, 10, 4, 1, 7, 6, 1]from itertools import compressmore5 = [n > 5 for n in counts]print more5print list(compress(addresses, more5))

1.17 从字典中提取子集

prices = {'ACNE':45.23, 'AAPL':612.78, 'IBM':205.55, 'HPQ':37.20, 'FB':10.75}p1 = { key:value for key, value in prices.items if value > 200 }print p1tech_names = {'AAPL', 'IBM', 'HPQ'}p2 = { key:value for key, value in prices.items if key in tech_names }print p2p3 = dict( (key, value) for key, value in prices.items if value > 200 ) #慢print p3tech_names = {'AAPL', 'IBM', 'HPQ'}p4 = { key:prices[key] for key in prices.keys if key in tech_names } #慢print p4

1.18 将名称映射到序列的元素中

from collections import namedtupleSubscriber = namedtuple('Subscriber', ['addr', 'joined'])sub = Subscriber('wang@qq.com', '2020-10-10')print subprint sub.joinedprint sub.addrprint len(sub)addr, joined = subprint addrprint joineddef compute_cost(records):    total = 0.0    for rec in records:        total += rec[1]*rec[2]    return totalStock = namedtuple('Stock', ['name', 'shares', 'price'])def compute_cost2(records):    total = 0.0    for rec in records:        s = Stock(*rec)        total += s.shares * s.price    return totals = Stock('ACME', 100, 123.45)print s#s.shares = 75    #errors = s._replace(shares=75)print sStock = namedtuple('Stock', ['name', 'shares', 'price', 'date', 'time'])stock_prototype = Stock('',0, 0.0, None, None)def dict_to_stock(s):    return stock_prototype._replace(**s)a = {'name':'ACME', 'shares':100, 'price':123.45}print dict_to_stock(a)b = {'name':'ACME', 'shares':100, 'price':123.45, 'date':'12/12/2012'}print dict_to_stock(b)

1.19 同时对数据做转换和换算

nums = [1, 2, 3, 4, 5]s = sum( x*x for x in nums )print simport osfiles = os.listdir('dirname')if any(name.endswith('.py') for name in files):    print('There be Python!')else:    print('sorry, no Python!')    s = ('ACME', 50, 123.45)print(','.join(str(x) for x in s))portfolio = [ {'name':'GOOG', 'shares':50}, {'name':'YHOO', 'shares':75}, {'name':'AOL', 'shares':20}, {'name':'SCOX', 'shares':65} ]min_shares = min(s['shares'] for s in portfolio)print min_shares    min_shares = min(portfolio, key=lambda s: s['shares'])print min_shares

1.20 将多个映射合并为单个映射

a = {'x':1, 'z':3}b = {'y':2, 'z':4}#from collections import ChainMapfrom pip._vendor.distlib.compat import ChainMapc = ChainMap(a, b)print(c['x'])print(c['y'])print(c['z']) #from a    第一个映射中的值print len(c)print list(c.values)c['z'] = 10c['w'] = 40del c['x']print a#del c['y']    #error    修改映射的操作总是会作用在列表的第一个映射结构上values = ChainMapvalues['x'] = 1values = values.new_child#add a new mapvalues['x'] = 2values = values.new_childvalues['x'] = 3#print valuesprint values['x']values = values.parentsprint values['x']values = values.parentsprint values['x']a = {'x':1, 'z':3}b = {'y':2, 'z':4}merged = dict(b)merged.update(a)print merged['x']print merged['y']print merged['z']a['x'] = 13print merged['x']   #不会反应到合并后的字典中a = {'x':1, 'z':3}b = {'y':2, 'z':4}merged = ChainMap(a, b)print merged['x']a['x'] = 42print merged['x']   #会反应到合并后的字典中