Text Processing Services

string

import string

classes:

functions:

data:

ascii_letters
ascii_lowercase
ascii_uppercase
digits
...

re

import re

classes:

functions:

compile(pattern, flags=0)
escape(pattern)
findall(pattern, string, flags=0)
finditer(pattern, string, flags=0)
match(pattern, string, flags=0) # if not match, return None, else return object.
search(pattern, string, flags=0)
purge()
split(pattern, string, maxsplit=0, flags=0) # 根据pattern分割string,返回分割后的列表．
sub(pattern, repl, string, count=0, flags=0)
subn(pattern, repl, string, count=0, flags=0)
template(pattern, flags=0)

data:

# flags:
IGNORECASE # 大小写不敏感
LOCALE
MULTILINE
DOTALL
VERBOSE
UNICODE

modules:

# sre_parse module
# functions:
sre_parse.parse(str, flags=0, pattern=None)
sre_parse.expand_template(template, match)
sre_parse.parse_template(source, pattern)

# sre_compile module
# functions:
sre_compile.compile(p, flags=0)
sre_compile.isstring(obj)

stringprep

difflib

textwrap

import textwrap

unicodedata

readline

rlcompleter

Binary Data Services

codecs

python的编码解码器coder/decoder。

import codecs

# 使用注册名为encoding的编码器编码obj，encoding默认为ascii。
# encode将unicode编码的obj编码成encoding编码对应的字节序列．
codecs.encode(obj, [encoding[,errors]])
codecs.encode(u'hello world', 'utf-8') # 编码成utf-8字节序．

# 使用注册名为encoding的解码器解码obj，encoding默认是ascii。
# decode将原来按照encoding编码的obj解码成unicode字符串.
codecs.decode(obj, [encoding[,errors]])
codecs.decode(obj, 'utf-8') # 将utf-8编码的obj解码成unicode.

# encoding取下面值:
# ascii是默认值,gb2312, gbk, gb18030, utf-8, utf-16
# ascii利用一个字节把字符转换成数字．
# unicode利用多字节转换，支持多种编码方式，utf-8, uft-16.

# errors取下面值：
# strict, 默认值，抛出UnicodeError异常。
# ignore
# replace
# xmlcharrefreplace
# backslashreplace

functions:

open(filename, mode='rb', encoding=None, errors='strict', buffering=1)

data:

struct

import struct

# struct模块提供了将C语言的struct转换成python的bytes对象的功能．
# 也可以将bytes对象转换成C语言的struct.

Data Types

datetime

import datetime

classes:

# datetime.date
date(year, month, day)
# methods:
ctime(...)
...
# data descriptors:
day
month
year

# datetime.datetime(datetime.date)
datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]])
# methods:
datetime.datetime.strptime(string, format)
format_datetime = datetime.datetime.strptime('20160824161431', '%Y%m%d%H%M%S') # return: datetime.datetime(2016, 8, 24, 16, 14, 31)
format_datetime = datetime.datetime.strptime('24 August 2016 16:14:31', '%Y%m%d%H%M%S') # return: datetime.datetime(2016, 8, 24, 16, 14, 31)

datetime.datetime.strftime(format[, tuple])
string_datetime = format_datetime.strftime("%d %B %Y %H:%M:%S") # return: '24 August 2016 16:14:31'
datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') # return: '2017-02-07 23:07:32'

str(datetime.datetime.now())
datetime.datetime.now().strftime('%Y%m%d%H%M%S') # 当前时间戳

# datetime.time

# datetime.timedelta

# datetime.tzinfo

data:

MAXYEAR = 9999
MINYEAR = 1

calendar

zoneinfo

collections

import collections

classes:

Callable 
# 用于检查对象是否可调用

Container 
# 用于检查对象是否是容器类型

Counter(iterable_or_mapping=None, **kwds)
# 返回一个Counter类型的实例，类似于dict，统计元素出现的次数

HashableDict(mapping=None, **kwargs)

Iterable(iterable=None)

Iterator(iterable=None)

functions:

namedtuple(typename[, field_names[, verbose[, rename]]]) # 返回一个namedtuple类型的实例，类似于C语言的struct
OrderedDict([items]) # 返回一个OrderedDict类型的实例，类似于dict，但是有序
ChainMap(*maps) # 返回一个ChainMap类型的实例，类似于dict，但是可以合并多个字典
UserDict([mapping]) # 返回一个UserDict类型的实例，类似于dict，但是可以继承
UserList([sequence]) # 返回一个UserList类型的实例，类似于list，但是可以继承
UserString([string]) # 返回一个UserString类型的实例，类似于str，但是可以继承

heapq

heapq模块提供了堆队列算法，也称为优先队列算法．

import heapq

functions:

heappush(heap, item) # 把item放到堆中
heappop(heap) # 弹出堆顶元素, 也就是最小的元素

bisect

array

weakref

types

copy

copy和deepcopy都只拷贝对象的类型和数值，不拷贝对象的ID.也就是==运算为True, is(id())运算为False.

import copy

functions:

copy.copy(x) # 浅复制, 只拷贝父对象，不拷贝内部的子对象.
copy.deepcopy(x, memo=None, _nil=[]) # 深复制, 拷贝父对象和内部的子对象

pprint

reprlib

enum

import enum

graphlib

Data Persistence

pickle/cPickle/marshal提供对象的序列化操作．

gdb相关的模块anydbm/whichdb/dbm/gdbm/dumbdbm提供类似字典和文件的对象．

shelve集合了以上两者的功能．

pickle

pickle模块提供了对象的序列化和反序列化操作．

import pickle

functions:

dump(obj, file[, protocol, *, fix_imports, buffer_callback] # 将对象obj序列化到文件file中
dumps(obj[, protocol, *, fix_imports, buffer_callback] # 将对象obj序列化为字节串
load(file[, *, encoding, errors, buffers]) # 从文件file中反序列化对象
loads(data[, *, encoding, errors, buffers]) # 从字节串data中反序列化对象

data:

HIGHEST_PROTOCOL = 2

marshal

shelve

import shelve

with shelve.open(f, flag='n', writeback=True) as f:
r/w: 不存在报错
c: read-write to new/existing，不存在创建
n: read-write to new. 总是创建新的

copyreg

dbm

sqlite3

python访问数据库两种方式：

ORM
DB-API

ORM是对象-关系管理器，相关模块有SQLAlchemy, SQLObject.

DB-API参考PEP248/249定义了Database的API。

https://www.python.org/dev/peps/pep-0249/

import sqlite3

classes:

# sqlite3.Connection
# methods:
close(...)
commit(...)
cursor(...)
execute(...)
...

# sqlite3.Cursor
# methods:
close(...)
...

functions:

adapt(obj, protocol, alternate)
connect(database[, timeout, isolation_level, detect_types, factory])
...

Text Processing Services

string

re

stringprep

difflib

textwrap

unicodedata

readline

rlcompleter

Binary Data Services

codecs

struct

Data Types

datetime

calendar

zoneinfo

collections

heapq

bisect

array

weakref

types

copy

pprint

reprlib

enum

graphlib

Data Persistence

pickle

marshal

shelve

copyreg

dbm

sqlite3

Data Compression and Archiving

gzip

bz2

zlib

zipfile

tarfile

lzma