Text Processing Services
string
import string
classes:
functions:
data:
ascii_letters
ascii_lowercase
ascii_uppercase
digits
...
re
import re
classes:
functions:
compile(pattern, flags=0)
escape(pattern)
findall(pattern, string, flags=0)
finditer(pattern, string, flags=0)
match(pattern, string, flags=0) # if not match, return None, else return object.
search(pattern, string, flags=0)
purge()
split(pattern, string, maxsplit=0, flags=0) # 根据pattern分割string,返回分割后的列表.
sub(pattern, repl, string, count=0, flags=0)
subn(pattern, repl, string, count=0, flags=0)
template(pattern, flags=0)
data:
# flags:
IGNORECASE # 大小写不敏感
LOCALE
MULTILINE
DOTALL
VERBOSE
UNICODE
modules:
# sre_parse module
# functions:
sre_parse.parse(str, flags=0, pattern=None)
sre_parse.expand_template(template, match)
sre_parse.parse_template(source, pattern)
# sre_compile module
# functions:
sre_compile.compile(p, flags=0)
sre_compile.isstring(obj)
stringprep
difflib
textwrap
import textwrap
unicodedata
readline
rlcompleter
Binary Data Services
codecs
python的编码解码器coder/decoder。
import codecs
# 使用注册名为encoding的编码器编码obj,encoding默认为ascii。
# encode将unicode编码的obj编码成encoding编码对应的字节序列.
codecs.encode(obj, [encoding[,errors]])
codecs.encode(u'hello world', 'utf-8') # 编码成utf-8字节序.
# 使用注册名为encoding的解码器解码obj,encoding默认是ascii。
# decode将原来按照encoding编码的obj解码成unicode字符串.
codecs.decode(obj, [encoding[,errors]])
codecs.decode(obj, 'utf-8') # 将utf-8编码的obj解码成unicode.
# encoding取下面值:
# ascii是默认值,gb2312, gbk, gb18030, utf-8, utf-16
# ascii利用一个字节把字符转换成数字.
# unicode利用多字节转换,支持多种编码方式,utf-8, uft-16.
# errors取下面值:
# strict, 默认值,抛出UnicodeError异常。
# ignore
# replace
# xmlcharrefreplace
# backslashreplace
functions:
open(filename, mode='rb', encoding=None, errors='strict', buffering=1)
data:
struct
import struct
# struct模块提供了将C语言的struct转换成python的bytes对象的功能.
# 也可以将bytes对象转换成C语言的struct.
Data Types
datetime
import datetime
classes:
# datetime.date
date(year, month, day)
# methods:
ctime(...)
...
# data descriptors:
day
month
year
# datetime.datetime(datetime.date)
datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]])
# methods:
datetime.datetime.strptime(string, format)
format_datetime = datetime.datetime.strptime('20160824161431', '%Y%m%d%H%M%S') # return: datetime.datetime(2016, 8, 24, 16, 14, 31)
format_datetime = datetime.datetime.strptime('24 August 2016 16:14:31', '%Y%m%d%H%M%S') # return: datetime.datetime(2016, 8, 24, 16, 14, 31)
datetime.datetime.strftime(format[, tuple])
string_datetime = format_datetime.strftime("%d %B %Y %H:%M:%S") # return: '24 August 2016 16:14:31'
datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') # return: '2017-02-07 23:07:32'
str(datetime.datetime.now())
datetime.datetime.now().strftime('%Y%m%d%H%M%S') # 当前时间戳
# datetime.time
# datetime.timedelta
# datetime.tzinfo
data:
MAXYEAR = 9999
MINYEAR = 1
calendar
zoneinfo
collections
import collections
classes:
Callable
# 用于检查对象是否可调用
Container
# 用于检查对象是否是容器类型
Counter(iterable_or_mapping=None, **kwds)
# 返回一个Counter类型的实例,类似于dict,统计元素出现的次数
HashableDict(mapping=None, **kwargs)
Iterable(iterable=None)
Iterator(iterable=None)
functions:
namedtuple(typename[, field_names[, verbose[, rename]]]) # 返回一个namedtuple类型的实例,类似于C语言的struct
OrderedDict([items]) # 返回一个OrderedDict类型的实例,类似于dict,但是有序
ChainMap(*maps) # 返回一个ChainMap类型的实例,类似于dict,但是可以合并多个字典
UserDict([mapping]) # 返回一个UserDict类型的实例,类似于dict,但是可以继承
UserList([sequence]) # 返回一个UserList类型的实例,类似于list,但是可以继承
UserString([string]) # 返回一个UserString类型的实例,类似于str,但是可以继承
heapq
heapq模块提供了堆队列算法,也称为优先队列算法.
import heapq
functions:
heappush(heap, item) # 把item放到堆中
heappop(heap) # 弹出堆顶元素, 也就是最小的元素
bisect
array
weakref
types
copy
copy和deepcopy都只拷贝对象的类型和数值,不拷贝对象的ID.也就是==运算为True, is(id())运算为False.
import copy
functions:
copy.copy(x) # 浅复制, 只拷贝父对象,不拷贝内部的子对象.
copy.deepcopy(x, memo=None, _nil=[]) # 深复制, 拷贝父对象和内部的子对象
pprint
reprlib
enum
import enum
graphlib
Data Persistence
pickle/cPickle/marshal提供对象的序列化操作.
gdb相关的模块anydbm/whichdb/dbm/gdbm/dumbdbm提供类似字典和文件的对象.
shelve集合了以上两者的功能.
pickle
pickle模块提供了对象的序列化和反序列化操作.
import pickle
functions:
dump(obj, file[, protocol, *, fix_imports, buffer_callback] # 将对象obj序列化到文件file中
dumps(obj[, protocol, *, fix_imports, buffer_callback] # 将对象obj序列化为字节串
load(file[, *, encoding, errors, buffers]) # 从文件file中反序列化对象
loads(data[, *, encoding, errors, buffers]) # 从字节串data中反序列化对象
data:
HIGHEST_PROTOCOL = 2
marshal
shelve
import shelve
with shelve.open(f, flag='n', writeback=True) as f:
r/w: 不存在报错
c: read-write to new/existing,不存在创建
n: read-write to new. 总是创建新的
copyreg
dbm
sqlite3
python访问数据库两种方式:
- ORM
- DB-API
ORM是对象-关系管理器,相关模块有SQLAlchemy, SQLObject.
DB-API参考PEP248/249定义了Database的API。
https://www.python.org/dev/peps/pep-0249/
import sqlite3
classes:
# sqlite3.Connection
# methods:
close(...)
commit(...)
cursor(...)
execute(...)
...
# sqlite3.Cursor
# methods:
close(...)
...
functions:
adapt(obj, protocol, alternate)
connect(database[, timeout, isolation_level, detect_types, factory])
...
data: