PSL_Data

Text Processing Services

string

import string

classes:

functions:

data:

ascii_letters
ascii_lowercase
ascii_uppercase
digits
...

re

import re

classes:

functions:

compile(pattern, flags=0)
escape(pattern)
findall(pattern, string, flags=0)
finditer(pattern, string, flags=0)
match(pattern, string, flags=0) # if not match, return None, else return object.
search(pattern, string, flags=0)
purge()
split(pattern, string, maxsplit=0, flags=0) # 根据pattern分割string,返回分割后的列表.
sub(pattern, repl, string, count=0, flags=0)
subn(pattern, repl, string, count=0, flags=0)
template(pattern, flags=0)

data:

# flags:
IGNORECASE # 大小写不敏感
LOCALE
MULTILINE
DOTALL
VERBOSE
UNICODE

modules:

# sre_parse module
# functions:
sre_parse.parse(str, flags=0, pattern=None)
sre_parse.expand_template(template, match)
sre_parse.parse_template(source, pattern)

# sre_compile module
# functions:
sre_compile.compile(p, flags=0)
sre_compile.isstring(obj)

stringprep

difflib

textwrap

import textwrap

unicodedata

readline

rlcompleter


Binary Data Services

codecs

python的编码解码器coder/decoder。

import codecs

# 使用注册名为encoding的编码器编码obj,encoding默认为ascii。
# encode将unicode编码的obj编码成encoding编码对应的字节序列.
codecs.encode(obj, [encoding[,errors]])
codecs.encode(u'hello world', 'utf-8') # 编码成utf-8字节序.

# 使用注册名为encoding的解码器解码obj,encoding默认是ascii。
# decode将原来按照encoding编码的obj解码成unicode字符串.
codecs.decode(obj, [encoding[,errors]])
codecs.decode(obj, 'utf-8') # 将utf-8编码的obj解码成unicode.

# encoding取下面值:
# ascii是默认值,gb2312, gbk, gb18030, utf-8, utf-16
# ascii利用一个字节把字符转换成数字.
# unicode利用多字节转换,支持多种编码方式,utf-8, uft-16.

# errors取下面值:
# strict, 默认值,抛出UnicodeError异常。
# ignore
# replace
# xmlcharrefreplace
# backslashreplace

functions:

open(filename, mode='rb', encoding=None, errors='strict', buffering=1)

data:

struct

import struct

# struct模块提供了将C语言的struct转换成python的bytes对象的功能.
# 也可以将bytes对象转换成C语言的struct.

Data Types

datetime

import datetime

classes:

# datetime.date
date(year, month, day)
# methods:
ctime(...)
...
# data descriptors:
day
month
year

# datetime.datetime(datetime.date)
datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]])
# methods:
datetime.datetime.strptime(string, format)
format_datetime = datetime.datetime.strptime('20160824161431', '%Y%m%d%H%M%S') # return: datetime.datetime(2016, 8, 24, 16, 14, 31)
format_datetime = datetime.datetime.strptime('24 August 2016 16:14:31', '%Y%m%d%H%M%S') # return: datetime.datetime(2016, 8, 24, 16, 14, 31)

datetime.datetime.strftime(format[, tuple])
string_datetime = format_datetime.strftime("%d %B %Y %H:%M:%S") # return: '24 August 2016 16:14:31'
datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') # return: '2017-02-07 23:07:32'

str(datetime.datetime.now())
datetime.datetime.now().strftime('%Y%m%d%H%M%S') # 当前时间戳

# datetime.time

# datetime.timedelta

# datetime.tzinfo

data:

MAXYEAR = 9999
MINYEAR = 1

calendar

zoneinfo

collections

import collections

classes:

Callable 
# 用于检查对象是否可调用

Container 
# 用于检查对象是否是容器类型

Counter(iterable_or_mapping=None, **kwds)
# 返回一个Counter类型的实例,类似于dict,统计元素出现的次数

HashableDict(mapping=None, **kwargs)

Iterable(iterable=None)

Iterator(iterable=None)

functions:

namedtuple(typename[, field_names[, verbose[, rename]]]) # 返回一个namedtuple类型的实例,类似于C语言的struct
OrderedDict([items]) # 返回一个OrderedDict类型的实例,类似于dict,但是有序
ChainMap(*maps) # 返回一个ChainMap类型的实例,类似于dict,但是可以合并多个字典
UserDict([mapping]) # 返回一个UserDict类型的实例,类似于dict,但是可以继承
UserList([sequence]) # 返回一个UserList类型的实例,类似于list,但是可以继承
UserString([string]) # 返回一个UserString类型的实例,类似于str,但是可以继承

heapq

heapq模块提供了堆队列算法,也称为优先队列算法.

import heapq

functions:

heappush(heap, item) # 把item放到堆中
heappop(heap) # 弹出堆顶元素, 也就是最小的元素

bisect

array

weakref

types

copy

copy和deepcopy都只拷贝对象的类型和数值,不拷贝对象的ID.也就是==运算为True, is(id())运算为False.

import copy

functions:

copy.copy(x) # 浅复制, 只拷贝父对象,不拷贝内部的子对象.
copy.deepcopy(x, memo=None, _nil=[]) # 深复制, 拷贝父对象和内部的子对象

pprint

reprlib

enum

import enum

graphlib


Data Persistence

pickle/cPickle/marshal提供对象的序列化操作.

gdb相关的模块anydbm/whichdb/dbm/gdbm/dumbdbm提供类似字典和文件的对象.

shelve集合了以上两者的功能.

pickle

pickle模块提供了对象的序列化和反序列化操作.

import pickle

functions:

dump(obj, file[, protocol, *, fix_imports, buffer_callback] # 将对象obj序列化到文件file中
dumps(obj[, protocol, *, fix_imports, buffer_callback] # 将对象obj序列化为字节串
load(file[, *, encoding, errors, buffers]) # 从文件file中反序列化对象
loads(data[, *, encoding, errors, buffers]) # 从字节串data中反序列化对象

data:

HIGHEST_PROTOCOL = 2

marshal

shelve

import shelve

with shelve.open(f, flag='n', writeback=True) as f:
r/w: 不存在报错
c: read-write to new/existing,不存在创建
n: read-write to new. 总是创建新的

copyreg

dbm

sqlite3

python访问数据库两种方式:

  1. ORM
  2. DB-API

ORM是对象-关系管理器,相关模块有SQLAlchemy, SQLObject.

DB-API参考PEP248/249定义了Database的API。

https://www.python.org/dev/peps/pep-0249/

import sqlite3

classes:

# sqlite3.Connection
# methods:
close(...)
commit(...)
cursor(...)
execute(...)
...

# sqlite3.Cursor
# methods:
close(...)
...

functions:

adapt(obj, protocol, alternate)
connect(database[, timeout, isolation_level, detect_types, factory])
...

data:


Data Compression and Archiving

gzip

bz2

zlib

zipfile

tarfile

lzma


Designed by Canux