
Writing Idiomatic Python 3.3(Kindle版)を読んで参考になった点~その2

第1弾に引き続き最近読んだ書籍「Writing Idiomatic Python 3.3」で参考になった点をBlogにまとめておこうと思います。

本書籍の特徴はタイトルの通り様々な側面から悪いサンプル(Harmful)をまず示した上でPythonicなサンプル(Idiomatic)を示して、どこが悪いかどこを直すべきかが分かりやすく解説されています。今回は5章「Working with Data」で参考になったTipsをメモしておこうと思います。


5.2 Strings

5.2.1 Chain string functions to make a simple series of transformations more clear

# Harmful
book_info = ' The Three Musketeers: Alexandre Dumas'
formatted_book_info = book_info.strip()
formatted_book_info = formatted_book_info.upper()
formatted_book_info = formatted_book_info.replace(':', ' by')

# Idiomatic
book_info = ' The Three Musketeers: Alexandre Dumas'
formatted_book_info = book_info.strip().upper().replace(':', ' by')

5.2.2 Use ''.join when creating a single string for list elements

# Harmful
result_list = ['True', 'False', 'File not found']
result_string = ''
for result in result_list:
    result_string += result

# Idiomatic
result_list = ['True', 'False', 'File not found']
result_string = ''.join(result_list)

5.2.3 Prefer the format function for formatting strings

# Harmful
def get_formatted_user_info_worst(user):
    return 'Name: ' + user.name + 'Age: ' + str(user.age) + ', Sex: ' + user.sex

def get_formatted_user_info_slightly_better(user):
    return 'Name: %s. Age: %i, Sex: %c' % (user.name, user.age, user.sex)

# Idiomatic
def get_formatted_user_info(user):
    output = 'Name: {user.name}, Age: {user.age}, Sex: {user.sex}'.format(user=user)
    return output

5.3 Lists

5.3.1 Use a list comprehension to create a transformed version of an existing list

# Harmful
some_other = list()
some_list = list()
for element in some_other_list:
    if is_prime(element):
        some_list.append(element + 5)

# Idiomatic
some_other_list = range(10)
some_list = [element + 5 
    for element in some_other_list
    if is_prime(element)]

5.4 Dictionaries

5.4.1 Use a dict as a substitute for a switch...case statement

# Harmful
# Pythonはswitch-case文がないので以下のようにしがちだが...
def apply_operation(left_operand, right_operand, operator):
    if operator == '+':
        return left_operand + right_operand
    elif operator == '-':
        return left_operand - right_operand
    elif operator == '*':
        return left_operand * right_operand
    elif operator == '/':
        return left_operand / right_operand

# Idiomatic
def apply_operation(left_operand, right_operand, operator):
    import operator as op
    operator_mapper = {'+': op.add, '-': op.sub, '*': op.mul, '/': op.truediv}
    return operator_mapper[operator](left_operand, right_operand)

5.4.2 Use the default parameter of dict.get to provide default values

# Harmful
log_severity = None
if 'severity' in configuration:
    log_severity = configuration['severity']
    log_severity = 'Info'

# Idiomatic
log_severity = configuration.get('severity', 'Info')

5.4.3 Use a dict comprehension to build a dict clearly and efficiently

# Harmful
user_email = {}
for user in user_list:
    if user.email:
        user_email[user.name] = user.email

# Idiomatic
user_email = {user.name: user.email for user in user_list if user.email}

5.5 Sets

5.5.2 Use a set comprehension to generate sets concisely

# Harmful
users_first_names = set()
for user in users:

# Idiomatic
users_first_names = {user.first_name for user in users}

5.6 Tuples

5.6.1 Use collections.namedtuple to make tuple-heavy code more clear

# Harmful
def print_employee_information(db_connection):
    db_cursor = db_connection.cursor()
    results = db_cursor.execute('select * from employees').fetchall()
    # 基本的にこの手法だと情報取得して出力は不可能
    for row in results:
        print(row[1] + ', ' + row[0] + ' was hired on' \
        + row[5] + ' (for $' + row[4] + ' per annum) info the' \
        + row[2] + ' department and reports to ' + row[3])

# Idiomatic
# 'employees'テーブルは次のカラムを持ってるのを前提とする
# first_name, last_name, department, manager, salary, hire_date
employee_row = namedtuple('EmployeeRow', \
['first_name', 'last_name', 'department', 'manager', 'salary', 'hire_date'])

EMPLOYEE_INFO_STRING = '{last}, {first} was hired on {date} \
${sarlary} per annum) info the {department} department and reports to ager}'

def print_employee_information(db_connection):
    db_cursor = db_connection.cursor()
    results = db_cursor.execute('select * from employees').fetchall()
    for row in results:
        employee = employee_row._make(row)

# これでも間違った場所では出力はほとんど不可能ではある

5.6.2 Use _ as a placeholder for data in a tuple that should be ignored

# Harmful
(name, age, temp, temp2) = get_user_info(user)
# temp, temp2が使われていない
if age > 21:
    output = '{name} can drink!'.format(name=name)

# Idiomatic
(name, age, _, _) = get_user_info(user)
if age > 21:
    output = '{name} can drink!'.format(name=name)

5.6.3 Use tuples to unpack data

# Harmful
list_from_comma_separated_value_file = ['dog', 'Fido', 10]
animal = list_from_comma_separated_value_file[0]
name = list_from_comma_separated_value_file[1]
age = list_from_comma_separated_value_file[2]
output = ('{name} the {animal} is {age} years old'.format(animal=animal, name=name, age=age))

# Idiomatic
list_from_comma_separated_value_file = ['dog', 'Fido', 10]
(animal, name, age) = list_comma_separated_value_file
output = ('{name} the {animal} is {age} years old'.format(animal=animal, name=name, age=age))

5.6.4 Use a tuple to return multiple values from a function

# Harmful
from collections import Counter

STATS_FORMAT = """Statistics:
Mean: {mean}
Median: {median}
Mode: {mode}"""

def calculate_mean(value_list):
    return float(sum(value_list) / len(value_list))

def calculate_median(value_list):
    return value_list(int(len(value_list) / 2)]

def calculate_mode(value_list):
    return Counter(value_list).most_common(1)[0][0]

values = [10, 20, 20, 30]
mean = calculate_mean(values)
median = calculate_median(values)
mode = calculate_median(values)

print(STATS_FORMAT.format(mean=mean, median=median, mode=mode))

# Idiomatic
from collections import Counter

STATS_FORMAT = """Statistics:
Mean: {mean}
Median: {median}
Mode: {mode}"""

def calculate_stastics(value_list):
    mean = float(sum(value_list) / len(value_list))
    median = value_list[int(len(value_list) / 2)]
    mode = Counter(value_list).most_common[1)[0][0]
    return (mean, median, mode)

(mean, median, mode) = calculate_stastics([10, 20, 20. 30])
print(STATS_FORMAT.format(mean=mean, median=median, mode=mode))

5.7 Classes

5.7.2 Use properties to "future-proof" your class implementation

# Harmful
class Product():
    def __init__(self, name, price):
        self.name = name
        self.price = price

# Idiomatic
class Product():
    def __init__(self, name, price):
        self._price = price

 def price(self):
     return self._price * TAX_RATE
 def price(self, value):
     # "setter"関数はpropertyと同名である必要がある
     self._price = value

5.7.3 Define __str__ in a class to show a human-readable representation

# Harmful
class Point():
    def __init__(self, x, y):
        self.x = x
        self.y = y
p = Point(1, 2)
# 出力結果:'<__main__.Point object at 0x91ebd0>'

# Idiomatic
class Point():
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __str__(self):
        return '{0}, {1}'.format(self.x, self.y)

p = Point(1, 2)

# 出力結果:'1, 2'

5.8 Context Managers

5.8.1 Use a context manager to ensure resources are properly managed

# Harmful
# 例外発生したときにOpenしたファイルをCloseする術がない
file_handle = open(path_to_file, 'r')
for line in file_handle.readlines():
    if raise_excepton(line):
        print('No! An Excepton!')

# Idiomatic
# context manager経由で開くと__enter__, __exit__メソッドを定義する事で容易に処理ができる
with open(path_to_file, 'r') as file_handle:
    for line in file_handle:
        if raise_excepton(line):
            print('No! An Exception!')

5.9 Generators

5.9.1 Prefer a generator expression to a list comprehension for simple iteration

# Harmful
# list comprehensionは直ちに全要素を敷き詰めたリストを生成する
# 膨大なリストの場合、非常に多くのメモリを食う事になる
for uppercase_name in [name.upper() for name in get_all_usernames()]:

# Idiomatic
# 一方、generator expressionの場合、要求に応じた要素が都度生成される
for uppercase_name in (name_upper() for name in get_all_usernames()):

5.9.2 Use a generator to lazily load infinite sequences

# Harmful
def get_twitter_stream_for_keyword(keyword):
    imaginary_twitter_api = ImaginaryTwitterAPI()
    if imaginary_twitter_api.can_get_stream_data(keyword):
        return imaginary_twitter_api.get_stream(keyword)

current_stream = get_twitter_stream_for_keyword('#jeffknupp')
for tweet in current_stream:

def get_list_of_incredibly_complex_calculation_results(data):
    return [first_incredibly_long_calculation(data),\

# Idiomatic
def get_twitter_stream_for_keyword(keyword):
 imaginary_twitter_api = ImaginaryTwitterAPI()
 while imaginary_twitter_api.can_get_stream_data(keyword):
     yield imaginary_twitter_api.get_stream(keyword)

# generatorを呼び出してるので、クライアントが終了するまでプロセス続けてる
for tweet in get_twitter_stream_for_keyword('#jeffknupp'):
    if got_stop_signal:

def get_list_of_incredibly_complex_calculation_results(data):
    yield first_incredibly_long_calculation(data)
    yield second_incredibly_long_calculation(data)
    yield third_incredibly_long_calculation(data)

