2015年11月1日日曜日

Writing Idiomatic Python 3.3(Kindle版)を読んで参考になった点~その2

第1弾に引き続き最近読んだ書籍「Writing Idiomatic Python 3.3」で参考になった点をBlogにまとめておこうと思います。

本書籍の特徴はタイトルの通り様々な側面から悪いサンプル(Harmful)をまず示した上でPythonicなサンプル(Idiomatic)を示して、どこが悪いかどこを直すべきかが分かりやすく解説されています。今回は5章「Working with Data」で参考になったTipsをメモしておこうと思います。

ここで紹介してるのはごく一部なので興味あれば一度本書籍をチェックしていただければと思います。


5.2 Strings

5.2.1 Chain string functions to make a simple series of transformations more clear

# Harmful
book_info = ' The Three Musketeers: Alexandre Dumas'
formatted_book_info = book_info.strip()
formatted_book_info = formatted_book_info.upper()
formatted_book_info = formatted_book_info.replace(':', ' by')

# Idiomatic
book_info = ' The Three Musketeers: Alexandre Dumas'
formatted_book_info = book_info.strip().upper().replace(':', ' by')

5.2.2 Use ''.join when creating a single string for list elements

# Harmful
result_list = ['True', 'False', 'File not found']
result_string = ''
for result in result_list:
    result_string += result

# Idiomatic
result_list = ['True', 'False', 'File not found']
result_string = ''.join(result_list)

5.2.3 Prefer the format function for formatting strings

# Harmful
def get_formatted_user_info_worst(user):
    return 'Name: ' + user.name + 'Age: ' + str(user.age) + ', Sex: ' + user.sex

def get_formatted_user_info_slightly_better(user):
    return 'Name: %s. Age: %i, Sex: %c' % (user.name, user.age, user.sex)

# Idiomatic
def get_formatted_user_info(user):
    output = 'Name: {user.name}, Age: {user.age}, Sex: {user.sex}'.format(user=user)
    return output

5.3 Lists

5.3.1 Use a list comprehension to create a transformed version of an existing list

# Harmful
some_other = list()
some_list = list()
for element in some_other_list:
    if is_prime(element):
        some_list.append(element + 5)

# Idiomatic
some_other_list = range(10)
some_list = [element + 5 
    for element in some_other_list
    if is_prime(element)]

5.4 Dictionaries

5.4.1 Use a dict as a substitute for a switch...case statement

# Harmful
# Pythonはswitch-case文がないので以下のようにしがちだが...
def apply_operation(left_operand, right_operand, operator):
    if operator == '+':
        return left_operand + right_operand
    elif operator == '-':
        return left_operand - right_operand
    elif operator == '*':
        return left_operand * right_operand
    elif operator == '/':
        return left_operand / right_operand

# Idiomatic
def apply_operation(left_operand, right_operand, operator):
    import operator as op
    operator_mapper = {'+': op.add, '-': op.sub, '*': op.mul, '/': op.truediv}
    return operator_mapper[operator](left_operand, right_operand)

5.4.2 Use the default parameter of dict.get to provide default values

# Harmful
log_severity = None
if 'severity' in configuration:
    log_severity = configuration['severity']
else:
    log_severity = 'Info'

# Idiomatic
log_severity = configuration.get('severity', 'Info')

5.4.3 Use a dict comprehension to build a dict clearly and efficiently

# Harmful
user_email = {}
for user in user_list:
    if user.email:
        user_email[user.name] = user.email

# Idiomatic
user_email = {user.name: user.email for user in user_list if user.email}

5.5 Sets

5.5.2 Use a set comprehension to generate sets concisely

# Harmful
users_first_names = set()
for user in users:
    users_first_names.add(user.first_name)

# Idiomatic
users_first_names = {user.first_name for user in users}

5.6 Tuples

5.6.1 Use collections.namedtuple to make tuple-heavy code more clear

# Harmful
def print_employee_information(db_connection):
    db_cursor = db_connection.cursor()
    results = db_cursor.execute('select * from employees').fetchall()
    # 基本的にこの手法だと情報取得して出力は不可能
    for row in results:
        print(row[1] + ', ' + row[0] + ' was hired on' \
        + row[5] + ' (for $' + row[4] + ' per annum) info the' \
        + row[2] + ' department and reports to ' + row[3])

# Idiomatic
# 'employees'テーブルは次のカラムを持ってるのを前提とする
# first_name, last_name, department, manager, salary, hire_date
employee_row = namedtuple('EmployeeRow', \
['first_name', 'last_name', 'department', 'manager', 'salary', 'hire_date'])

EMPLOYEE_INFO_STRING = '{last}, {first} was hired on {date} \
${sarlary} per annum) info the {department} department and reports to ager}'

def print_employee_information(db_connection):
    db_cursor = db_connection.cursor()
    results = db_cursor.execute('select * from employees').fetchall()
    for row in results:
        employee = employee_row._make(row)

# これでも間違った場所では出力はほとんど不可能ではある
print(EMPLOYEE_INFO_STRING.format(\
last=employee.last_name,\
first=employee.first_name,\
date=employee.hire_date,\
salary=employee.salary,\
department=employee.department,\
manager=employee.manager))

5.6.2 Use _ as a placeholder for data in a tuple that should be ignored

# Harmful
(name, age, temp, temp2) = get_user_info(user)
# temp, temp2が使われていない
if age > 21:
    output = '{name} can drink!'.format(name=name)

# Idiomatic
(name, age, _, _) = get_user_info(user)
if age > 21:
    output = '{name} can drink!'.format(name=name)

5.6.3 Use tuples to unpack data

# Harmful
list_from_comma_separated_value_file = ['dog', 'Fido', 10]
animal = list_from_comma_separated_value_file[0]
name = list_from_comma_separated_value_file[1]
age = list_from_comma_separated_value_file[2]
output = ('{name} the {animal} is {age} years old'.format(animal=animal, name=name, age=age))

# Idiomatic
list_from_comma_separated_value_file = ['dog', 'Fido', 10]
(animal, name, age) = list_comma_separated_value_file
output = ('{name} the {animal} is {age} years old'.format(animal=animal, name=name, age=age))

5.6.4 Use a tuple to return multiple values from a function

# Harmful
from collections import Counter

STATS_FORMAT = """Statistics:
Mean: {mean}
Median: {median}
Mode: {mode}"""

def calculate_mean(value_list):
    return float(sum(value_list) / len(value_list))

def calculate_median(value_list):
    return value_list(int(len(value_list) / 2)]

def calculate_mode(value_list):
    return Counter(value_list).most_common(1)[0][0]

values = [10, 20, 20, 30]
mean = calculate_mean(values)
median = calculate_median(values)
mode = calculate_median(values)

print(STATS_FORMAT.format(mean=mean, median=median, mode=mode))

# Idiomatic
from collections import Counter

STATS_FORMAT = """Statistics:
Mean: {mean}
Median: {median}
Mode: {mode}"""

def calculate_stastics(value_list):
    mean = float(sum(value_list) / len(value_list))
    median = value_list[int(len(value_list) / 2)]
    mode = Counter(value_list).most_common[1)[0][0]
    return (mean, median, mode)

(mean, median, mode) = calculate_stastics([10, 20, 20. 30])
print(STATS_FORMAT.format(mean=mean, median=median, mode=mode))

5.7 Classes

5.7.2 Use properties to "future-proof" your class implementation

# Harmful
class Product():
    def __init__(self, name, price):
        self.name = name
        self.price = price

# Idiomatic
class Product():
    def __init__(self, name, price):
        self.name
        self._price = price

 @property
 def price(self):
     return self._price * TAX_RATE
 
 @price.setter
 def price(self, value):
     # "setter"関数はpropertyと同名である必要がある
     self._price = value

5.7.3 Define __str__ in a class to show a human-readable representation

# Harmful
class Point():
    def __init__(self, x, y):
        self.x = x
        self.y = y
  
p = Point(1, 2)
print(p)
# 出力結果:'<__main__.Point object at 0x91ebd0>'

# Idiomatic
class Point():
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __str__(self):
        return '{0}, {1}'.format(self.x, self.y)

p = Point(1, 2)
print(p)

# 出力結果:'1, 2'

5.8 Context Managers

5.8.1 Use a context manager to ensure resources are properly managed

# Harmful
# 例外発生したときにOpenしたファイルをCloseする術がない
file_handle = open(path_to_file, 'r')
for line in file_handle.readlines():
    if raise_excepton(line):
        print('No! An Excepton!')

# Idiomatic
# context manager経由で開くと__enter__, __exit__メソッドを定義する事で容易に処理ができる
with open(path_to_file, 'r') as file_handle:
    for line in file_handle:
        if raise_excepton(line):
            print('No! An Exception!')

5.9 Generators

5.9.1 Prefer a generator expression to a list comprehension for simple iteration

# Harmful
# list comprehensionは直ちに全要素を敷き詰めたリストを生成する
# 膨大なリストの場合、非常に多くのメモリを食う事になる
for uppercase_name in [name.upper() for name in get_all_usernames()]:
    process_normalized_username(uppercase_name)

# Idiomatic
# 一方、generator expressionの場合、要求に応じた要素が都度生成される
for uppercase_name in (name_upper() for name in get_all_usernames()):
    process_normalized_username(uppercase_name)

5.9.2 Use a generator to lazily load infinite sequences

# Harmful
def get_twitter_stream_for_keyword(keyword):
    imaginary_twitter_api = ImaginaryTwitterAPI()
    if imaginary_twitter_api.can_get_stream_data(keyword):
        return imaginary_twitter_api.get_stream(keyword)

current_stream = get_twitter_stream_for_keyword('#jeffknupp')
for tweet in current_stream:
    process_tweet(tweet)

def get_list_of_incredibly_complex_calculation_results(data):
    return [first_incredibly_long_calculation(data),\
    second_incredibly_long_calculation(data),\
    third_incredibly_long_calculation(data)
 ]

# Idiomatic
def get_twitter_stream_for_keyword(keyword):
 """この関数はgeneratorで'can_get_stream_data(user)'がFalse
 になるまで反復可能なデータを生成し続ける
 """
 imaginary_twitter_api = ImaginaryTwitterAPI()
 while imaginary_twitter_api.can_get_stream_data(keyword):
     yield imaginary_twitter_api.get_stream(keyword)

# generatorを呼び出してるので、クライアントが終了するまでプロセス続けてる
for tweet in get_twitter_stream_for_keyword('#jeffknupp'):
    if got_stop_signal:
        break
     process_tweet(tweet)

def get_list_of_incredibly_complex_calculation_results(data):
    yield first_incredibly_long_calculation(data)
    yield second_incredibly_long_calculation(data)
    yield third_incredibly_long_calculation(data)

0 件のコメント:

コメントを投稿