本書籍の特徴はタイトルの通り様々な側面から悪いサンプル(Harmful)をまず示した上でPythonicなサンプル(Idiomatic)を示して、どこが悪いかどこを直すべきかが分かりやすく解説されています。今回は5章「Working with Data」で参考になったTipsをメモしておこうと思います。
※ここで紹介してるのはごく一部なので興味あれば一度本書籍をチェックしていただければと思います。
5.2 Strings
5.2.1 Chain string functions to make a simple series of transformations more clear
# Harmful book_info = ' The Three Musketeers: Alexandre Dumas' formatted_book_info = book_info.strip() formatted_book_info = formatted_book_info.upper() formatted_book_info = formatted_book_info.replace(':', ' by') # Idiomatic book_info = ' The Three Musketeers: Alexandre Dumas' formatted_book_info = book_info.strip().upper().replace(':', ' by')
5.2.2 Use ''.join when creating a single string for list elements
# Harmful result_list = ['True', 'False', 'File not found'] result_string = '' for result in result_list: result_string += result # Idiomatic result_list = ['True', 'False', 'File not found'] result_string = ''.join(result_list)
5.2.3 Prefer the format function for formatting strings
# Harmful def get_formatted_user_info_worst(user): return 'Name: ' + user.name + 'Age: ' + str(user.age) + ', Sex: ' + user.sex def get_formatted_user_info_slightly_better(user): return 'Name: %s. Age: %i, Sex: %c' % (user.name, user.age, user.sex) # Idiomatic def get_formatted_user_info(user): output = 'Name: {user.name}, Age: {user.age}, Sex: {user.sex}'.format(user=user) return output
5.3 Lists
5.3.1 Use a list comprehension to create a transformed version of an existing list
# Harmful some_other = list() some_list = list() for element in some_other_list: if is_prime(element): some_list.append(element + 5) # Idiomatic some_other_list = range(10) some_list = [element + 5 for element in some_other_list if is_prime(element)]
5.4 Dictionaries
5.4.1 Use a dict as a substitute for a switch...case statement
# Harmful # Pythonはswitch-case文がないので以下のようにしがちだが... def apply_operation(left_operand, right_operand, operator): if operator == '+': return left_operand + right_operand elif operator == '-': return left_operand - right_operand elif operator == '*': return left_operand * right_operand elif operator == '/': return left_operand / right_operand # Idiomatic def apply_operation(left_operand, right_operand, operator): import operator as op operator_mapper = {'+': op.add, '-': op.sub, '*': op.mul, '/': op.truediv} return operator_mapper[operator](left_operand, right_operand)
5.4.2 Use the default parameter of dict.get to provide default values
# Harmful log_severity = None if 'severity' in configuration: log_severity = configuration['severity'] else: log_severity = 'Info' # Idiomatic log_severity = configuration.get('severity', 'Info')
5.4.3 Use a dict comprehension to build a dict clearly and efficiently
# Harmful user_email = {} for user in user_list: if user.email: user_email[user.name] = user.email # Idiomatic user_email = {user.name: user.email for user in user_list if user.email}
5.5 Sets
5.5.2 Use a set comprehension to generate sets concisely
# Harmful users_first_names = set() for user in users: users_first_names.add(user.first_name) # Idiomatic users_first_names = {user.first_name for user in users}
5.6 Tuples
5.6.1 Use collections.namedtuple to make tuple-heavy code more clear
# Harmful def print_employee_information(db_connection): db_cursor = db_connection.cursor() results = db_cursor.execute('select * from employees').fetchall() # 基本的にこの手法だと情報取得して出力は不可能 for row in results: print(row[1] + ', ' + row[0] + ' was hired on' \ + row[5] + ' (for $' + row[4] + ' per annum) info the' \ + row[2] + ' department and reports to ' + row[3]) # Idiomatic # 'employees'テーブルは次のカラムを持ってるのを前提とする # first_name, last_name, department, manager, salary, hire_date employee_row = namedtuple('EmployeeRow', \ ['first_name', 'last_name', 'department', 'manager', 'salary', 'hire_date']) EMPLOYEE_INFO_STRING = '{last}, {first} was hired on {date} \ ${sarlary} per annum) info the {department} department and reports to ager}' def print_employee_information(db_connection): db_cursor = db_connection.cursor() results = db_cursor.execute('select * from employees').fetchall() for row in results: employee = employee_row._make(row) # これでも間違った場所では出力はほとんど不可能ではある print(EMPLOYEE_INFO_STRING.format(\ last=employee.last_name,\ first=employee.first_name,\ date=employee.hire_date,\ salary=employee.salary,\ department=employee.department,\ manager=employee.manager))
5.6.2 Use _ as a placeholder for data in a tuple that should be ignored
# Harmful (name, age, temp, temp2) = get_user_info(user) # temp, temp2が使われていない if age > 21: output = '{name} can drink!'.format(name=name) # Idiomatic (name, age, _, _) = get_user_info(user) if age > 21: output = '{name} can drink!'.format(name=name)
5.6.3 Use tuples to unpack data
# Harmful list_from_comma_separated_value_file = ['dog', 'Fido', 10] animal = list_from_comma_separated_value_file[0] name = list_from_comma_separated_value_file[1] age = list_from_comma_separated_value_file[2] output = ('{name} the {animal} is {age} years old'.format(animal=animal, name=name, age=age)) # Idiomatic list_from_comma_separated_value_file = ['dog', 'Fido', 10] (animal, name, age) = list_comma_separated_value_file output = ('{name} the {animal} is {age} years old'.format(animal=animal, name=name, age=age))
5.6.4 Use a tuple to return multiple values from a function
# Harmful from collections import Counter STATS_FORMAT = """Statistics: Mean: {mean} Median: {median} Mode: {mode}""" def calculate_mean(value_list): return float(sum(value_list) / len(value_list)) def calculate_median(value_list): return value_list(int(len(value_list) / 2)] def calculate_mode(value_list): return Counter(value_list).most_common(1)[0][0] values = [10, 20, 20, 30] mean = calculate_mean(values) median = calculate_median(values) mode = calculate_median(values) print(STATS_FORMAT.format(mean=mean, median=median, mode=mode)) # Idiomatic from collections import Counter STATS_FORMAT = """Statistics: Mean: {mean} Median: {median} Mode: {mode}""" def calculate_stastics(value_list): mean = float(sum(value_list) / len(value_list)) median = value_list[int(len(value_list) / 2)] mode = Counter(value_list).most_common[1)[0][0] return (mean, median, mode) (mean, median, mode) = calculate_stastics([10, 20, 20. 30]) print(STATS_FORMAT.format(mean=mean, median=median, mode=mode))
5.7 Classes
5.7.2 Use properties to "future-proof" your class implementation
# Harmful class Product(): def __init__(self, name, price): self.name = name self.price = price # Idiomatic class Product(): def __init__(self, name, price): self.name self._price = price @property def price(self): return self._price * TAX_RATE @price.setter def price(self, value): # "setter"関数はpropertyと同名である必要がある self._price = value
5.7.3 Define __str__ in a class to show a human-readable representation
# Harmful class Point(): def __init__(self, x, y): self.x = x self.y = y p = Point(1, 2) print(p) # 出力結果:'<__main__.Point object at 0x91ebd0>' # Idiomatic class Point(): def __init__(self, x, y): self.x = x self.y = y def __str__(self): return '{0}, {1}'.format(self.x, self.y) p = Point(1, 2) print(p) # 出力結果:'1, 2'
5.8 Context Managers
5.8.1 Use a context manager to ensure resources are properly managed
# Harmful # 例外発生したときにOpenしたファイルをCloseする術がない file_handle = open(path_to_file, 'r') for line in file_handle.readlines(): if raise_excepton(line): print('No! An Excepton!') # Idiomatic # context manager経由で開くと__enter__, __exit__メソッドを定義する事で容易に処理ができる with open(path_to_file, 'r') as file_handle: for line in file_handle: if raise_excepton(line): print('No! An Exception!')
5.9 Generators
5.9.1 Prefer a generator expression to a list comprehension for simple iteration
# Harmful # list comprehensionは直ちに全要素を敷き詰めたリストを生成する # 膨大なリストの場合、非常に多くのメモリを食う事になる for uppercase_name in [name.upper() for name in get_all_usernames()]: process_normalized_username(uppercase_name) # Idiomatic # 一方、generator expressionの場合、要求に応じた要素が都度生成される for uppercase_name in (name_upper() for name in get_all_usernames()): process_normalized_username(uppercase_name)
5.9.2 Use a generator to lazily load infinite sequences
# Harmful def get_twitter_stream_for_keyword(keyword): imaginary_twitter_api = ImaginaryTwitterAPI() if imaginary_twitter_api.can_get_stream_data(keyword): return imaginary_twitter_api.get_stream(keyword) current_stream = get_twitter_stream_for_keyword('#jeffknupp') for tweet in current_stream: process_tweet(tweet) def get_list_of_incredibly_complex_calculation_results(data): return [first_incredibly_long_calculation(data),\ second_incredibly_long_calculation(data),\ third_incredibly_long_calculation(data) ] # Idiomatic def get_twitter_stream_for_keyword(keyword): """この関数はgeneratorで'can_get_stream_data(user)'がFalse になるまで反復可能なデータを生成し続ける """ imaginary_twitter_api = ImaginaryTwitterAPI() while imaginary_twitter_api.can_get_stream_data(keyword): yield imaginary_twitter_api.get_stream(keyword) # generatorを呼び出してるので、クライアントが終了するまでプロセス続けてる for tweet in get_twitter_stream_for_keyword('#jeffknupp'): if got_stop_signal: break process_tweet(tweet) def get_list_of_incredibly_complex_calculation_results(data): yield first_incredibly_long_calculation(data) yield second_incredibly_long_calculation(data) yield third_incredibly_long_calculation(data)
0 件のコメント:
コメントを投稿