Search     or:     and:
 LINUX 
 Language 
 Kernel 
 Package 
 Book 
 Test 
 OS 
 Forum 
iakovlev.org

Вглубь языка Python - часть 3

3.1. Вглубь

Начиная с этой главы мы будем иметь дело с объектно ориентированным программированием на языке Python. Помните, я говорил, что вам необходимо знать объектно-ориентированный язык для чтения этой книги? Так я не шутил.

Здесь приведена полноценная программа на языке Python. Прочитайте строки документации модуля, классов и функций для полуения информации о том, что программа делает и как она работает. Как обычно, не обеспокойтесь, если что-либо вы не понимаете: для этого и предназначена оставшаяся часть главы.

Пример 3.1. fileinfo.py

Если вы еще этого не сделали, можете загрузить этот и другие примеры, используемые в книге.

"""Получение метаинформации, специфичной для файла данного типа.
 
 Создайте экземпляр соответствующего класса, передав конструктору имя файла.
 Возвращаемый объект ведет себя аналогично словарю с парами ключ-значение для
 каждой части метаинформации.
     import fileinfo
     info = fileinfo.MP3FileInfo("/music/ap/mahadeva.mp3")
     print "\\n".join(["%s=%s" % (k, v) for k, v in info.items()])
 
 Или используйте функцию listDirectory для получения информации обо всех файлов
 в директории
     for info in fileinfo.listDirectory("/music/ap/", [".mp3"]):
         ...
 
 Модуль может быть расширен путем доюавления классов для других типов файлов,
 например HTMLFileInfo, MPGFileInfo, DOCFileInfo.  Каждый класс полностью
 отвечает за анализ файлов соответствующего типа; используйте MP3FileInfo в
 качестве примера.
 """
 import os
 import sys
 from UserDict import UserDict
 
 def stripnulls(data):
     "очищает строку от символов пропуска и нулевых символов"
     return data.replace("\00", "").strip()
 
 class FileInfo(UserDict):
     "хранит метаинформацию о файле"   
     def __init__(self, filename=None):
         UserDict.__init__(self)
         self["name"] = filename
 
 class MP3FileInfo(FileInfo):
     "хранит ID3v1.0 MP3 теги"
     tagDataMap = {"title"   : (  3,  33, stripnulls),
                   "artist"  : ( 33,  63, stripnulls),
                   "album"   : ( 63,  93, stripnulls),
                   "year"    : ( 93,  97, stripnulls),
                   "comment" : ( 97, 126, stripnulls),
                   "genre"   : (127, 128, ord)}
 
     def __parse(self, filename):
         "анализ ID3v1.0 тегов из MP3 файла"
         self.clear()
         try:                               
             fsock = open(filename, "rb", 0)
             try:                           
                 fsock.seek(-128, 2)        
                 tagdata = fsock.read(128)  
             finally:                       
                 fsock.close()              
             if tagdata[:3] == "TAG":
                 for tag, (start, end, parseFunc) in self.tagDataMap.items():
                     self[tag] = parseFunc(tagdata[start:end])
         except IOError:                    
             pass                           
 
     def __setitem__(self, key, item):
         if key == "name" and item:
             self.__parse(item)
         FileInfo.__setitem__(self, key, item)
 
 def listDirectory(directory, fileExtList):
     """возвращает список объектов с метаинформацией для всех файлов с
 указанным расширением"""
     fileList = [os.path.normcase(f) for f in os.listdir(directory)]
     fileList = [os.path.join(directory, f) for f in fileList \
                 if os.path.splitext(f)[1] in fileExtList]
     def getFileInfoClass(filename, module=sys.modules[FileInfo.__module__]):      
         "оределяет класс, предназначеный для обработки файла, по расширению"      
         subclass = "%sFileInfo" % os.path.splitext(filename)[1].upper()[1:]
         return hasattr(module, subclass) and getattr(module, subclass) or FileInfo
     return [getFileInfoClass(f)(f) for f in fileList]                             
 
 if __name__ == "__main__":
     for info in listDirectory("/music/_singles/", [".mp3"]): 1
         print "\n".join(["%s=%s" % (k, v) for k, v in info.items()])
         print
1 Вывод этой программы зависит от файлов на вашем диске.
Чтобы получить осмысленную информацию, необходимо изменить путь на каталог, содержащий MP3 файлы на вашей машине.

Пример 3.2. Вывод программы fileinfo.py

Вот результат, полученный на моей машине. Ваш результат будет другим, если только ваши музыкальные вкусы полность не совпадают с моими.

album=
 artist=Ghost in the Machine
 title=A Time Long Forgotten (Concept
 genre=31
 name=/music/_singles/a_time_long_forgotten_con.mp3
 year=1999
 comment=http://mp3.com/ghostmachine
 
 album=Rave Mix
 artist=***DJ MARY-JANE***
 title=HELLRAISER****Trance from Hell
 genre=31
 name=/music/_singles/hellraiser.mp3
 year=2000
 comment=http://mp3.com/DJMARYJANE
 
 album=Rave Mix
 artist=***DJ MARY-JANE***
 title=KAIRO****THE BEST GOA
 genre=31
 name=/music/_singles/kairo.mp3
 year=2000
 comment=http://mp3.com/DJMARYJANE
 
 album=Journeys
 artist=Masters of Balance
 title=Long Way Home
 genre=31
 name=/music/_singles/long_way_home1.mp3
 year=2000
 comment=http://mp3.com/MastersofBalan
 
 album=
 artist=The Cynic Project
 title=Sidewinder
 genre=18
 name=/music/_singles/sidewinder.mp3
 year=2000
 comment=http://mp3.com/cynicproject
 
 album=Digitosis@128k
 artist=VXpanded
 title=Spinning
 genre=255
 name=/music/_singles/spinning.mp3
 year=2000
 comment=http://mp3.com/artists/95/vxp
3.2. Импортирование модулей инструкцией from module import

В языке Python есть два способа импортировать модули. Оба из них полезны, и вы должны знать, когда каждый из них лучше использовать. С одним способом, import module, вы уже ознакомились в главе 1. Второй способ делает примерно то же самое, но в его работе есть несколько важных отличий.

Пример 3.3. Синтаксис from module import


 from UserDict import UserDict
 

Синтакис очень похож на уже знакомый вам ситаксис import module, но есть одно важное отличие: атрибуты модуля types импортируются непосредственно в локальное пространство имен и, поэтому, становятся доступными напрямую, без указания имени модуля.

Замечание
from module import in Python is like use module in Perl; import module in Python is like require module in Perl.
Замечание
from module import in Python is like import module.* in Java; import module in Python is like import module in Java.

Пример 3.4. import module vs. from module import

>>> import types
 >>> types.FunctionType             1
 <type 'function'>
 >>> FunctionType                   2
 Traceback (innermost last):
   File "<interactive input>", line 1, in ?
 NameError: There is no variable named 'FunctionType'
 >>> from types import FunctionType 3
 >>> FunctionType                   4
 <type 'function'>
1 Модуль types содержит объекты-типы для объектов всех встроенных типов языка Python.
Обратите внимание, что имя объекта, FunctionType, должно быть указано с использованием имени модуля, types.
2 Имя FunctionType не определено в этом пространстве имен, оно доступно только в контексте types.
3 Используя такой синтаксис вы импортируете FunctionType из модуля types непосредственно в локальное пространство имен.
4 Теперь объект FunctionType доступен напрямую, без указания имени модуля.

Когда следует использовать from module import?

  • Если вы собираетесь многократно использовать атрибуты модуля и не хотите снова и снова набирать имя модуля, используйте from module import.
  • Если вы хотите импортировать только отдельные атрибуты, используйте from module import.
  • Если модуль содержит атрибуты с такими же именами, как переменные вашего модуля, во избежание конфликта имен вам придется использовать import module.

Во всех остальных случаях — это дело вкуса. Вы увидите код на языке Python с использованием обоих вариантов.

Дополнительная литература

3.3. Определение классов

Python имеет полноценную поддержку объектно-ориентированного программирования: вы божете определять собственные классы, наследоваться от встроенных и собственных классов, создавать экземпляры определенных вами классов.

Определять классы в языке Python просто. Как и для функций, для классов нет отдельного определения интерфейса. Определения класса в языке Python начинается с зарезервированного слова class и следующего за ним имени класса. Технически, это все что требуется, так как класс совсем необязательно должен быть производным от другого класса.

Пример 3.5. Простейший класс


 class foo: 1
     pass   2 3
1 Определяем класс с именем foo, который не является производным от других классов.
2 Этот класс не определяет никаких методов и атрибутов, но синтаксис требует наличие хотя бы одной иструкции в определении,
поэтому мы использовали pass. Это зарезервированное слово, которое означает, что ничего делать не нужно.
Инструкция pass ничего не делает и полезна в качестве наполнителя в случаях, когда вы хотите поставить заглушку в определении функции или класса.
3 Как вы, наверное, уже догадались, тело класса записывается с отступом, как и тело функции или инструкций if, for и т. д.
Первая строка,записанная без отступа, уже не попадает в определение класса.
Замечание
Инструкция pass в Python ведет себя аналогично пустым фигурным скобкам ({}) в Java и C.

Конечно, в реальных программах большинство классов будут производными от других классов и будут определять собственные атрибуты и методы. Но, как вы уже увидели, нет ничего, что класс обязательно должен иметь, кроме имени. В частности, программистам на C++ может показаться странным, что классы в языке Python не имеют явных конструкторов и деструкторов. В классах языка Python есть нечто? похожее на конструктор — метод __init__.

Пример 3.6. Определение класса FileInfo


 from UserDict import UserDict
 
 class FileInfo(UserDict): 1
1 В языке Python родительские классы просто перечисляются в скобках сразу после имени класса.
В данном случае класс FileInfo наследуется от класса UserDict (который был проимпортирован из модуля UserDict).
UserDict — класс, который ведет себя аналогично словарю, позволяя от него наследоваться и изменять или дополнять его поведение.
(Существуют аналогичные классы UserList и UserString, позволяющие определить класс, производный от списка и строки.)
Здесь есть немного черной магии, которую мы раскроем позже в этой главе, когда будем подробнее исследовать класс UserDict.
Замечание
В языке Python родительские классы просто перечисляются в скобках после имени.
Для этого не нужно использовать специальное ключевое слово, такое как extends в Java.
Замечание
Хотя я не буду буду рассказывать об этом подробно, Python поддерживает множественное наследование.
В скобках после имени класса вы можете пересислить через запятую столько родительских классов, сколько вам нужно.

Пример 3.7. Инициализация класса FileInfo


 class FileInfo(UserDict):
     "хранит метаинформацию о файле"    1
     def __init__(self, filename=None): 2 3 4
1 Для классов можно (и желательно) определять строку документации, также как для модулей и функций.
2 Метод __init__ вызывается сразу после создания экземпляра класса.
Соблазнительно, но не правильно называть этот метод конструктором.
Соблазнительно, потому что он выглядит как конструктор (принято, чтобы __init__ был первым методом, определенным в классе),
ведет себя как коструктор (это перый кусок кода, вызываемый в созданном экземпляре класса) и даже называется как коструктор.
Неправильно, так как к тому времени, когда вызывается метод __init__, объект уже создан и вы имеете ссылку на созданный экземпляр класса.
Но метод __init__ — это самое близкое к конструктору, из того что есть в языке Python.
3 Первым аргументом каждого метода класса, включая __init__, всегда является текущий экземпляр класса.
Общепринято всегда называть этот аргумент self.
В методе __init__ self ссылается на только что созданный объект, в других методах — на экземпляр класса, для которого метод вызывается.
Хотя и необходимо явно указывать self при определении метода, вы его не указываете, когда вызываете метод;
Python добавит его автоматически.
4 Метод __init__ может иметь несколько аргументов.
Аргументы могут иметь значения по умолчанию, что сделает их необязательными.
В данном случае аргумент filename имеет значение по умолчанию None.
Замечание
Первый аргумент метода класса (ссылка на текущий экземпляр) принято называть self.
Этот аргумент играет роль зарезервированного слова this в C++ и Java,
но self не является зарезервированным словом — просто соглашение.
Несмотря на это, не стоит называть его иначе, чем self.

Пример 3.8. Тело класса FileInfo


 class FileInfo(UserDict):
     "хранит метаинформацию о файле"
     def __init__(self, filename=None):
         UserDict.__init__(self)        1
         self["name"] = filename        2
                                        3
1 Некоторыми псевдо-объектно-ориентированными языками, например Powerbuilder, поддерживается концепция
“расширения” конструкторов и других обработчиков событий: метод базового класса
автоматически вызывается перед выполнением метода производного класса.
В Python этого не происходит, необходимо явно вызывать метод в производном классе.
2 Я сказал, что наш класс ведет себя аналогично словарю, и вот первое проявление такого поведения:
мы присваивает значение аргумента filename записи объекта с ключом name.
3 Обратите внимание, что метод __init__ никогда не возвращает значение.
Замечание
В определении методов необходимо явно указывать self в качестве первого аргумента любого метода,
включая __init__. При вызове метода базового класса также необходимо включать self в список аргументов.
Но при вызове метода извне аргумент self не указывается, а подставляется интерпретатором автоматически.
Подозреваю, что сначала это сбивает с толку и может показаться непоследовательным,
так как вы еще не узнали о разнице между связаннымы и несвязанными методами.

Вот так. Понимаю, что здесь многое нужно запомнить, но вы быстро наловчитесь. Все классы в языке Python работают одинаково, так что изучив один, вы узнаете обо всех. Вы можете забыть обо всем остальном, но одно правило стоит запомнить (иначе, обещаю, вы когда-нибудь запутаетесь):

Замечание
Метод __init__ не является обязательным, но если вы его определяете,
то не забудть вызвать методы __init__ базовых классов.
Это правило верно и в более широком смысле: если производный класс хочет расширить поведение базового,
то он должен в нужное время вызвать соответствующий метод базового класса с необходимыми ему аргументами.
3.4. Создание экземпляров классов

Создать экземпляр класса очень просто: достаточно вызвать класс, как если бы он был функцией, передав аргменты, определенные в методе __init__. Возвращаемое значение и есть созданный объект.

Пример 3.9. Создание экземпляра класса FileInfo

>>> import fileinfo
 >>> f = fileinfo.FileInfo("/music/_singles/kairo.mp3") 1
 >>> f.__class__                                        2
 <class fileinfo.FileInfo at 010EC204>
 >>> f.__doc__                                          3
 'base class for file info'
 >>> f                                                  4
 {'name': '/music/_singles/kairo.mp3'}
1 Мы создаем экземпляр класса FileInfo (определенного в модуле fileinfo) и присваиваем его переменной f.
Параметр "/music/_singles/kairo.mp3" в итоге будет передан в качестве аргумента filename методу __init__
класса FileInfo.
2 У каждого экземпляра класса есть специальный атрибут __class__,
ссылающийся на его класс (обратите внимание, что его представление содержит
физический адрес объекта-класса на моей машине, в вашем случае представление
будет немного другим).
Программисты на Java могут быть знакомы с классом Class, у которого есть такие методы, как
getName и getSuperclass для получения информации о классе.
В языке Python информация такого рода доступна непосредственно через специальные атрибуты __class__, __name__ и __bases__.
3 Вы можете получить строку документации экземпляра точно так же, как для функции или модуля.
Все экземпляры одного класса имеют одну и ту же строку документации.
4 Помните, что метод __init__ сохраняет значение аргумента filename в self["name"]?
Вот и результат. Аргумент, который мы передали при создании экземпляра передается
методу __init__ (плюс ссылка на объект self, которую Python добавляет сам).
Замечание
В языке Python для создания нового экземпляра необходимо просто вызвать
класс, как если бы он был функцией.
Здесь нет оператора new, как в C++ и Java.

Если создаются экземпляры классов просто, то уничтожаются они еще проще. Как правило нет необходимости это делать явно, так как удаление происходит автоматически при выходе объектов за пределы области видимости. Утечки памяти в Python бывают редко.

Пример 3.10. Попытка реализовать утечку памяти

>>> def leakmem():
 ...     f = fileinfo.FileInfo('/music/_singles/kairo.mp3') 1
 ...     
 >>> for i in range(100):
 ...     leakmem()                                          2
1 На каждый вызов функции leakmem мы создаем экземпляр класса FileInfo и присваиваем его переменной f,
локальной для этой функции. На этом функция заканчивается без явного уничтожения f,
так что вы можете ожидать утечки памяти, но вы не правы. При выходе из функции локальная
переменная f выходит из области видимости. С этого момента
больше нет ссылок на созданный экземпляр класса FileInfo
(мы больше нигде его не сохраняли, кроме f), так что
Python уничтожает экземпляр за нас.
2 Сколько бы мы не вызывали функцию leakmem, утечки памяти не будет,
потому что каждый раз перед выходом из leakmem Python будет удалять вновь созданный экземпляр
FileInfo.

Технический термин для этого вида сборки мусора — “подсчет ссылок”. Python всегда знает количество ссылок на каждый созданный объект. В приведенном примере была единственная ссылка — локальная переменная f. При выходе из функции переменная f выходит из области видимости, количество ссылок становится равным нулю и Python автоматически уничтожает экземпляр.

Бывают ситуации, когда подсчет ссылок не дает нужного результата. Если вы создаете объекты, ссылающиеся друг на друга (например, двусвязные списки, в которых каждый элемент имеет ссылку на следующий и предыдущий элемент в списке), ни один из объектов не может быть уничтожен, так как счетчик ссылок не достигает нуля. Начиная с версии 2.0 в языке Python есть дополнительный способ сборки мусора (“mark-and-sweep”), достаточно сообразительный, чтобы работать с такими ситуациями и корректно разрывать циклические ссылки.

Как бывшего философа, меня беспокоит мысль о том, что какие-то вещи исчезают, когда никто на них никто не смотрит, но это именно то, что происходит в языке Python. В общем, вы можете просто забыть об управлении памятью и предоставить интерпретатору возможность делать это за вас.

Дополнительная литература

3.5. UserDict: a wrapper class

As you've seen, FileInfo is a class that acts like a dictionary. To explore this further, let's look at the UserDict class in the UserDict module, which is the ancestor of our FileInfo class. This is nothing special; the class is written in Python and stored in a .py file, just like our code. In particular, it's stored in the lib directory in your Python installation.

Подсказка
In the Python IDE on Windows, you can quickly open any module in your library path with File->Locate... (Ctrl-L).

In Python, you can not subclass built-in datatypes like strings, lists, and dictionaries. To compensate for this, Python comes with wrapper classes that mimic the behavior of these built-in datatypes: UserString, UserList, and UserDict. Using a combination of normal and special methods, the UserDict class does an excellent imitation of a dictionary, but it's just a class like any other, so you can subclass it to provide custom dictionary-like classes like FileInfo.

Пример 3.11. Defining the UserDict class


 class UserDict:                                1
     def __init__(self, dict=None):             2
         self.data = {}                         3
         if dict is not None: self.update(dict) 4
 
1 Note that UserDict is a base class, not inherited from any other class.
2 This is the __init__ method that we overrode in the FileInfo class.
Note that the argument list in this ancestor class is different than the descendant.
That's okay; each subclass can have its own set of arguments, as long as it calls the ancestor with the correct arguments.
Here the ancestor class has a way to define initial values (by passing a dictionary in the dict argument)
which our FileInfo does not take advantage of.
3 Python supports data attributes (called “instance variables” in Java and Powerbuilder,
“member variables” in C++), which is data held by a specific instance of a class.
In this case, each instance of UserDict will have a data attribute data.
To reference this attribute from code outside the class, you would qualify it with the instance name,
instance.data, in the same way that you qualify a function with its module name.
To reference a data attribute from within the class, we use self as the qualifier.
By convention, all data attributes are initialized to reasonable values in the __init__ method.
However, this is not required, since data attributes, like local variables, spring into existence when they are first assigned a value.
4 This is a syntax you may not have seen before (I haven't used it in the examples in this book).
This is an if statement, but instead of having an indented block starting on the next
line, there is just a single statement on the same line, after the colon.
This is perfectly legal syntax, and is just a shortcut when you have only one statement in a block.
(It's like specifying a single statement without braces in C++.)
You can use this syntax, or you can have indented code on subsequent lines,
but you can't do both for the same block.
Замечание
Java and Powerbuilder support function overloading by argument list,
i.e. one class can have multiple methods with the same name but a different number of arguments,
or arguments of different types. Other languages (most notably PL/SQL) even support function overloading
by argument name; i.e. one class can have multiple methods with the same
name and the same number of arguments of the same type but different argument names.
Python supports neither of these; it has no form of function overloading whatsoever.
An __init__ method is an __init__ method is an __init__ method, regardless of its arguments.
There can be only one __init__ method per class, and if a descendant class has an __init__ method,
it always overrides the ancestor __init__ method, even if the descendant defines it
with a different argument list.
Замечание
Always assign an initial value to all of an instance's data attributes in the __init__ method.
It will save you hours of debugging later.

Пример 3.12. UserDict normal methods

    def clear(self): self.data.clear()          1
     def copy(self):                             2
         if self.__class__ is UserDict:          3
             return UserDict(self.data)
         import copy                             4
         return copy.copy(self)
     def keys(self): return self.data.keys()     5
     def items(self): return self.data.items()
     def values(self): return self.data.values()
 
1 clear is a normal class method; it is publicly available to be called by anyone at any time.
Note that clear, like all class methods, has self as its first argument.
(Remember, you don't include self when you call the method; it's something that Python adds for you.)
Also note the basic technique of this wrapper class: store a real dictionary (data) as a data attribute,
define all the methods that a real dictionary has, and have each class method redirect to the
corresponding method on the real dictionary. (In case you'd forgotten,
a dictionary's clear method deletes all of its keys and their associated values.)
2 The copy method of a real dictionary returns a new dictionary that is an exact duplicate of the original
(all the same key-value pairs). But UserDict can't simply redirect to self.data.copy,
because that method returns a real dictionary, and what we want is to return a new instance
that is the same class as self.
3 We use the __class__ attribute to see if self is a UserDict; if so, we're golden,
because we know how to copy a UserDict: just create a new UserDict and give it the real dictionary
that we've squirreled away in self.data.
4 If self.__class__ is not UserDict, then self must be some subclass of UserDict (like maybe FileInfo), in which case life gets trickier. UserDict
doesn't know how to make an exact copy of one of its descendants;
there could, for instance, be other data attributes defined in the subclass,
so we would have to iterate through them and make sure to copy all of
them. Luckily, Python comes with a module to do exactly this, and it's
called copy. I won't go into the details here (though it's a
wicked cool module, if you're ever inclined to dive into it on your own).
Suffice to say that copy can copy arbitrary Python objects, and that's how we're using it here.
5 The rest of the methods are straightforward, redirecting the calls to the built-in methods on self.data.

Further reading

3.6. Special class methods

In addition to normal class methods, there are a number of special methods which Python classes can define. Instead of being called directly by your code (like normal methods), special methods are called for you by Python in particular circumstances or when specific syntax is used.

As you saw in the previous section, normal methods went a long way towards wrapping a dictionary in a class. But normal methods alone are not enough, because there are lots of things you can do with dictionaries besides call methods on them. For starters, you can get and set items with a syntax that doesn't include explicitly invoking methods. This is where special class methods come in: they provide a way to map non-method-calling syntax into method calls.

Пример 3.13. The __getitem__ special method

    def __getitem__(self, key): return self.data[key]
>>> f = fileinfo.FileInfo("/music/_singles/kairo.mp3")
 >>> f
 {'name':'/music/_singles/kairo.mp3'}
 >>> f.__getitem__("name") 1
 '/music/_singles/kairo.mp3'
 >>> f["name"]             2
 '/music/_singles/kairo.mp3'
1 The __getitem__ special method looks simple enough.
Like the normal methods clear, keys, and values, it just redirects to the dictionary to return its value.
But how does it get called? Well, you can call __getitem__ directly, but in practice you wouldn't actually do that;
I'm just doing it here to show you how it works.
The right way to use __getitem__ is to get Python to call it for you.
2 This looks just like the syntax you would use to get a dictionary value, and in fact it returns the value you would expect.
But here's the missing link: under the covers, Python has converted this syntax to the method call f.__getitem__("name").
That's why __getitem__ is a special class method; not only can you call it yourself,
you can get Python to call it for you by using the right syntax.

Пример 3.14. The __setitem__ special method

    def __setitem__(self, key, item): self.data[key] = item
>>> f
 {'name':'/music/_singles/kairo.mp3'}
 >>> f.__setitem__("genre", 31) 1
 >>> f
 {'name':'/music/_singles/kairo.mp3', 'genre':31}
 >>> f["genre"] = 32            2
 >>> f
 {'name':'/music/_singles/kairo.mp3', 'genre':32}
1 Like the __getitem__ method, __setitem__ simply redirects to the real dictionary self.data to do its work.

And like __getitem__, you wouldn't ordinarily call it directly like this; Python calls __setitem__ for you when you use the right syntax.
2 This looks like regular dictionary syntax, except of course that f is really a class
that's trying very hard to masquerade as a dictionary, and __setitem__ is an essential part
of that masquerade. This line of code actually calls f.__setitem__("genre", 32) under the covers.


__setitem__
is a special class method because it gets called for you, but it's still a class method.
Just as easily as the __setitem__ method was defined in UserDict, we can redefine it in our descendant class to override the ancestor method. This allows us to define classes that act like dictionaries in some ways but define their own behavior above and beyond the built-in dictionary.

This concept is the basis of the entire framework we're studying in this chapter. Each file type can have a handler class which knows how to get metadata from a particular type of file. Once some attributes (like the file's name and location) are known, the handler class knows how to derive other attributes automatically. This is done by overriding the __setitem__ method, checking for particular keys, and adding additional processing when they are found.

For example, MP3FileInfo is a descendant of FileInfo.
When an MP3FileInfo's name is set, it doesn't just set the name key (like the ancestor FileInfo does);
it also looks in the file itself for MP3 tags and populates a whole set of keys.

Пример 3.15. Overriding __setitem__ in MP3FileInfo

    def __setitem__(self, key, item):         1
         if key == "name" and item:            2
             self.__parse(item)                3
         FileInfo.__setitem__(self, key, item) 4
1 Note that our __setitem__ method is defined exactly the same way as the ancestor method.
This is important, since Python will be calling the method for us, and it
expects it to be defined with a certain number of arguments.
(Technically speaking, the names of the arguments don't matter, just the number.)
2 Here's the crux of the entire MP3FileInfo class:
if we're assigning a value to the name key, we want to do something extra.
3 The extra processing we do for names is encapsulated in the __parse method.
This is another class method defined in MP3FileInfo, and when we call it,
we qualify it with self. Just calling __parse would look for a normal function defined outside the class,
which is not what we want; calling self.__parse will look for a class method defined
within the class. This isn't anything new; you reference data attributes the same way.
4 After doing our extra processing, we want to call the ancestor method.
Remember, this is never done for you in Python; you have to do it manually.
Note that we're calling the immediate ancestor, FileInfo,
even though it doesn't have a __setitem__ method.
That's okay, because Python will walk up the ancestor tree until it finds a class with the method we're calling,
so this line of code will eventually find and call the __setitem__ defined in UserDict.
Замечание
When accessing data attributes within a class, you need to qualify the attribute name:
self.attribute. When calling other methods within a class, you need to qualify
the method name: self.method.

Пример 3.16. Setting an MP3FileInfo's name

>>> import fileinfo
 >>> mp3file = fileinfo.MP3FileInfo()                   1
 >>> mp3file
 {'name':None}
 >>> mp3file["name"] = "/music/_singles/kairo.mp3"      2
 >>> mp3file
 {'album': 'Rave Mix', 'artist': '***DJ MARY-JANE***', 'genre': 31,
 'title': 'KAIRO****THE BEST GOA', 'name': '/music/_singles/kairo.mp3',
 'year': '2000', 'comment': 'http://mp3.com/DJMARYJANE'}
 >>> mp3file["name"] = "/music/_singles/sidewinder.mp3" 3
 >>> mp3file
 {'album': '', 'artist': 'The Cynic Project', 'genre': 18, 'title': 'Sidewinder',
 'name': '/music/_singles/sidewinder.mp3', 'year': '2000',
 'comment': 'http://mp3.com/cynicproject'}
1 First, we create an instance of MP3FileInfo,without passing it a filename.
(We can get away with this because the filename argument of the __init__ method is optional.)
Since MP3FileInfo has no __init__ method of its own, Python walks up the ancestor tree
and finds the __init__ method of FileInfo.
This __init__ method manually calls the __init__ method of UserDict and then sets the name key to filename, which is None,
since we didn't pass a filename.
Thus, mp3file initially looks like a dictionary with one key, name, whose value is None.
2 Now the real fun begins. Setting the name key of mp3file triggers the __setitem__ method
on MP3FileInfo (not UserDict), which notices that we're setting the name key
with a real value and calls self.__parse.
Although we haven't traced through the __parse method yet, you can see from the output
that it sets several other keys: album, artist, genre, title, year, and comment.
3 Modifying the name key will go through the same process again: Python calls __setitem__,
which calls self.__parse, which sets all the other keys.
3.7. Advanced special class methods

There are more special methods than just __getitem__ and __setitem__. Some of them let you emulate functionality that you may not even know about.

Пример 3.17. More special methods in UserDict

    def __repr__(self): return repr(self.data)     1
     def __cmp__(self, dict):                       2
         if isinstance(dict, UserDict):
             return cmp(self.data, dict.data)
         else:
             return cmp(self.data, dict)
     def __len__(self): return len(self.data)       3
     def __delitem__(self, key): del self.data[key] 4
1 __repr__ is a special method which is called when you call repr(instance).
The repr function is a built-in function that returns a string representation of an object.
It works on any object, not just class instances. You're already intimately familiar with repr and you don't even know it.
In the interactive window, when you type just a variable name and hit ENTER, Python uses repr to display the variable's value.
Go create a dictionary d with some data and then print repr(d) to see for yourself.
2 __cmp__ is called when you compare class instances. In general, you can compare any two Python objects,
not just class instances, by using ==. There are rules that define when built-in datatypes are considered equal;
for instance, dictionaries are equal when they have all the same keys and values,
and strings are equal when they are the same length and contain the same sequence of characters.
For class instances, you can define the __cmp__ method and code the comparison logic yourself,
and then you can use == to compare instances of your class and Python will call your __cmp__ special method
for you.
3 __len__ is called when you call len(instance).
The len function is a built-in function that returns the length of an object.
It works on any object that could reasonably be thought of as having a length.
The len of a string is its number of characters; the len of a dictionary is its number of keys;
the len of a list or tuple is its number of elements.
For class instances, define the __len__ method and code the length calculation yourself,
then call len(instance) and Python will call your __len__ special method for you.
4 __delitem__ is called when you call del instance[key], which you may remember as the way
to delete individual items from a dictionary.
When you use del on a class instance, Python calls the __delitem__ special method for you.
Замечание
In Java, you determine whether two string variables reference the same physical memory location
by using str1 == str2. This is called object identity, and it is written in Python as str1 is str2.
To compare string values in Java, you would use str1.equals(str2);
in Python, you would use str1 == str2.
Java programmers who have been taught to believe that the world is a better place
because == in Java compares by identity instead of by value may have a difficult time adjusting
to Python's lack of such “gotchas”.

At this point, you may be thinking, “all this work just to do something in a class that I can do with a built-in datatype”. And it's true that life would be easier (and the entire UserDict class would be unnecessary) if you could inherit from built-in datatypes like a dictionary. But even if you could, special methods would still be useful, because they can be used in any class, not just wrapper classes like UserDict.

Special methods mean that any class can store key-value pairs like a dictionary, just by defining the __setitem__ method. Any class can act like a sequence, just by defining the __getitem__ method. Any class that defines the __cmp__ method can be compared with ==. And if your class represents something that has a length, don't define a GetLength method; define the __len__ method and use len(instance).

Замечание
While other object-oriented languages only let you define the physical model of an object
(“this object has a GetLength method”), Python's special class methods like __len__ allow you
to define the logical model of an object (“this object has a length”).

There are lots of other special methods. There's a whole set of them that let classes act like numbers, allowing you to add, subtract, and do other arithmetic operations on class instances. (The canonical example of this is a class that represents complex numbers, numbers with both real and imaginary components.) The __call__ method lets a class act like a function, allowing you to call a class instance directly. And there are other special methods that allow classes to have read-only and write-only data attributes; we'll talk more about those in later chapters.

3.8. Class attributes

You already know about data attributes, which are variables owned by a specific instance of a class. Python also supports class attributes, which are variables owned by the class itself.

Пример 3.18. Introducing class attributes


 class MP3FileInfo(FileInfo):
     "хранит ID3v1.0 MP3 теги"
     tagDataMap = {"title"   : (  3,  33, stripnulls),
                   "artist"  : ( 33,  63, stripnulls),
                   "album"   : ( 63,  93, stripnulls),
                   "year"    : ( 93,  97, stripnulls),
                   "comment" : ( 97, 126, stripnulls),
                   "genre"   : (127, 128, ord)}
>>> import fileinfo
 >>> fileinfo.MP3FileInfo            1
 <class fileinfo.MP3FileInfo at 01257FDC>
 >>> fileinfo.MP3FileInfo.tagDataMap 2
 {'title': (3, 33, <function stripnulls at 0260C8D4>), 
 'genre': (127, 128, <built-in function ord>), 
 'artist': (33, 63, <function stripnulls at 0260C8D4>), 
 'year': (93, 97, <function stripnulls at 0260C8D4>), 
 'comment': (97, 126, <function stripnulls at 0260C8D4>), 
 'album': (63, 93, <function stripnulls at 0260C8D4>)}
 >>> m = fileinfo.MP3FileInfo()      3
 >>> m.tagDataMap
 {'title': (3, 33, <function stripnulls at 0260C8D4>), 
 'genre': (127, 128, <built-in function ord>),
 'artist': (33, 63, <function stripnulls at 0260C8D4>), 
 'year': (93, 97, <function stripnulls at 0260C8D4>), 
 'comment': (97, 126, <function stripnulls at 0260C8D4>), 
 'album': (63, 93, <function stripnulls at 0260C8D4>)}
1 MP3FileInfo is the class itself, not any particular instance of the class.
2
tagDataMap is a class attribute: literally, an attribute of the class.
 It is available before creating any instances of the class.
3 Class attributes are available both through direct reference to the class and through any instance of the class.
Замечание
In Java, both static variables (called class attributes in Python) and instance variables
(called data attributes in Python) are defined immediately after the class definition
(one with the static keyword, one without).
In Python, only class attributes can be defined here; data attributes are defined in the __init__ method.

Class attributes can be used as class-level constants (which is how we use them in MP3FileInfo), but they are not really constants.[4] You can also change them.

Пример 3.19. Modifying class attributes

>>> class counter:
 ...     count = 0                     1
 ...     def __init__(self)
 ...         self.__class__.count += 1 2
 ...     
 >>> counter
 <class __main__.counter at 010EAECC>
 >>> counter.count                     3
 0
 >>> c = counter()
 >>> c.count                           4
 1
 >>> counter.count
 1
 >>> d = counter()                     5
 >>> d.count
 2
 >>> c.count
 2
 >>> counter.count
 2
1 count is a class attribute of the counter class.
2 __class__ is a built-in attribute of every class instance (of every class).
It is a reference to the class that self is an instance of (in this case, the counter class).
3 Because count is a class attribute, it is available through direct reference to the class,
before we have created any instances of the class.
4 Creating an instance of the class calls the __init__ method, which increments the class attribute count by 1.
This affects the class itself, not just the newly created instance.
5 Creating a second instance will increment the class attribute count again. Notice how the class attribute is shared by the class and all instances of the class.

Footnotes

[4] There are no constants in Python. Everything can be changed if you try hard enough. This fits with one of the core principles of Python: bad behavior should be discouraged but not banned. If you really want to change the value of None, you can do it, but don't come running to me when your code is impossible to debug.

3.9. Private functions

Like most languages, Python has the concept of private functions, which can not be called from outside their module; private class methods, which can not be called from outside their class; and private attributes, which can not be accessed from outside their class. Unlike most languages, whether a Python function, method, or attribute is private or public is determined entirely by its name.

In MP3FileInfo, there are two methods: __parse and __setitem__. As we have already discussed, __setitem__ is a special method; normally, you would call it indirectly by using the dictionary syntax on a class instance, but it is public, and you could call it directly (even from outside the fileinfo module) if you had a really good reason. However, __parse is private, because it has two underscores at the beginning of its name.

Замечание
If the name of a Python function, class method, or attribute starts with (but doesn't end with) two underscores, it's private; everything else is public.
Замечание
In Python, all special methods (like __setitem__) and built-in attributes
(like __doc__)
follow a standard naming convention: they both start with and end with
two underscores. Don't name your own methods and attributes this way;
it will only confuse you (and others) later.
Замечание
Python has no concept of protected class methods (accessible only in their own class and descendant classes).
Class methods are either private (accessible only in their own class) or public (accessible from anywhere).

Пример 3.20. Trying to call a private method

>>> import fileinfo
 >>> m = fileinfo.MP3FileInfo()
 >>> m.__parse("/music/_singles/kairo.mp3") 1
 Traceback (innermost last):
   File "<interactive input>", line 1, in ?
 AttributeError: 'MP3FileInfo' instance has no attribute '__parse'
1 If you try to call a private method, Python will raise a slightly misleading exception,
saying that the method does not exist.
Of course it does exist, but it's private, so it's not accessible outside the class.[5]

Further reading

Footnotes

[5] Strictly speaking, private methods are accessible outside their class, just not easily accessible. Nothing in Python is truly private; internally, the names of private methods and attributes are mangled and unmangled on the fly to make them seem inaccessible by their given names. You can access the __parse method of the MP3FileInfo class by the name _MP3FileInfo__parse. Acknowledge that this is interesting, then promise to never, ever do it in real code. Private methods are private for a reason, but like many other things in Python, their privateness is ultimately a matter of convention, not force.

3.10. Handling exceptions

Like many object-oriented languages, Python has exception handling via try...except blocks.

Замечание
Python uses try...except to handle exceptions and raise to generate them. Java and C++ use try...catch to handle exceptions, and throw to generate them.

If you already know all about exceptions, you can skim this section. If you've been stuck programming in a lesser language that doesn't have exception handling, or you've been using a real language but not using exceptions, this section is very important.

Exceptions are everywhere in Python; virtually every module in the standard Python library uses them, and Python itself will raise them in lots of different circumstances. You've already seen them repeatedly throughout this book.

In each of these cases, we were simply playing around in the Python IDE: an error occurred, the exception was printed (depending on your IDE, in an intentionally jarring shade of red), and that was that. This is called an unhandled exception; when the exception was raised, there was no code to explicitly notice it and deal with it, so it bubbled its way back to the default behavior built in to Python, which is to spit out some debugging information and give up. In the IDE, that's no big deal, but if that happened while your actual Python program was running, the entire program would come to a screeching halt.[6]

An exception doesn't have to be a complete program crash, though. Exceptions, when raised, can be handled. Sometimes an exception is really because you have a bug in your code (like accessing a variable that doesn't exist), but many times, an exception is something you can plan for. If you're opening a file, it might not exist; if you're connecting to a database, it might be unavailable, or you might not have the correct security credentials to access it. If you know a line of code may raise an exception, you should handle the exception using a try...except block.

Пример 3.21. Opening a non-existent file

>>> fsock = open("/notthere", "r")      1
 Traceback (innermost last):
   File "<interactive input>", line 1, in ?
 IOError: [Errno 2] No such file or directory: '/notthere'
 >>> try:
 ...     fsock = open("/notthere")       2
 ... except IOError:                     3
 ...     print "The file does not exist, exiting gracefully"
 ... print "This line will always print" 4
 The file does not exist, exiting gracefully
 This line will always print
1 Using the built-in open function, we can try to open a file for reading
(more on open in the next section).
But the file doesn't exist, so this raises the IOError exception.
Since we haven't provided any explicit check for an IOError exception,
Python just prints out some debugging information about what happened and then gives up.
2 We're trying to open the same non-existent file, but this time we're doing it within a try...except block.
3 When the open method raises an IOError exception, we're ready for it.
The except IOError: line catches the exception and executes our own block of code,
which in this case just prints a more pleasant error message.
4 Once an exception has been handled, processing continues normally
on the first line after the try...except block.
Note that this line will always print, whether or not an exception occurs.
If you really did have a file called notthere in your root directory,
the call to open would succeed, the except clause would be ignored,
and this line would still be executed.

Exceptions may seem unfriendly (after all, if you don't catch the exception, your entire program will crash), but consider the alternative. Would you rather get back an unusable file object to a non-existent file? You'd have to check its validity somehow anyway, and if you forgot, your program would give you strange errors somewhere down the line that you would have to trace back to the source. I'm sure you've done this; it's not fun. With exceptions, errors occur immediately, and you can handle them in a standard way at the source of the problem.

There are lots of other uses for exceptions besides handling actual error conditions. A common use in the standard Python library is to try to import a module, then check whether it worked. Importing a module that does not exist will raise an ImportError exception. You can use this to define multiple levels of functionality based on which modules are available at run-time, or to support multiple platforms (where platform-specific code is separated into different modules).

Пример 3.22. Supporting platform-specific functionality

This code comes from the getpass module, a wrapper module for getting a password from the user. Getting a password is accomplished differently on UNIX, Windows, and Mac OS platforms, but this code encapsulates all of those differences.

  # Bind the name getpass to the appropriate function
   try:
       import termios, TERMIOS                     1
   except ImportError:
       try:
           import msvcrt                           2
       except ImportError:
           try:
               from EasyDialogs import AskPassword 3
           except ImportError:
               getpass = default_getpass           4
           else:                                   5
               getpass = AskPassword
       else:
           getpass = win_getpass
   else:
       getpass = unix_getpass
1
 termios is a UNIX-specific module that provides low-level control over the
 input terminal. If this module is not available (because it's not on
 your system, or your system doesn't support it), the import fails and
 Python raises an ImportError, which we catch.
2
OK, we didn't have termios, so let's try msvcrt,
 which is a Windows-specific module that provides an API to lots of
 useful functions in the Microsoft Visual C++ runtime services. If this
 import fails, Python will raise an ImportError, which we catch.
3
If the first two didn't work, we try to import a function from EasyDialogs,
 which is a Mac OS-specific module that provides functions to pop up
 dialogs of various types. Once again, if this import fails, Python will
 raise an ImportError, which we catch.
4
None of these platform-specific modules is available (which is possible,
 since Python has been ported to lots of different platforms), so we
 have to fall back on a default password input function (which is
 defined elsewhere in the getpass module).
 Notice what we're doing here: we're assigning the function default_getpass to the variable
 getpass.  If you read the official getpass documentation,
 it tells you that the getpass module defines a getpass function.
 This is how it does it: by binding getpass to the right function for your platform.
 Then when you call the getpass
 function, you're really calling a platform-specific function that this
 code has set up for you. You don't have to know or care what platform
 your code is running on; just call getpass, and it will always do the right thing.
5
A try...except block can have an else clause, like an if statement.
 If no exception is raised during the try block, the else clause is executed afterwards.
 In this case, that means that the from EasyDialogs import AskPassword import worked,
 so we should bind getpass to the AskPassword function.
 Each of the other try...except blocks have similar else clauses to bind getpass
 to the appropriate function when we find an import that works.

Further reading

Footnotes

[6] Or, as some marketroids would put it, your program would perform an illegal action. Whatever.

3.11. File objects

Python has a built-in function, open, for opening a file on disk. open returns a file object, which has methods and attributes for getting information about and manipulating the opened file.

Пример 3.23. Opening a file

>>> f = open("/music/_singles/kairo.mp3", "rb") 1
 >>> f                                           2
 <open file '/music/_singles/kairo.mp3', mode 'rb' at 010E3988>
 >>> f.mode                                      3
 'rb'
 >>> f.name                                      4
 '/music/_singles/kairo.mp3'
1
The open method can take up to three parameters: a filename, a mode, and a
 buffering parameter. Only the first one, the filename, is required; the
 other two are optional.  If not specified, the file is opened for reading in text mode.
 Here we are opening the file for reading in binary mode.  (print open.__doc__ displays a great explanation of all the possible modes.)
2 The open function returns an object (by now, this should not surprise you). A file object has several useful attributes.
3 The mode attribute of a file object tells you what mode the file was opened in.
4 The name attribute of a file object tells you the name of the file that the file object has open.

Пример 3.24. Reading a file

>>> f
 <open file '/music/_singles/kairo.mp3', mode 'rb' at 010E3988>
 >>> f.tell()              1
 0
 >>> f.seek(-128, 2)       2
 >>> f.tell()              3
 7542909
 >>> tagData = f.read(128) 4
 >>> tagData
 'TAGKAIRO****THE BEST GOA         ***DJ MARY-JANE***            Rave Mix                      2000http://mp3.com/DJMARYJANE     \037'
 >>> f.tell()              5
 7543037
1
A file object maintains state about the file it has open.  The tell
 method of a file object tells you your current position in the open
 file. Since we haven't done anything with this file yet, the current
 position is 0, which is the beginning of the file.
2
The seek method of a file object moves to another position in the open file.
 The second parameter specifies what the first one means;
 0 means move to an absolute position (counting from the start of the file),
 1 means move to a relative position (counting from the current position), and 2
 means move to a position relative to the end of the file. Since the MP3
 tags we're looking for are stored at the end of the file,
 we use 2 and tell the file object to move to a position 128 bytes from the end of the file.
3 The tell method confirms that the current file position has moved.
4
The read method reads a specified number of bytes from the open file and returns
 a string with the data which was read. The optional parameter specifies
 the maximum number of bytes to read.
 If no parameter is specified, read will read until the end of the file.
 (We could have simply said read()
 here, since we know exactly where we are in the file and we are, in
 fact, reading the last 128 bytes.) The read data is assigned to the tagData variable,
 and the current position is updated based on how many bytes were read.
5
The tell method confirms that the current position has moved. If you do the
 math, you'll see that after reading 128 bytes, the position has been
 incremented by 128.

Пример 3.25. Closing a file

>>> f
 <open file '/music/_singles/kairo.mp3', mode 'rb' at 010E3988>
 >>> f.closed  1
 0
 >>> f.close() 2
 >>> f
 <closed file '/music/_singles/kairo.mp3', mode 'rb' at 010E3988>
 >>> f.closed
 1
 >>> f.seek(0) 3
 Traceback (innermost last):
   File "<interactive input>", line 1, in ?
 ValueError: I/O operation on closed file
 >>> f.tell()
 Traceback (innermost last):
   File "<interactive input>", line 1, in ?
 ValueError: I/O operation on closed file
 >>> f.read()
 Traceback (innermost last):
   File "<interactive input>", line 1, in ?
 ValueError: I/O operation on closed file
 >>> f.close() 4
1
The closed attribute of a file object indicates whether the object has a file open or not.
 In this case, the file is still open (closed is 0).
 Open files consume system resources, and depending on the file mode,
 other programs may not be able to access them. It's important to close
 files as soon as you're done with them.
2
To close a file, call the close
 method of the file object. This frees the lock (if any) that you were
 holding on the file, flushes buffered writes (if any) that the system
 hadn't gotten around to actually writing yet, and releases the system
 resources. The closed attribute confirms that the file is closed.
3
Just because a file is closed doesn't mean that the file object ceases to exist.
 The variable f will continue to exist until it goes out of scope
 or gets manually deleted. However, none of the methods that manipulate
 an open file will work once the file has been closed; they all raise an
 exception.
4
Calling close on a file object whose file is already closed does not raise an exception;
 it fails silently.

Пример 3.26. File objects in MP3FileInfo

        try:                                1
             fsock = open(filename, "rb", 0) 2
             try:
                 fsock.seek(-128, 2)         3
                 tagdata = fsock.read(128)   4
             finally:                        5
                 fsock.close()
             .
             .
             .
         except IOError:                     6
             pass                           
1
Because opening and reading files is risky and may raise an exception,
 all of this code is wrapped in a try...except block.
 (Hey, isn't standardized indentation great?
 This is where you start to appreciate it.)
2
The open function may raise an IOError.
 (Maybe the file doesn't exist.)
3 The seek method may raise an IOError. (Maybe the file is smaller than 128 bytes.)
4
The read method may raise an IOError.
 (Maybe the disk has a bad sector, or it's on a network drive and the network just went down.)
5
This is new: a try...finally block.
 Once the file has been opened successfully by the open function,
 we want to make absolutely sure that we close it, even if an exception is raised by the seek
 or read methods.  That's what a try...finally block is for: code in the finally block
 will always be executed, even if something in the try block raises
 an exception.  Think of it as code that gets executed “on the way out”, regardless of what happened on the way.
6
At last, we handle our IOError exception.  This could be the IOError exception raised
 by the call to open, seek, or read.
 Here, we really don't care, because all we're going to do is ignore it silently and continue.
 (Remember, pass is a Python statement that does nothing.)
 That's perfectly legal; “handling” an exception can mean explicitly
 doing nothing. It still counts as handled, and processing will continue
 normally on the next line of code after the try...except block.

Further reading

3.12. for loops

Like most other languages, Python has for loops. The only reason you haven't seen them until now is that Python is good at so many other things that you don't need them as often.

Most other languages don't have a powerful list datatype like Python, so you end up doing a lot of manual work, specifying a start, end, and step to define a range of integers or characters or other iteratable entities. But in Python, a for loop simply iterates over a list, the same way list comprehensions work.

Пример 3.27. Introducing the for loop

>>> li = ['a', 'b', 'e']
 >>> for s in li:         1
 ...     print s          2
 a
 b
 e
 >>> print "\n".join(li)  3
 a
 b
 e
1
The syntax for a for loop is similar to list comprehensions.
 li is a list, and s will take the value of each element in turn,
 starting from the first element.
2
Like an if statement or any other indented block,
 a for loop can have any number of lines of code in it.
3
This is the reason you haven't seen the for loop yet: we haven't needed it yet.
 It's amazing how often you use for loops in other languages when all you really want is a
 join or a list comprehension.

Пример 3.28. Simple counters

>>> for i in range(5):       1
 ...     print i
 0
 1
 2
 3
 4
 >>> li = ['a', 'b', 'c', 'd', 'e']
 >>> for i in range(len(li)): 2
 ...     print li[i]
 a
 b
 c
 d
 e
 
1
Doing a “normal” (by Visual Basic standards) counter for loop is also simple.
 As we saw in Пример 1.27. Присваивание идущих подряд значений, range produces a list of integers,
 which we then loop through.  I know it looks a bit odd,
 but it is occasionally (and I stress occasionally) useful to have a counter loop.
2
Don't ever do this. This is Visual Basic-style thinking. Break out of it.
 Just iterate through the list, as shown in the previous example.

Пример 3.29. Iterating through a dictionary

>>> for k, v in
 
os.environ.items():a name="fileinfo.for.2.1">1 2
 ...     print "%s=%s" % (k, v)
 USERPROFILE=C:\Documents and Settings\mpilgrim
 OS=Windows_NT
 COMPUTERNAME=MPILGRIM
 USERNAME=mpilgrim
 
 [...snip...]
 >>> print "\n".join(["%s=%s" % (k, v) for k, v in os.environ.items()]) 3
 USERPROFILE=C:\Documents and Settings\mpilgrim
 OS=Windows_NT
 COMPUTERNAME=MPILGRIM
 USERNAME=mpilgrim
 
 [...snip...]
1
os.environ is a dictionary of the environment variables defined on your system. In
 Windows, these are your user and system variables accessible from
 MS-DOS. In UNIX, they are the variables exported in your shell's
 startup scripts. In Mac OS, there is no concept of environment
 variables, so this dictionary is empty.
2
os.environ.items() returns a list of tuples: [(key1, value1), (key2, value2), ...].
 The for loop iterates through this list.
 The first round, it assigns key1 to k and value1 to v,
 so k = USERPROFILE and v = C:\Documents and Settings\mpilgrim.
 The second round, k gets the second key, OS, and v gets the corresponding value,
 Windows_NT.
3
With multi-variable assignment and list comprehensions, you can replace the entire for
 loop with a single statement. Whether you actually do this in real code
 is a matter of personal coding style; I like it because it makes it
 clear that what we're doing is mapping a dictionary into a list, then
 joining the list into a single string. Other programmers prefer to
 write this out as a for loop.
 Note that the output is the same in either case, although this version is slightly faster,
 because there is only one print statement instead of many.

Пример 3.30. for loop in MP3FileInfo

    tagDataMap = {"title"   : (  3,  33, stripnulls),
                   "artist"  : ( 33,  63, stripnulls),
                   "album"   : ( 63,  93, stripnulls),
                   "year"    : ( 93,  97, stripnulls),
                   "comment" : ( 97, 126, stripnulls),
                   "genre"   : (127, 128, ord)} 1
     .
     .
     .
             if tagdata[:3] == "TAG":
                 for tag, (start, end, parseFunc) in self.tagDataMap.items(): 2
                     self[tag] = parseFunc(tagdata[start:end])                3
1
tagDataMap is a class attribute
 that defines the tags we're looking for in an MP3 file. Tags are stored
 in fixed-length fields; once we read the last 128 bytes of the file,
 bytes 3 through 32 of those are always the song title, 33-62 are always
 the artist name, 63-92 the album name, and so forth.
 Note that tagDataMap is a dictionary of tuples, and each tuple contains two integers
 and a function reference.
2
This looks complicated, but it's not.
 The structure of the for variables matches the structure of the elements of the list returned
 by items.  Remember, items returns a list of tuples of the form (key,
 value).
 The first element of that list is ("title", (3, 33, <function stripnulls>)),
 so the first time around the loop, tag gets "title", start gets 3,
 end gets 33, and parseFunc gets the function stripnulls.
3
Now that we've extracted all the parameters for a single MP3 tag, saving the tag data is easy.
 We slice tagdata from start to end to get the actual data for this tag,
 call parseFunc to post-process the data, and assign this as the value for the key tag
 in the pseudo-dictionary self.
 After iterating through all the elements in tagDataMap, self has the values for all the tags,
 and you know what that looks like.
3.13. More on modules

Modules, like everything else in Python, are objects. Once imported, you can always get a reference to a module through the global dictionary sys.modules.

Пример 3.31. Introducing sys.modules

>>> import sys                          1
 >>> print '\n'.join(sys.modules.keys()) 2
 win32api
 os.path
 os
 exceptions
 __main__
 ntpath
 nt
 sys
 __builtin__
 site
 signal
 UserDict
 stat
1
The sys module contains system-level information, like the version of Python you're running
 (sys.version or sys.version_info), and system-level options
 like the maximum allowed recursion depth (sys.getrecursionlimit() and sys.setrecursionlimit()).
2
sys.modules is a dictionary containing all the modules that have ever been imported
 since Python was started; the key is the module name, the value is the
 module object. Note that this is more than just the modules your program has imported.
 Python preloads some modules on startup, and if you're in a Python IDE, sys.modules contains
 all the modules imported by all the programs you've run within the IDE.

Пример 3.32. Using sys.modules

>>> import fileinfo         1
 >>> print '\n'.join(sys.modules.keys())
 win32api
 os.path
 os
 fileinfo
 exceptions
 __main__
 ntpath
 nt
 sys
 __builtin__
 site
 signal
 UserDict
 stat
 >>> fileinfo
 <module 'fileinfo' from 'fileinfo.pyc'>
 >>> sys.modules["fileinfo"] 2
 <module 'fileinfo' from 'fileinfo.pyc'>
1
As new modules are imported, they are added to sys.modules.
 This explains why importing the same module twice is very fast:
 Python has already loaded and cached the module in sys.modules,
 so importing the second time is simply a dictionary lookup.
2
Given the name (as a string) of any previously-imported module, you can get a reference to the module
 itself through the sys.modules dictionary.

Пример 3.33. The __module__ class attribute

>>> from fileinfo import MP3FileInfo
 >>> MP3FileInfo.__module__              1
 'fileinfo'
 >>> sys.modules[MP3FileInfo.__module__] 2
 <module 'fileinfo' from 'fileinfo.pyc'>
1 Every Python class has a built-in class attribute __module__, which is the name of the module in which the class is defined.
2 Combining this with the sys.modules dictionary, you can get a reference to the module in which a class is defined.

Пример 3.34. sys.modules in fileinfo.py

    def getFileInfoClass(filename, module=sys.modules[FileInfo.__module__]):       1
         "оределяет класс, предназначеный для обработки файла, по расширению"
         subclass = "%sFileInfo" % os.path.splitext(filename)[1].upper()[1:]        2
         return hasattr(module, subclass) and getattr(module, subclass) or FileInfo 3
1
This is a function with two arguments; filename is required,
 but module is optional and defaults to the module which contains the FileInfo class.
 This looks inefficient, because you might expect Python to evaluate the sys.modules
 expression every time the function is called. In fact, Python only
 evaluates default expressions once, the first time the module is
 imported. As we'll see later, we never call this function with a module argument, so module serves as a function-level constant.
2
We'll plough through this line later, after we dive into the os module.
 For now, take it on faith that subclass ends up as the name of a class,
 like MP3FileInfo.
3
You already know about getattr, which gets a reference to an object by name.  hasattr
 is a complementary function that checks whether an object has a
 particular attribute; in this case, whether a module has a particular
 class (although it works for any object and any attribute, just like getattr).
 In English, this line of code says “if this module has the class named by subclass then return it,
 otherwise return the base class FileInfo”.

Further reading

3.14. The os module

The os module has lots of useful functions for manipulating files and processes,
 and os.path has functions for manipulating file and directory paths.

Пример 3.35. Constructing pathnames

>>> import os
 >>> os.path.join("c:\\music\\ap\\", "mahadeva.mp3") 1 2
 'c:\\music\\ap\\mahadeva.mp3'
 >>> os.path.join("c:\\music\\ap", "mahadeva.mp3")   3
 'c:\\music\\ap\\mahadeva.mp3'
 >>> os.path.expanduser("~")                         4
 'c:\\Documents and Settings\\mpilgrim\\My Documents'
 >>> os.path.join(os.path.expanduser("~"), "Python") 5
 'c:\\Documents and Settings\\mpilgrim\\My Documents\\Python'
1
os.path is a reference to a module; which module it is depends on
 what platform you're running on.
 Just like getpass encapsulates differences between platforms
 by setting getpass to a platform-specific function, os encapsulates differences
 between platforms by setting path to a platform-specific module.
2
The join function of os.path
 constructs a pathname out of one or more partial pathnames. In this
 simple case, it simply concatenates strings. (Note that dealing with
 pathnames on Windows is annoying because the backslash character must
 be escaped.)
3
In this slightly less trivial case, join will add an extra backslash to the pathname
 before joining it to the filename.
 I was overjoyed when I discovered this, since addSlashIfNecessary is always one of the stupid
 little functions I have to write when building up my toolbox in a new language.  
 Do not write this stupid little function in Python; smart people have already taken care of it for you.
4
expanduser will expand a pathname that uses ~
 to represent the current user's home directory. This works on any
 platform where users have a home directory, like Windows, UNIX, and Mac
 OS X; it has no effect on Mac OS.
5
Combining these techniques, you can easily construct pathnames for directories and files under
 the user's home directory.

Пример 3.36. Splitting pathnames

>>> os.path.split("c:\\music\\ap\\mahadeva.mp3")                        1
 ('c:\\music\\ap', 'mahadeva.mp3')
 >>> (filepath, filename) = os.path.split("c:\\music\\ap\\mahadeva.mp3") 2
 >>> filepath                                                            3
 'c:\\music\\ap'
 >>> filename                                                            4
 'mahadeva.mp3'
 >>> (shortname, extension) = os.path.splitext(filename)                 5
 >>> shortname
 'mahadeva'
 >>> extension
 '.mp3'
1
The split function splits a full pathname and returns a tuple containing the path and filename.
 Remember when I said you could use multi-variable assignment to return multiple values from a function?
 Well, split is such a function.
2
We assign the return value of the split function into a tuple of two variables.
 Each variable receives the value of the corresponding element of the returned tuple.
3
The first variable, filepath, receives the value of the first element of the tuple returned from
 split, the file path.
4
The second variable, filename, receives the value of the second element of the tuple returned
 from split, the filename.
5
os.path also contains a function splitext,
 which splits a filename and returns a tuple containing the filename and
 the file extension. We use the same technique to assign each of them to
 separate variables.

Пример 3.37. Listing directories

>>> os.listdir("c:\\music\\_singles\\")                                          1
 ['a_time_long_forgotten_con.mp3', 'hellraiser.mp3', 'kairo.mp3',
 'long_way_home1.mp3', 'sidewinder.mp3', 'spinning.mp3']
 >>> dirname = "c:\\"
 >>> os.listdir(dirname)                                                          2
 ['AUTOEXEC.BAT', 'boot.ini', 'CONFIG.SYS', 'cygwin', 'docbook',
 'Documents and Settings', 'Incoming', 'Inetpub', 'IO.SYS', 'MSDOS.SYS', 'Music',
 'NTDETECT.COM', 'ntldr', 'pagefile.sys', 'Program Files', 'Python20', 'RECYCLER',
 'System Volume Information', 'TEMP', 'WINNT']
 >>> [f for f in os.listdir(dirname) if os.path.isfile(os.path.join(dirname, f))] 3
 ['AUTOEXEC.BAT', 'boot.ini', 'CONFIG.SYS', 'IO.SYS', 'MSDOS.SYS',
 'NTDETECT.COM', 'ntldr', 'pagefile.sys']
 >>> [f for f in os.listdir(dirname) if os.path.isdir(os.path.join(dirname, f))]  4
 ['cygwin', 'docbook', 'Documents and Settings', 'Incoming',
 'Inetpub', 'Music', 'Program Files', 'Python20', 'RECYCLER',
 'System Volume Information', 'TEMP', 'WINNT']
1 The listdir function takes a pathname and returns a list of the contents of the directory.
2 listdir returns both files and folders, with no indication of which is which.
3
You can use list filtering
 and the isfile function of the os.path module to separate
 the files from the folders.  isfile takes a pathname and returns 1 if the path represents a file,
 and 0 otherwise.
 Here we're using os.path.join to ensure a full pathname,
 but isfile also works with a partial path, relative to the current working directory.
 You can use os.path.getcwd() to get the current working directory.
4
os.path also has a isdir
 function which returns 1 if the path represents a directory, and 0
 otherwise. You can use this to get a list of the subdirectories within
 a directory.

Пример 3.38. Listing directories in fileinfo.py


 def listDirectory(directory, fileExtList):
     """возвращает список объектов с метаинформацией для всех файлов с
 указанным расширением"""                       
     fileList = [os.path.normcase(f) for f in os.listdir(directory)]
     fileList = [os.path.join(directory, f) for f in fileList \
                 if os.path.splitext(f)[1] in fileExtList]                         

These two lines of code combine everything we've learned so far about the os module, and then some.

  1. os.listdir(directory) returns a list of all the files and folders in directory.
  2. Iterating through the list with f, we use os.path.normcase(f) to normalize
     the case according to operating system defaults.  normcase
     is a useful little function that compensates for case-insensitive operating systems
     that think that mahadeva.mp3 and mahadeva.MP3 are the same file.
     For instance, on Windows and Mac OS, normcase will convert the entire filename to lowercase;
     on UNIX-compatible systems, it will return the filename unchanged.
  3. Iterating through the normalized list with f again, we use os.path.splitext(f) to split each filename into name and extension.
  4. For each file, we see if the extension is in the list of file extensions we care about (fileExtList, which was passed to the listDirectory function).
  5. For each file we care about, we use os.path.join(directory, f) to construct the full pathname of the file, and return a list of the full pathnames.
Замечание
Whenever possible, you should use the functions in os and os.path for file,
 directory, and path manipulations.
 These modules are wrappers for platform-specific modules,
 so functions like os.path.split work on UNIX, Windows, Mac OS, and any other supported Python platform.

Further reading

3.15. Putting it all together

Once again, all the dominoes are in place. We've seen how each line of code works. Now let's step back and see how it all fits together.

Пример 3.39. listDirectory


 def listDirectory(directory, fileExtList):                                         1
     """возвращает список объектов с метаинформацией для всех файлов с
 указанным расширением"""
     fileList = [os.path.normcase(f) for f in os.listdir(directory)]               
     fileList = [os.path.join(directory, f) for f in fileList \
                 if os.path.splitext(f)[1] in fileExtList]                          2
     def getFileInfoClass(filename, module=sys.modules[FileInfo.__module__]):       3
         "оределяет класс, предназначеный для обработки файла, по расширению"
         subclass = "%sFileInfo" % os.path.splitext(filename)[1].upper()[1:]        4
         return hasattr(module, subclass) and getattr(module, subclass) or FileInfo 5
     return [getFileInfoClass(f)(f) for f in fileList]                              6
1
listDirectory is the main attraction of this entire module.
 It takes a directory (like c:\music\_singles\ in my case) and a list of interesting file extensions
 (like ['.mp3']),
 and it returns a list of class instances that act like dictionaries
 that contain metadata about each interesting file in that directory.
 And it does it in just a few straightforward lines of code.
2
As we saw in the previous section,
 this line of code gets a list of the full pathnames of all the files in directory
 that have an interesting file extension (as specified by fileExtList).
3
Old-school Pascal programmers may be familiar with them, but most people give me a
 blank stare when I tell them that Python supports nested functions -- literally,
 a function within a function.
 The nested function getFileInfoClass can only be called from the function in which it is defined,
 listDirectory.
 As with any other function, you don't need an interface declaration or anything fancy;
 just define the function and code it.
4
Now that you've seen the os module, this line should make more sense.
 It gets the extension of the file (os.path.splitext(filename)[1]),
 forces it to uppercase (.upper()), slices off the dot ([1:]),
 and constructs a class name out of it with string formatting.
 So c:\music\ap\mahadeva.mp3 becomes .mp3 becomes .MP3 becomes MP3
 becomes MP3FileInfo.
5
Having constructed the name of the handler class that would handle this file,
 we check to see if that handler class actually exists in this module.
 If it does, we return the class, otherwise we return the base class FileInfo.
 This is a very important point: this function returns a class.
 Not an instance of a class, but the class itself.
6
For each file in our “interesting files” list (fileList),
 we call getFileInfoClass with the filename (f).  Calling getFileInfoClass(f)
 returns a class; we don't know exactly which class, but we don't care.
 We then create an instance of this class (whatever it is) and pass the
 filename (f again), to the __init__ method.
 As we saw earlier in this chapter,
 the __init__ method of FileInfo sets self["name"], which triggers __setitem__,
 which is overridden in the descendant (MP3FileInfo)
 to parse the file appropriately to pull out the file's metadata. We do
 all that for each interesting file and return a list of the resulting
 instances.

Note that listDirectory is completely generic. It doesn't know ahead of time which types of files it will be getting, or which classes are defined that could potentially handle those files. It inspects the directory for the files to process, then introspects its own module to see what special handler classes (like MP3FileInfo) are defined. You can extend this program to handle other types of files simply by defining an appropriately-named class: HTMLFileInfo for HTML files, DOCFileInfo for Word .doc files, and so forth. listDirectory will handle them all, without modification, by handing the real work off to the appropriate classes and collating the results.

3.16. Summary

The fileinfo.py program should now make perfect sense.

Пример 3.40. fileinfo.py

"""Получение метаинформации, специфичной для файла данного типа.
 
 Создайте экземпляр соответствующего класса, передав конструктору имя файла.
 Возвращаемый объект ведет себя аналогично словарю с парами ключ-значение для
 каждой части метаинформации.
     import fileinfo
     info = fileinfo.MP3FileInfo("/music/ap/mahadeva.mp3")
     print "\\n".join(["%s=%s" % (k, v) for k, v in info.items()])
 
 Или используйте функцию listDirectory для получения информации обо всех файлов
 в директории
     for info in fileinfo.listDirectory("/music/ap/", [".mp3"]):
         ...
 
 Модуль может быть расширен путем доюавления классов для других типов файлов,
 например HTMLFileInfo, MPGFileInfo, DOCFileInfo.  Каждый класс полностью
 отвечает за анализ файлов соответствующего типа; используйте MP3FileInfo в
 качестве примера.
 """
 import os
 import sys
 from UserDict import UserDict
 
 def stripnulls(data):
     "очищает строку от символов пропуска и нулевых символов"
     return data.replace("\00", "").strip()
 
 class FileInfo(UserDict):
     "хранит метаинформацию о файле"   
     def __init__(self, filename=None):
         UserDict.__init__(self)
         self["name"] = filename
 
 class MP3FileInfo(FileInfo):
     "хранит ID3v1.0 MP3 теги"
     tagDataMap = {"title"   : (  3,  33, stripnulls),
                   "artist"  : ( 33,  63, stripnulls),
                   "album"   : ( 63,  93, stripnulls),
                   "year"    : ( 93,  97, stripnulls),
                   "comment" : ( 97, 126, stripnulls),
                   "genre"   : (127, 128, ord)}
 
     def __parse(self, filename):
         "анализ ID3v1.0 тегов из MP3 файла"
         self.clear()
         try:                               
             fsock = open(filename, "rb", 0)
             try:                           
                 fsock.seek(-128, 2)
                 tagdata = fsock.read(128)  
             finally:                       
                 fsock.close()              
             if tagdata[:3] == "TAG":
                 for tag, (start, end, parseFunc) in self.tagDataMap.items():
                     self[tag] = parseFunc(tagdata[start:end])               
         except IOError:                    
             pass                           
 
     def __setitem__(self, key, item):
         if key == "name" and item:
             self.__parse(item)
         FileInfo.__setitem__(self, key, item)
 
 def listDirectory(directory, fileExtList):                                        
     """возвращает список объектов с метаинформацией для всех файлов с
 указанным расширением"""                       
     fileList = [os.path.normcase(f) for f in os.listdir(directory)]               
     fileList = [os.path.join(directory, f) for f in fileList \
                 if os.path.splitext(f)[1] in fileExtList]                         
     def getFileInfoClass(filename, module=sys.modules[FileInfo.__module__]):      
         "оределяет класс, предназначеный для обработки файла, по расширению"      
         subclass = "%sFileInfo" % os.path.splitext(filename)[1].upper()[1:]       
         return hasattr(module, subclass) and getattr(module, subclass) or FileInfo
     return [getFileInfoClass(f)(f) for f in fileList]                             
 
 if __name__ == "__main__":
     for info in listDirectory("/music/_singles/", [".mp3"]):
         print "\n".join(["%s=%s" % (k, v) for k, v in info.items()])
         print

Before diving into the next chapter, make sure you're comfortable doing all of these things:

Оставьте свой комментарий !

Ваше имя:
Комментарий:
Оба поля являются обязательными

 Автор  Комментарий к данной статье