Package horizons :: Package ext :: Module polib
[hide private]
[frames] | no frames]

Source Code for Module horizons.ext.polib

   1  # -* coding: utf-8 -*- 
   2  # 
   3  # License: MIT (see LICENSE file provided) 
   4  # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: 
   5   
   6  """ 
   7  **polib** allows you to manipulate, create, modify gettext files (pot, po and 
   8  mo files).  You can load existing files, iterate through it's entries, add, 
   9  modify entries, comments or metadata, etc. or create new po files from scratch. 
  10   
  11  **polib** provides a simple and pythonic API via the :func:`~polib.pofile` and 
  12  :func:`~polib.mofile` convenience functions. 
  13  """ 
  14   
  15  import array 
  16  import codecs 
  17  import os 
  18  import re 
  19  import struct 
  20  import sys 
  21  import textwrap 
  22   
  23  try: 
  24      import io 
  25  except ImportError: 
26 # replacement of io.open() for python < 2.6 27 # we use codecs instead 28 - class io(object):
29 @staticmethod
30 - def open(fpath, mode='r', encoding=None):
31 return codecs.open(fpath, mode, encoding)
32 33 34 __author__ = 'David Jean Louis <izimobil@gmail.com>' 35 __version__ = '1.1.0' 36 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry', 37 'default_encoding', 'escape', 'unescape', 'detect_encoding', ] 38 39 40 # the default encoding to use when encoding cannot be detected 41 default_encoding = 'utf-8' 42 43 # python 2/3 compatibility helpers {{{ 44 45 46 if sys.version_info[:2] < (3, 0): 47 PY3 = False 48 text_type = unicode
49 50 - def b(s):
51 return s
52
53 - def u(s):
54 return unicode(s, "unicode_escape")
55 56 else: 57 PY3 = True 58 text_type = str
59 60 - def b(s):
61 return s.encode("latin-1")
62
63 - def u(s):
64 return s
65 # }}}
66 # _pofile_or_mofile {{{ 67 68 69 -def _pofile_or_mofile(f, type, **kwargs):
70 """ 71 Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to 72 honor the DRY concept. 73 """ 74 # get the file encoding 75 enc = kwargs.get('encoding') 76 if enc is None: 77 enc = detect_encoding(f, type == 'mofile') 78 79 # parse the file 80 kls = type == 'pofile' and _POFileParser or _MOFileParser 81 parser = kls( 82 f, 83 encoding=enc, 84 check_for_duplicates=kwargs.get('check_for_duplicates', False), 85 klass=kwargs.get('klass') 86 ) 87 instance = parser.parse() 88 instance.wrapwidth = kwargs.get('wrapwidth', 78) 89 return instance
90 # }}}
91 # _is_file {{{ 92 93 94 -def _is_file(filename_or_contents):
95 """ 96 Safely returns the value of os.path.exists(filename_or_contents). 97 98 Arguments: 99 100 ``filename_or_contents`` 101 either a filename, or a string holding the contents of some file. 102 In the latter case, this function will always return False. 103 """ 104 try: 105 return os.path.exists(filename_or_contents) 106 except (ValueError, UnicodeEncodeError): 107 return False
108 # }}}
109 # function pofile() {{{ 110 111 112 -def pofile(pofile, **kwargs):
113 """ 114 Convenience function that parses the po or pot file ``pofile`` and returns 115 a :class:`~polib.POFile` instance. 116 117 Arguments: 118 119 ``pofile`` 120 string, full or relative path to the po/pot file or its content (data). 121 122 ``wrapwidth`` 123 integer, the wrap width, only useful when the ``-w`` option was passed 124 to xgettext (optional, default: ``78``). 125 126 ``encoding`` 127 string, the encoding to use (e.g. "utf-8") (default: ``None``, the 128 encoding will be auto-detected). 129 130 ``check_for_duplicates`` 131 whether to check for duplicate entries when adding entries to the 132 file (optional, default: ``False``). 133 134 ``klass`` 135 class which is used to instantiate the return value (optional, 136 default: ``None``, the return value with be a :class:`~polib.POFile` 137 instance). 138 """ 139 return _pofile_or_mofile(pofile, 'pofile', **kwargs)
140 # }}}
141 # function mofile() {{{ 142 143 144 -def mofile(mofile, **kwargs):
145 """ 146 Convenience function that parses the mo file ``mofile`` and returns a 147 :class:`~polib.MOFile` instance. 148 149 Arguments: 150 151 ``mofile`` 152 string, full or relative path to the mo file or its content (data). 153 154 ``wrapwidth`` 155 integer, the wrap width, only useful when the ``-w`` option was passed 156 to xgettext to generate the po file that was used to format the mo file 157 (optional, default: ``78``). 158 159 ``encoding`` 160 string, the encoding to use (e.g. "utf-8") (default: ``None``, the 161 encoding will be auto-detected). 162 163 ``check_for_duplicates`` 164 whether to check for duplicate entries when adding entries to the 165 file (optional, default: ``False``). 166 167 ``klass`` 168 class which is used to instantiate the return value (optional, 169 default: ``None``, the return value with be a :class:`~polib.POFile` 170 instance). 171 """ 172 return _pofile_or_mofile(mofile, 'mofile', **kwargs)
173 # }}}
174 # function detect_encoding() {{{ 175 176 177 -def detect_encoding(file, binary_mode=False):
178 """ 179 Try to detect the encoding used by the ``file``. The ``file`` argument can 180 be a PO or MO file path or a string containing the contents of the file. 181 If the encoding cannot be detected, the function will return the value of 182 ``default_encoding``. 183 184 Arguments: 185 186 ``file`` 187 string, full or relative path to the po/mo file or its content. 188 189 ``binary_mode`` 190 boolean, set this to True if ``file`` is a mo file. 191 """ 192 PATTERN = r'"?Content-Type:.+? charset=([\w_\-:\.]+)' 193 rxt = re.compile(u(PATTERN)) 194 rxb = re.compile(b(PATTERN)) 195 196 def charset_exists(charset): 197 """Check whether ``charset`` is valid or not.""" 198 try: 199 codecs.lookup(charset) 200 except LookupError: 201 return False 202 return True
203 204 if not _is_file(file): 205 match = rxt.search(file) 206 if match: 207 enc = match.group(1).strip() 208 if charset_exists(enc): 209 return enc 210 else: 211 # For PY3, always treat as binary 212 if binary_mode or PY3: 213 mode = 'rb' 214 rx = rxb 215 else: 216 mode = 'r' 217 rx = rxt 218 f = open(file, mode) 219 for l in f.readlines(): 220 match = rx.search(l) 221 if match: 222 f.close() 223 enc = match.group(1).strip() 224 if not isinstance(enc, text_type): 225 enc = enc.decode('utf-8') 226 if charset_exists(enc): 227 return enc 228 f.close() 229 return default_encoding 230 # }}}
231 # function escape() {{{ 232 233 234 -def escape(st):
235 """ 236 Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in 237 the given string ``st`` and returns it. 238 """ 239 return st.replace('\\', r'\\')\ 240 .replace('\t', r'\t')\ 241 .replace('\r', r'\r')\ 242 .replace('\n', r'\n')\ 243 .replace('\"', r'\"')
244 # }}}
245 # function unescape() {{{ 246 247 248 -def unescape(st):
249 """ 250 Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in 251 the given string ``st`` and returns it. 252 """ 253 def unescape_repl(m): 254 m = m.group(1) 255 if m == 'n': 256 return '\n' 257 if m == 't': 258 return '\t' 259 if m == 'r': 260 return '\r' 261 if m == '\\': 262 return '\\' 263 return m # handles escaped double quote
264 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st) 265 # }}}
266 # function natural_sort() {{{ 267 268 269 -def natural_sort(lst):
270 """ 271 Sort naturally the given list. 272 Credits: http://stackoverflow.com/a/4836734 273 """ 274 def convert(text): 275 return int(text) if text.isdigit() else text.lower()
276 277 def alphanum_key(key): 278 return [convert(c) for c in re.split('([0-9]+)', key)] 279 280 return sorted(lst, key=alphanum_key) 281
282 # }}} 283 # class _BaseFile {{{ 284 285 286 -class _BaseFile(list):
287 """ 288 Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile` 289 classes. This class should **not** be instantiated directly. 290 """ 291
292 - def __init__(self, *args, **kwargs):
293 """ 294 Constructor, accepts the following keyword arguments: 295 296 ``pofile`` 297 string, the path to the po or mo file, or its content as a string. 298 299 ``wrapwidth`` 300 integer, the wrap width, only useful when the ``-w`` option was 301 passed to xgettext (optional, default: ``78``). 302 303 ``encoding`` 304 string, the encoding to use, defaults to ``default_encoding`` 305 global variable (optional). 306 307 ``check_for_duplicates`` 308 whether to check for duplicate entries when adding entries to the 309 file, (optional, default: ``False``). 310 """ 311 list.__init__(self) 312 # the opened file handle 313 pofile = kwargs.get('pofile', None) 314 if pofile and _is_file(pofile): 315 self.fpath = pofile 316 else: 317 self.fpath = kwargs.get('fpath') 318 # the width at which lines should be wrapped 319 self.wrapwidth = kwargs.get('wrapwidth', 78) 320 # the file encoding 321 self.encoding = kwargs.get('encoding', default_encoding) 322 # whether to check for duplicate entries or not 323 self.check_for_duplicates = kwargs.get('check_for_duplicates', False) 324 # header 325 self.header = '' 326 # both po and mo files have metadata 327 self.metadata = {} 328 self.metadata_is_fuzzy = 0
329
330 - def __unicode__(self):
331 """ 332 Returns the unicode representation of the file. 333 """ 334 ret = [] 335 entries = [self.metadata_as_entry()] + \ 336 [e for e in self if not e.obsolete] 337 for entry in entries: 338 ret.append(entry.__unicode__(self.wrapwidth)) 339 for entry in self.obsolete_entries(): 340 ret.append(entry.__unicode__(self.wrapwidth)) 341 ret = u('\n').join(ret) 342 return ret
343 344 if PY3:
345 - def __str__(self):
346 return self.__unicode__()
347 else:
348 - def __str__(self):
349 """ 350 Returns the string representation of the file. 351 """ 352 return unicode(self).encode(self.encoding)
353
354 - def __contains__(self, entry):
355 """ 356 Overridden ``list`` method to implement the membership test (in and 357 not in). 358 The method considers that an entry is in the file if it finds an entry 359 that has the same msgid (the test is **case sensitive**) and the same 360 msgctxt (or none for both entries). 361 362 Argument: 363 364 ``entry`` 365 an instance of :class:`~polib._BaseEntry`. 366 """ 367 return self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt) \ 368 is not None
369
370 - def __eq__(self, other):
371 return str(self) == str(other)
372
373 - def append(self, entry):
374 """ 375 Overridden method to check for duplicates entries, if a user tries to 376 add an entry that is already in the file, the method will raise a 377 ``ValueError`` exception. 378 379 Argument: 380 381 ``entry`` 382 an instance of :class:`~polib._BaseEntry`. 383 """ 384 # check_for_duplicates may not be defined (yet) when unpickling. 385 # But if pickling, we never want to check for duplicates anyway. 386 if getattr(self, 'check_for_duplicates', False) and entry in self: 387 raise ValueError('Entry "%s" already exists' % entry.msgid) 388 super(_BaseFile, self).append(entry)
389
390 - def insert(self, index, entry):
391 """ 392 Overridden method to check for duplicates entries, if a user tries to 393 add an entry that is already in the file, the method will raise a 394 ``ValueError`` exception. 395 396 Arguments: 397 398 ``index`` 399 index at which the entry should be inserted. 400 401 ``entry`` 402 an instance of :class:`~polib._BaseEntry`. 403 """ 404 if self.check_for_duplicates and entry in self: 405 raise ValueError('Entry "%s" already exists' % entry.msgid) 406 super(_BaseFile, self).insert(index, entry)
407
408 - def metadata_as_entry(self):
409 """ 410 Returns the file metadata as a :class:`~polib.POFile` instance. 411 """ 412 e = POEntry(msgid='') 413 mdata = self.ordered_metadata() 414 if mdata: 415 strs = [] 416 for name, value in mdata: 417 # Strip whitespace off each line in a multi-line entry 418 strs.append('%s: %s' % (name, value)) 419 e.msgstr = '\n'.join(strs) + '\n' 420 if self.metadata_is_fuzzy: 421 e.flags.append('fuzzy') 422 return e
423
424 - def save(self, fpath=None, repr_method='__unicode__'):
425 """ 426 Saves the po file to ``fpath``. 427 If it is an existing file and no ``fpath`` is provided, then the 428 existing file is rewritten with the modified data. 429 430 Keyword arguments: 431 432 ``fpath`` 433 string, full or relative path to the file. 434 435 ``repr_method`` 436 string, the method to use for output. 437 """ 438 if self.fpath is None and fpath is None: 439 raise IOError('You must provide a file path to save() method') 440 contents = getattr(self, repr_method)() 441 if fpath is None: 442 fpath = self.fpath 443 if repr_method == 'to_binary': 444 fhandle = open(fpath, 'wb') 445 else: 446 fhandle = io.open(fpath, 'w', encoding=self.encoding) 447 if not isinstance(contents, text_type): 448 contents = contents.decode(self.encoding) 449 fhandle.write(contents) 450 fhandle.close() 451 # set the file path if not set 452 if self.fpath is None and fpath: 453 self.fpath = fpath
454
455 - def find(self, st, by='msgid', include_obsolete_entries=False, 456 msgctxt=False):
457 """ 458 Find the entry which msgid (or property identified by the ``by`` 459 argument) matches the string ``st``. 460 461 Keyword arguments: 462 463 ``st`` 464 string, the string to search for. 465 466 ``by`` 467 string, the property to use for comparison (default: ``msgid``). 468 469 ``include_obsolete_entries`` 470 boolean, whether to also search in entries that are obsolete. 471 472 ``msgctxt`` 473 string, allows specifying a specific message context for the 474 search. 475 """ 476 if include_obsolete_entries: 477 entries = self[:] 478 else: 479 entries = [e for e in self if not e.obsolete] 480 matches = [] 481 for e in entries: 482 if getattr(e, by) == st: 483 if msgctxt is not False and e.msgctxt != msgctxt: 484 continue 485 matches.append(e) 486 if len(matches) == 1: 487 return matches[0] 488 elif len(matches) > 1: 489 if not msgctxt: 490 # find the entry with no msgctx 491 e = None 492 for m in matches: 493 if not m.msgctxt: 494 e = m 495 if e: 496 return e 497 # fallback to the first entry found 498 return matches[0] 499 return None
500
501 - def ordered_metadata(self):
502 """ 503 Convenience method that returns an ordered version of the metadata 504 dictionary. The return value is list of tuples (metadata name, 505 metadata_value). 506 """ 507 # copy the dict first 508 metadata = self.metadata.copy() 509 data_order = [ 510 'Project-Id-Version', 511 'Report-Msgid-Bugs-To', 512 'POT-Creation-Date', 513 'PO-Revision-Date', 514 'Last-Translator', 515 'Language-Team', 516 'Language', 517 'MIME-Version', 518 'Content-Type', 519 'Content-Transfer-Encoding', 520 'Plural-Forms' 521 ] 522 ordered_data = [] 523 for data in data_order: 524 try: 525 value = metadata.pop(data) 526 ordered_data.append((data, value)) 527 except KeyError: 528 pass 529 # the rest of the metadata will be alphabetically ordered since there 530 # are no specs for this AFAIK 531 for data in natural_sort(metadata.keys()): 532 value = metadata[data] 533 ordered_data.append((data, value)) 534 return ordered_data
535
536 - def to_binary(self):
537 """ 538 Return the binary representation of the file. 539 """ 540 offsets = [] 541 entries = self.translated_entries() 542 543 # the keys are sorted in the .mo file 544 def cmp(_self, other): 545 # msgfmt compares entries with msgctxt if it exists 546 self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid 547 other_msgid = other.msgctxt and other.msgctxt or other.msgid 548 if self_msgid > other_msgid: 549 return 1 550 elif self_msgid < other_msgid: 551 return -1 552 else: 553 return 0
554 # add metadata entry 555 entries.sort(key=lambda o: o.msgid_with_context.encode('utf-8')) 556 mentry = self.metadata_as_entry() 557 entries = [mentry] + entries 558 entries_len = len(entries) 559 ids, strs = b(''), b('') 560 for e in entries: 561 # For each string, we need size and file offset. Each string is 562 # NUL terminated; the NUL does not count into the size. 563 msgid = b('') 564 if e.msgctxt: 565 # Contexts are stored by storing the concatenation of the 566 # context, a <EOT> byte, and the original string 567 msgid = self._encode(e.msgctxt + '\4') 568 if e.msgid_plural: 569 msgstr = [] 570 for index in sorted(e.msgstr_plural.keys()): 571 msgstr.append(e.msgstr_plural[index]) 572 msgid += self._encode(e.msgid + '\0' + e.msgid_plural) 573 msgstr = self._encode('\0'.join(msgstr)) 574 else: 575 msgid += self._encode(e.msgid) 576 msgstr = self._encode(e.msgstr) 577 offsets.append((len(ids), len(msgid), len(strs), len(msgstr))) 578 ids += msgid + b('\0') 579 strs += msgstr + b('\0') 580 581 # The header is 7 32-bit unsigned integers. 582 keystart = 7 * 4 + 16 * entries_len 583 # and the values start after the keys 584 valuestart = keystart + len(ids) 585 koffsets = [] 586 voffsets = [] 587 # The string table first has the list of keys, then the list of values. 588 # Each entry has first the size of the string, then the file offset. 589 for o1, l1, o2, l2 in offsets: 590 koffsets += [l1, o1 + keystart] 591 voffsets += [l2, o2 + valuestart] 592 offsets = koffsets + voffsets 593 594 output = struct.pack( 595 "Iiiiiii", 596 # Magic number 597 MOFile.MAGIC, 598 # Version 599 0, 600 # number of entries 601 entries_len, 602 # start of key index 603 7 * 4, 604 # start of value index 605 7 * 4 + entries_len * 8, 606 # size and offset of hash table, we don't use hash tables 607 0, keystart 608 609 ) 610 if PY3 and sys.version_info.minor > 1: # python 3.2 or superior 611 output += array.array("i", offsets).tobytes() 612 else: 613 output += array.array("i", offsets).tostring() 614 output += ids 615 output += strs 616 return output
617
618 - def _encode(self, mixed):
619 """ 620 Encodes the given ``mixed`` argument with the file encoding if and 621 only if it's an unicode string and returns the encoded string. 622 """ 623 if isinstance(mixed, text_type): 624 mixed = mixed.encode(self.encoding) 625 return mixed
626 # }}}
627 # class POFile {{{ 628 629 630 -class POFile(_BaseFile):
631 """ 632 Po (or Pot) file reader/writer. 633 This class inherits the :class:`~polib._BaseFile` class and, by extension, 634 the python ``list`` type. 635 """ 636
637 - def __unicode__(self):
638 """ 639 Returns the unicode representation of the po file. 640 """ 641 ret, headers = '', self.header.split('\n') 642 for header in headers: 643 if not len(header): 644 ret += "#\n" 645 elif header[:1] in [',', ':']: 646 ret += '#%s\n' % header 647 else: 648 ret += '# %s\n' % header 649 650 if not isinstance(ret, text_type): 651 ret = ret.decode(self.encoding) 652 653 return ret + _BaseFile.__unicode__(self)
654
655 - def save_as_mofile(self, fpath):
656 """ 657 Saves the binary representation of the file to given ``fpath``. 658 659 Keyword argument: 660 661 ``fpath`` 662 string, full or relative path to the mo file. 663 """ 664 _BaseFile.save(self, fpath, 'to_binary')
665
666 - def percent_translated(self):
667 """ 668 Convenience method that returns the percentage of translated 669 messages. 670 """ 671 total = len([e for e in self if not e.obsolete]) 672 if total == 0: 673 return 100 674 translated = len(self.translated_entries()) 675 return int(translated * 100 / float(total))
676
677 - def translated_entries(self):
678 """ 679 Convenience method that returns the list of translated entries. 680 """ 681 return [e for e in self if e.translated()]
682
683 - def untranslated_entries(self):
684 """ 685 Convenience method that returns the list of untranslated entries. 686 """ 687 return [e for e in self if not e.translated() and 688 not e.obsolete and not e.fuzzy]
689
690 - def fuzzy_entries(self):
691 """ 692 Convenience method that returns the list of fuzzy entries. 693 """ 694 return [e for e in self if e.fuzzy]
695
696 - def obsolete_entries(self):
697 """ 698 Convenience method that returns the list of obsolete entries. 699 """ 700 return [e for e in self if e.obsolete]
701
702 - def merge(self, refpot):
703 """ 704 Convenience method that merges the current pofile with the pot file 705 provided. It behaves exactly as the gettext msgmerge utility: 706 707 * comments of this file will be preserved, but extracted comments and 708 occurrences will be discarded; 709 * any translations or comments in the file will be discarded, however, 710 dot comments and file positions will be preserved; 711 * the fuzzy flags are preserved. 712 713 Keyword argument: 714 715 ``refpot`` 716 object POFile, the reference catalog. 717 """ 718 # Store entries in dict/set for faster access 719 self_entries = dict( 720 (entry.msgid_with_context, entry) for entry in self 721 ) 722 refpot_msgids = set(entry.msgid_with_context for entry in refpot) 723 # Merge entries that are in the refpot 724 for entry in refpot: 725 e = self_entries.get(entry.msgid_with_context) 726 if e is None: 727 e = POEntry() 728 self.append(e) 729 e.merge(entry) 730 # ok, now we must "obsolete" entries that are not in the refpot anymore 731 for entry in self: 732 if entry.msgid_with_context not in refpot_msgids: 733 entry.obsolete = True
734 # }}}
735 # class MOFile {{{ 736 737 738 -class MOFile(_BaseFile):
739 """ 740 Mo file reader/writer. 741 This class inherits the :class:`~polib._BaseFile` class and, by 742 extension, the python ``list`` type. 743 """ 744 MAGIC = 0x950412de 745 MAGIC_SWAPPED = 0xde120495 746
747 - def __init__(self, *args, **kwargs):
748 """ 749 Constructor, accepts all keywords arguments accepted by 750 :class:`~polib._BaseFile` class. 751 """ 752 _BaseFile.__init__(self, *args, **kwargs) 753 self.magic_number = None 754 self.version = 0
755
756 - def save_as_pofile(self, fpath):
757 """ 758 Saves the mofile as a pofile to ``fpath``. 759 760 Keyword argument: 761 762 ``fpath`` 763 string, full or relative path to the file. 764 """ 765 _BaseFile.save(self, fpath)
766
767 - def save(self, fpath=None):
768 """ 769 Saves the mofile to ``fpath``. 770 771 Keyword argument: 772 773 ``fpath`` 774 string, full or relative path to the file. 775 """ 776 _BaseFile.save(self, fpath, 'to_binary')
777
778 - def percent_translated(self):
779 """ 780 Convenience method to keep the same interface with POFile instances. 781 """ 782 return 100
783
784 - def translated_entries(self):
785 """ 786 Convenience method to keep the same interface with POFile instances. 787 """ 788 return self
789
790 - def untranslated_entries(self):
791 """ 792 Convenience method to keep the same interface with POFile instances. 793 """ 794 return []
795
796 - def fuzzy_entries(self):
797 """ 798 Convenience method to keep the same interface with POFile instances. 799 """ 800 return []
801
802 - def obsolete_entries(self):
803 """ 804 Convenience method to keep the same interface with POFile instances. 805 """ 806 return []
807 # }}}
808 # class _BaseEntry {{{ 809 810 811 -class _BaseEntry(object):
812 """ 813 Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes. 814 This class should **not** be instantiated directly. 815 """ 816
817 - def __init__(self, *args, **kwargs):
818 """ 819 Constructor, accepts the following keyword arguments: 820 821 ``msgid`` 822 string, the entry msgid. 823 824 ``msgstr`` 825 string, the entry msgstr. 826 827 ``msgid_plural`` 828 string, the entry msgid_plural. 829 830 ``msgstr_plural`` 831 list, the entry msgstr_plural lines. 832 833 ``msgctxt`` 834 string, the entry context (msgctxt). 835 836 ``obsolete`` 837 bool, whether the entry is "obsolete" or not. 838 839 ``encoding`` 840 string, the encoding to use, defaults to ``default_encoding`` 841 global variable (optional). 842 """ 843 self.msgid = kwargs.get('msgid', '') 844 self.msgstr = kwargs.get('msgstr', '') 845 self.msgid_plural = kwargs.get('msgid_plural', '') 846 self.msgstr_plural = kwargs.get('msgstr_plural', {}) 847 self.msgctxt = kwargs.get('msgctxt', None) 848 self.obsolete = kwargs.get('obsolete', False) 849 self.encoding = kwargs.get('encoding', default_encoding)
850
851 - def __unicode__(self, wrapwidth=78):
852 """ 853 Returns the unicode representation of the entry. 854 """ 855 if self.obsolete: 856 delflag = '#~ ' 857 else: 858 delflag = '' 859 ret = [] 860 # write the msgctxt if any 861 if self.msgctxt is not None: 862 ret += self._str_field("msgctxt", delflag, "", self.msgctxt, 863 wrapwidth) 864 # write the msgid 865 ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth) 866 # write the msgid_plural if any 867 if self.msgid_plural: 868 ret += self._str_field("msgid_plural", delflag, "", 869 self.msgid_plural, wrapwidth) 870 if self.msgstr_plural: 871 # write the msgstr_plural if any 872 msgstrs = self.msgstr_plural 873 keys = list(msgstrs) 874 keys.sort() 875 for index in keys: 876 msgstr = msgstrs[index] 877 plural_index = '[%s]' % index 878 ret += self._str_field("msgstr", delflag, plural_index, msgstr, 879 wrapwidth) 880 else: 881 # otherwise write the msgstr 882 ret += self._str_field("msgstr", delflag, "", self.msgstr, 883 wrapwidth) 884 ret.append('') 885 ret = u('\n').join(ret) 886 return ret
887 888 if PY3:
889 - def __str__(self):
890 return self.__unicode__()
891 else:
892 - def __str__(self):
893 """ 894 Returns the string representation of the entry. 895 """ 896 return unicode(self).encode(self.encoding)
897
898 - def __eq__(self, other):
899 return str(self) == str(other)
900
901 - def _str_field(self, fieldname, delflag, plural_index, field, 902 wrapwidth=78):
903 lines = field.splitlines(True) 904 if len(lines) > 1: 905 lines = [''] + lines # start with initial empty line 906 else: 907 escaped_field = escape(field) 908 specialchars_count = 0 909 for c in ['\\', '\n', '\r', '\t', '"']: 910 specialchars_count += field.count(c) 911 # comparison must take into account fieldname length + one space 912 # + 2 quotes (eg. msgid "<string>") 913 flength = len(fieldname) + 3 914 if plural_index: 915 flength += len(plural_index) 916 real_wrapwidth = wrapwidth - flength + specialchars_count 917 if wrapwidth > 0 and len(field) > real_wrapwidth: 918 # Wrap the line but take field name into account 919 lines = [''] + [unescape(item) for item in wrap( 920 escaped_field, 921 wrapwidth - 2, # 2 for quotes "" 922 drop_whitespace=False, 923 break_long_words=False 924 )] 925 else: 926 lines = [field] 927 if fieldname.startswith('previous_'): 928 # quick and dirty trick to get the real field name 929 fieldname = fieldname[9:] 930 931 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index, 932 escape(lines.pop(0)))] 933 for line in lines: 934 ret.append('%s"%s"' % (delflag, escape(line))) 935 return ret
936 # }}}
937 # class POEntry {{{ 938 939 940 -class POEntry(_BaseEntry):
941 """ 942 Represents a po file entry. 943 """ 944
945 - def __init__(self, *args, **kwargs):
946 """ 947 Constructor, accepts the following keyword arguments: 948 949 ``comment`` 950 string, the entry comment. 951 952 ``tcomment`` 953 string, the entry translator comment. 954 955 ``occurrences`` 956 list, the entry occurrences. 957 958 ``flags`` 959 list, the entry flags. 960 961 ``previous_msgctxt`` 962 string, the entry previous context. 963 964 ``previous_msgid`` 965 string, the entry previous msgid. 966 967 ``previous_msgid_plural`` 968 string, the entry previous msgid_plural. 969 970 ``linenum`` 971 integer, the line number of the entry 972 """ 973 _BaseEntry.__init__(self, *args, **kwargs) 974 self.comment = kwargs.get('comment', '') 975 self.tcomment = kwargs.get('tcomment', '') 976 self.occurrences = kwargs.get('occurrences', []) 977 self.flags = kwargs.get('flags', []) 978 self.previous_msgctxt = kwargs.get('previous_msgctxt', None) 979 self.previous_msgid = kwargs.get('previous_msgid', None) 980 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None) 981 self.linenum = kwargs.get('linenum', None)
982
983 - def __unicode__(self, wrapwidth=78):
984 """ 985 Returns the unicode representation of the entry. 986 """ 987 ret = [] 988 # comments first, if any (with text wrapping as xgettext does) 989 if self.obsolete: 990 comments = [('tcomment', '# ')] 991 else: 992 comments = [('comment', '#. '), ('tcomment', '# ')] 993 for c in comments: 994 val = getattr(self, c[0]) 995 if val: 996 for comment in val.split('\n'): 997 if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth: 998 ret += wrap( 999 comment, 1000 wrapwidth, 1001 initial_indent=c[1], 1002 subsequent_indent=c[1], 1003 break_long_words=False 1004 ) 1005 else: 1006 ret.append('%s%s' % (c[1], comment)) 1007 1008 # occurrences (with text wrapping as xgettext does) 1009 if not self.obsolete and self.occurrences: 1010 filelist = [] 1011 for fpath, lineno in self.occurrences: 1012 if lineno: 1013 filelist.append('%s:%s' % (fpath, lineno)) 1014 else: 1015 filelist.append(fpath) 1016 filestr = ' '.join(filelist) 1017 if wrapwidth > 0 and len(filestr) + 3 > wrapwidth: 1018 # textwrap split words that contain hyphen, this is not 1019 # what we want for filenames, so the dirty hack is to 1020 # temporally replace hyphens with a char that a file cannot 1021 # contain, like "*" 1022 ret += [l.replace('*', '-') for l in wrap( 1023 filestr.replace('-', '*'), 1024 wrapwidth, 1025 initial_indent='#: ', 1026 subsequent_indent='#: ', 1027 break_long_words=False 1028 )] 1029 else: 1030 ret.append('#: ' + filestr) 1031 1032 # flags (TODO: wrapping ?) 1033 if self.flags: 1034 ret.append('#, %s' % ', '.join(self.flags)) 1035 1036 # previous context and previous msgid/msgid_plural 1037 fields = ['previous_msgctxt', 'previous_msgid', 1038 'previous_msgid_plural'] 1039 if self.obsolete: 1040 prefix = "#~| " 1041 else: 1042 prefix = "#| " 1043 for f in fields: 1044 val = getattr(self, f) 1045 if val: 1046 ret += self._str_field(f, prefix, "", val, wrapwidth) 1047 1048 ret.append(_BaseEntry.__unicode__(self, wrapwidth)) 1049 ret = u('\n').join(ret) 1050 return ret
1051
1052 - def __cmp__(self, other):
1053 """ 1054 Called by comparison operations if rich comparison is not defined. 1055 """ 1056 # First: Obsolete test 1057 if self.obsolete != other.obsolete: 1058 if self.obsolete: 1059 return -1 1060 else: 1061 return 1 1062 # Work on a copy to protect original 1063 occ1 = sorted(self.occurrences[:]) 1064 occ2 = sorted(other.occurrences[:]) 1065 pos = 0 1066 if occ1 > occ2: 1067 return 1 1068 if occ1 < occ2: 1069 return -1 1070 # Compare context 1071 msgctxt = self.msgctxt or '' 1072 othermsgctxt = other.msgctxt or '' 1073 if msgctxt > othermsgctxt: 1074 return 1 1075 elif msgctxt < othermsgctxt: 1076 return -1 1077 # Compare msgid_plural 1078 msgid_plural = self.msgid_plural or '' 1079 othermsgid_plural = other.msgid_plural or '' 1080 if msgid_plural > othermsgid_plural: 1081 return 1 1082 elif msgid_plural < othermsgid_plural: 1083 return -1 1084 # Compare msgstr_plural 1085 # Because dict order comparison works different in Python <2.7 and 2.7, 1086 # and does not work at all in Python 3.x, this approach is being used 1087 # instead. It simulates order comparison of dicts in Python <2.7 to the 1088 # required degree. 1089 msgstr_plural = self.msgstr_plural or {} 1090 othermsgstr_plural = other.msgstr_plural or {} 1091 if len(msgstr_plural) > len(othermsgstr_plural): 1092 return 1 1093 elif len(msgstr_plural) < len(othermsgstr_plural): 1094 return -1 1095 for idx in msgstr_plural: 1096 if msgstr_plural[idx] > othermsgstr_plural[idx]: 1097 return 1 1098 elif msgstr_plural[idx] < othermsgstr_plural[idx]: 1099 return -1 1100 # Compare msgid 1101 if self.msgid > other.msgid: 1102 return 1 1103 elif self.msgid < other.msgid: 1104 return -1 1105 return 0 1106 # Compare msgstr 1107 if self.msgstr > other.msgstr: 1108 return 1 1109 elif self.msgstr < other.msgstr: 1110 return -1 1111 return 0
1112
1113 - def __gt__(self, other):
1114 return self.__cmp__(other) > 0
1115
1116 - def __lt__(self, other):
1117 return self.__cmp__(other) < 0
1118
1119 - def __ge__(self, other):
1120 return self.__cmp__(other) >= 0
1121
1122 - def __le__(self, other):
1123 return self.__cmp__(other) <= 0
1124
1125 - def __eq__(self, other):
1126 return self.__cmp__(other) == 0
1127
1128 - def __ne__(self, other):
1129 return self.__cmp__(other) != 0
1130
1131 - def translated(self):
1132 """ 1133 Returns ``True`` if the entry has been translated or ``False`` 1134 otherwise. 1135 """ 1136 if self.obsolete or self.fuzzy: 1137 return False 1138 if self.msgstr != '': 1139 return True 1140 if self.msgstr_plural: 1141 for pos in self.msgstr_plural: 1142 if self.msgstr_plural[pos] == '': 1143 return False 1144 return True 1145 return False
1146
1147 - def merge(self, other):
1148 """ 1149 Merge the current entry with the given pot entry. 1150 """ 1151 self.msgid = other.msgid 1152 self.msgctxt = other.msgctxt 1153 self.occurrences = other.occurrences 1154 self.comment = other.comment 1155 fuzzy = self.fuzzy 1156 self.flags = other.flags[:] # clone flags 1157 if fuzzy: 1158 self.flags.append('fuzzy') 1159 self.msgid_plural = other.msgid_plural 1160 self.obsolete = other.obsolete 1161 self.previous_msgctxt = other.previous_msgctxt 1162 self.previous_msgid = other.previous_msgid 1163 self.previous_msgid_plural = other.previous_msgid_plural 1164 if other.msgstr_plural: 1165 for pos in other.msgstr_plural: 1166 try: 1167 # keep existing translation at pos if any 1168 self.msgstr_plural[pos] 1169 except KeyError: 1170 self.msgstr_plural[pos] = ''
1171 1172 @property
1173 - def fuzzy(self):
1174 return 'fuzzy' in self.flags
1175 1176 @property
1177 - def msgid_with_context(self):
1178 if self.msgctxt: 1179 return '%s%s%s' % (self.msgctxt, "\x04", self.msgid) 1180 return self.msgid
1181
1182 - def __hash__(self):
1183 return hash((self.msgid, self.msgstr))
1184 # }}}
1185 # class MOEntry {{{ 1186 1187 1188 -class MOEntry(_BaseEntry):
1189 """ 1190 Represents a mo file entry. 1191 """
1192 - def __init__(self, *args, **kwargs):
1193 """ 1194 Constructor, accepts the following keyword arguments, 1195 for consistency with :class:`~polib.POEntry`: 1196 1197 ``comment`` 1198 ``tcomment`` 1199 ``occurrences`` 1200 ``flags`` 1201 ``previous_msgctxt`` 1202 ``previous_msgid`` 1203 ``previous_msgid_plural`` 1204 1205 Note: even though these keyword arguments are accepted, 1206 they hold no real meaning in the context of MO files 1207 and are simply ignored. 1208 """ 1209 _BaseEntry.__init__(self, *args, **kwargs) 1210 self.comment = '' 1211 self.tcomment = '' 1212 self.occurrences = [] 1213 self.flags = [] 1214 self.previous_msgctxt = None 1215 self.previous_msgid = None 1216 self.previous_msgid_plural = None
1217
1218 - def __hash__(self):
1219 return hash((self.msgid, self.msgstr))
1220
1221 # }}} 1222 # class _POFileParser {{{ 1223 1224 1225 -class _POFileParser(object):
1226 """ 1227 A finite state machine to parse efficiently and correctly po 1228 file format. 1229 """ 1230
1231 - def __init__(self, pofile, *args, **kwargs):
1232 """ 1233 Constructor. 1234 1235 Keyword arguments: 1236 1237 ``pofile`` 1238 string, path to the po file or its content 1239 1240 ``encoding`` 1241 string, the encoding to use, defaults to ``default_encoding`` 1242 global variable (optional). 1243 1244 ``check_for_duplicates`` 1245 whether to check for duplicate entries when adding entries to the 1246 file (optional, default: ``False``). 1247 """ 1248 enc = kwargs.get('encoding', default_encoding) 1249 if _is_file(pofile): 1250 try: 1251 self.fhandle = io.open(pofile, 'rt', encoding=enc) 1252 except LookupError: 1253 enc = default_encoding 1254 self.fhandle = io.open(pofile, 'rt', encoding=enc) 1255 else: 1256 self.fhandle = pofile.splitlines() 1257 1258 klass = kwargs.get('klass') 1259 if klass is None: 1260 klass = POFile 1261 self.instance = klass( 1262 pofile=pofile, 1263 encoding=enc, 1264 check_for_duplicates=kwargs.get('check_for_duplicates', False) 1265 ) 1266 self.transitions = {} 1267 self.current_line = 0 1268 self.current_entry = POEntry(linenum=self.current_line) 1269 self.current_state = 'st' 1270 self.current_token = None 1271 # two memo flags used in handlers 1272 self.msgstr_index = 0 1273 self.entry_obsolete = 0 1274 # Configure the state machine, by adding transitions. 1275 # Signification of symbols: 1276 # * ST: Beginning of the file (start) 1277 # * HE: Header 1278 # * TC: a translation comment 1279 # * GC: a generated comment 1280 # * OC: a file/line occurrence 1281 # * FL: a flags line 1282 # * CT: a message context 1283 # * PC: a previous msgctxt 1284 # * PM: a previous msgid 1285 # * PP: a previous msgid_plural 1286 # * MI: a msgid 1287 # * MP: a msgid plural 1288 # * MS: a msgstr 1289 # * MX: a msgstr plural 1290 # * MC: a msgid or msgstr continuation line 1291 all = ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'pc', 'pm', 'pp', 'tc', 1292 'ms', 'mp', 'mx', 'mi'] 1293 1294 self.add('tc', ['st', 'he'], 'he') 1295 self.add('tc', ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms', 1296 'mp', 'mx', 'mi'], 'tc') 1297 self.add('gc', all, 'gc') 1298 self.add('oc', all, 'oc') 1299 self.add('fl', all, 'fl') 1300 self.add('pc', all, 'pc') 1301 self.add('pm', all, 'pm') 1302 self.add('pp', all, 'pp') 1303 self.add('ct', ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm', 1304 'pp', 'ms', 'mx'], 'ct') 1305 self.add('mi', ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'tc', 'pc', 1306 'pm', 'pp', 'ms', 'mx'], 'mi') 1307 self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'], 'mp') 1308 self.add('ms', ['mi', 'mp', 'tc'], 'ms') 1309 self.add('mx', ['mi', 'mx', 'mp', 'tc'], 'mx') 1310 self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc')
1311
1312 - def parse(self):
1313 """ 1314 Run the state machine, parse the file line by line and call process() 1315 with the current matched symbol. 1316 """ 1317 1318 keywords = { 1319 'msgctxt': 'ct', 1320 'msgid': 'mi', 1321 'msgstr': 'ms', 1322 'msgid_plural': 'mp', 1323 } 1324 prev_keywords = { 1325 'msgid_plural': 'pp', 1326 'msgid': 'pm', 1327 'msgctxt': 'pc', 1328 } 1329 tokens = [] 1330 fpath = '%s ' % self.instance.fpath if self.instance.fpath else '' 1331 for line in self.fhandle: 1332 self.current_line += 1 1333 line = line.strip() 1334 if line == '': 1335 continue 1336 1337 tokens = line.split(None, 2) 1338 nb_tokens = len(tokens) 1339 1340 if tokens[0] == '#~|': 1341 continue 1342 1343 if tokens[0] == '#~' and nb_tokens > 1: 1344 line = line[3:].strip() 1345 tokens = tokens[1:] 1346 nb_tokens -= 1 1347 self.entry_obsolete = 1 1348 else: 1349 self.entry_obsolete = 0 1350 1351 # Take care of keywords like 1352 # msgid, msgid_plural, msgctxt & msgstr. 1353 if tokens[0] in keywords and nb_tokens > 1: 1354 line = line[len(tokens[0]):].lstrip() 1355 if re.search(r'([^\\]|^)"', line[1:-1]): 1356 raise IOError('Syntax error in po file %s(line %s): ' 1357 'unescaped double quote found' % 1358 (fpath, self.current_line)) 1359 self.current_token = line 1360 self.process(keywords[tokens[0]]) 1361 continue 1362 1363 self.current_token = line 1364 1365 if tokens[0] == '#:': 1366 if nb_tokens <= 1: 1367 continue 1368 # we are on a occurrences line 1369 self.process('oc') 1370 1371 elif line[:1] == '"': 1372 # we are on a continuation line 1373 if re.search(r'([^\\]|^)"', line[1:-1]): 1374 raise IOError('Syntax error in po file %s(line %s): ' 1375 'unescaped double quote found' % 1376 (fpath, self.current_line)) 1377 self.process('mc') 1378 1379 elif line[:7] == 'msgstr[': 1380 # we are on a msgstr plural 1381 self.process('mx') 1382 1383 elif tokens[0] == '#,': 1384 if nb_tokens <= 1: 1385 continue 1386 # we are on a flags line 1387 self.process('fl') 1388 1389 elif tokens[0] == '#' or tokens[0].startswith('##'): 1390 if line == '#': 1391 line += ' ' 1392 # we are on a translator comment line 1393 self.process('tc') 1394 1395 elif tokens[0] == '#.': 1396 if nb_tokens <= 1: 1397 continue 1398 # we are on a generated comment line 1399 self.process('gc') 1400 1401 elif tokens[0] == '#|': 1402 if nb_tokens <= 1: 1403 raise IOError('Syntax error in po file %s(line %s)' % 1404 (fpath, self.current_line)) 1405 1406 # Remove the marker and any whitespace right after that. 1407 line = line[2:].lstrip() 1408 self.current_token = line 1409 1410 if tokens[1].startswith('"'): 1411 # Continuation of previous metadata. 1412 self.process('mc') 1413 continue 1414 1415 if nb_tokens == 2: 1416 # Invalid continuation line. 1417 raise IOError('Syntax error in po file %s(line %s): ' 1418 'invalid continuation line' % 1419 (fpath, self.current_line)) 1420 1421 # we are on a "previous translation" comment line, 1422 if tokens[1] not in prev_keywords: 1423 # Unknown keyword in previous translation comment. 1424 raise IOError('Syntax error in po file %s(line %s): ' 1425 'unknown keyword %s' % 1426 (fpath, self.current_line, 1427 tokens[1])) 1428 1429 # Remove the keyword and any whitespace 1430 # between it and the starting quote. 1431 line = line[len(tokens[1]):].lstrip() 1432 self.current_token = line 1433 self.process(prev_keywords[tokens[1]]) 1434 1435 else: 1436 raise IOError('Syntax error in po file %s(line %s)' % 1437 (fpath, self.current_line)) 1438 1439 if self.current_entry and len(tokens) > 0 and \ 1440 not tokens[0].startswith('#'): 1441 # since entries are added when another entry is found, we must add 1442 # the last entry here (only if there are lines). Trailing comments 1443 # are ignored 1444 self.instance.append(self.current_entry) 1445 1446 # before returning the instance, check if there's metadata and if 1447 # so extract it in a dict 1448 metadataentry = self.instance.find('') 1449 if metadataentry: # metadata found 1450 # remove the entry 1451 self.instance.remove(metadataentry) 1452 self.instance.metadata_is_fuzzy = metadataentry.flags 1453 key = None 1454 for msg in metadataentry.msgstr.splitlines(): 1455 try: 1456 key, val = msg.split(':', 1) 1457 self.instance.metadata[key] = val.strip() 1458 except (ValueError, KeyError): 1459 if key is not None: 1460 self.instance.metadata[key] += '\n' + msg.strip() 1461 # close opened file 1462 if not isinstance(self.fhandle, list): # must be file 1463 self.fhandle.close() 1464 return self.instance
1465
1466 - def add(self, symbol, states, next_state):
1467 """ 1468 Add a transition to the state machine. 1469 1470 Keywords arguments: 1471 1472 ``symbol`` 1473 string, the matched token (two chars symbol). 1474 1475 ``states`` 1476 list, a list of states (two chars symbols). 1477 1478 ``next_state`` 1479 the next state the fsm will have after the action. 1480 """ 1481 for state in states: 1482 action = getattr(self, 'handle_%s' % next_state) 1483 self.transitions[(symbol, state)] = (action, next_state)
1484
1485 - def process(self, symbol):
1486 """ 1487 Process the transition corresponding to the current state and the 1488 symbol provided. 1489 1490 Keywords arguments: 1491 1492 ``symbol`` 1493 string, the matched token (two chars symbol). 1494 1495 ``linenum`` 1496 integer, the current line number of the parsed file. 1497 """ 1498 try: 1499 (action, state) = self.transitions[(symbol, self.current_state)] 1500 if action(): 1501 self.current_state = state 1502 except Exception: 1503 raise IOError('Syntax error in po file (line %s)' % 1504 self.current_line)
1505 1506 # state handlers 1507
1508 - def handle_he(self):
1509 """Handle a header comment.""" 1510 if self.instance.header != '': 1511 self.instance.header += '\n' 1512 self.instance.header += self.current_token[2:] 1513 return 1
1514
1515 - def handle_tc(self):
1516 """Handle a translator comment.""" 1517 if self.current_state in ['mc', 'ms', 'mx']: 1518 self.instance.append(self.current_entry) 1519 self.current_entry = POEntry(linenum=self.current_line) 1520 if self.current_entry.tcomment != '': 1521 self.current_entry.tcomment += '\n' 1522 tcomment = self.current_token.lstrip('#') 1523 if tcomment.startswith(' '): 1524 tcomment = tcomment[1:] 1525 self.current_entry.tcomment += tcomment 1526 return True
1527
1528 - def handle_gc(self):
1529 """Handle a generated comment.""" 1530 if self.current_state in ['mc', 'ms', 'mx']: 1531 self.instance.append(self.current_entry) 1532 self.current_entry = POEntry(linenum=self.current_line) 1533 if self.current_entry.comment != '': 1534 self.current_entry.comment += '\n' 1535 self.current_entry.comment += self.current_token[3:] 1536 return True
1537
1538 - def handle_oc(self):
1539 """Handle a file:num occurrence.""" 1540 if self.current_state in ['mc', 'ms', 'mx']: 1541 self.instance.append(self.current_entry) 1542 self.current_entry = POEntry(linenum=self.current_line) 1543 occurrences = self.current_token[3:].split() 1544 for occurrence in occurrences: 1545 if occurrence != '': 1546 try: 1547 fil, line = occurrence.rsplit(':', 1) 1548 if not line.isdigit(): 1549 fil = occurrence 1550 line = '' 1551 self.current_entry.occurrences.append((fil, line)) 1552 except (ValueError, AttributeError): 1553 self.current_entry.occurrences.append((occurrence, '')) 1554 return True
1555
1556 - def handle_fl(self):
1557 """Handle a flags line.""" 1558 if self.current_state in ['mc', 'ms', 'mx']: 1559 self.instance.append(self.current_entry) 1560 self.current_entry = POEntry(linenum=self.current_line) 1561 self.current_entry.flags += [c.strip() for c in 1562 self.current_token[3:].split(',')] 1563 return True
1564
1565 - def handle_pp(self):
1566 """Handle a previous msgid_plural line.""" 1567 if self.current_state in ['mc', 'ms', 'mx']: 1568 self.instance.append(self.current_entry) 1569 self.current_entry = POEntry(linenum=self.current_line) 1570 self.current_entry.previous_msgid_plural = \ 1571 unescape(self.current_token[1:-1]) 1572 return True
1573
1574 - def handle_pm(self):
1575 """Handle a previous msgid line.""" 1576 if self.current_state in ['mc', 'ms', 'mx']: 1577 self.instance.append(self.current_entry) 1578 self.current_entry = POEntry(linenum=self.current_line) 1579 self.current_entry.previous_msgid = \ 1580 unescape(self.current_token[1:-1]) 1581 return True
1582
1583 - def handle_pc(self):
1584 """Handle a previous msgctxt line.""" 1585 if self.current_state in ['mc', 'ms', 'mx']: 1586 self.instance.append(self.current_entry) 1587 self.current_entry = POEntry(linenum=self.current_line) 1588 self.current_entry.previous_msgctxt = \ 1589 unescape(self.current_token[1:-1]) 1590 return True
1591
1592 - def handle_ct(self):
1593 """Handle a msgctxt.""" 1594 if self.current_state in ['mc', 'ms', 'mx']: 1595 self.instance.append(self.current_entry) 1596 self.current_entry = POEntry(linenum=self.current_line) 1597 self.current_entry.msgctxt = unescape(self.current_token[1:-1]) 1598 return True
1599
1600 - def handle_mi(self):
1601 """Handle a msgid.""" 1602 if self.current_state in ['mc', 'ms', 'mx']: 1603 self.instance.append(self.current_entry) 1604 self.current_entry = POEntry(linenum=self.current_line) 1605 self.current_entry.obsolete = self.entry_obsolete 1606 self.current_entry.msgid = unescape(self.current_token[1:-1]) 1607 return True
1608
1609 - def handle_mp(self):
1610 """Handle a msgid plural.""" 1611 self.current_entry.msgid_plural = unescape(self.current_token[1:-1]) 1612 return True
1613
1614 - def handle_ms(self):
1615 """Handle a msgstr.""" 1616 self.current_entry.msgstr = unescape(self.current_token[1:-1]) 1617 return True
1618
1619 - def handle_mx(self):
1620 """Handle a msgstr plural.""" 1621 index = self.current_token[7] 1622 value = self.current_token[self.current_token.find('"') + 1:-1] 1623 self.current_entry.msgstr_plural[int(index)] = unescape(value) 1624 self.msgstr_index = int(index) 1625 return True
1626
1627 - def handle_mc(self):
1628 """Handle a msgid or msgstr continuation line.""" 1629 token = unescape(self.current_token[1:-1]) 1630 if self.current_state == 'ct': 1631 self.current_entry.msgctxt += token 1632 elif self.current_state == 'mi': 1633 self.current_entry.msgid += token 1634 elif self.current_state == 'mp': 1635 self.current_entry.msgid_plural += token 1636 elif self.current_state == 'ms': 1637 self.current_entry.msgstr += token 1638 elif self.current_state == 'mx': 1639 self.current_entry.msgstr_plural[self.msgstr_index] += token 1640 elif self.current_state == 'pp': 1641 self.current_entry.previous_msgid_plural += token 1642 elif self.current_state == 'pm': 1643 self.current_entry.previous_msgid += token 1644 elif self.current_state == 'pc': 1645 self.current_entry.previous_msgctxt += token 1646 # don't change the current state 1647 return False
1648 # }}}
1649 # class _MOFileParser {{{ 1650 1651 1652 -class _MOFileParser(object):
1653 """ 1654 A class to parse binary mo files. 1655 """ 1656
1657 - def __init__(self, mofile, *args, **kwargs):
1658 """ 1659 Constructor. 1660 1661 Keyword arguments: 1662 1663 ``mofile`` 1664 string, path to the mo file or its content 1665 1666 ``encoding`` 1667 string, the encoding to use, defaults to ``default_encoding`` 1668 global variable (optional). 1669 1670 ``check_for_duplicates`` 1671 whether to check for duplicate entries when adding entries to the 1672 file (optional, default: ``False``). 1673 """ 1674 self.fhandle = open(mofile, 'rb') 1675 1676 klass = kwargs.get('klass') 1677 if klass is None: 1678 klass = MOFile 1679 self.instance = klass( 1680 fpath=mofile, 1681 encoding=kwargs.get('encoding', default_encoding), 1682 check_for_duplicates=kwargs.get('check_for_duplicates', False) 1683 )
1684
1685 - def __del__(self):
1686 """ 1687 Make sure the file is closed, this prevents warnings on unclosed file 1688 when running tests with python >= 3.2. 1689 """ 1690 if self.fhandle: 1691 self.fhandle.close()
1692
1693 - def parse(self):
1694 """ 1695 Build the instance with the file handle provided in the 1696 constructor. 1697 """ 1698 # parse magic number 1699 magic_number = self._readbinary('<I', 4) 1700 if magic_number == MOFile.MAGIC: 1701 ii = '<II' 1702 elif magic_number == MOFile.MAGIC_SWAPPED: 1703 ii = '>II' 1704 else: 1705 raise IOError('Invalid mo file, magic number is incorrect !') 1706 self.instance.magic_number = magic_number 1707 # parse the version number and the number of strings 1708 version, numofstrings = self._readbinary(ii, 8) 1709 # from MO file format specs: "A program seeing an unexpected major 1710 # revision number should stop reading the MO file entirely" 1711 if version >> 16 not in (0, 1): 1712 raise IOError('Invalid mo file, unexpected major revision number') 1713 self.instance.version = version 1714 # original strings and translation strings hash table offset 1715 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8) 1716 # move to msgid hash table and read length and offset of msgids 1717 self.fhandle.seek(msgids_hash_offset) 1718 msgids_index = [] 1719 for i in range(numofstrings): 1720 msgids_index.append(self._readbinary(ii, 8)) 1721 # move to msgstr hash table and read length and offset of msgstrs 1722 self.fhandle.seek(msgstrs_hash_offset) 1723 msgstrs_index = [] 1724 for i in range(numofstrings): 1725 msgstrs_index.append(self._readbinary(ii, 8)) 1726 # build entries 1727 encoding = self.instance.encoding 1728 for i in range(numofstrings): 1729 self.fhandle.seek(msgids_index[i][1]) 1730 msgid = self.fhandle.read(msgids_index[i][0]) 1731 1732 self.fhandle.seek(msgstrs_index[i][1]) 1733 msgstr = self.fhandle.read(msgstrs_index[i][0]) 1734 if i == 0 and not msgid: # metadata 1735 raw_metadata, metadata = msgstr.split(b('\n')), {} 1736 for line in raw_metadata: 1737 tokens = line.split(b(':'), 1) 1738 if tokens[0] != b(''): 1739 try: 1740 k = tokens[0].decode(encoding) 1741 v = tokens[1].decode(encoding) 1742 metadata[k] = v.strip() 1743 except IndexError: 1744 metadata[k] = u('') 1745 self.instance.metadata = metadata 1746 continue 1747 # test if we have a plural entry 1748 msgid_tokens = msgid.split(b('\0')) 1749 if len(msgid_tokens) > 1: 1750 entry = self._build_entry( 1751 msgid=msgid_tokens[0], 1752 msgid_plural=msgid_tokens[1], 1753 msgstr_plural=dict((k, v) for k, v in 1754 enumerate(msgstr.split(b('\0')))) 1755 ) 1756 else: 1757 entry = self._build_entry(msgid=msgid, msgstr=msgstr) 1758 self.instance.append(entry) 1759 # close opened file 1760 self.fhandle.close() 1761 return self.instance
1762
1763 - def _build_entry(self, msgid, msgstr=None, msgid_plural=None, 1764 msgstr_plural=None):
1765 msgctxt_msgid = msgid.split(b('\x04')) 1766 encoding = self.instance.encoding 1767 if len(msgctxt_msgid) > 1: 1768 kwargs = { 1769 'msgctxt': msgctxt_msgid[0].decode(encoding), 1770 'msgid': msgctxt_msgid[1].decode(encoding), 1771 } 1772 else: 1773 kwargs = {'msgid': msgid.decode(encoding)} 1774 if msgstr: 1775 kwargs['msgstr'] = msgstr.decode(encoding) 1776 if msgid_plural: 1777 kwargs['msgid_plural'] = msgid_plural.decode(encoding) 1778 if msgstr_plural: 1779 for k in msgstr_plural: 1780 msgstr_plural[k] = msgstr_plural[k].decode(encoding) 1781 kwargs['msgstr_plural'] = msgstr_plural 1782 return MOEntry(**kwargs)
1783
1784 - def _readbinary(self, fmt, numbytes):
1785 """ 1786 Private method that unpack n bytes of data using format <fmt>. 1787 It returns a tuple or a mixed value if the tuple length is 1. 1788 """ 1789 bytes = self.fhandle.read(numbytes) 1790 tup = struct.unpack(fmt, bytes) 1791 if len(tup) == 1: 1792 return tup[0] 1793 return tup
1794 # }}}
1795 # class TextWrapper {{{ 1796 1797 1798 -class TextWrapper(textwrap.TextWrapper):
1799 """ 1800 Subclass of textwrap.TextWrapper that backport the 1801 drop_whitespace option. 1802 """
1803 - def __init__(self, *args, **kwargs):
1804 drop_whitespace = kwargs.pop('drop_whitespace', True) 1805 textwrap.TextWrapper.__init__(self, *args, **kwargs) 1806 self.drop_whitespace = drop_whitespace
1807
1808 - def _wrap_chunks(self, chunks):
1809 """_wrap_chunks(chunks : [string]) -> [string] 1810 1811 Wrap a sequence of text chunks and return a list of lines of 1812 length 'self.width' or less. (If 'break_long_words' is false, 1813 some lines may be longer than this.) Chunks correspond roughly 1814 to words and the whitespace between them: each chunk is 1815 indivisible (modulo 'break_long_words'), but a line break can 1816 come between any two chunks. Chunks should not have internal 1817 whitespace; ie. a chunk is either all whitespace or a "word". 1818 Whitespace chunks will be removed from the beginning and end of 1819 lines, but apart from that whitespace is preserved. 1820 """ 1821 lines = [] 1822 if self.width <= 0: 1823 raise ValueError("invalid width %r (must be > 0)" % self.width) 1824 1825 # Arrange in reverse order so items can be efficiently popped 1826 # from a stack of chucks. 1827 chunks.reverse() 1828 1829 while chunks: 1830 1831 # Start the list of chunks that will make up the current line. 1832 # cur_len is just the length of all the chunks in cur_line. 1833 cur_line = [] 1834 cur_len = 0 1835 1836 # Figure out which static string will prefix this line. 1837 if lines: 1838 indent = self.subsequent_indent 1839 else: 1840 indent = self.initial_indent 1841 1842 # Maximum width for this line. 1843 width = self.width - len(indent) 1844 1845 # First chunk on line is whitespace -- drop it, unless this 1846 # is the very beginning of the text (ie. no lines started yet). 1847 if self.drop_whitespace and chunks[-1].strip() == '' and lines: 1848 del chunks[-1] 1849 1850 while chunks: 1851 length = len(chunks[-1]) 1852 1853 # Can at least squeeze this chunk onto the current line. 1854 if cur_len + length <= width: 1855 cur_line.append(chunks.pop()) 1856 cur_len += length 1857 1858 # Nope, this line is full. 1859 else: 1860 break 1861 1862 # The current line is full, and the next chunk is too big to 1863 # fit on *any* line (not just this one). 1864 if chunks and len(chunks[-1]) > width: 1865 self._handle_long_word(chunks, cur_line, cur_len, width) 1866 1867 # If the last chunk on this line is all whitespace, drop it. 1868 if self.drop_whitespace and cur_line and not cur_line[-1].strip(): 1869 del cur_line[-1] 1870 1871 # Convert current line back to a string and store it in list 1872 # of all lines (return value). 1873 if cur_line: 1874 lines.append(indent + ''.join(cur_line)) 1875 1876 return lines
1877 # }}}
1878 # function wrap() {{{ 1879 1880 1881 -def wrap(text, width=70, **kwargs):
1882 """ 1883 Wrap a single paragraph of text, returning a list of wrapped lines. 1884 """ 1885 if sys.version_info < (2, 6): 1886 return TextWrapper(width=width, **kwargs).wrap(text) 1887 return textwrap.wrap(text, width=width, **kwargs)
1888 1889 # }}} 1890