Package tdi :: Package markup :: Package soup :: Module filters
[frames] | no frames]

Source Code for Module tdi.markup.soup.filters

  1  # -*- coding: ascii -*- 
  2  # 
  3  # Copyright 2006 - 2013 
  4  # Andr\xe9 Malo or his licensors, as applicable 
  5  # 
  6  # Licensed under the Apache License, Version 2.0 (the "License"); 
  7  # you may not use this file except in compliance with the License. 
  8  # You may obtain a copy of the License at 
  9  # 
 10  #     http://www.apache.org/licenses/LICENSE-2.0 
 11  # 
 12  # Unless required by applicable law or agreed to in writing, software 
 13  # distributed under the License is distributed on an "AS IS" BASIS, 
 14  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
 15  # See the License for the specific language governing permissions and 
 16  # limitations under the License. 
 17  """ 
 18  ===================== 
 19   Soup Filter Classes 
 20  ===================== 
 21   
 22  Filters for soup templates. 
 23  """ 
 24  __author__ = u"Andr\xe9 Malo" 
 25  __docformat__ = "restructuredtext en" 
 26   
 27  import re as _re 
 28   
 29  from tdi import util as _util 
 30  from tdi import filters as _filters 
 31   
 32   
33 -class EncodingDetectFilter(_filters.BaseEventFilter):
34 """ Extract template encoding and pass it properly to the builder """ 35 __slots__ = ('_normalize', '_meta') 36
37 - def __init__(self, builder):
38 """ Initialization """ 39 super(EncodingDetectFilter, self).__init__(builder) 40 self._normalize = self.builder.decoder.normalize 41 self._meta = self._normalize('meta')
42
43 - def handle_starttag(self, name, attr, closed, data):
44 """ 45 Extract encoding from HTML meta element 46 47 Here are samples for the expected formats:: 48 49 <meta charset="utf-8"> <!-- HTML5 --> 50 51 <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> 52 53 The event is passed to the builder nevertheless. 54 55 :See: `BuildingListenerInterface` 56 """ 57 normalize = self._normalize 58 59 iname = normalize(name) 60 if iname == self._meta: 61 adict = dict([(normalize(key), val) for key, val in attr]) 62 value = str((adict.get(normalize('charset')) or '')) 63 if value.startswith('"') or value.startswith("'"): 64 value = value[1:-1].strip() 65 if value: 66 self.builder.handle_encoding(value) 67 else: 68 value = (adict.get(normalize('http-equiv')) or '').lower() 69 if value.startswith('"') or value.startswith("'"): 70 value = value[1:-1].strip() 71 if value == 'content-type': 72 ctype = adict.get(normalize('content')) 73 if ctype: 74 if ctype.startswith('"') or ctype.startswith("'"): 75 ctype = ctype[1:-1].strip() 76 77 parsed = _util.parse_content_type(ctype) 78 if parsed is not None: 79 encoding = parsed[1].get('charset') 80 if encoding: 81 self.builder.handle_encoding( 82 encoding[0].strip() 83 ) 84 85 self.builder.handle_starttag(name, attr, closed, data)
86 87 #: Regex matcher to match xml declarations 88 #: 89 #: :Type: ``callable`` 90 _PI_MATCH = _re.compile(r''' 91 <\? \s* [xX][mM][lL] \s+ (?P<attr> 92 [^"'?]* 93 (?: 94 (?: 95 "[^"]*" 96 | '[^']*' 97 ) 98 [^"'?]* 99 )* 100 ) 101 \s* \?>$ 102 ''', _re.X).match 103 104 #: Iterator over the matched xml declaration attributes 105 #: 106 #: :Type: ``callable`` 107 _PI_ATT_ITER = _re.compile(r''' 108 \s* 109 (?P<name>[^\s=]*) # attribute name 110 \s* 111 = 112 (?P<value> # value 113 \s*"[^"]*" 114 | \s*'[^']*' 115 ) 116 ''', _re.X).finditer 117
118 - def handle_pi(self, data):
119 """ 120 Extract encoding from xml declaration 121 122 Here's a sample for the expected format:: 123 124 <?xml version="1.0" encoding="ascii" ?> 125 126 The event is passed to the builder nevertheless. 127 128 :See: `BuildingListenerInterface` 129 """ 130 match = self._PI_MATCH(str(data)) 131 if match: 132 encoding = 'utf-8' # xml default 133 for match in self._PI_ATT_ITER(match.group('attr')): 134 key, value = match.group('name', 'value') 135 if key or value: 136 if key == 'encoding': 137 value = value.strip() 138 if value.startswith('"') or value.startswith("'"): 139 value = value[1:-1].strip() 140 if value: 141 encoding = value 142 break 143 else: 144 break 145 self.builder.handle_encoding(encoding) 146 self.builder.handle_pi(data)
147 148 from tdi import c 149 c = c.load('impl') 150 if c is not None: 151 EncodingDetectFilter = c.SoupEncodingDetectFilter 152 del c 153