Package pywurfl :: Package algorithms
[hide private]
[frames] | no frames]

Source Code for Package pywurfl.algorithms

  1  # pywurfl Algorithms - Wireless Universal Resource File UA search algorithms 
  2  # Copyright (C) 2006-2009 Armand Lynch 
  3  # 
  4  # This library is free software; you can redistribute it and/or modify it 
  5  # under the terms of the GNU Lesser General Public License as published by the 
  6  # Free Software Foundation; either version 2.1 of the License, or (at your 
  7  # option) any later version. 
  8  # 
  9  # This library is distributed in the hope that it will be useful, but WITHOUT 
 10  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
 11  # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 
 12  # details. 
 13  # 
 14  # You should have received a copy of the GNU Lesser General Public License 
 15  # along with this library; if not, write to the Free Software Foundation, Inc., 
 16  # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
 17  # 
 18  # Armand Lynch <lyncha@users.sourceforge.net> 
 19   
 20  __doc__ = \ 
 21  """ 
 22  pywurfl search algorithms 
 23  """ 
 24   
 25  import re 
 26   
 27  from pywurfl.exceptions import DeviceNotFound 
 28  from pywurfl.algorithms.wurfl.handlers import handlers 
 29   
 30   
 31  __author__ = "Armand Lynch <lyncha@users.sourceforge.net>" 
 32  __copyright__ = "Copyright 2006-2010, Armand Lynch" 
 33  __license__ = "LGPL" 
 34  __url__ = "http://celljam.net/" 
 35   
 36   
37 -class Algorithm(object):
38 """ 39 Base class for all pywurfl search algorithms 40 """
41 - def __call__(self, ua, devices):
42 """ 43 Every pywurfl algorithm class must define a __call__ method. 44 45 @param ua: The user agent 46 @type ua: string 47 @param devices: The devices object to search 48 @type devices: Devices 49 @rtype: Device 50 """ 51 raise NotImplementedError
52 53 54 try: 55 import Levenshtein 56
57 - class JaroWinkler(Algorithm):
58 """ 59 Jaro-Winkler Search Algorithm 60 """ 61
62 - def __init__(self, accuracy=1.0, weight=0.05):
63 """ 64 @param accuracy: The tolerance that the Jaro-Winkler algorithm will 65 use to determine if a user agent matches 66 0.0 <= accuracy <= 1.0 67 @type accuracy: float 68 @param weight: The prefix weight is inverse value of common prefix 69 length sufficient to consider the strings 70 'identical' (excerpt from the Levenshtein module 71 documentation). 72 @type weight: float 73 """ 74 75 self.accuracy = accuracy 76 self.weight = weight
77
78 - def __call__(self, ua, devices):
79 """ 80 @param ua: The user agent 81 @type ua: string 82 @param devices: The devices object to search 83 @type devices: Devices 84 @rtype: Device 85 @raises pywurfl.DeviceNotFound 86 """ 87 match = max((Levenshtein.jaro_winkler(x, ua, self.weight), x) for 88 x in devices.devuas) 89 if match[0] >= self.accuracy: 90 return devices.devuas[match[1]] 91 else: 92 raise DeviceNotFound(ua)
93 94
95 - class LevenshteinDistance(Algorithm):
96 """ 97 Levenshtein distance Search Algorithm 98 """ 99
100 - def __call__(self, ua, devices):
101 """ 102 @param ua: The user agent 103 @type ua: string 104 @param devices: The devices object to search 105 @type devices: Devices 106 @rtype: Device 107 """ 108 109 match = min((Levenshtein.distance(ua, x), x) for x in 110 devices.devuas) 111 return devices.devuas[match[1]]
112 113 except ImportError: 114 pass 115 116
117 -class Tokenizer(Algorithm):
118 """ 119 Tokenizer Search Algorithm 120 """ 121 tokenize_chars = ('/', '.', ',', ';', '-', '_', ' ', '(', ')') 122 base_regex = '[\\'+'\\'.join(tokenize_chars)+']*' 123
124 - def __init__(self, devwindow=30):
125 """ 126 @param devwindow: If more than devwindow user agents match, 127 return empty device. 128 @type devwindow: integer 129 """ 130 self.devwindow = devwindow
131
132 - def _tokenize(self, s):
133 """ 134 @param s: The user agent to tokenize 135 @type s: string 136 """ 137 for d in self.tokenize_chars: 138 s = s.replace(d, ' ') 139 return [re.escape(x) for x in s.split()]
140
141 - def __call__(self, ua, devices):
142 """ 143 @param ua: The user agent 144 @type ua: string 145 @param devices: The devices object to search 146 @type devices: Devices 147 @rtype: Device 148 """ 149 uas = devices.devuas.keys() 150 tokens = self._tokenize(ua) 151 regex = '' 152 for t in tokens: 153 if regex: 154 regex += self.base_regex + t 155 else: 156 regex += t 157 158 regex2 = regex + '.*' 159 160 uare = re.compile(regex2, re.I) 161 uas2 = [x for x in uas if uare.match(x)] 162 163 # If the last regex didn't produce any matches and more than 164 # devwindow devices were matched before, return a generic device. 165 # Else, there is a device that "looks" like some others so return 166 # the first one. 167 if len(uas2) == 0 and len(uas) > self.devwindow: 168 return devices.devids['generic'] 169 elif len(uas2) == 0 and len(uas) <= self.devwindow: 170 #uas.sort() 171 return devices.devuas[uas[0]] 172 173 # We found one good looking match 174 if len(uas2) == 1: 175 #uas2.sort() 176 return devices.devuas[uas2[0]] 177 178 # We've got matches so search some more 179 uas = uas2 180 181 # We've got some matches but we ran out of tokens so search with. 182 # If we matched more than devwindow, return a generic device. 183 # Else we've got some devices within the devwindow so return the first 184 # one. 185 if len(uas2) > self.devwindow: 186 return devices.devids['generic'] 187 else: 188 #uas2.sort() 189 return devices.devuas[uas2[0]]
190 191
192 -class TwoStepAnalysis(Algorithm):
193 """ 194 WURFL Two Step Analysis algorithm based on the Java API implementation 195 """
196 - def __init__(self, devices):
197 for ua in devices.uas: 198 for h in handlers: 199 if h.can_handle(ua): 200 h.add(ua, devices.devuas[ua].devid) 201 break
202
203 - def _determine_handler(self, ua):
204 for handler in handlers: 205 if handler.can_handle(ua): 206 #print "Handling with %s" % handler.__class__ 207 return handler
208
209 - def __call__(self, ua, devices=None):
210 handler = self._determine_handler(ua) 211 match = handler(ua) 212 return devices.devids[match]
213