Package pywurfl :: Package algorithms :: Package wurfl :: Module normalizers
[hide private]
[frames] | no frames]

Source Code for Module pywurfl.algorithms.wurfl.normalizers

  1  # pywurfl - Wireless Universal Resource File Tools in Python 
  2  # Copyright (C) 2006-2010 Armand Lynch 
  3  # 
  4  # This library is free software; you can redistribute it and/or modify it 
  5  # under the terms of the GNU Lesser General Public License as published by the 
  6  # Free Software Foundation; either version 2.1 of the License, or (at your 
  7  # option) any later version. 
  8  # 
  9  # This library is distributed in the hope that it will be useful, but WITHOUT 
 10  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
 11  # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 
 12  # details. 
 13  # 
 14  # You should have received a copy of the GNU Lesser General Public License 
 15  # along with this library; if not, write to the Free Software Foundation, Inc., 
 16  # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
 17  # 
 18  # Armand Lynch <lyncha@users.sourceforge.net> 
 19   
 20  __doc__ = """ 
 21  This module contains the supporting classes for the Two Step Analysis user agent 
 22  algorithm that is used as the primary way to match user agents with the Java API 
 23  for the WURFL. 
 24   
 25  A description of the way the following source is intended to work can be found 
 26  within the source for the original Java API implementation here: 
 27  http://sourceforge.net/projects/wurfl/files/WURFL Java API/ 
 28   
 29  The original Java code is GPLd and Copyright (c) 2008-2009 WURFL-Pro srl 
 30  """ 
 31   
 32  __author__ = "Armand Lynch <lyncha@users.sourceforge.net>" 
 33  __copyright__ = "Copyright 2010, Armand Lynch" 
 34  __license__ = "LGPL" 
 35  __url__ = "http://celljam.net/" 
 36  __version__ = "1.0.1" 
 37   
 38  import re 
 39   
 40  from pywurfl.algorithms.wurfl import utils 
 41   
 42   
 43  # generic user agent normalizers 
 44   
 45  babel_fish_re = re.compile(ur"\s*\(via babelfish.yahoo.com\)\s*", re.UNICODE) 
 46  uplink_re = re.compile(ur"\s*UP\.Link.+$", re.UNICODE) 
 47  vodafone_re = re.compile(ur"/SN(\d+)\s", re.UNICODE) 
 48  yeswap_re = re.compile(ur"\s*Mozilla/4\.0 \(YesWAP mobile phone proxy\)", 
 49                         re.UNICODE) 
 50  safari_re = re.compile(ur"(Mozilla\/5\.0.*)(\;\s*U\;.*?)(Safari\/\d{0,3})", 
 51                         re.UNICODE) 
 52  ibm_wbi_re = re.compile(ur"\(via IBM WBI \d+\.\d+\)", re.UNICODE) 
 53  gmcc_re = re.compile(ur"GMCC/\d\.\d") 
 54   
 55   
56 -def babelfish(user_agent):
57 """Replace the "via babelfish.yahoo.com" with ''""" 58 #print "normalizer babelfish" 59 return babel_fish_re.sub('', user_agent)
60 61
62 -def blackberry(user_agent):
63 """ Replaces the heading "BlackBerry" string with ''""" 64 #print "normalizer blackberry" 65 if u"BlackBerry" in user_agent and not user_agent.startswith(u"BlackBerry"): 66 user_agent = user_agent[user_agent.index(u"BlackBerry"):] 67 return user_agent
68 69 74 75
76 -def vodafone(user_agent):
77 """Normalize the "/SNnnnnnnnnnnnnnnnn" String.""" 78 #print "normalizer vodafone" 79 match = vodafone_re.search(user_agent) 80 if match: 81 grp_repl = u"/SN" + "X" * (len(match.group()) - 4) + " " 82 user_agent = vodafone_re.sub(grp_repl, user_agent) 83 return user_agent
84 85
86 -def yeswap(user_agent):
87 """Replace the "YesWAP mobile phone proxy" with ''""" 88 #print "normalizer yeswap" 89 return yeswap_re.sub('', user_agent)
90 91
92 -def ibm_wbi(user_agent):
93 #print "normalizer ibm_wbi" 94 return ibm_wbi_re.sub('', user_agent)
95 96
97 -def gmcc(user_agent):
98 #print "normalizer gmcc" 99 return gmcc_re.sub('', user_agent)
100 101
102 -def _combine_funcs(*funcs):
103 def normalizer(user_agent): 104 #print "applying default normalizer" 105 for f in funcs: 106 user_agent = f(user_agent) 107 return user_agent.replace(' ', ' ').strip()
108 return normalizer 109 110 111 default_normalizer = _combine_funcs(vodafone, blackberry, uplink, yeswap, 112 babelfish, ibm_wbi, gmcc) 113 114 115 # specific user agent normalizers 116
117 -def _specific_normalizer(user_agent, search_string, vsn_size):
118 if search_string in user_agent: 119 start = user_agent.index(search_string) 120 user_agent = user_agent[start:start + vsn_size] 121 return user_agent
122 123
124 -def chrome(user_agent):
125 #print "chrome normalizer" 126 return _specific_normalizer(user_agent, u"Chrome", 8)
127 128
129 -def firefox(user_agent):
130 #print "firefox normalizer" 131 return _specific_normalizer(user_agent, u"Firefox", 11)
132 133
134 -def konqueror(user_agent):
135 #print "konqueror normalizer" 136 return _specific_normalizer(user_agent, u"Konqueror", 11)
137 138
139 -def opera(user_agent):
140 #print "opera normalizer" 141 return _specific_normalizer(user_agent, u"Opera", 7)
142 143
144 -def msie(user_agent):
145 #print "msie normalizer" 146 if u"MSIE" in user_agent: 147 user_agent = user_agent[0:user_agent.index(u"MSIE")+9] 148 return user_agent
149 150
151 -def android(user_agent):
152 #print "android normalizer" 153 start = utils.ordinal_index(user_agent, ";", 3) 154 end = utils.ordinal_index(user_agent, ";", 4) 155 if start == -1 or end == -1: 156 return user_agent 157 return user_agent[:start] + user_agent[end:]
158 159
160 -def safari(user_agent):
161 """ 162 Return the safari user agent stripping out all the chararcters between 163 U; and Safari/xxx 164 165 e.g Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; fr) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.1 Safari/525.18 166 becomes 167 Mozilla/5.0 (Macintosh Safari/525 168 """ 169 #print "safari normalizer" 170 match = safari_re.search(user_agent) 171 if match and len(match.groups()) >= 3: 172 user_agent = " ".join([match.group(1).strip(), match.group(3).strip()]) 173 return user_agent
174