1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 __doc__ = """
21 This module contains the supporting classes for the Two Step Analysis user agent
22 algorithm that is used as the primary way to match user agents with the Java API
23 for the WURFL.
24
25 A description of the way the following source is intended to work can be found
26 within the source for the original Java API implementation here:
27 http://sourceforge.net/projects/wurfl/files/WURFL Java API/
28
29 The original Java code is GPLd and Copyright (c) 2008-2009 WURFL-Pro srl
30 """
31
32 __author__ = "Armand Lynch <lyncha@users.sourceforge.net>"
33 __copyright__ = "Copyright 2010, Armand Lynch"
34 __license__ = "LGPL"
35 __url__ = "http://celljam.net/"
36 __version__ = "1.0.1"
37
38 import re
39
40 from pywurfl.algorithms.wurfl import utils
41
42
43
44
45 babel_fish_re = re.compile(ur"\s*\(via babelfish.yahoo.com\)\s*", re.UNICODE)
46 uplink_re = re.compile(ur"\s*UP\.Link.+$", re.UNICODE)
47 vodafone_re = re.compile(ur"/SN(\d+)\s", re.UNICODE)
48 yeswap_re = re.compile(ur"\s*Mozilla/4\.0 \(YesWAP mobile phone proxy\)",
49 re.UNICODE)
50 safari_re = re.compile(ur"(Mozilla\/5\.0.*)(\;\s*U\;.*?)(Safari\/\d{0,3})",
51 re.UNICODE)
52 ibm_wbi_re = re.compile(ur"\(via IBM WBI \d+\.\d+\)", re.UNICODE)
53 gmcc_re = re.compile(ur"GMCC/\d\.\d")
54
55
57 """Replace the "via babelfish.yahoo.com" with ''"""
58
59 return babel_fish_re.sub('', user_agent)
60
61
63 """ Replaces the heading "BlackBerry" string with ''"""
64
65 if u"BlackBerry" in user_agent and not user_agent.startswith(u"BlackBerry"):
66 user_agent = user_agent[user_agent.index(u"BlackBerry"):]
67 return user_agent
68
69
71 """Replace the trailing UP.Link ... with ''"""
72
73 return uplink_re.sub('', user_agent)
74
75
77 """Normalize the "/SNnnnnnnnnnnnnnnnn" String."""
78
79 match = vodafone_re.search(user_agent)
80 if match:
81 grp_repl = u"/SN" + "X" * (len(match.group()) - 4) + " "
82 user_agent = vodafone_re.sub(grp_repl, user_agent)
83 return user_agent
84
85
87 """Replace the "YesWAP mobile phone proxy" with ''"""
88
89 return yeswap_re.sub('', user_agent)
90
91
95
96
97 -def gmcc(user_agent):
98
99 return gmcc_re.sub('', user_agent)
100
101
103 def normalizer(user_agent):
104
105 for f in funcs:
106 user_agent = f(user_agent)
107 return user_agent.replace(' ', ' ').strip()
108 return normalizer
109
110
111 default_normalizer = _combine_funcs(vodafone, blackberry, uplink, yeswap,
112 babelfish, ibm_wbi, gmcc)
113
114
115
116
118 if search_string in user_agent:
119 start = user_agent.index(search_string)
120 user_agent = user_agent[start:start + vsn_size]
121 return user_agent
122
123
127
128
132
133
137
138
142
143
144 -def msie(user_agent):
145
146 if u"MSIE" in user_agent:
147 user_agent = user_agent[0:user_agent.index(u"MSIE")+9]
148 return user_agent
149
150
152
153 start = utils.ordinal_index(user_agent, ";", 3)
154 end = utils.ordinal_index(user_agent, ";", 4)
155 if start == -1 or end == -1:
156 return user_agent
157 return user_agent[:start] + user_agent[end:]
158
159
161 """
162 Return the safari user agent stripping out all the chararcters between
163 U; and Safari/xxx
164
165 e.g Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; fr) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.1 Safari/525.18
166 becomes
167 Mozilla/5.0 (Macintosh Safari/525
168 """
169
170 match = safari_re.search(user_agent)
171 if match and len(match.groups()) >= 3:
172 user_agent = " ".join([match.group(1).strip(), match.group(3).strip()])
173 return user_agent
174