|
|
User-Agent detection: The Code
This code is written in Java. It is trivial enough to be translated in any language easily though. The first three methods are utilities, and the last two are the real detection: Browser and OS. The one in the middle is a helper to handle bots. In the code, some comments (in red) will guide you though the process.
The three methods returning a Browser/OS (the last three) actually return an array of three Strings, representing the element with three different levels of verbosity.
- The first element represent the class of the element (Win, Linux, MSIE, KHTML...)
- The second element represent the element reasonably detailed (Win98, WinXP, MSIE6, Gecko(Firebird)...)
- The last element represent element with all known details from the User-Agent (Linux2.4.23 i686, Windows NT 5.1, Gecko2004(Firebird0.8.0+), Opera7.23...), the only exception being Gecko for which I truncate the timestamp to the first four digits (the year) otherwise it gets unmanageable
This page was last updated on Oct 23rd 2010.
You can download the code by clicking here. This code is free of any charge and too small to be licensed (IMHO)
This method gets a user-agent String and a position. Starting at this position is assumed to be a version number (From an OS or a Browser, such as 3.1 or NT3.51beta). The method then parses the User-Agent provided and try to detect as much of the version number as possible.
|
public static String getVersionNumber(String a_userAgent, int a_position) {
if (a_position<0) return "";
StringBuffer res = new StringBuffer();
int status = 0;
while (a_position < a_userAgent.length()) {
char c = a_userAgent.charAt(a_position);
switch (status) {
case 0: //
if (c == ' ' || c=='/') break;
if (c == ';' || c==')') return "";
status = 1;
case 1: //
if (c == ';' || c=='/' || c==')' || c=='(' || c=='[') return res.toString().trim();
if (c == ' ') status = 2;
res.append(c);
break;
case 2: //
if ((Character.isLetter(c) &&
Character.isLowerCase(c)) ||
Character.isDigit(c)) {
res.append(c);
status=1;
} else
return res.toString().trim();
break;
}
a_position++;
}
return res.toString().trim();
}
|
This method relies on the preceding one to get the version number. It then truncates it to the numDigits first characters.
|
public static String getFirstVersionNumber(String a_userAgent, int a_position, int numDigits) {
String ver = getVersionNumber(a_userAgent, a_position);
if (ver==null) return "";
int i = 0;
String res="";
while (i<ver.length() && i<numDigits) {
res+=String.valueOf(ver.charAt(i));
i++;
}
return res;
}
|
| This method is a Java utility and doesn't really hold any logic. It takes three Strings as parameters and returns an array of 3 strings. This array is used to return the right label for the right verbosity.
|
public static String[]getArray(String a, String b, String c) {
String[]res = new String[3];
res[0]=a;
res[1]=b;
res[2]=c;
return res;
}
|
| This method detects if the userAgent provided is a Bot or Not. In case it is, it will return the name of the Bot. Bots are considered as being Browsers in my terminology. They belong in the OS "Bot". They hold only two levels of verbosity: 0 and 1 are not verbose, 2 is verbose, including the version number. This method might be out of date as I am testing this code on a system that doesn't record bots hits.
|
public static String[] getBotName(String userAgent) {
userAgent = userAgent.toLowerCase();
int pos=0;
String res=null;
if ((pos=userAgent.indexOf("google/"))>-1) {
res= "Google";
pos+=7;
} else
if ((pos=userAgent.indexOf("msnbot/"))>-1) {
res= "MSNBot";
pos+=7;
} else
if ((pos=userAgent.indexOf("googlebot/"))>-1) {
res= "Google";
pos+=10;
} else
if ((pos=userAgent.indexOf("webcrawler/"))>-1) {
res= "WebCrawler";
pos+=11;
} else
//
if ((pos=userAgent.indexOf("inktomi"))>-1) {
res= "Inktomi";
pos=-1;
} else
if ((pos=userAgent.indexOf("teoma"))>-1) {
res= "Teoma";
pos=-1;
}
if (res==null) return null;
return getArray(res,res,res + getVersionNumber(userAgent,pos));
}
|
Here we go with a big piece: The detection of the OS. The code is mostly focused on Windows for two reasons:
- It is the most encountered OS, see here for details.
- The version numbers are a little screwed as the OS is represented by two names (Win and Windows) and the version numbers for NT by two different representations: XP and NT 5.1 for example.
|
public static String[] getOS(String userAgent) {
if (getBotName(userAgent)!=null) return getArray("Bot","Bot","Bot");
String[]res = null;
int pos;
if ((pos=userAgent.indexOf("Windows-NT"))>-1) {
res = getArray("Win","WinNT","Win"+getVersionNumber(userAgent,pos+8));
} else
if (userAgent.indexOf("Windows NT")>-1) {
//
//
if ((pos=userAgent.indexOf("Windows NT 5.1"))>-1) {
res = getArray("Win","WinXP","Win"+getVersionNumber(userAgent,pos+7));
} else
if ((pos=userAgent.indexOf("Windows NT 6.0"))>-1) {
res = getArray("Win","Vista","Vista"+getVersionNumber(userAgent,pos+7));
} else
if ((pos=userAgent.indexOf("Windows NT 5.0"))>-1) {
res = getArray("Win","Seven","Seven "+getVersionNumber(userAgent,pos+7));
} else
if ((pos=userAgent.indexOf("Windows NT 5.0"))>-1) {
res = getArray("Win","Win2000","Win"+getVersionNumber(userAgent,pos+7));
} else
if ((pos=userAgent.indexOf("Windows NT 5.2"))>-1) {
res = getArray("Win","Win2003","Win"+getVersionNumber(userAgent,pos+7));
} else
if ((pos=userAgent.indexOf("Windows NT 4.0"))>-1) {
res = getArray("Win","WinNT4","Win"+getVersionNumber(userAgent,pos+7));
} else
if ((pos=userAgent.indexOf("Windows NT)"))>-1) {
res = getArray("Win","WinNT","WinNT");
} else
if ((pos=userAgent.indexOf("Windows NT;"))>-1) {
res = getArray("Win","WinNT","WinNT");
} else
res = getArray("Win","<B>WinNT?</B>","<B>WinNT?</B>");
} else
if (userAgent.indexOf("Win")>-1) {
if (userAgent.indexOf("Windows")>-1) {
if ((pos=userAgent.indexOf("Windows 98"))>-1) {
res = getArray("Win","Win98","Win"+getVersionNumber(userAgent,pos+7));
} else
if ((pos=userAgent.indexOf("Windows_98"))>-1) {
res = getArray("Win","Win98","Win"+getVersionNumber(userAgent,pos+8));
} else
if ((pos=userAgent.indexOf("Windows 2000"))>-1) {
res = getArray("Win","Win2000","Win"+getVersionNumber(userAgent,pos+7));
} else
if ((pos=userAgent.indexOf("Windows 95"))>-1) {
res = getArray("Win","Win95","Win"+getVersionNumber(userAgent,pos+7));
} else
if ((pos=userAgent.indexOf("Windows 9x"))>-1) {
res = getArray("Win","Win9x","Win"+getVersionNumber(userAgent,pos+7));
} else
if ((pos=userAgent.indexOf("Windows ME"))>-1) {
res = getArray("Win","WinME","Win"+getVersionNumber(userAgent,pos+7));
} else
if ((pos=userAgent.indexOf("Windows 3.1"))>-1) {
res = getArray("Win","Win31","Win"+getVersionNumber(userAgent,pos+7));
}
//
//
//
}
if (res == null) {
if ((pos=userAgent.indexOf("Win98"))>-1) {
res = getArray("Win","Win98","Win"+getVersionNumber(userAgent,pos+3));
} else
if ((pos=userAgent.indexOf("Win31"))>-1) {
res = getArray("Win","Win31","Win"+getVersionNumber(userAgent,pos+3));
} else
if ((pos=userAgent.indexOf("Win95"))>-1) {
res = getArray("Win","Win95","Win"+getVersionNumber(userAgent,pos+3));
} else
if ((pos=userAgent.indexOf("Win 9x"))>-1) {
res = getArray("Win","Win9x","Win"+getVersionNumber(userAgent,pos+3));
} else
if ((pos=userAgent.indexOf("WinNT4.0"))>-1) {
res = getArray("Win","WinNT4","Win"+getVersionNumber(userAgent,pos+3));
} else
if ((pos=userAgent.indexOf("WinNT"))>-1) {
res = getArray("Win","WinNT","Win"+getVersionNumber(userAgent,pos+3));
}
}
if (res == null) {
if ((pos=userAgent.indexOf("Windows"))>-1) {
res = getArray("Win","<B>Win?</B>","<B>Win?"+getVersionNumber(userAgent,pos+7)+"</B>");
} else
if ((pos=userAgent.indexOf("Win"))>-1) {
res = getArray("Win","<B>Win?</B>","<B>Win?"+getVersionNumber(userAgent,pos+3)+"</B>");
} else
//
res = getArray("Win","<B>Win?</B>","<B>Win?</B>");
}
} else
if ((pos=userAgent.indexOf("Mac OS X"))>-1) {
if ((userAgent.indexOf("iPhone"))>-1) {
pos = userAgent.indexOf("iPhone OS");
if ((userAgent.indexOf("iPod"))>-1) {
res = getArray("iOS","iOS-iPod","iOS-iPod "+((pos<0)?"":getVersionNumber(userAgent,pos+9)));
} else {
res = getArray("iOS","iOS-iPhone","iOS-iPhone "+((pos<0)?"":getVersionNumber(userAgent,pos+9)));
}
} else
if ((userAgent.indexOf("iPad"))>-1) {
pos = userAgent.indexOf("CPU OS");
res = getArray("iOS","iOS-iPad","iOS-iPad "+((pos<0)?"":getVersionNumber(userAgent,pos+6)));
} else
res = getArray("Mac","MacOSX","MacOS "+getVersionNumber(userAgent,pos+8));
} else
if ((pos=userAgent.indexOf("Android"))>-1) {
res = getArray("Linux","Android","Android "+getVersionNumber(userAgent,pos+8));
} else
if ((pos=userAgent.indexOf("Mac_PowerPC"))>-1) {
res = getArray("Mac","MacPPC","MacOS "+getVersionNumber(userAgent,pos+3));
} else
if ((pos=userAgent.indexOf("Macintosh"))>-1) {
if (userAgent.indexOf("PPC")>-1)
res = getArray("Mac","MacPPC","MacOS?");
else
res = getArray("Mac?","Mac?","MacOS?");
} else
if ((pos=userAgent.indexOf("FreeBSD"))>-1) {
res = getArray("*BSD","*BSD FreeBSD","FreeBSD "+getVersionNumber(userAgent,pos+7));
} else
if ((pos=userAgent.indexOf("OpenBSD"))>-1) {
res = getArray("*BSD","*BSD OpenBSD","OpenBSD "+getVersionNumber(userAgent,pos+7));
} else
if ((pos=userAgent.indexOf("Linux"))>-1) {
String detail = "Linux "+getVersionNumber(userAgent,pos+5);
String med = "Linux";
if ((pos=userAgent.indexOf("Ubuntu/"))>-1) {
detail = "Ubuntu "+getVersionNumber(userAgent,pos+7);
med+=" Ubuntu";
}
res = getArray("Linux",med,detail);
} else
if ((pos=userAgent.indexOf("CentOS"))>-1) {
res = getArray("Linux","Linux CentOS","CentOS");
} else
if ((pos=userAgent.indexOf("NetBSD"))>-1) {
res = getArray("*BSD","*BSD NetBSD","NetBSD "+getVersionNumber(userAgent,pos+6));
} else
if ((pos=userAgent.indexOf("Unix"))>-1) {
res = getArray("Linux","Linux","Linux "+getVersionNumber(userAgent,pos+4));
} else
if ((pos=userAgent.indexOf("SunOS"))>-1) {
res = getArray("Unix","SunOS","SunOS"+getVersionNumber(userAgent,pos+5));
} else
if ((pos=userAgent.indexOf("IRIX"))>-1) {
res = getArray("Unix","IRIX","IRIX"+getVersionNumber(userAgent,pos+4));
} else
if ((pos=userAgent.indexOf("SonyEricsson"))>-1) {
res = getArray("SonyEricsson","SonyEricsson","SonyEricsson"+getVersionNumber(userAgent,pos+12));
} else
if ((pos=userAgent.indexOf("Nokia"))>-1) {
res = getArray("Nokia","Nokia","Nokia"+getVersionNumber(userAgent,pos+5));
} else
if ((pos=userAgent.indexOf("BlackBerry"))>-1) {
res = getArray("BlackBerry","BlackBerry","BlackBerry"+getVersionNumber(userAgent,pos+10));
} else
if ((pos=userAgent.indexOf("SymbianOS"))>-1) {
res = getArray("SymbianOS","SymbianOS","SymbianOS"+getVersionNumber(userAgent,pos+10));
} else
if ((pos=userAgent.indexOf("BeOS"))>-1) {
res = getArray("BeOS","BeOS","BeOS");
} else
if ((pos=userAgent.indexOf("Nintendo Wii"))>-1) {
res = getArray("Nintendo Wii","Nintendo Wii","Nintendo Wii"+getVersionNumber(userAgent,pos+10));
} else
res = getArray("<b>?</b>","<b>?</b>","<b>?</b>");
return res;
}
|
Here is the main part: The detection of the Browser. Some interesting tricks:- As Opera has the good idea of including a version of IE in its User-agent, it has to be tested before IE.
- Gecko stands for the rendering engine of Mozilla. It has several packagings: Camino & Chimera for the Mac, Galeon for *nix and Phoenix, Firebird, and Firefox for the generic versions from mozilla.org. Additionally, Netscape in its versions greater than 6 also uses Gecko.
- KHTML stands for the KDE renderer and WebKit. It is packaged as Konqueror for Linux and Safari for the Mac, as well as Chrome and mobile browser of the iPhone and Android platforms.
- As most of the User-Agents begin with Mozilla/X.0, it is very hazardous to detect browsers that does rely on this version number to actually describe themselves. Among them is Netscape Communicator that hopefully is finally dying.
|
public static String []getBrowser(String userAgent) {
String []botName;
if ((botName=getBotName(userAgent))!=null) return botName;
String[]res = null;
int pos;
if ((pos=userAgent.indexOf("Lotus-Notes/"))>-1) {
res = getArray("LotusNotes","LotusNotes","LotusNotes"+getVersionNumber(userAgent,pos+12));
} else
if ((pos=userAgent.indexOf("Opera"))>-1) {
res = getArray("Opera","Opera"+getFirstVersionNumber(userAgent,pos+5,1),"Opera"+getVersionNumber(userAgent,pos+5));
} else
if (userAgent.indexOf("MSIE")>-1) {
if ((pos=userAgent.indexOf("MSIE 6.0"))>-1) {
res = getArray("MSIE","MSIE6","MSIE"+getVersionNumber(userAgent,pos+4));
} else
if ((pos=userAgent.indexOf("MSIE 5.0"))>-1) {
res = getArray("MSIE","MSIE5","MSIE"+getVersionNumber(userAgent,pos+4));
} else
if ((pos=userAgent.indexOf("MSIE 5.5"))>-1) {
res = getArray("MSIE","MSIE5.5","MSIE"+getVersionNumber(userAgent,pos+4));
} else
if ((pos=userAgent.indexOf("MSIE 5."))>-1) {
res = getArray("MSIE","MSIE5.x","MSIE"+getVersionNumber(userAgent,pos+4));
} else
if ((pos=userAgent.indexOf("MSIE 4"))>-1) {
res = getArray("MSIE","MSIE4","MSIE"+getVersionNumber(userAgent,pos+4));
} else
if ((pos=userAgent.indexOf("MSIE 7"))>-1 && userAgent.indexOf("Trident/4.0")<0) {
res = getArray("MSIE","MSIE7","MSIE"+getVersionNumber(userAgent,pos+4));
} else
if ((pos=userAgent.indexOf("MSIE 8"))>-1 || userAgent.indexOf("Trident/4.0")>-1) {
res = getArray("MSIE","MSIE8","MSIE"+getVersionNumber(userAgent,pos+4));
} else
if ((pos=userAgent.indexOf("MSIE 9"))>-1 || userAgent.indexOf("Trident/4.0")>-1) {
res = getArray("MSIE","MSIE9","MSIE"+getVersionNumber(userAgent,pos+4));
} else
res = getArray("MSIE","<B>MSIE?</B>","<B>MSIE?"+getVersionNumber(userAgent,userAgent.indexOf("MSIE")+4)+"</B>");
} else
if ((pos=userAgent.indexOf("Gecko/"))>-1) {
res = getArray("Gecko","Gecko","Gecko"+getFirstVersionNumber(userAgent,pos+5,4));
if ((pos=userAgent.indexOf("Camino/"))>-1) {
res[1]+="(Camino)";
res[2]+="(Camino"+getVersionNumber(userAgent,pos+7)+")";
} else
if ((pos=userAgent.indexOf("Chimera/"))>-1) {
res[1]+="(Chimera)";
res[2]+="(Chimera"+getVersionNumber(userAgent,pos+8)+")";
} else
if ((pos=userAgent.indexOf("Firebird/"))>-1) {
res[1]+="(Firebird)";
res[2]+="(Firebird"+getVersionNumber(userAgent,pos+9)+")";
} else
if ((pos=userAgent.indexOf("Phoenix/"))>-1) {
res[1]+="(Phoenix)";
res[2]+="(Phoenix"+getVersionNumber(userAgent,pos+8)+")";
} else
if ((pos=userAgent.indexOf("Galeon/"))>-1) {
res[1]+="(Galeon)";
res[2]+="(Galeon"+getVersionNumber(userAgent,pos+7)+")";
} else
if ((pos=userAgent.indexOf("Firefox/"))>-1) {
res[1]+="(Firefox)";
res[2]+="(Firefox"+getVersionNumber(userAgent,pos+8)+")";
} else
if ((pos=userAgent.indexOf("Netscape/"))>-1) {
if ((pos=userAgent.indexOf("Netscape/6"))>-1) {
res[1]+="(NS6)";
res[2]+="(NS"+getVersionNumber(userAgent,pos+9)+")";
} else
if ((pos=userAgent.indexOf("Netscape/7"))>-1) {
res[1]+="(NS7)";
res[2]+="(NS"+getVersionNumber(userAgent,pos+9)+")";
} else {
res[1]+="(NS?)";
res[2]+="(NS?"+getVersionNumber(userAgent,userAgent.indexOf("Netscape/")+9)+")";
}
}
} else
if ((pos=userAgent.indexOf("Netscape/"))>-1) {
if ((pos=userAgent.indexOf("Netscape/4"))>-1) {
res = getArray("NS","NS4","NS"+getVersionNumber(userAgent,pos+9));
} else
res = getArray("NS","NS?","NS?"+getVersionNumber(userAgent,pos+9));
} else
if ((pos=userAgent.indexOf("Chrome/"))>-1) {
res = getArray("KHTML","KHTML(Chrome)","KHTML(Chrome"+getVersionNumber(userAgent,pos+6)+")");
} else
if ((pos=userAgent.indexOf("Safari/"))>-1) {
res = getArray("KHTML","KHTML(Safari)","KHTML(Safari"+getVersionNumber(userAgent,pos+6)+")");
} else
if ((pos=userAgent.indexOf("Konqueror/"))>-1) {
res = getArray("KHTML","KHTML(Konqueror)","KHTML(Konqueror"+getVersionNumber(userAgent,pos+9)+")");
} else
if ((pos=userAgent.indexOf("KHTML"))>-1) {
res = getArray("KHTML","KHTML?","KHTML?("+getVersionNumber(userAgent,pos+5)+")");
} else
if ((pos=userAgent.indexOf("NetFront"))>-1) {
res = getArray("NetFront","NetFront","NetFront "+getVersionNumber(userAgent,pos+8));
} else
//
//
if (userAgent.indexOf("Mozilla/4.")==0 &&
userAgent.indexOf("Mozilla/4.0")<0 &&
userAgent.indexOf("Mozilla/4.5 ")<0) {
res = getArray("Communicator","Communicator","Communicator"+getVersionNumber(userAgent,pos+8));
} else
return getArray("<B>?</B>","<B>?</B>","<B>?</B>");
return res;
}
|
| |