Basically, I've started using so much more SQL in our new Business Intelligence projects that I've been revising all my scripts to see what SQL I can optimize.
One of my systems is a MediaWiki CMS which is used for the official Bournemouth University Knowledge Base. The skin itself is the index page loaded for any page in the Wiki system. It logs the IP address (VisitorIP), the URL (VisitorURL) entered via the browser useragent (VisitorUAgent), the User ID (VisitorID, 0 if not logged in) and of course the Timestamp (DateTimeStamp).
Here is a page of some MySQL queries I can do based on just those 5 columns:
MediaWiki: number of unique guests, students and staff by month. Also associates a UserID to the wikimedia_user table to identify a user.
SELECT CONCAT(MONTHNAME(t2.Date), ' ', YEAR(t2.Date)) Month, SUM(IF(t2.VisitorType='Staff', 1, 0)) Staff, SUM(IF(t2.VisitorType='Student', 1, 0)) Student, SUM(IF(t2.VisitorType='Anonymous', 1, 0)) Guests FROM ( SELECT b.user_name User, CASE WHEN INSTR(b.user_name, '@staff') THEN 'Staff' WHEN INSTR(b.user_name, '@bournemouth') THEN 'Student' WHEN t1.ID=0 THEN 'Anonymous' END VisitorType, t1.Date Date FROM ( SELECT MAX(a.VisitorID) ID, a.VisitorIP IP, a.DateTimeStamp Date FROM custombu_stats_visits a WHERE YEAR(a.DateTimeStamp)='2010' GROUP BY a.VisitorIP ORDER BY MAX(a.VisitorID) DESC ) t1 LEFT OUTER JOIN wikimedia_user b ON t1.ID=b.user_id GROUP BY t1.IP ORDER BY b.user_name ) t2 GROUP BY MONTH(t2.Date);
- SELECT
- CONCAT(MONTHNAME(t2.Date), ' ', YEAR(t2.Date)) Month,
- SUM(IF(t2.VisitorType='Staff', 1, 0)) Staff,
- SUM(IF(t2.VisitorType='Student', 1, 0)) Student,
- SUM(IF(t2.VisitorType='Anonymous', 1, 0)) Guests
- FROM
- (
- SELECT
- b.user_name User,
- CASE
- WHEN INSTR(b.user_name, '@staff') THEN 'Staff'
- WHEN INSTR(b.user_name, '@bournemouth') THEN 'Student'
- WHEN t1.ID=0 THEN 'Anonymous'
- END VisitorType,
- t1.Date Date
- FROM (
- SELECT
- MAX(a.VisitorID) ID,
- a.VisitorIP IP,
- a.DateTimeStamp Date
- FROM
- custombu_stats_visits a
- WHERE
- YEAR(a.DateTimeStamp)='2010'
- GROUP BY
- a.VisitorIP
- ORDER BY
- MAX(a.VisitorID) DESC
- ) t1
- LEFT OUTER JOIN
- wikimedia_user b ON t1.ID=b.user_id
- GROUP BY
- t1.IP
- ORDER BY
- b.user_name
- ) t2
- GROUP BY
- MONTH(t2.Date);
Number of total hits aggregated by month with on-peak/off-peak.
SELECT CONCAT(MONTHNAME(a.DateTimeStamp), ' ', YEAR(a.DateTimeStamp)) Month, SUM(IF((HOUR(a.DateTimeStamp) BETWEEN 8 AND 18), 1, 0)) 'Worktime', SUM(IF((HOUR(a.DateTimeStamp) BETWEEN 8 AND 18), 0, 1)) 'Out-of-Hours', COUNT(VisitorIP) 'Total Hits' FROM custombu_stats_visits a WHERE YEAR(a.DateTimeStamp)='2010' GROUP BY CONCAT(YEAR(a.DateTimeStamp), '-', MONTH(a.DateTimeStamp)) ORDER BY a.DateTimeStamp
- SELECT
- CONCAT(MONTHNAME(a.DateTimeStamp), ' ', YEAR(a.DateTimeStamp)) Month,
- SUM(IF((HOUR(a.DateTimeStamp) BETWEEN 8 AND 18), 1, 0)) 'Worktime',
- SUM(IF((HOUR(a.DateTimeStamp) BETWEEN 8 AND 18), 0, 1)) 'Out-of-Hours',
- COUNT(VisitorIP) 'Total Hits'
- FROM
- custombu_stats_visits a
- WHERE
- YEAR(a.DateTimeStamp)='2010'
- GROUP BY
- CONCAT(YEAR(a.DateTimeStamp), '-', MONTH(a.DateTimeStamp))
- ORDER BY
- a.DateTimeStamp
Operating Systems used aggregated by month
SELECT CONCAT(MONTHNAME(t1.Date), ' ', YEAR(t1.Date)) Month, SUM(IF(t1.VisitorOS='Windows', 1, 0)) 'Windows', SUM(IF(t1.VisitorOS='Mac', 1, 0)) 'Mac', SUM(IF(t1.VisitorOS='Linux', 1, 0)) 'Linux', SUM(IF(t1.VisitorOS='Other', 1, 0)) 'Other', COUNT(t1.VisitorOS) 'Total' FROM ( SELECT a.DateTimeStamp Date, CASE WHEN INSTR(a.VisitorUAgent, 'WinNT') THEN 'Windows' WHEN INSTR(a.VisitorUAgent, 'Windows') THEN 'Windows' WHEN INSTR(a.VisitorUAgent, 'WinNT') THEN 'Windows' WHEN INSTR(a.VisitorUAgent, 'Mac_PowerPC') THEN 'Mac' WHEN INSTR(a.VisitorUAgent, 'Macintosh') THEN 'Mac' WHEN INSTR(a.VisitorUAgent, 'X11') THEN 'Linux' WHEN INSTR(a.VisitorUAgent, 'Linux') THEN 'Linux' ELSE 'Other' END VisitorOS, COUNT(a.VisitorUAgent) 'Total' FROM custombu_stats_visits a WHERE YEAR(a.DateTimeStamp)='2010' GROUP BY a.VisitorIP ORDER BY a.DateTimeStamp DESC ) t1 GROUP BY CONCAT(MONTH(t1.Date), ' ', YEAR(t1.Date))
- SELECT
- CONCAT(MONTHNAME(t1.Date), ' ', YEAR(t1.Date)) Month,
- SUM(IF(t1.VisitorOS='Windows', 1, 0)) 'Windows',
- SUM(IF(t1.VisitorOS='Mac', 1, 0)) 'Mac',
- SUM(IF(t1.VisitorOS='Linux', 1, 0)) 'Linux',
- SUM(IF(t1.VisitorOS='Other', 1, 0)) 'Other',
- COUNT(t1.VisitorOS) 'Total'
- FROM (
- SELECT
- a.DateTimeStamp Date,
- CASE
- WHEN INSTR(a.VisitorUAgent, 'WinNT') THEN 'Windows'
- WHEN INSTR(a.VisitorUAgent, 'Windows') THEN 'Windows'
- WHEN INSTR(a.VisitorUAgent, 'WinNT') THEN 'Windows'
- WHEN INSTR(a.VisitorUAgent, 'Mac_PowerPC') THEN 'Mac'
- WHEN INSTR(a.VisitorUAgent, 'Macintosh') THEN 'Mac'
- WHEN INSTR(a.VisitorUAgent, 'X11') THEN 'Linux'
- WHEN INSTR(a.VisitorUAgent, 'Linux') THEN 'Linux'
- ELSE 'Other'
- END VisitorOS,
- COUNT(a.VisitorUAgent) 'Total'
- FROM
- custombu_stats_visits a
- WHERE
- YEAR(a.DateTimeStamp)='2010'
- GROUP BY
- a.VisitorIP
- ORDER BY
- a.DateTimeStamp DESC
- ) t1
- GROUP BY
- CONCAT(MONTH(t1.Date), ' ', YEAR(t1.Date))
Browsers used aggregated by month
SELECT CONCAT(MONTHNAME(t1.Date), ' ', YEAR(t1.Date)) Month, SUM(IF(t1.VisitorBrowser='MSIE', 1, 0)) 'IE', SUM(IF(t1.VisitorBrowser='Firefox', 1, 0)) 'FF', SUM(IF(t1.VisitorBrowser='Safari', 1, 0)) 'CS', SUM(IF(t1.VisitorBrowser='Smartphone', 1, 0)) 'Smart', SUM(IF(t1.VisitorBrowser='Other', 1, 0)) 'Other', COUNT(t1.VisitorBrowser) 'Total' FROM (SELECT a.DateTimeStamp Date, CASE WHEN INSTR(a.VisitorUAgent, 'MSIE ') THEN 'MSIE' WHEN INSTR(a.VisitorUAgent, 'Firefox') THEN 'Firefox' WHEN INSTR(a.VisitorUAgent, 'Safari') THEN 'Safari' WHEN INSTR(a.VisitorUAgent, 'Chrome') THEN 'Safari' WHEN INSTR(a.VisitorUAgent, 'BlackBerry') THEN 'Smartphone' WHEN INSTR(a.VisitorUAgent, 'iPhone') THEN 'Smartphone' WHEN INSTR(a.VisitorUAgent, 'Android') THEN 'Smartphone' WHEN INSTR(a.VisitorUAgent, 'NetFront') THEN 'Smartphone' WHEN INSTR(a.VisitorUAgent, 'SonyEricsson') THEN 'Smartphone' ELSE 'Other' END VisitorBrowser FROM custombu_stats_visits a WHERE YEAR(a.DateTimeStamp)='2010' GROUP BY a.VisitorIP ORDER BY a.DateTimeStamp DESC) t1 GROUP BY CONCAT(MONTH(t1.Date), ' ', YEAR(t1.Date))
- SELECT
- CONCAT(MONTHNAME(t1.Date), ' ', YEAR(t1.Date)) Month,
- SUM(IF(t1.VisitorBrowser='MSIE', 1, 0)) 'IE',
- SUM(IF(t1.VisitorBrowser='Firefox', 1, 0)) 'FF',
- SUM(IF(t1.VisitorBrowser='Safari', 1, 0)) 'CS',
- SUM(IF(t1.VisitorBrowser='Smartphone', 1, 0)) 'Smart',
- SUM(IF(t1.VisitorBrowser='Other', 1, 0)) 'Other',
- COUNT(t1.VisitorBrowser) 'Total'
- FROM
- (SELECT
- a.DateTimeStamp Date,
- CASE
- WHEN INSTR(a.VisitorUAgent, 'MSIE ') THEN 'MSIE'
- WHEN INSTR(a.VisitorUAgent, 'Firefox') THEN 'Firefox'
- WHEN INSTR(a.VisitorUAgent, 'Safari') THEN 'Safari'
- WHEN INSTR(a.VisitorUAgent, 'Chrome') THEN 'Safari'
- WHEN INSTR(a.VisitorUAgent, 'BlackBerry') THEN 'Smartphone'
- WHEN INSTR(a.VisitorUAgent, 'iPhone') THEN 'Smartphone'
- WHEN INSTR(a.VisitorUAgent, 'Android') THEN 'Smartphone'
- WHEN INSTR(a.VisitorUAgent, 'NetFront') THEN 'Smartphone'
- WHEN INSTR(a.VisitorUAgent, 'SonyEricsson') THEN 'Smartphone'
- ELSE 'Other'
- END VisitorBrowser
- FROM
- custombu_stats_visits a
- WHERE
- YEAR(a.DateTimeStamp)='2010'
- GROUP BY
- a.VisitorIP
- ORDER BY
- a.DateTimeStamp DESC) t1
- GROUP BY
- CONCAT(MONTH(t1.Date), ' ', YEAR(t1.Date))
Number of actions aggregated by top 10 users this month
SELECT MONTH(a.DateTimeStamp) Month, SUM(IF(INSTR(a.VisitorURL, '?'), 0, 1)) Views, SUM(IF(INSTR(a.VisitorURL, 'Special:Search'), 1, 0)) Searches, SUM(IF(INSTR(a.VisitorURL, 'create=Add'), 1, 0)) 'Pages Created', SUM(IF(INSTR(a.VisitorURL, 'action=edit'), 1, 0)) 'Pages Edited', SUM(IF(INSTR(a.VisitorURL, 'Special:Move'), 1, 0)) 'Pages Renamed', SUM(IF(INSTR(a.VisitorURL, 'Kerberos_Login?returnto'), 1, 0)) 'Logins', SUM(IF(INSTR(a.VisitorURL, 'action=delete'), 1, 0)) 'Pages Deleted', ( COUNT(a.VisitorID) - SUM(IF(INSTR(a.VisitorURL, '?'), 0, 1)) - SUM(IF(INSTR(a.VisitorURL, 'Special:Search'), 1, 0)) - SUM(IF(INSTR(a.VisitorURL, 'create=Add'), 1, 0)) - SUM(IF(INSTR(a.VisitorURL, 'action=edit'), 1, 0)) - SUM(IF(INSTR(a.VisitorURL, 'action=delete'), 1, 0)) - SUM(IF(INSTR(a.VisitorURL, 'Kerberos_Login?returnto'), 1, 0)) - SUM(IF(INSTR(a.VisitorURL, 'Special:Move'), 1, 0)) ) 'Other Actions', COUNT(a.VisitorID) 'Total Actions', a.VisitorIP IP, b.user_id UserID, b.user_name Name FROM custombu_stats_visits a, wikimedia_user b WHERE a.VisitorID=b.user_id AND a.DateTimeStamp BETWEEN CONCAT('2010', '-', MONTH(NOW()), '-01') AND CONCAT('2010', '-', MONTH(NOW()), '-', DAYOFMONTH(NOW())) GROUP BY b.user_name ORDER BY COUNT(a.VisitorID) DESC LIMIT 0,10;
- SELECT
- MONTH(a.DateTimeStamp) Month,
- SUM(IF(INSTR(a.VisitorURL, '?'), 0, 1)) Views,
- SUM(IF(INSTR(a.VisitorURL, 'Special:Search'), 1, 0)) Searches,
- SUM(IF(INSTR(a.VisitorURL, 'create=Add'), 1, 0)) 'Pages Created',
- SUM(IF(INSTR(a.VisitorURL, 'action=edit'), 1, 0)) 'Pages Edited',
- SUM(IF(INSTR(a.VisitorURL, 'Special:Move'), 1, 0)) 'Pages Renamed',
- SUM(IF(INSTR(a.VisitorURL, 'Kerberos_Login?returnto'), 1, 0)) 'Logins',
- SUM(IF(INSTR(a.VisitorURL, 'action=delete'), 1, 0)) 'Pages Deleted',
- ( COUNT(a.VisitorID) - SUM(IF(INSTR(a.VisitorURL, '?'), 0, 1)) - SUM(IF(INSTR(a.VisitorURL, 'Special:Search'), 1, 0)) - SUM(IF(INSTR(a.VisitorURL, 'create=Add'), 1, 0)) - SUM(IF(INSTR(a.VisitorURL, 'action=edit'), 1, 0)) - SUM(IF(INSTR(a.VisitorURL, 'action=delete'), 1, 0)) - SUM(IF(INSTR(a.VisitorURL, 'Kerberos_Login?returnto'), 1, 0)) - SUM(IF(INSTR(a.VisitorURL, 'Special:Move'), 1, 0)) ) 'Other Actions',
- COUNT(a.VisitorID) 'Total Actions',
- a.VisitorIP IP,
- b.user_id UserID,
- b.user_name Name
- FROM
- custombu_stats_visits a,
- wikimedia_user b
- WHERE
- a.VisitorID=b.user_id
- AND
- a.DateTimeStamp BETWEEN CONCAT('2010', '-', MONTH(NOW()), '-01')
- AND
- CONCAT('2010', '-', MONTH(NOW()), '-', DAYOFMONTH(NOW()))
- GROUP BY
- b.user_name
- ORDER BY
- COUNT(a.VisitorID) DESC
- LIMIT
- 0,10;