Package omero :: Package util :: Module importperf
[hide private]
[frames] | no frames]

Source Code for Module omero.util.importperf

  1  #!/usr/bin/env python 
  2  # encoding: utf-8 
  3   
  4  """ 
  5  Performs various performance metrics and reports on OMERO.importer log files. 
  6  """ 
  7   
  8  # Copyright (C) 2009 University of Dundee 
  9   
 10  # This program is free software; you can redistribute it and/or 
 11  # modify it under the terms of the GNU General Public License 
 12  # as published by the Free Software Foundation; either version 2 
 13  # of the License, or (at your option) any later version. 
 14   
 15  # This program is distributed in the hope that it will be useful, 
 16  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 17  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 18  # GNU General Public License for more details. 
 19   
 20  # You should have received a copy of the GNU General Public License 
 21  # along with this program; if not, write to the Free Software 
 22  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA. 
 23   
 24  import re 
 25  import os 
 26  import sys 
 27   
 28  import mx.DateTime as DateTime 
 29  from mx.DateTime import DateTimeDelta 
 30  from getopt import getopt, GetoptError 
 31   
32 -def usage(error):
33 """Prints usage so that we don't have to. :)""" 34 cmd = sys.argv[0] 35 print """%s 36 Usage: %s [options...] <importer_log_file> 37 Generate performance metrics from an OMERO.importer log file. 38 39 Options: 40 --series_report Print a CSV report for each import's series I/O 41 --help Display this help and exit 42 43 Examples: 44 %s importer.log 45 %s --series_report importer.log > series_report.csv 46 47 Report bugs to ome-devel@lists.openmicroscopy.org.uk""" % \ 48 (error, cmd, cmd, cmd) 49 sys.exit(2)
50
51 -class ParsingError(Exception):
52 """Raised whenever there is an error parsing a log file.""" 53 pass
54
55 -class Import(object):
56 """Stores context about a given import.""" 57
58 - def __init__(self, start, name):
59 self.start = start 60 self.end = None 61 self.name = name 62 self.setid_start = start 63 self.setid_end = None 64 self.post_process_start = None 65 self.post_process_end = None 66 self.save_to_db_start = None 67 self.save_to_db_end = None 68 self.overlays_start = None 69 self.thumbnailing_start = None 70 self.series = []
71
72 -class Series(object):
73 """Stores context about a given series.""" 74
75 - def __init__(self, start):
76 self.start = start 77 self.end = None 78 self.planes = []
79
80 -class Plane(object):
81 """Stores context about a given plane.""" 82
83 - def __init__(self, abs_date_time):
84 self.abs_date_time = abs_date_time
85
86 -class ImporterLog(object):
87 """ 88 Parses and stores context from an OMERO.importer log file. It also has 89 the capability of producing various reports. 90 """ 91 92 # Regular expression for matching log4j log lines 93 log_regex = re.compile( 94 '^(?P<date_time>\S+\s+\S+)\s+(?P<ms_elapsed>\d+)\s+' \ 95 '(?P<thread>\[.*?\])\s+(?P<level>\S+)\s+(?P<class>\S+)\s+-\s+' \ 96 '(?P<message>.*)$') 97 98 # Regular expression for matching possible OMERO.importer status messages 99 status_regex = re.compile('^[A-Z_]*') 100 101 # Format string for matching log4j date/time strings 102 date_time_fmt = '%Y-%m-%d %H:%M:%S' 103
104 - def __init__(self, log_file):
105 self.log_file = log_file 106 self.imports = [] 107 self.parse() 108 self.last_import = None 109 self.last_series = None
110
111 - def parse(self):
112 """Parses the specified log file.""" 113 line_no = 1 114 for line in self.log_file: 115 match = self.log_regex.match(line) 116 if match: 117 self.handle_match(match) 118 line_no += 1
119
120 - def handle_match(self, match):
121 """Handles cases where the log_regex is matched.""" 122 message = match.group('message') 123 if not self.status_regex.match(message): 124 return 125 date_time = match.group('date_time') 126 date_time, ms = date_time.split(',') 127 date_time = DateTime.strptime(date_time, self.date_time_fmt) 128 ms = DateTimeDelta(0, 0, 0, int(ms) / 1000.0) 129 date_time = date_time + ms 130 if message.startswith('LOADING_IMAGE'): 131 name = message[message.find(':') + 2:] 132 self.last_import = Import(date_time, name) 133 self.imports.append(self.last_import) 134 elif not hasattr(self, 'last_import') or self.last_import is None: 135 return 136 elif message.startswith('LOADED_IMAGE'): 137 self.last_import.setid_end = date_time 138 elif message.startswith('BEGIN_POST_PROCESS'): 139 self.last_import.post_process_start = date_time 140 elif message.startswith('END_POST_PROCESS'): 141 self.last_import.post_process_end = date_time 142 elif message.startswith('BEGIN_SAVE_TO_DB'): 143 self.last_import.save_to_db_start = date_time 144 elif message.startswith('END_SAVE_TO_DB'): 145 self.last_import.save_to_db_end = date_time 146 elif message.startswith('IMPORT_OVERLAYS'): 147 self.last_import.overlays_start = date_time 148 elif message.startswith('IMPORT_THUMBNAILING'): 149 self.last_import.thumbnailing_start = date_time 150 elif message.startswith('IMPORT_DONE'): 151 self.last_import.end = date_time 152 self.last_import = None 153 elif message.startswith('DATASET_STORED'): 154 self.last_series = Series(date_time) 155 self.last_import.series.append(self.last_series) 156 elif message.startswith('DATA_STORED'): 157 self.last_import.series[-1].end = date_time 158 elif message.startswith('IMPORT_STEP'): 159 self.last_series.planes.append(Plane(date_time))
160
161 - def elapsed(self, start, end):
162 if start is not None and end is not None: 163 return str((end - start).seconds) + "sec" 164 return 'Unknown'
165
166 - def report(self):
167 """ 168 Prints a simple report to STDOUT stating timings for the overall 169 import and Bio-Formats setId(). 170 """ 171 for import_n, i in enumerate(self.imports): 172 elapsed = self.elapsed(i.start, i.end) 173 print "Import(%s) %d start: %s end: %s elapsed: %s" % \ 174 (i.name, import_n, i.start, i.end, elapsed) 175 elapsed = self.elapsed(i.setid_start, i.setid_end) 176 print "setId() start: %s end: %s elapsed: %s" % \ 177 (i.setid_start, i.setid_end, elapsed) 178 elapsed = self.elapsed(i.post_process_start, i.post_process_end) 179 print "Post process start: %s end: %s elapsed: %s" % \ 180 (i.post_process_start, i.post_process_end, elapsed) 181 elapsed = self.elapsed(i.save_to_db_start, i.save_to_db_end) 182 print "Save to DB start: %s end: %s elapsed: %s" % \ 183 (i.save_to_db_start, i.save_to_db_end, elapsed) 184 if len(i.series) > 0: 185 elapsed = self.elapsed(i.series[0].start, i.series[-1].end) 186 print "Image I/O start: %s end: %s elapsed: %s" % \ 187 (i.series[0].start, i.series[-1].end, elapsed) 188 elapsed = self.elapsed(i.overlays_start, i.thumbnailing_start) 189 print "Overlays start: %s end: %s elapsed: %s" % \ 190 (i.overlays_start, i.thumbnailing_start, elapsed) 191 elapsed = self.elapsed(i.thumbnailing_start, i.end) 192 print "Thumbnailing start: %s end: %s elapsed: %s" % \ 193 (i.thumbnailing_start, i.end, elapsed)
194
195 - def series_report_csv(self):
196 """ 197 Prints a CSV report to STDOUT with timings for the I/O operations 198 of each import's set of image series. 199 """ 200 print ','.join(['import', 'series', 'series_start', 'series_end', 201 'series_elapsed']) 202 for import_n, i in enumerate(self.imports): 203 for series_n, series in enumerate(i.series): 204 if series.start is None or series.end is None: 205 continue 206 elapsed = (series.end - series.start).seconds 207 values = [import_n, series_n, series.start, series.end, 208 elapsed * 1000] 209 print ','.join([str(v) for v in values])
210 211 if __name__ == "__main__": 212 try: 213 options, args = getopt(sys.argv[1:], "", ['series_report', 'help']) 214 except GetoptError, (msg, opt): 215 usage(msg) 216 217 try: 218 log_file, = args 219 except ValueError: 220 usage('Must specify at least one log file.') 221 log = ImporterLog(open(log_file)) 222 223 do_default_report = True 224 for option, argument in options: 225 if option == '--help': 226 usage('') 227 if option == '--series_report': 228 do_default_report = False 229 log.series_report_csv() 230 231 if do_default_report: 232 log.report() 233