1
2
3
4 """
5 Performs various performance metrics and reports on OMERO.importer log files.
6 """
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 import re
25 import os
26 import sys
27
28 import mx.DateTime as DateTime
29 from mx.DateTime import DateTimeDelta
30 from getopt import getopt, GetoptError
31
33 """Prints usage so that we don't have to. :)"""
34 cmd = sys.argv[0]
35 print """%s
36 Usage: %s [options...] <importer_log_file>
37 Generate performance metrics from an OMERO.importer log file.
38
39 Options:
40 --series_report Print a CSV report for each import's series I/O
41 --help Display this help and exit
42
43 Examples:
44 %s importer.log
45 %s --series_report importer.log > series_report.csv
46
47 Report bugs to ome-devel@lists.openmicroscopy.org.uk""" % \
48 (error, cmd, cmd, cmd)
49 sys.exit(2)
50
52 """Raised whenever there is an error parsing a log file."""
53 pass
54
56 """Stores context about a given import."""
57
59 self.start = start
60 self.end = None
61 self.name = name
62 self.setid_start = start
63 self.setid_end = None
64 self.post_process_start = None
65 self.post_process_end = None
66 self.save_to_db_start = None
67 self.save_to_db_end = None
68 self.overlays_start = None
69 self.thumbnailing_start = None
70 self.series = []
71
73 """Stores context about a given series."""
74
76 self.start = start
77 self.end = None
78 self.planes = []
79
81 """Stores context about a given plane."""
82
84 self.abs_date_time = abs_date_time
85
87 """
88 Parses and stores context from an OMERO.importer log file. It also has
89 the capability of producing various reports.
90 """
91
92
93 log_regex = re.compile(
94 '^(?P<date_time>\S+\s+\S+)\s+(?P<ms_elapsed>\d+)\s+' \
95 '(?P<thread>\[.*?\])\s+(?P<level>\S+)\s+(?P<class>\S+)\s+-\s+' \
96 '(?P<message>.*)$')
97
98
99 status_regex = re.compile('^[A-Z_]*')
100
101
102 date_time_fmt = '%Y-%m-%d %H:%M:%S'
103
105 self.log_file = log_file
106 self.imports = []
107 self.parse()
108 self.last_import = None
109 self.last_series = None
110
112 """Parses the specified log file."""
113 line_no = 1
114 for line in self.log_file:
115 match = self.log_regex.match(line)
116 if match:
117 self.handle_match(match)
118 line_no += 1
119
121 """Handles cases where the log_regex is matched."""
122 message = match.group('message')
123 if not self.status_regex.match(message):
124 return
125 date_time = match.group('date_time')
126 date_time, ms = date_time.split(',')
127 date_time = DateTime.strptime(date_time, self.date_time_fmt)
128 ms = DateTimeDelta(0, 0, 0, int(ms) / 1000.0)
129 date_time = date_time + ms
130 if message.startswith('LOADING_IMAGE'):
131 name = message[message.find(':') + 2:]
132 self.last_import = Import(date_time, name)
133 self.imports.append(self.last_import)
134 elif not hasattr(self, 'last_import') or self.last_import is None:
135 return
136 elif message.startswith('LOADED_IMAGE'):
137 self.last_import.setid_end = date_time
138 elif message.startswith('BEGIN_POST_PROCESS'):
139 self.last_import.post_process_start = date_time
140 elif message.startswith('END_POST_PROCESS'):
141 self.last_import.post_process_end = date_time
142 elif message.startswith('BEGIN_SAVE_TO_DB'):
143 self.last_import.save_to_db_start = date_time
144 elif message.startswith('END_SAVE_TO_DB'):
145 self.last_import.save_to_db_end = date_time
146 elif message.startswith('IMPORT_OVERLAYS'):
147 self.last_import.overlays_start = date_time
148 elif message.startswith('IMPORT_THUMBNAILING'):
149 self.last_import.thumbnailing_start = date_time
150 elif message.startswith('IMPORT_DONE'):
151 self.last_import.end = date_time
152 self.last_import = None
153 elif message.startswith('DATASET_STORED'):
154 self.last_series = Series(date_time)
155 self.last_import.series.append(self.last_series)
156 elif message.startswith('DATA_STORED'):
157 self.last_import.series[-1].end = date_time
158 elif message.startswith('IMPORT_STEP'):
159 self.last_series.planes.append(Plane(date_time))
160
162 if start is not None and end is not None:
163 return str((end - start).seconds) + "sec"
164 return 'Unknown'
165
167 """
168 Prints a simple report to STDOUT stating timings for the overall
169 import and Bio-Formats setId().
170 """
171 for import_n, i in enumerate(self.imports):
172 elapsed = self.elapsed(i.start, i.end)
173 print "Import(%s) %d start: %s end: %s elapsed: %s" % \
174 (i.name, import_n, i.start, i.end, elapsed)
175 elapsed = self.elapsed(i.setid_start, i.setid_end)
176 print "setId() start: %s end: %s elapsed: %s" % \
177 (i.setid_start, i.setid_end, elapsed)
178 elapsed = self.elapsed(i.post_process_start, i.post_process_end)
179 print "Post process start: %s end: %s elapsed: %s" % \
180 (i.post_process_start, i.post_process_end, elapsed)
181 elapsed = self.elapsed(i.save_to_db_start, i.save_to_db_end)
182 print "Save to DB start: %s end: %s elapsed: %s" % \
183 (i.save_to_db_start, i.save_to_db_end, elapsed)
184 if len(i.series) > 0:
185 elapsed = self.elapsed(i.series[0].start, i.series[-1].end)
186 print "Image I/O start: %s end: %s elapsed: %s" % \
187 (i.series[0].start, i.series[-1].end, elapsed)
188 elapsed = self.elapsed(i.overlays_start, i.thumbnailing_start)
189 print "Overlays start: %s end: %s elapsed: %s" % \
190 (i.overlays_start, i.thumbnailing_start, elapsed)
191 elapsed = self.elapsed(i.thumbnailing_start, i.end)
192 print "Thumbnailing start: %s end: %s elapsed: %s" % \
193 (i.thumbnailing_start, i.end, elapsed)
194
196 """
197 Prints a CSV report to STDOUT with timings for the I/O operations
198 of each import's set of image series.
199 """
200 print ','.join(['import', 'series', 'series_start', 'series_end',
201 'series_elapsed'])
202 for import_n, i in enumerate(self.imports):
203 for series_n, series in enumerate(i.series):
204 if series.start is None or series.end is None:
205 continue
206 elapsed = (series.end - series.start).seconds
207 values = [import_n, series_n, series.start, series.end,
208 elapsed * 1000]
209 print ','.join([str(v) for v in values])
210
211 if __name__ == "__main__":
212 try:
213 options, args = getopt(sys.argv[1:], "", ['series_report', 'help'])
214 except GetoptError, (msg, opt):
215 usage(msg)
216
217 try:
218 log_file, = args
219 except ValueError:
220 usage('Must specify at least one log file.')
221 log = ImporterLog(open(log_file))
222
223 do_default_report = True
224 for option, argument in options:
225 if option == '--help':
226 usage('')
227 if option == '--series_report':
228 do_default_report = False
229 log.series_report_csv()
230
231 if do_default_report:
232 log.report()
233