1
2
3 """
4 Populate bulk metadata tables from delimited text files.
5 """
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 import tempfile
27 import logging
28 import time
29 import sys
30 import csv
31 import re
32 from threading import Thread
33 from StringIO import StringIO
34 from getpass import getpass
35 from getopt import getopt, GetoptError
36 from Queue import Queue
37
38 import omero.clients
39 from omero.rtypes import rdouble, rstring, rint
40 from omero.model import DatasetAnnotationLink, DatasetI, FileAnnotationI, \
41 OriginalFileI, PlateI, PlateAnnotationLinkI, ScreenI, \
42 ScreenAnnotationLinkI
43 from omero.grid import ImageColumn, LongColumn, PlateColumn, StringColumn, \
44 WellColumn
45 from omero.util.temp_files import create_path, remove_path
46 from omero import client
47
48 from populate_roi import ThreadPool
49
50
51 try:
52 from xml.etree.cElementTree import XML, Element, SubElement, ElementTree, dump, iterparse
53 except ImportError:
54 from cElementTree import XML, Element, SubElement, ElementTree, dump, iterparse
55
56 log = logging.getLogger("omero.util.populate_metadata")
57
59 """Prints usage so that we don't have to. :)"""
60 cmd = sys.argv[0]
61 print """%s
62 Usage: %s [options] <target_object> <file>
63 Runs metadata population code for a given object.
64
65 Options:
66 -s OMERO hostname to use [defaults to "localhost"]
67 -p OMERO port to use [defaults to 4064]
68 -u OMERO username to use
69 -w OMERO password
70 -k OMERO session key to use
71 -i Dump measurement information and exit (no population)
72 -d Print debug statements
73
74 Examples:
75 %s -s localhost -p 14064 -u bob Plate:6 metadata.csv
76
77 Report bugs to ome-devel@lists.openmicroscopy.org.uk""" % (error, cmd, cmd)
78 sys.exit(2)
79
80
81 thread_pool = None
82
83
84 PLATE_NAME_COLUMN = 'Plate Name'
85 WELL_NAME_COLUMN = 'Well Name'
86
88 """Instance to denote a row skip request."""
89 pass
90
97
99 """
100 Header resolver for known header names which is responsible for creating
101 the column set for the OMERO.tables instance.
102 """
103
104 DEFAULT_COLUMN_SIZE = 1
105
106 plate_keys = {
107 'well': WellColumn,
108 'field': ImageColumn,
109 'row': LongColumn,
110 'column': LongColumn,
111 'wellsample': ImageColumn
112 }
113
114 screen_keys = dict({
115 'plate': PlateColumn,
116 }, **plate_keys)
117
119 self.target_object = target_object
120 self.headers = [v.replace('/', '\\') for v in headers]
121 self.headers_as_lower = [v.lower() for v in self.headers]
122
124 target_class = self.target_object.__class__
125 target_id = self.target_object.id.val
126 if ScreenI is target_class:
127 log.debug('Creating columns for Screen:%d' % target_id)
128 return self.create_columns_screen()
129 if PlateI is target_class:
130 log.debug('Creating columns for Plate:%d' % target_id)
131 return self.create_columns_plate()
132 if DatasetI is target_class:
133 log.debug('Creating columns for Dataset:%d' % target_id)
134 return self.create_columns_dataset()
135 raise MetadataError('Unsupported target object class: %s' \
136 % target_class)
137
139 columns = list()
140 for i, header_as_lower in enumerate(self.headers_as_lower):
141 name = self.headers[i]
142 try:
143 column = self.screen_keys[header_as_lower](name, '', list())
144 except KeyError:
145 column = StringColumn(name, '', self.DEFAULT_COLUMN_SIZE,
146 list())
147 columns.append(column)
148 for column in columns:
149 if column.__class__ is PlateColumn:
150 columns.append(StringColumn(PLATE_NAME_COLUMN, '',
151 self.DEFAULT_COLUMN_SIZE, list()))
152 if column.__class__ is WellColumn:
153 columns.append(StringColumn(WELL_NAME_COLUMN, '',
154 self.DEFAULT_COLUMN_SIZE, list()))
155 return columns
156
158 columns = list()
159 for i, header_as_lower in enumerate(self.headers_as_lower):
160 name = self.headers[i]
161 try:
162 column = self.plate_keys[header_as_lower](name, '', list())
163 except KeyError:
164 column = StringColumn(name, '', self.DEFAULT_COLUMN_SIZE,
165 list())
166 columns.append(column)
167 for column in columns:
168 if column.__class__ is PlateColumn:
169 columns.append(StringColumn(PLATE_NAME_COLUMN, '',
170 self.DEFAULT_COLUMN_SIZE, list()))
171 if column.__class__ is WellColumn:
172 columns.append(StringColumn(WELL_NAME_COLUMN, '',
173 self.DEFAULT_COLUMN_SIZE, list()))
174 return columns
175
177 raise Exception('To be implemented!')
178
180 """
181 Value resolver for column types which is responsible for filling up
182 non-metadata columns with their OMERO data model identifiers.
183 """
184
185 AS_ALPHA = [chr(v) for v in range(97, 122 + 1)]
186 WELL_REGEX = re.compile(r'^([a-zA-Z]+)(\d+)$')
187
188 - def __init__(self, client, target_object):
189 self.client = client
190 self.target_object = target_object
191 self.target_class = self.target_object.__class__
192 if PlateI is self.target_class:
193 return self.load_plate()
194 if DatasetI is self.target_class:
195 return self.load_dataset()
196 if ScreenI is self.target_class:
197 return self.load_screen()
198 raise MetadataError('Unsupported target object class: %s' \
199 % target_class)
201 query_service = self.client.getSession().getQueryService()
202 parameters = omero.sys.ParametersI()
203 parameters.addId(self.target_object.id.val)
204 log.debug('Loading Screen:%d' % self.target_object.id.val)
205 self.target_object = query_service.findByQuery(
206 'select s from Screen as s '
207 'join fetch s.plateLinks as p_link '
208 'join fetch p_link.child as p '
209 'where s.id = :id', parameters, {'omero.group': '-1'})
210 if self.target_object is None:
211 raise MetadataError('Could not find target object!')
212 self.wells_by_location = dict()
213 self.wells_by_id = dict()
214 self.plates_by_name = dict()
215 self.plates_by_id = dict()
216 for plate in (l.child for l in self.target_object.copyPlateLinks()):
217 parameters = omero.sys.ParametersI()
218 parameters.addId(plate.id.val)
219 plate = query_service.findByQuery(
220 'select p from Plate as p '
221 'join fetch p.wells as w '
222 'join fetch w.wellSamples as ws '
223 'where p.id = :id', parameters, {'omero.group': '-1'})
224 self.plates_by_name[plate.name.val] = plate
225 self.plates_by_id[plate.id.val] = plate
226 wells_by_location = dict()
227 wells_by_id = dict()
228 self.wells_by_location[plate.name.val] = wells_by_location
229 self.wells_by_id[plate.id.val] = wells_by_id
230 self.parse_plate(plate, wells_by_location, wells_by_id)
231
233 query_service = self.client.getSession().getQueryService()
234 parameters = omero.sys.ParametersI()
235 parameters.addId(self.target_object.id.val)
236 log.debug('Loading Plate:%d' % self.target_object.id.val)
237 self.target_object = query_service.findByQuery(
238 'select p from Plate as p '
239 'join fetch p.wells as w '
240 'join fetch w.wellSamples as ws '
241 'where p.id = :id', parameters, {'omero.group': '-1'})
242 if self.target_object is None:
243 raise MetadataError('Could not find target object!')
244 self.wells_by_location = dict()
245 self.wells_by_id = dict()
246 wells_by_location = dict()
247 wells_by_id = dict()
248 self.wells_by_location[self.target_object.name.val] = wells_by_location
249 self.wells_by_id[self.target_object.id.val] = wells_by_id
250 self.parse_plate(self.target_object, wells_by_location, wells_by_id)
251
252 - def parse_plate(self, plate, wells_by_location, wells_by_id):
268
270 raise Exception('To be implemented!')
271
272 - def resolve(self, column, value, row):
273 column_class = column.__class__
274 column_as_lower = column.name.lower()
275 if WellColumn is column_class:
276 m = self.WELL_REGEX.match(value)
277 if m is None or len(m.groups()) != 2:
278 raise MetadataError(
279 'Cannot parse well identifier "%s" from row: %r' % \
280 (value, [o[1] for o in row]))
281 plate_row = m.group(1).lower()
282 plate_column = str(long(m.group(2)))
283 if len(self.wells_by_location) == 1:
284 wells_by_location = self.wells_by_location.values()[0]
285 log.debug('Parsed "%s" row: %s column: %s' % \
286 (value, plate_row, plate_column))
287 else:
288 for column, plate in row:
289 if column.__class__ is PlateColumn:
290 wells_by_location = self.wells_by_location[plate]
291 log.debug('Parsed "%s" row: %s column: %s plate: %s' % \
292 (value, plate_row, plate_column, plate))
293 break
294 try:
295 return wells_by_location[plate_row][plate_column].id.val
296 except KeyError:
297 log.debug('Row: %s Column: %s not found!' % \
298 (plate_row, plate_column))
299 return -1L
300 if PlateColumn is column_class:
301 try:
302 return self.plates_by_name[value].id.val
303 except KeyError:
304 log.warn('Screen is missing plate: %s' % value)
305 return Skip()
306 if column_as_lower in ('row', 'column') \
307 and column_class is LongColumn:
308 try:
309
310 return long(value) - 1
311 except ValueError:
312 return long(self.AS_ALPHA.index(value.lower()))
313 if StringColumn is column_class:
314 return value
315 raise MetadataError('Unsupported column class: %s' % column_class)
316
317 -class ParsingContext(object):
318 """Generic parsing context for CSV files."""
319
320 - def __init__(self, client, target_object, file):
321 self.client = client
322 self.target_object = target_object
323 self.file = file
324 self.value_resolver = ValueResolver(self.client, self.target_object)
325
327 self.target_class = self.target_object.__class__
328 if ScreenI is self.target_class:
329 return ScreenAnnotationLinkI()
330 if PlateI is self.target_class:
331 return PlateAnnotationLinkI()
332 if DatasetI is self.target_class:
333 return DatasetAnnotationLinkI()
334 raise MetadataError('Unsupported target object class: %s' \
335 % target_class)
336
338 widths = list()
339 for column in self.columns:
340 try:
341 widths.append(column.size)
342 except AttributeError:
343 widths.append(None)
344 return widths
345
347 data = open(self.file, 'U')
348 try:
349 rows = list(csv.reader(data, delimiter=','))
350 finally:
351 data.close()
352 log.debug('Header: %r' % rows[0])
353 header_resolver = HeaderResolver(self.target_object, rows[0])
354 self.columns = header_resolver.create_columns()
355 log.debug('Columns: %r' % self.columns)
356 self.populate(rows[1:])
357 self.post_process()
358 log.debug('Column widths: %r' % self.get_column_widths())
359 log.debug('Columns: %r' % \
360 [(o.name, len(o.values)) for o in self.columns])
361
362
363
364
365
366
367
368 - def populate(self, rows):
369 value = None
370 for row in rows:
371 values = list()
372 row = [(self.columns[i], value) for i, value in enumerate(row)]
373 for column, original_value in row:
374 value = self.value_resolver.resolve(column, original_value, row)
375 if value.__class__ is Skip:
376 break
377 values.append(value)
378 try:
379 if value.__class__ is not long:
380 column.size = max(column.size, len(value))
381 except TypeError:
382 log.error('Original value "%s" now "%s" of bad type!' % \
383 (original_value, value))
384 raise
385 if value.__class__ is not Skip:
386 values.reverse()
387 for column in self.columns:
388 if column.name in (PLATE_NAME_COLUMN, WELL_NAME_COLUMN):
389 continue
390 try:
391 column.values.append(values.pop())
392 except IndexError:
393 log.error('Column %s has no values to pop.' % \
394 column.name)
395 raise
396
397 - def post_process(self):
398 columns_by_name = dict()
399 plate_column = None
400 well_column = None
401 well_name_column = None
402 plate_name_column = None
403 for column in self.columns:
404 columns_by_name[column.name] = column
405 if column.__class__ is PlateColumn:
406 plate_column = column
407 elif column.__class__ is WellColumn:
408 well_column = column
409 elif column.name == WELL_NAME_COLUMN:
410 well_name_column = column
411 elif column.name == PLATE_NAME_COLUMN:
412 plate_name_column = column
413 if well_name_column is None and plate_name_column is None:
414 log.info('Nothing to do during post processing.')
415 for i in range(0, len(self.columns[0].values)):
416 if well_name_column is not None:
417 if PlateI is self.value_resolver.target_class:
418 plate = self.value_resolver.target_object.id.val
419 elif ScreenI is self.value_resolver.target_class:
420 plate = columns_by_name['Plate'].values[i]
421 try:
422 well = self.value_resolver.wells_by_id[plate]
423 well = well[well_column.values[i]]
424 row = well.row.val
425 col = well.column.val
426 except KeyError:
427 log.error('Missing row or column for well name population!')
428 raise
429 row = self.value_resolver.AS_ALPHA[row]
430 v = '%s%d' % (row, col + 1)
431 well_name_column.size = max(well_name_column.size, len(v))
432 well_name_column.values.append(v)
433 else:
434 log.info('Missing well name column, skipping.')
435 if plate_name_column is not None:
436 plate = columns_by_name['Plate'].values[i]
437 plate = self.value_resolver.plates_by_id[plate]
438 v = plate.name.val
439 plate_name_column.size = max(plate_name_column.size, len(v))
440 plate_name_column.values.append(v)
441 else:
442 log.info('Missing plate name column, skipping.')
443
444 - def write_to_omero(self):
445 sf = self.client.getSession()
446 sr = sf.sharedResources()
447 update_service = sf.getUpdateService()
448 name = 'bulk_annotations'
449 table = sr.newTable(1, name)
450 if table is None:
451 raise MetadataError(
452 "Unable to create table: %s" % name)
453 original_file = table.getOriginalFile()
454 log.info('Created new table OriginalFile:%d' % original_file.id.val)
455 table.initialize(self.columns)
456 log.info('Table initialized with %d columns.' % (len(self.columns)))
457 table.addData(self.columns)
458 log.info('Added data column data.')
459 table.close()
460 file_annotation = FileAnnotationI()
461 file_annotation.ns = \
462 rstring('openmicroscopy.org/omero/bulk_annotations')
463 file_annotation.description = rstring(name)
464 file_annotation.file = OriginalFileI(original_file.id.val, False)
465 link = self.create_annotation_link()
466 link.parent = self.target_object
467 link.child = file_annotation
468 group = str(self.value_resolver.target_object.details.group.id.val)
469 update_service.saveObject(link, {'omero.group': group})
470
472 type, id = target_object.split(':')
473 if 'Dataset' == type:
474 return DatasetI(long(id), False)
475 if 'Plate' == type:
476 return PlateI(long(id), False)
477 if 'Screen' == type:
478 return ScreenI(long(id), False)
479 raise ValueError('Unsupported target object: %s' % target_object)
480
481 if __name__ == "__main__":
482 try:
483 options, args = getopt(sys.argv[1:], "s:p:u:w:k:id")
484 except GetoptError, (msg, opt):
485 usage(msg)
486
487 try:
488 target_object, file = args
489 target_object = parse_target_object(target_object)
490 except ValueError:
491 usage('Target object and file must be a specified!')
492
493 username = None
494 password = None
495 hostname = 'localhost'
496 port = 4064
497 info = False
498 session_key = None
499 logging_level = logging.INFO
500 thread_count = 1
501 for option, argument in options:
502 if option == "-u":
503 username = argument
504 if option == "-w":
505 password = argument
506 if option == "-s":
507 hostname = argument
508 if option == "-p":
509 port = int(argument)
510 if option == "-i":
511 info = True
512 if option == "-k":
513 session_key = argument
514 if option == "-d":
515 logging_level = logging.DEBUG
516 if option == "-t":
517 thread_count = int(argument)
518 if session_key is None and username is None:
519 usage("Username must be specified!")
520 if session_key is None and hostname is None:
521 usage("Host name must be specified!")
522 if session_key is None and password is None:
523 password = getpass()
524
525 logging.basicConfig(level = logging_level)
526 client = client(hostname, port)
527 client.setAgent("OMERO.populate_metadata")
528 client.enableKeepAlive(60)
529 try:
530 if session_key is not None:
531 client.joinSession(session_key)
532 else:
533 client.createSession(username, password)
534
535 log.debug('Creating pool of %d threads' % thread_count)
536 thread_pool = ThreadPool(thread_count)
537 ctx = ParsingContext(client, target_object, file)
538 ctx.parse()
539 if not info:
540 ctx.write_to_omero()
541 finally:
542 pass
543 client.closeSession()
544