1
2
3 """
4 Populate bulk metadata tables from delimited text files.
5 """
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 import exceptions
27 import tempfile
28 import logging
29 import time
30 import sys
31 import csv
32 import re
33 from threading import Thread
34 from StringIO import StringIO
35 from getpass import getpass
36 from getopt import getopt, GetoptError
37 from Queue import Queue
38
39 import omero.clients
40 from omero.rtypes import rdouble, rstring, rint
41 from omero.model import DatasetAnnotationLink, DatasetI, FileAnnotationI, \
42 OriginalFileI, PlateI, PlateAnnotationLinkI, ScreenI, \
43 ScreenAnnotationLinkI
44 from omero.grid import ImageColumn, LongColumn, PlateColumn, StringColumn, \
45 WellColumn
46 from omero.util.temp_files import create_path, remove_path
47 from omero import client
48
49 from populate_roi import ThreadPool
50
51
52 try:
53 from xml.etree.cElementTree import XML, Element, SubElement, ElementTree, dump, iterparse
54 except ImportError:
55 from cElementTree import XML, Element, SubElement, ElementTree, dump, iterparse
56
57 log = logging.getLogger("omero.util.populate_metadata")
58
60 """Prints usage so that we don't have to. :)"""
61 cmd = sys.argv[0]
62 print """%s
63 Usage: %s [options] <target_object> <file>
64 Runs metadata population code for a given object.
65
66 Options:
67 -s OMERO hostname to use [defaults to "localhost"]
68 -p OMERO port to use [defaults to 4064]
69 -u OMERO username to use
70 -w OMERO password
71 -k OMERO session key to use
72 -i Dump measurement information and exit (no population)
73 -d Print debug statements
74
75 Examples:
76 %s -s localhost -p 14064 -u bob Plate:6 metadata.csv
77
78 Report bugs to ome-devel@lists.openmicroscopy.org.uk""" % (error, cmd, cmd)
79 sys.exit(2)
80
81
82 thread_pool = None
83
84
85 PLATE_NAME_COLUMN = 'Plate Name'
86 WELL_NAME_COLUMN = 'Well Name'
87
89 """Instance to denote a row skip request."""
90 pass
91
98
100 """
101 Header resolver for known header names which is responsible for creating
102 the column set for the OMERO.tables instance.
103 """
104
105 DEFAULT_COLUMN_SIZE = 1
106
107 plate_keys = {
108 'well': WellColumn,
109 'field': ImageColumn,
110 'row': LongColumn,
111 'column': LongColumn,
112 'wellsample': ImageColumn
113 }
114
115 screen_keys = dict({
116 'plate': PlateColumn,
117 }, **plate_keys)
118
120 self.target_object = target_object
121 self.headers = [v.replace('/', '\\') for v in headers]
122 self.headers_as_lower = [v.lower() for v in self.headers]
123
125 target_class = self.target_object.__class__
126 target_id = self.target_object.id.val
127 if ScreenI is target_class:
128 log.debug('Creating columns for Screen:%d' % target_id)
129 return self.create_columns_screen()
130 if PlateI is target_class:
131 log.debug('Creating columns for Plate:%d' % target_id)
132 return self.create_columns_plate()
133 if DatasetI is target_class:
134 log.debug('Creating columns for Dataset:%d' % target_id)
135 return self.create_columns_dataset()
136 raise MetadataError('Unsupported target object class: %s' \
137 % target_class)
138
140 columns = list()
141 for i, header_as_lower in enumerate(self.headers_as_lower):
142 name = self.headers[i]
143 try:
144 column = self.screen_keys[header_as_lower](name, '', list())
145 except KeyError:
146 column = StringColumn(name, '', self.DEFAULT_COLUMN_SIZE,
147 list())
148 columns.append(column)
149 for column in columns:
150 if column.__class__ is PlateColumn:
151 columns.append(StringColumn(PLATE_NAME_COLUMN, '',
152 self.DEFAULT_COLUMN_SIZE, list()))
153 if column.__class__ is WellColumn:
154 columns.append(StringColumn(WELL_NAME_COLUMN, '',
155 self.DEFAULT_COLUMN_SIZE, list()))
156 return columns
157
169
171 raise Exception('To be implemented!')
172
174 """
175 Value resolver for column types which is responsible for filling up
176 non-metadata columns with their OMERO data model identifiers.
177 """
178
179 AS_ALPHA = [chr(v) for v in range(97, 122 + 1)]
180 WELL_REGEX = re.compile(r'^([a-zA-Z]+)(\d+)$')
181
182 - def __init__(self, client, target_object):
183 self.client = client
184 self.target_object = target_object
185 self.target_class = self.target_object.__class__
186 if PlateI is self.target_class:
187 return self.load_plate()
188 if DatasetI is self.target_class:
189 return self.load_dataset()
190 if ScreenI is self.target_class:
191 return self.load_screen()
192 raise MetadataError('Unsupported target object class: %s' \
193 % target_class)
195 query_service = self.client.getSession().getQueryService()
196 parameters = omero.sys.ParametersI()
197 parameters.addId(self.target_object.id.val)
198 log.debug('Loading Screen:%d' % self.target_object.id.val)
199 self.target_object = query_service.findByQuery(
200 'select s from Screen as s '
201 'join fetch s.plateLinks as p_link '
202 'join fetch p_link.child as p '
203 'where s.id = :id', parameters)
204 if self.target_object is None:
205 raise MetadataException('Could not find target object!')
206 self.wells_by_location = dict()
207 self.plates_by_name = dict()
208 self.plates_by_id = dict()
209 for plate in (l.child for l in self.target_object.copyPlateLinks()):
210 parameters = omero.sys.ParametersI()
211 parameters.addId(plate.id.val)
212 plate = query_service.findByQuery(
213 'select p from Plate as p '
214 'join fetch p.wells as w '
215 'join fetch w.wellSamples as ws '
216 'where p.id = :id', parameters)
217 self.plates_by_name[plate.name.val] = plate
218 self.plates_by_id[plate.id.val] = plate
219 wells_by_location = dict()
220 self.wells_by_location[plate.name.val] = wells_by_location
221 self.parse_plate(plate, wells_by_location)
222
224 query_service = self.client.getSession().getQueryService()
225 parameters = omero.sys.ParametersI()
226 parameters.addId(self.target_object.id.val)
227 log.debug('Loading Plate:%d' % self.target_object.id.val)
228 self.target_object = query_service.findByQuery(
229 'select p from Plate as p '
230 'join fetch p.wells as w '
231 'join fetch w.wellSamples as ws '
232 'where p.id = :id', parameters)
233 if self.target_object is None:
234 raise MetadataException('Could not find target object!')
235 self.wells_by_location = dict()
236 wells_by_location = dict()
237 self.wells_by_location[self.target_object.name.val] = wells_by_location
238 self.parse_plate(self.target_object, wells_by_location)
239
255
257 raise Exception('To be implemented!')
258
259 - def resolve(self, column, value, row):
260 column_class = column.__class__
261 column_as_lower = column.name.lower()
262 if WellColumn is column_class:
263 m = self.WELL_REGEX.match(value)
264 if m is None or len(m.groups()) != 2:
265 raise MetadataError(
266 'Cannot parse well identifier "%s" from row: %r' % \
267 (value, [o[1] for o in row]))
268 plate_row = m.group(1).lower()
269 plate_column = m.group(2)
270 if len(self.wells_by_location) == 1:
271 wells_by_location = self.wells_by_location.values()[0]
272 log.debug('Parsed "%s" row: %s column: %s' % \
273 (value, plate_row, plate_column))
274 else:
275 for column, plate in row:
276 if column.__class__ is PlateColumn:
277 wells_by_location = self.wells_by_location[plate]
278 log.debug('Parsed "%s" row: %s column: %s plate: %s' % \
279 (value, plate_row, plate_column, plate))
280 break
281 try:
282 return wells_by_location[plate_row][plate_column].id.val
283 except KeyError:
284 log.debug('Row: %s Column: %s not found!' % \
285 (plate_row, plate_column))
286 return -1L
287 if PlateColumn is column_class:
288 try:
289 return self.plates_by_name[value].id.val
290 except KeyError:
291 log.warn('Screen is missing plate: %s' % value)
292 return Skip()
293 if column_as_lower in ('row', 'column') \
294 and column_class is LongColumn:
295 try:
296
297 return long(value) - 1
298 except ValueError:
299 return long(self.AS_ALPHA.index(value.lower()))
300 if StringColumn is column_class:
301 return value
302 raise MetadataError('Unsupported column class: %s' % column_class)
303
304 -class ParsingContext(object):
305 """Generic parsing context for CSV files."""
306
307 - def __init__(self, client, target_object, file):
308 self.client = client
309 self.target_object = target_object
310 self.file = file
311 self.value_resolver = ValueResolver(self.client, self.target_object)
312
314 self.target_class = self.target_object.__class__
315 if ScreenI is self.target_class:
316 return ScreenAnnotationLinkI()
317 if PlateI is self.target_class:
318 return PlateAnnotationLinkI()
319 if DatasetI is self.target_class:
320 return DatasetAnnotationLinkI()
321 raise MetadataError('Unsupported target object class: %s' \
322 % target_class)
323
325 widths = list()
326 for column in self.columns:
327 try:
328 widths.append(column.size)
329 except AttributeError:
330 widths.append(None)
331 return widths
332
334 data = open(self.file, 'U')
335 try:
336 rows = list(csv.reader(data, delimiter=','))
337 finally:
338 data.close()
339 log.debug('Header: %r' % rows[0])
340 header_resolver = HeaderResolver(self.target_object, rows[0])
341 self.columns = header_resolver.create_columns()
342 log.debug('Columns: %r' % self.columns)
343 self.populate(rows[1:])
344 self.post_process()
345 log.debug('Column widths: %r' % self.get_column_widths())
346 log.debug('Columns: %r' % \
347 [(o.name, len(o.values)) for o in self.columns])
348
349
350
351
352
353
354
355 - def populate(self, rows):
356 value = None
357 for row in rows:
358 values = list()
359 row = [(self.columns[i], value) for i, value in enumerate(row)]
360 for column, original_value in row:
361 value = self.value_resolver.resolve(column, original_value, row)
362 if value.__class__ is Skip:
363 break
364 values.append(value)
365 try:
366 if value.__class__ is not long:
367 column.size = max(column.size, len(value))
368 except TypeError:
369 log.error('Original value "%s" now "%s" of bad type!' % \
370 (original_value, value))
371 raise
372 if value.__class__ is not Skip:
373 values.reverse()
374 for column in self.columns:
375 if column.name in (PLATE_NAME_COLUMN, WELL_NAME_COLUMN):
376 continue
377 try:
378 column.values.append(values.pop())
379 except IndexError:
380 log.error('Column %s has no values to pop.' % \
381 column.name)
382 raise
383
384 - def post_process(self):
385 columns_by_name = dict()
386 plate_column = None
387 well_column = None
388 well_name_column = None
389 plate_name_column = None
390 for column in self.columns:
391 columns_by_name[column.name] = column
392 if column.__class__ is PlateColumn:
393 plate_column = column
394 elif column.__class__ is WellColumn:
395 well_column = column
396 elif column.name == WELL_NAME_COLUMN:
397 well_name_column = column
398 elif column.name == PLATE_NAME_COLUMN:
399 plate_name_column = column
400 if well_name_column is None and plate_name_column is None:
401 log.info('Nothing to do during post processing.')
402 for i in range(0, len(self.columns[0].values)):
403 if well_name_column is not None:
404 try:
405 row = columns_by_name['Row'].values[i]
406 col = columns_by_name['Column'].values[i]
407 except KeyError:
408 log.error('Missing row or column for well name population!')
409 raise
410 row = self.value_resolver.AS_ALPHA[row]
411 v = '%s%d' % (row, col + 1)
412 well_name_column.size = max(well_name_column.size, len(v))
413 well_name_column.values.append(v)
414 else:
415 log.info('Missing well name column, skipping.')
416 if plate_name_column is not None:
417 plate = columns_by_name['Plate'].values[i]
418 plate = self.value_resolver.plates_by_id[plate]
419 v = plate.name.val
420 plate_name_column.size = max(plate_name_column.size, len(v))
421 plate_name_column.values.append(v)
422 else:
423 log.info('Missing plate name column, skipping.')
424
425 - def write_to_omero(self):
426 sf = self.client.getSession()
427 sr = sf.sharedResources()
428 update_service = sf.getUpdateService()
429 name = 'bulk_annotations'
430 table = sr.newTable(1, name)
431 original_file = table.getOriginalFile()
432 if table is None:
433 raise MetadataError(
434 "Unable to create table: %s" % name)
435 log.info('Created new table OriginalFile:%d' % original_file.id.val)
436 table.initialize(self.columns)
437 log.info('Table initialized with %d columns.' % (len(self.columns)))
438 table.addData(self.columns)
439 log.info('Added data column data.')
440 file_annotation = FileAnnotationI()
441 file_annotation.ns = \
442 rstring('openmicroscopy.org/omero/bulk_annotations')
443 file_annotation.description = rstring(name)
444 file_annotation.file = OriginalFileI(original_file.id.val, False)
445 link = self.create_annotation_link()
446 link.parent = self.target_object
447 link.child = file_annotation
448 update_service.saveObject(link)
449
451 type, id = target_object.split(':')
452 if 'Dataset' == type:
453 return DatasetI(long(id), False)
454 if 'Plate' == type:
455 return PlateI(long(id), False)
456 if 'Screen' == type:
457 return ScreenI(long(id), False)
458 raise ValueError('Unsupported target object: %s' % target_object)
459
460 if __name__ == "__main__":
461 try:
462 options, args = getopt(sys.argv[1:], "s:p:u:w:k:id")
463 except GetoptError, (msg, opt):
464 usage(msg)
465
466 try:
467 target_object, file = args
468 target_object = parse_target_object(target_object)
469 except ValueError:
470 usage('Target object and file must be a specified!')
471
472 username = None
473 password = None
474 hostname = 'localhost'
475 port = 4064
476 info = False
477 session_key = None
478 logging_level = logging.INFO
479 thread_count = 1
480 for option, argument in options:
481 if option == "-u":
482 username = argument
483 if option == "-w":
484 password = argument
485 if option == "-s":
486 hostname = argument
487 if option == "-p":
488 port = int(argument)
489 if option == "-i":
490 info = True
491 if option == "-k":
492 session_key = argument
493 if option == "-d":
494 logging_level = logging.DEBUG
495 if option == "-t":
496 thread_count = int(argument)
497 if session_key is None and username is None:
498 usage("Username must be specified!")
499 if session_key is None and hostname is None:
500 usage("Host name must be specified!")
501 if session_key is None and password is None:
502 password = getpass()
503
504 logging.basicConfig(level = logging_level)
505 client = client(hostname, port)
506 client.setAgent("OMERO.populate_metadata")
507 client.enableKeepAlive(60)
508 try:
509 if session_key is not None:
510 client.joinSession(session_key)
511 else:
512 client.createSession(username, password)
513
514 log.debug('Creating pool of %d threads' % thread_count)
515 thread_pool = ThreadPool(thread_count)
516 ctx = ParsingContext(client, target_object, file)
517 ctx.parse()
518 if not info:
519 ctx.write_to_omero()
520 finally:
521 pass
522 client.closeSession()
523