Package omero :: Package util :: Module populate_roi
[hide private]
[frames] | no frames]

Source Code for Module omero.util.populate_roi

   1  #!/usr/bin/env python 
   2  # encoding: utf-8 
   3  """ 
   4  ... 
   5  """ 
   6   
   7  # 
   8  #  Copyright (C) 2009 University of Dundee. All rights reserved. 
   9  # 
  10  # 
  11  #  This program is free software; you can redistribute it and/or modify 
  12  #  it under the terms of the GNU General Public License as published by 
  13  #  the Free Software Foundation; either version 2 of the License, or 
  14  #  (at your option) any later version. 
  15  #  This program is distributed in the hope that it will be useful, 
  16  #  but WITHOUT ANY WARRANTY; without even the implied warranty of 
  17  #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  18  #  GNU General Public License for more details. 
  19  # 
  20  #  You should have received a copy of the GNU General Public License along 
  21  #  with this program; if not, write to the Free Software Foundation, Inc., 
  22  #  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 
  23  # 
  24   
  25   
  26  import exceptions 
  27  import tempfile 
  28  import logging 
  29  import time 
  30  import sys 
  31  import csv 
  32  import re 
  33  from threading import Thread 
  34  from StringIO import StringIO 
  35  from getpass import getpass 
  36  from getopt import getopt, GetoptError 
  37  from Queue import Queue 
  38   
  39  import omero.clients 
  40  from omero.rtypes import rdouble, rstring, rint 
  41  from omero.model import OriginalFileI, PlateI, PlateAnnotationLinkI, ImageI, \ 
  42                          FileAnnotationI, RoiI, EllipseI, PointI 
  43  from omero.grid import ImageColumn, WellColumn, RoiColumn, LongColumn, DoubleColumn 
  44  from omero.util.temp_files import create_path, remove_path 
  45  from omero import client 
  46   
  47  # Handle Python 2.5 built-in ElementTree 
  48  try: 
  49          from xml.etree.cElementTree import XML, Element, SubElement, ElementTree, dump, iterparse 
  50  except ImportError: 
  51          from cElementTree import XML, Element, SubElement, ElementTree, dump, iterparse 
  52   
  53  log = logging.getLogger("omero.util.populate_roi") 
  54   
55 -def usage(error):
56 """Prints usage so that we don't have to. :)""" 57 cmd = sys.argv[0] 58 print """%s 59 Usage: %s [-s hostname] [-u username | -k session_key] <-p port> [plate_id] 60 Runs measurement population code for a given plate. 61 62 Options: 63 -s OMERO hostname to use 64 -p OMERO port to use [defaults to 4064] 65 -u OMERO username to use 66 -k OMERO session key to use 67 -m Measurement index to populate 68 -i Dump measurement information and exit (no population) 69 -d Print debug statements 70 -t Number of threads to use when populating [defaults to 1] 71 72 Examples: 73 %s -s localhost -p 4063 -u bob 27 74 75 Report bugs to ome-devel@lists.openmicroscopy.org.uk""" % (error, cmd, cmd) 76 sys.exit(2)
77 78 ### 79 ### Worker and ThreadPool from... 80 ### http://code.activestate.com/recipes/577187-python-thread-pool/ 81 ### 82
83 -class Worker(Thread):
84 """Thread executing tasks from a given tasks queue"""
85 - def __init__(self, tasks):
86 Thread.__init__(self) 87 self.tasks = tasks 88 self.daemon = True 89 self.start()
90
91 - def run(self):
92 while True: 93 func, args, kargs = self.tasks.get() 94 try: 95 func(*args, **kargs) 96 except Exception, e: 97 log.exception(e) 98 self.tasks.task_done()
99
100 -class ThreadPool:
101 """Pool of threads consuming tasks from a queue"""
102 - def __init__(self, num_threads):
103 self.tasks = Queue(num_threads) 104 for _ in range(num_threads): 105 Worker(self.tasks)
106
107 - def add_task(self, func, *args, **kargs):
108 """Add a task to the queue""" 109 self.tasks.put((func, args, kargs))
110
111 - def wait_completion(self):
112 """Wait for completion of all the tasks in the queue""" 113 self.tasks.join()
114 115 # Global thread pool for use by ROI workers 116 thread_pool = None 117
118 -class MeasurementError(Exception):
119 """ 120 Raised by the analysis or measurement context when an error condition 121 is reached. 122 """ 123 pass
124
125 -class DownloadingOriginalFileProvider(object):
126 """ 127 Provides original file data by downloading it from an OMERO raw file store. 128 """ 129 130 # Default raw file store buffer size 131 BUFFER_SIZE = 1024 * 1024 # 1MB 132
133 - def __init__(self, service_factory):
134 self.service_factory = service_factory 135 self.raw_file_store = self.service_factory.createRawFileStore() 136 self.dir = create_path("populate_roi", "dir", folder = True)
137
138 - def get_original_file_data(self, original_file):
139 """ 140 Downloads an original file to a temporary file and returns an open 141 file handle to that temporary file seeked to zero. The caller is 142 responsible for closing the temporary file. 143 """ 144 log.info("Downloading original file: %d" % original_file.id.val) 145 self.raw_file_store.setFileId(original_file.id.val) 146 temporary_file = tempfile.TemporaryFile(dir=str(self.dir)) 147 size = original_file.size.val 148 for i in range((size / self.BUFFER_SIZE) + 1): 149 index = i * self.BUFFER_SIZE 150 data = self.raw_file_store.read(index, self.BUFFER_SIZE) 151 temporary_file.write(data) 152 temporary_file.seek(0L) 153 temporary_file.truncate(size) 154 return temporary_file
155
156 - def __delete__(self):
157 self.raw_file_store.close()
158
159 -class AbstractPlateAnalysisCtx(object):
160 """ 161 Abstract class which aggregates and represents all measurement runs made on 162 a given Plate. 163 """ 164 165 DEFAULT_ORIGINAL_FILE_PROVIDER = DownloadingOriginalFileProvider 166
167 - def __init__(self, images, original_files, original_file_image_map, 168 plate_id, service_factory):
169 super(AbstractPlateAnalysisCtx, self).__init__() 170 self.images = images 171 self.numcols, self.numrows = self.guess_geometry(self.images) 172 self.original_files = original_files 173 self.original_file_image_map = original_file_image_map 174 self.plate_id = plate_id 175 self.service_factory = service_factory 176 self.log_files = dict() 177 self.detail_files = dict() 178 self.measurements = dict()
179
180 - def guess_geometry(self, images):
181 max_col = 0 182 max_row = 0 183 for image in images: 184 ws = image.copyWellSamples()[0] # Using only first well sample link 185 well = ws.well 186 max_col = max(max_col, well.column.val) 187 max_row = max(max_row, well.row.val) 188 return (max_col+1, max_row+1)
189
190 - def colrow_from_wellnumber(self, width, wellnumber):
191 x = wellnumber - 1 192 col = x % width 193 row = x / width 194 return (col,row)
195
196 - def image_from_wellnumber(self, wellnumber):
197 col, row = self.colrow_from_wellnumber(self.numcols, wellnumber) 198 log.debug("Finding image for %s (%s,%s)..." % (wellnumber, col, row)) 199 for image in self.images: 200 well = image.copyWellSamples()[0].well 201 if well.column.val == col and well.row.val == row: 202 return image 203 raise exceptions.Exception("Could not find image for (col,row)==(%s,%s)" % (col,row))
204 205 ### 206 ### Abstract methods 207 ### 208
209 - def is_this_type(klass):
210 """ 211 Concrete implementations are to return True if the class pertinent 212 for the original files associated with the plate. 213 """ 214 raise Exception("To be implemented by concrete implementations.")
215 is_this_type = classmethod(is_this_type) 216
217 - def get_measurement_count(self):
218 """Returns the number of recognized measurement runs.""" 219 raise Exception("To be implemented by concrete implementations.")
220
221 - def get_measurement_ctx(self, index):
222 """Returns the measurement context for a given index.""" 223 raise Exception("To be implemented by concrete implementations.")
224
225 - def get_result_file_count(self, measurement_index):
226 """ 227 Return the number of result files associated with a measurement run. 228 """ 229 raise Exception("To be implemented by concrete implementations.")
230
231 -class MIASPlateAnalysisCtx(AbstractPlateAnalysisCtx):
232 """ 233 MIAS dataset concrete class implementation of an analysis context. MIAS 234 measurements are aggregated based on a single "log" file. A result 235 file is present for each stitched (of multiple fields) mosaic and 236 contains the actual measured results and ROI. 237 """ 238 239 # Python datetime format string of the log filename completion date/time 240 datetime_format = '%Y-%m-%d-%Hh%Mm%Ss' 241 242 # Regular expression matching a log filename 243 log_regex = re.compile('.*log(\d+-\d+-\d+-\d+h\d+m\d+s).txt$') 244 245 # Regular expression matching a result filename 246 detail_regex = re.compile( 247 '^Well(\d+)_(.*)_detail_(\d+-\d+-\d+-\d+h\d+m\d+s).txt$') 248 249 # Companion file format 250 companion_format = 'Companion/MIAS' 251
252 - def __init__(self, images, original_files, original_file_image_map, 253 plate_id, service_factory):
254 super(MIASPlateAnalysisCtx, self).__init__( 255 images, original_files, original_file_image_map, plate_id, 256 service_factory) 257 self._populate_log_and_detail_files() 258 self._populate_measurements()
259
261 """ 262 Strips out erroneous files and collects the log and result original 263 files based on regular expression matching. 264 """ 265 for original_file in self.original_files: 266 if original_file.mimetype.val != self.companion_format: 267 continue 268 name = original_file.name.val 269 match = self.log_regex.match(name) 270 if match: 271 d = time.strptime(match.group(1), self.datetime_format) 272 self.log_files[d] = original_file 273 continue 274 match = self.detail_regex.match(name) 275 if match: 276 d = time.strptime(match.group(3), self.datetime_format) 277 self.detail_files[d] = original_file 278 continue
279
280 - def _populate_measurements(self):
281 """ 282 Result original files are only recognizable as part of a given 283 measurement (declared by a log file) based upon their parsed 284 date/time of completion as encoded in the filename. This method 285 collects result original files and groups them by collective 286 parsed date/time of completion. 287 """ 288 log_timestamps = list(self.log_files.keys()) 289 log_timestamps.sort() 290 detail_timestamps = list(self.detail_files.keys()) 291 detail_timestamps.sort() 292 for log_timestamp in log_timestamps: 293 self.measurements[log_timestamp] = list() 294 for detail_timestamp in detail_timestamps: 295 for log_timestamp in log_timestamps: 296 if detail_timestamp < log_timestamp: 297 self.measurements[log_timestamp].append( 298 self.detail_files[detail_timestamp]) 299 break
300 301 ### 302 ### Abstract method implementations 303 ### 304
305 - def is_this_type(klass, original_files):
306 for original_file in original_files: 307 format = original_file.mimetype.val 308 if format == klass.companion_format \ 309 and klass.log_regex.match(original_file.name.val): 310 return True
311 is_this_type = classmethod(is_this_type) 312
313 - def get_measurement_count(self):
314 return len(self.measurements.keys())
315
316 - def get_measurement_ctx(self, index):
317 key = self.log_files.keys()[index] 318 sf = self.service_factory 319 original_file = self.log_files[key] 320 result_files = self.measurements[key] 321 provider = self.DEFAULT_ORIGINAL_FILE_PROVIDER(sf) 322 return MIASMeasurementCtx(self, sf, provider, original_file, 323 result_files)
324
325 - def get_result_file_count(self, measurement_index):
326 key = self.log_files.keys()[measurement_index] 327 return len(self.measurements[key])
328
329 -class FlexPlateAnalysisCtx(AbstractPlateAnalysisCtx):
330 """ 331 Flex dataset concrete class implementation of an analysis context. Flex 332 measurements are aggregated in a single ".res" XML file and contain no 333 ROI. 334 """ 335 336 # Companion file format 337 companion_format = 'Companion/Flex' 338
339 - def __init__(self, images, original_files, original_file_image_map, 340 plate_id, service_factory):
341 super(FlexPlateAnalysisCtx, self).__init__( 342 images, original_files, original_file_image_map, plate_id, 343 service_factory) 344 path_original_file_map = dict() 345 for original_file in original_files: 346 path = original_file.path.val 347 name = original_file.name.val 348 format = original_file.mimetype.val 349 if format == self.companion_format and name.endswith('.res'): 350 path_original_file_map[path] = original_file 351 self.measurements = path_original_file_map.values()
352 353 ### 354 ### Abstract method implementations 355 ### 356
357 - def is_this_type(klass, original_files):
358 for original_file in original_files: 359 path = original_file.path.val 360 format = original_file.mimetype.val 361 name = original_file.name.val 362 if format == klass.companion_format and name.endswith('.res'): 363 return True 364 return False
365 is_this_type = classmethod(is_this_type) 366
367 - def get_measurement_count(self):
368 return len(self.measurements)
369
370 - def get_measurement_ctx(self, index):
371 sf = self.service_factory 372 original_file = self.measurements[index] 373 result_files = [] 374 provider = self.DEFAULT_ORIGINAL_FILE_PROVIDER(sf) 375 return FlexMeasurementCtx(self, sf, provider, original_file, 376 result_files)
377
378 - def get_result_file_count(self, measurement_index):
379 return 1
380
381 -class InCellPlateAnalysisCtx(AbstractPlateAnalysisCtx):
382 """ 383 InCell dataset concrete class implementation of an analysis context. 384 InCell measurements are from InCell Analyzer and are aggregated in a 385 single gargantuan (often larger than 100MB per plate) XML file. 386 """ 387 388 # Companion file format 389 companion_format = 'Companion/InCell' 390
391 - def __init__(self, images, original_files, original_file_image_map, 392 plate_id, service_factory):
393 super(InCellPlateAnalysisCtx, self).__init__( 394 images, original_files, original_file_image_map, plate_id, 395 service_factory) 396 path_original_file_map = dict() 397 for original_file in original_files: 398 path = original_file.path.val 399 name = original_file.name.val 400 format = original_file.mimetype.val 401 if format == self.companion_format and name.endswith('.xml'): 402 path_original_file_map[path] = original_file 403 self.measurements = path_original_file_map.values()
404 405 ### 406 ### Abstract method implementations 407 ### 408
409 - def is_this_type(klass, original_files):
410 for original_file in original_files: 411 path = original_file.path.val 412 format = original_file.mimetype.val 413 name = original_file.name.val 414 if format == klass.companion_format and name.endswith('.xml'): 415 return True 416 return False
417 is_this_type = classmethod(is_this_type) 418
419 - def get_measurement_count(self):
420 return len(self.measurements)
421
422 - def get_measurement_ctx(self, index):
423 sf = self.service_factory 424 original_file = self.measurements[index] 425 result_files = [] 426 provider = self.DEFAULT_ORIGINAL_FILE_PROVIDER(sf) 427 return InCellMeasurementCtx(self, sf, provider, original_file, 428 result_files)
429
430 - def get_result_file_count(self, measurement_index):
431 return 1
432
433 -class PlateAnalysisCtxFactory(object):
434 """ 435 The plate analysis context factory is responsible for detecting and 436 returning a plate analysis context instance for a given plate. 437 """ 438 439 implementations = (FlexPlateAnalysisCtx, MIASPlateAnalysisCtx, 440 InCellPlateAnalysisCtx) 441
442 - def __init__(self, service_factory):
443 self.service_factory = service_factory 444 self.query_service = self.service_factory.getQueryService()
445
446 - def find_images_for_plate(self, plate_id):
447 """ 448 Retrieves all the images associated with a given plate. Fetched 449 are the Image's WellSample, the WellSample's Well, the annotation 450 stack associated with the Image and each annotation's linked 451 original file. 452 """ 453 # The query that follows is doublely linked: 454 # * Image --> WellSample --> Well 455 # * Well --> WellSample --> Image 456 # This is to facilitate later "ordered" access of fields/well 457 # samples required by certain measurement contexts (notably InCell). 458 log.debug("Loading image...") 459 images = self.query_service.findAllByQuery( 460 'select img from Image as img ' \ 461 'join fetch img.wellSamples as ws ' \ 462 'join fetch ws.well as w ' \ 463 'join fetch w.wellSamples as ws2 ' \ 464 'join w.plate as p ' \ 465 'left outer join fetch img.annotationLinks as ia_links ' \ 466 'left outer join fetch ia_links.child as ia ' \ 467 'left outer join fetch ia.file as i_o_file ' \ 468 'where p.id = %d' % plate_id, None) 469 log.debug("Loading plate...") 470 plate = self.query_service.findByQuery( 471 'select p from Plate p ' \ 472 'left outer join fetch p.annotationLinks as pa_links ' \ 473 'left outer join fetch pa_links.child as pa ' \ 474 'left outer join fetch pa.file as p_o_file ' \ 475 'where p.id = %d' % plate_id, None) 476 log.debug("Linking plate and images...") 477 for image in images: 478 for ws in image.copyWellSamples(): 479 ws.well.plate = plate 480 return images
481
482 - def gather_original_files(self, obj, original_files, original_file_obj_map):
483 for annotation_link in obj.copyAnnotationLinks(): 484 annotation = annotation_link.child 485 if isinstance(annotation, FileAnnotationI): 486 f = annotation.file 487 original_files.add(f) 488 if original_file_obj_map is not None: 489 original_file_obj_map[f.id.val] = obj
490
491 - def get_analysis_ctx(self, plate_id):
492 """Retrieves a plate analysis context for a given plate.""" 493 # Using a set since 1) no one was using the image.id key and 2) 494 # we are now also collecting original files from plates (MIAS) 495 # for which there's no clear key. Since all the files are loaded 496 # in a single shot, double linking should not cause a problem. 497 plates = set() 498 original_files = set() 499 original_file_image_map = dict() 500 images = self.find_images_for_plate(plate_id) 501 for i, image in enumerate(images): 502 for ws in image.copyWellSamples(): 503 plate = ws.well.plate 504 if plate not in plates: 505 plates.add(plate) 506 self.gather_original_files(plate, original_files, None) 507 self.gather_original_files(image, original_files, original_file_image_map) 508 for klass in self.implementations: 509 if klass.is_this_type(original_files): 510 return klass(images, original_files, 511 original_file_image_map, 512 plate_id, self.service_factory) 513 raise MeasurementError( 514 "Unable to find suitable analysis context for plate: %d" % \ 515 plate_id)
516
517 -class MeasurementParsingResult(object):
518 """ 519 Holds the results of a measurement parsing event. 520 """
521 - def __init__(self, sets_of_columns=None):
522 if sets_of_columns is None: 523 self.sets_of_columns = list() 524 else: 525 self.sets_of_columns = sets_of_columns
526
527 - def append_columns(self, columns):
528 """Adds a set of columns to the parsing result.""" 529 self.sets_of_columns.append(columns)
530
531 -class AbstractMeasurementCtx(object):
532 """ 533 Abstract class which aggregates and represents all the results produced 534 from a given measurement run. It also provides a scaffold for interacting 535 with the OmeroTables infrastructure. 536 """ 537 538 # The number of ROI to have parsed before streaming them to the server 539 ROI_UPDATE_LIMIT = 1000 540
541 - def __init__(self, analysis_ctx, service_factory, original_file_provider, 542 original_file, result_files):
543 super(AbstractMeasurementCtx, self).__init__() 544 self.analysis_ctx = analysis_ctx 545 self.service_factory = service_factory 546 self.original_file_provider = original_file_provider 547 self.query_service = self.service_factory.getQueryService() 548 self.update_service = self.service_factory.getUpdateService() 549 self.original_file = original_file 550 self.result_files = result_files 551 552 # Establish the rest of our initial state 553 self.wellimages = dict() 554 for image in self.analysis_ctx.images: 555 for well_sample in image.copyWellSamples(): 556 well = well_sample.well 557 idx = well.copyWellSamples().index(well_sample) 558 row = well.row.val 559 column = well.column.val 560 if row not in self.wellimages: 561 self.wellimages[row] = dict() 562 if column not in self.wellimages[row]: 563 self.wellimages[row][column] = [] 564 # Now we save the image at it's proper index 565 l = self.wellimages[row][column] 566 for x in range(idx - len(l) + 1): 567 l.append(None) 568 l[idx] = image
569
570 - def get_well_images(self, row, col):
571 """ 572 Takes a row and a col index and returns a tuple 573 of Well and image. Either might be None. Uses the 574 first image found to find the Well and therefore 575 must be loaded (image->wellSample->well) 576 """ 577 try: 578 images = self.wellimages[row][col] 579 if not images: 580 return (None, None) 581 image = images[0] 582 well = image.copyWellSamples()[0].well 583 return (well, images) 584 except KeyError: 585 # This has the potential to happen alot with the 586 # datasets we have given the split machine acquisition 587 # ".flex" file storage. 588 log.warn("WARNING: Missing data for row %d column %d" % \ 589 (row, col)) 590 return (None, None)
591
592 - def update_table(self, columns):
593 """Updates the OmeroTables instance backing our results.""" 594 # Create a new OMERO table to store our measurement results 595 sr = self.service_factory.sharedResources() 596 name = self.get_name() 597 self.table = sr.newTable(1, '/%s.r5' % name) 598 if self.table is None: 599 raise MeasurementError( 600 "Unable to create table: %s" % name) 601 602 # Retrieve the original file corresponding to the table for the 603 # measurement, link it to the file annotation representing the 604 # umbrella measurement run, link the annotation to the plate from 605 # which it belongs and save the file annotation. 606 table_original_file = self.table.getOriginalFile() 607 table_original_file_id = table_original_file.id.val 608 log.info("Created new table: %d" % table_original_file_id) 609 unloaded_o_file = OriginalFileI(table_original_file_id, False) 610 self.file_annotation.file = unloaded_o_file 611 unloaded_plate = PlateI(self.analysis_ctx.plate_id, False) 612 plate_annotation_link = PlateAnnotationLinkI() 613 plate_annotation_link.parent = unloaded_plate 614 plate_annotation_link.child = self.file_annotation 615 plate_annotation_link = \ 616 self.update_service.saveAndReturnObject(plate_annotation_link) 617 self.file_annotation = plate_annotation_link.child 618 619 t0 = int(time.time() * 1000) 620 self.table.initialize(columns) 621 log.debug("Table init took %sms" % (int(time.time() * 1000) - t0)) 622 t0 = int(time.time() * 1000) 623 column_report = dict() 624 for column in columns: 625 column_report[column.name] = len(column.values) 626 log.debug("Column report: %r" % column_report) 627 self.table.addData(columns) 628 log.info("Table update took %sms" % (int(time.time() * 1000) - t0))
629
630 - def create_file_annotation(self, set_of_columns):
631 """ 632 Creates a file annotation to represent a set of columns from our 633 measurment. 634 """ 635 self.file_annotation = FileAnnotationI() 636 self.file_annotation.ns = \ 637 rstring('openmicroscopy.org/omero/measurement') 638 name = self.get_name(set_of_columns) 639 self.file_annotation.description = rstring(name)
640
641 - def update_rois(self, rois, batches, batch_no):
642 """ 643 Updates a set of ROI for a given batch updating the batches 644 dictionary with the saved IDs. 645 """ 646 log.debug("Saving %d ROI for batch %d" % (len(rois), batch_no)) 647 t0 = int(time.time() * 1000) 648 roi_ids = self.update_service.saveAndReturnIds(rois) 649 log.info("Batch %d ROI update took %sms" % \ 650 (batch_no, int(time.time() * 1000) - t0)) 651 batches[batch_no] = roi_ids
652
653 - def image_from_original_file(self, original_file):
654 """Returns the image from which an original file has originated.""" 655 m = self.analysis_ctx.original_file_image_map 656 return m[original_file.id.val]
657
658 - def parse_and_populate(self):
659 """ 660 Calls parse and populate, updating the OmeroTables instance backing 661 our results and the OMERO database itself. 662 """ 663 result = self.parse() 664 if result is None: 665 return 666 for i, columns in enumerate(result.sets_of_columns): 667 self.create_file_annotation(i) 668 self.parse_and_populate_roi(columns) 669 self.populate(columns)
670 671 ### 672 ### Abstract methods 673 ### 674
675 - def get_name(self, set_of_columns=None):
676 """Returns the name of the measurement, and a set of columns.""" 677 raise Exception("To be implemented by concrete implementations.")
678
679 - def parse(self):
680 """Parses result files, returning a MeasurementParsingResult.""" 681 raise Exception("To be implemented by concrete implementations.")
682
683 - def parse_and_populate_roi(self, columns):
684 """ 685 Parses and populates ROI from column data in the OMERO database. 686 """ 687 raise Exception("To be implemented by concrete implementations.")
688
689 - def populate(self, columns):
690 """ 691 Populates an OmeroTables instance backing our results and ROI 692 linkages. 693 """ 694 raise Exception("To be implemented by concrete implementations.")
695
696 -class MIASMeasurementCtx(AbstractMeasurementCtx):
697 """ 698 MIAS measurements are a set of tab delimited text files per well. Each 699 TSV file's content is prefixed by the analysis parameters. 700 """ 701 702 # The OmeroTable ImageColumn index 703 IMAGE_COL = 0 704 705 # The OmeroTable RoiColumn index 706 ROI_COL = 1 707 708 # Expected columns in NEO datasets 709 NEO_EXPECTED = ('Image', 'ROI', 'Label', 'Row', 'Col', 'Nucleus Area', 710 'Cell Diam.', 'Cell Type', 'Mean Nucleus Intens.') 711 712 # Expected columns in MNU datasets 713 MNU_EXPECTED = ('Image', 'ROI', 'row', 'col', 'type') 714
715 - def __init__(self, analysis_ctx, service_factory, original_file_provider, 716 original_file, result_files):
717 super(MIASMeasurementCtx, self).__init__( 718 analysis_ctx, service_factory, original_file_provider, 719 original_file, result_files)
720
721 - def get_empty_columns(self, n_columns):
722 """ 723 Retrieves a set of empty OmeroTables columns for the analysis results 724 prefixed by an ImageColumn and RoiColumn to handle these linked 725 object indexes. 726 """ 727 columns = [ImageColumn('Image', '', list()), 728 RoiColumn('ROI', '', list())] 729 for i in range(n_columns): 730 columns.append(DoubleColumn('', '', list())) 731 return columns
732 733 ### 734 ### Overriding abstract implementation 735 ### 736
737 - def image_from_original_file(self, original_file):
738 """ 739 Overriding the abstract implementation since the companion 740 files are no longer attached to the images, but only to the plate 741 for MIAS. Instead, we use the filename itself to find the image. 742 """ 743 name = original_file.name.val 744 # Copy: '^Well(\d+)_(.*)_detail_(\d+-\d+-\d+-\d+h\d+m\d+s).txt$' 745 match = MIASPlateAnalysisCtx.detail_regex.match(name) 746 if match: 747 well_num = int(match.group(1)) 748 return self.analysis_ctx.image_from_wellnumber(well_num) 749 else: 750 raise exceptions.Exception("Not a detail file")
751 752 ### 753 ### Abstract method implementations 754 ### 755
756 - def get_name(self, set_of_columns=None):
757 return self.original_file.name.val[:-4]
758
759 - def parse(self):
760 columns = None 761 for result_file in self.result_files: 762 log.info("Parsing: %s" % result_file.name.val) 763 image = self.image_from_original_file(result_file) 764 provider = self.original_file_provider 765 data = provider.get_original_file_data(result_file) 766 try: 767 rows = list(csv.reader(data, delimiter='\t')) 768 finally: 769 data.close() 770 rows.reverse() 771 if columns is None: 772 columns = self.get_empty_columns(len(rows[0])) 773 for row in rows: 774 try: 775 for i, value in enumerate(row): 776 value = float(value) 777 columns[i + 2].values.append(value) 778 columns[self.IMAGE_COL].values.append(image.id.val) 779 except ValueError: 780 for i, value in enumerate(row): 781 columns[i + 2].name = value 782 break 783 log.debug("Returning %d columns" % len(columns)) 784 return MeasurementParsingResult([columns])
785
786 - def _parse_neo_roi(self, columns):
787 """Parses out ROI from OmeroTables columns for 'NEO' datasets.""" 788 log.debug("Parsing %s NEO ROIs..." % (len(columns[0].values))) 789 image_ids = columns[self.IMAGE_COL].values 790 rois = list() 791 # Save our file annotation to the database so we can use an unloaded 792 # annotation for the saveAndReturnIds that will be triggered below. 793 self.file_annotation = \ 794 self.update_service.saveAndReturnObject(self.file_annotation) 795 unloaded_file_annotation = \ 796 FileAnnotationI(self.file_annotation.id.val, False) 797 batch_no = 1 798 batches = dict() 799 for i, image_id in enumerate(image_ids): 800 unloaded_image = ImageI(image_id, False) 801 roi = RoiI() 802 shape = EllipseI() 803 values = columns[6].values 804 diameter = rdouble(float(values[i])) 805 shape.theZ = rint(0) 806 shape.theT = rint(0) 807 values = columns[4].values 808 shape.cx = rdouble(float(values[i])) 809 values = columns[3].values 810 shape.cy = rdouble(float(values[i])) 811 shape.rx = diameter 812 shape.ry = diameter 813 roi.addShape(shape) 814 roi.image = unloaded_image 815 roi.linkAnnotation(unloaded_file_annotation) 816 rois.append(roi) 817 if len(rois) == self.ROI_UPDATE_LIMIT: 818 thread_pool.add_task(self.update_rois, rois, batches, batch_no) 819 rois = list() 820 batch_no += 1 821 thread_pool.add_task(self.update_rois, rois, batches, batch_no) 822 thread_pool.wait_completion() 823 batch_keys = batches.keys() 824 batch_keys.sort() 825 for k in batch_keys: 826 columns[self.ROI_COL].values += batches[k]
827
828 - def _parse_mnu_roi(self, columns):
829 """Parses out ROI from OmeroTables columns for 'MNU' datasets.""" 830 log.debug("Parsing %s MNU ROIs..." % (len(columns[0].values))) 831 image_ids = columns[self.IMAGE_COL].values 832 rois = list() 833 # Save our file annotation to the database so we can use an unloaded 834 # annotation for the saveAndReturnIds that will be triggered below. 835 self.file_annotation = \ 836 self.update_service.saveAndReturnObject(self.file_annotation) 837 unloaded_file_annotation = \ 838 FileAnnotationI(self.file_annotation.id.val, False) 839 batch_no = 1 840 batches = dict() 841 for i, image_id in enumerate(image_ids): 842 unloaded_image = ImageI(image_id, False) 843 roi = RoiI() 844 shape = PointI() 845 shape.theZ = rint(0) 846 shape.theT = rint(0) 847 values = columns[3].values 848 shape.cx = rdouble(float(values[i])) 849 values = columns[2].values 850 shape.cy = rdouble(float(values[i])) 851 roi.addShape(shape) 852 roi.image = unloaded_image 853 roi.linkAnnotation(unloaded_file_annotation) 854 rois.append(roi) 855 if len(rois) == self.ROI_UPDATE_LIMIT: 856 thread_pool.add_task(self.update_rois, rois, batches, batch_no) 857 rois = list() 858 batch_no += 1 859 thread_pool.add_task(self.update_rois, rois, batches, batch_no) 860 thread_pool.wait_completion() 861 batch_keys = batches.keys() 862 batch_keys.sort() 863 for k in batch_keys: 864 columns[self.ROI_COL].values += batches[k]
865
866 - def parse_and_populate_roi(self, columns):
867 names = [column.name for column in columns] 868 neo = [name in self.NEO_EXPECTED for name in names] 869 mnu = [name in self.MNU_EXPECTED for name in names] 870 for name in names: 871 log.debug("Column: %s" % name) 872 if len(columns) == 9 and False not in neo: 873 self._parse_neo_roi(columns) 874 elif len(columns) == 5 and False not in mnu: 875 self._parse_mnu_roi(columns) 876 else: 877 log.warn("Unknown ROI type for MIAS dataset: %r" % names)
878
879 - def populate(self, columns):
880 """ 881 Query performed:: 882 first_roi = columns[self.ROI_COL].values[0] 883 first_roi = self.query_service.findByQuery( 884 'select roi from Roi as roi ' \ 885 'join fetch roi.annotationLinks as link ' \ 886 'join fetch link.child ' \ 887 'where roi.id = %d' % first_roi, None) 888 self.file_annotation = first_roi.copyAnnotationLinks()[0].child 889 """ 890 self.update_table(columns)
891
892 -class FlexMeasurementCtx(AbstractMeasurementCtx):
893 """ 894 Flex measurements are located deep within a ".res" XML file container 895 and contain no ROI. 896 """ 897 898 # The XPath to the <Area> which aggregate an acquisition 899 AREA_XPATH = './/Areas/Area' 900 901 # The XPath to the an analysis <Parameter>; will become a column header 902 # and is below AREA_XPATH 903 PARAMETER_XPATH = './/Wells/ResultParameters/Parameter' 904 905 # The XPath to a <Well> which has had at least one acquisition event 906 # within and is below AREA_XPATH 907 WELL_XPATH = './/Wells/Well' 908 909 # The XPath to a <Result> for a given well and is below WELL_XPATH 910 RESULT_XPATH = './/Result' 911
912 - def __init__(self, analysis_ctx, service_factory, original_file_provider, 913 original_file, result_files):
914 super(FlexMeasurementCtx, self).__init__( 915 analysis_ctx, service_factory, original_file_provider, 916 original_file, result_files)
917
918 - def get_empty_columns(self, headers):
919 """ 920 Retrieves a set of empty OmeroTables columns for the analysis results 921 prefixed by a WellColumn to handle linked object indexes. 922 """ 923 columns = {'Well': WellColumn('Well', '', list())} 924 for header in headers: 925 columns[header] = DoubleColumn(header, '', list()) 926 return columns
927 928 ### 929 ### Abstract method implementations 930 ### 931
932 - def get_name(self, set_of_columns=None):
933 return self.original_file.name.val[:-4]
934
935 - def parse(self):
936 log.info("Parsing: %s" % self.original_file.name.val) 937 provider = self.original_file_provider 938 data = provider.get_original_file_data(self.original_file) 939 try: 940 et = ElementTree(file=data) 941 finally: 942 data.close() 943 root = et.getroot() 944 areas = root.findall(self.AREA_XPATH) 945 log.debug("Area count: %d" % len(areas)) 946 for i, area in enumerate(areas): 947 result_parameters = area.findall(self.PARAMETER_XPATH) 948 log.debug("Area %d result children: %d" % \ 949 (i, len(result_parameters))) 950 if len(result_parameters) == 0: 951 log.warn("%s contains no analysis data." % self.get_name()) 952 return 953 headers = list() 954 for result_parameter in result_parameters: 955 headers.append(result_parameter.text) 956 columns = self.get_empty_columns(headers) 957 wells = area.findall(self.WELL_XPATH) 958 for well in wells: 959 # Rows and columns are 1-indexed, OMERO wells are 0-indexed 960 row = int(well.get('row')) - 1 961 column = int(well.get('col')) - 1 962 try: 963 v = columns['Well'].values 964 wellobj, images = self.get_well_images(row, column) 965 if not wellobj: 966 continue 967 v.append(wellobj.id.val) 968 except: 969 log.exception("ERROR: Failed to get well images") 970 continue 971 results = well.findall(self.RESULT_XPATH) 972 for result in results: 973 name = result.get('name') 974 columns[name].values.append(float(result.text)) 975 return MeasurementParsingResult([columns.values()])
976
977 - def parse_and_populate_roi(self, columns):
978 pass
979
980 - def populate(self, columns):
981 self.update_table(columns)
982
983 -class InCellMeasurementCtx(AbstractMeasurementCtx):
984 """ 985 InCell Analyzer measurements are located deep within an XML file container. 986 """ 987 988 # Cells expected centre of gravity columns 989 CELLS_CG_EXPECTED = ['Cell: cgX', 'Cell: cgY'] 990 991 # Nulcei expected centre of gravity columns 992 NUCLEI_CG_EXPECTED = ['Nucleus: cgX', 'Nucleus: cgY'] 993 994 # Expected source attribute value for cell data 995 CELLS_SOURCE = 'Cells' 996 997 # Expected source attribute value for nuclei data 998 NUCLEI_SOURCE = 'Nuclei' 999 1000 # Expected source attribute value for organelle data 1001 ORGANELLES_SOURCE = 'Organelles' 1002
1003 - def __init__(self, analysis_ctx, service_factory, original_file_provider, 1004 original_file, result_files):
1005 super(InCellMeasurementCtx, self).__init__( 1006 analysis_ctx, service_factory, original_file_provider, 1007 original_file, result_files)
1008
1009 - def check_sparse_data(self, columns):
1010 """ 1011 Checks a set of columns for sparse data (one column shorter than 1012 the rest) and adds -1 where appropriate. 1013 """ 1014 length = None 1015 for i, column in enumerate(columns): 1016 if column.name == 'ROI': 1017 # ROI are processed late so we don't care if this column 1018 # is sparse or not. 1019 continue 1020 current_length = len(column.values) 1021 if length is not None: 1022 if current_length > length: 1023 log.debug("%s length %d > %d modding previous column" % \ 1024 (column.name, current_length, length)) 1025 columns[i - 1].values.append(-1.0) 1026 if current_length < length: 1027 log.debug("%s length %d < %d modding current column" % \ 1028 (column.name, current_length, length)) 1029 column.values.append(-1.0) 1030 length = len(column.values)
1031 1032 ### 1033 ### Abstract method implementations 1034 ### 1035
1036 - def get_name(self, set_of_columns=None):
1037 if set_of_columns is None: 1038 return self.original_file.name.val[:-4] 1039 if set_of_columns == 0: 1040 return self.original_file.name.val[:-4] + ' Cells' 1041 if set_of_columns == 1: 1042 return self.original_file.name.val[:-4] + ' Nuclei' 1043 if set_of_columns == 2: 1044 return self.original_file.name.val[:-4] + ' Organelles'
1045
1046 - def parse(self):
1047 log.info("Parsing: %s" % self.original_file.name.val) 1048 provider = self.original_file_provider 1049 data = provider.get_original_file_data(self.original_file) 1050 try: 1051 events = ('start', 'end') 1052 well_data = None 1053 n_roi = 0 1054 n_measurements = 0 1055 cells_columns = {'Image': ImageColumn('Image', '', list()), 1056 'Cell': LongColumn('Cell', '', list()), 1057 'ROI': RoiColumn('ROI', '', list()) 1058 } 1059 organelles_columns = {'Image': ImageColumn('Image', '', list()), 1060 'Cell': LongColumn('Cell', '', list()), 1061 } 1062 nuclei_columns = {'Image': ImageColumn('Image', '', list()), 1063 'Cell': LongColumn('Cell', '', list()), 1064 'ROI': RoiColumn('ROI', '', list()) 1065 } 1066 for event, element in iterparse(data, events=events): 1067 if event == 'start' and element.tag == 'WellData' \ 1068 and element.get('cell') != 'Summary': 1069 row = int(element.get('row')) - 1 1070 col = int(element.get('col')) - 1 1071 i = int(element.get('field')) - 1 1072 try: 1073 well, images = self.get_well_images(row, col) 1074 if not images: 1075 continue 1076 image = images[i] 1077 except: 1078 log.exception("ERROR: Failed to get well images") 1079 continue 1080 self.check_sparse_data(cells_columns.values()) 1081 self.check_sparse_data(nuclei_columns.values()) 1082 self.check_sparse_data(organelles_columns.values()) 1083 cell = long(element.get('cell')) 1084 cells_columns['Cell'].values.append(cell) 1085 nuclei_columns['Cell'].values.append(cell) 1086 organelles_columns['Cell'].values.append(cell) 1087 well_data = element 1088 cells_columns['Image'].values.append(image.id.val) 1089 nuclei_columns['Image'].values.append(image.id.val) 1090 organelles_columns['Image'].values.append(image.id.val) 1091 elif well_data is not None and event == 'start' \ 1092 and element.tag == 'Measure': 1093 source = element.get('source') 1094 key = element.get('key') 1095 value = float(element.get('value')) 1096 if source == self.CELLS_SOURCE: 1097 columns_list = [cells_columns] 1098 elif source == self.NUCLEI_SOURCE: 1099 columns_list = [nuclei_columns] 1100 elif source == self.ORGANELLES_SOURCE: 1101 columns_list = [organelles_columns] 1102 else: 1103 columns_list = [cells_columns, nuclei_columns, 1104 organelles_columns] 1105 for columns in columns_list: 1106 if key not in columns: 1107 columns[key] = DoubleColumn(key, '', list()) 1108 columns[key].values.append(value) 1109 n_measurements += 1 1110 elif event == 'end' and element.tag == 'WellData': 1111 if well_data is not None: 1112 n_roi += 1 1113 well_data.clear() 1114 well_data = None 1115 else: 1116 element.clear() 1117 # Final row sparseness check 1118 self.check_sparse_data(cells_columns.values()) 1119 self.check_sparse_data(nuclei_columns.values()) 1120 self.check_sparse_data(organelles_columns.values()) 1121 log.info("Total ROI: %d" % n_roi) 1122 log.info("Total measurements: %d" % n_measurements) 1123 sets_of_columns = [cells_columns.values(), nuclei_columns.values(), 1124 organelles_columns.values()] 1125 return MeasurementParsingResult(sets_of_columns) 1126 finally: 1127 data.close()
1128
1129 - def parse_and_populate_roi(self, columns_as_list):
1130 # First sanity check our provided columns 1131 names = [column.name for column in columns_as_list] 1132 log.debug('Parsing columns: %r' % names) 1133 cells_expected = [name in names for name in self.CELLS_CG_EXPECTED] 1134 nuclei_expected = [name in names for name in self.NUCLEI_CG_EXPECTED] 1135 if (False in cells_expected) and (False in nuclei_expected): 1136 log.warn("Missing CGs for InCell dataset: %r" % names) 1137 log.warn('Removing resultant empty ROI column.') 1138 for column in columns_as_list: 1139 if RoiColumn == column.__class__: 1140 columns_as_list.remove(column) 1141 return 1142 # Reconstruct a column name to column map 1143 columns = dict() 1144 for column in columns_as_list: 1145 columns[column.name] = column 1146 image_ids = columns['Image'].values 1147 rois = list() 1148 # Save our file annotation to the database so we can use an unloaded 1149 # annotation for the saveAndReturnIds that will be triggered below. 1150 self.file_annotation = \ 1151 self.update_service.saveAndReturnObject(self.file_annotation) 1152 unloaded_file_annotation = \ 1153 FileAnnotationI(self.file_annotation.id.val, False) 1154 # Parse and append ROI 1155 batch_no = 1 1156 batches = dict() 1157 for i, image_id in enumerate(image_ids): 1158 unloaded_image = ImageI(image_id, False) 1159 if False in nuclei_expected: 1160 # Cell centre of gravity 1161 roi = RoiI() 1162 shape = PointI() 1163 shape.theZ = rint(0) 1164 shape.theT = rint(0) 1165 shape.cx = rdouble(float(columns['Cell: cgX'].values[i])) 1166 shape.cy = rdouble(float(columns['Cell: cgY'].values[i])) 1167 roi.addShape(shape) 1168 roi.image = unloaded_image 1169 roi.linkAnnotation(unloaded_file_annotation) 1170 rois.append(roi) 1171 elif False in cells_expected: 1172 # Nucleus centre of gravity 1173 roi = RoiI() 1174 shape = PointI() 1175 shape.theZ = rint(0) 1176 shape.theT = rint(0) 1177 shape.cx = rdouble(float(columns['Nucleus: cgX'].values[i])) 1178 shape.cy = rdouble(float(columns['Nucleus: cgY'].values[i])) 1179 roi.addShape(shape) 1180 roi.image = unloaded_image 1181 roi.linkAnnotation(unloaded_file_annotation) 1182 rois.append(roi) 1183 else: 1184 raise MeasurementError('Not a nucleus or cell ROI') 1185 if len(rois) == self.ROI_UPDATE_LIMIT: 1186 thread_pool.add_task(self.update_rois, rois, batches, batch_no) 1187 rois = list() 1188 batch_no += 1 1189 thread_pool.add_task(self.update_rois, rois, batches, batch_no) 1190 thread_pool.wait_completion() 1191 batch_keys = batches.keys() 1192 batch_keys.sort() 1193 for k in batch_keys: 1194 columns['ROI'].values += batches[k]
1195
1196 - def populate(self, columns):
1197 self.update_table(columns)
1198 1199 if __name__ == "__main__": 1200 try: 1201 options, args = getopt(sys.argv[1:], "s:p:u:m:k:t:id") 1202 except GetoptError, (msg, opt): 1203 usage(msg) 1204 1205 try: 1206 plate_id, = args 1207 plate_id = long(plate_id) 1208 except ValueError: 1209 usage("Plate ID must be a specified and a number!") 1210 1211 username = None 1212 hostname = None 1213 port = 4064 # SSL 1214 measurement = None 1215 info = False 1216 session_key = None 1217 logging_level = logging.INFO 1218 thread_count = 1 1219 for option, argument in options: 1220 if option == "-u": 1221 username = argument 1222 if option == "-s": 1223 hostname = argument 1224 if option == "-p": 1225 port = int(argument) 1226 if option == "-m": 1227 measurement = int(argument) 1228 if option == "-i": 1229 info = True 1230 if option == "-k": 1231 session_key = argument 1232 if option == "-d": 1233 logging_level = logging.DEBUG 1234 if option == "-t": 1235 thread_count = int(argument) 1236 if session_key is None and username is None: 1237 usage("Username must be specified!") 1238 if session_key is None and hostname is None: 1239 usage("Host name must be specified!") 1240 if session_key is None: 1241 password = getpass() 1242 1243 logging.basicConfig(level = logging_level) 1244 c = client(hostname, port) 1245 c.setAgent("OMERO.populate_roi") 1246 c.enableKeepAlive(60) 1247 try: 1248 if session_key is not None: 1249 service_factory = c.joinSession(session_key) 1250 else: 1251 service_factory = c.createSession(username, password) 1252 1253 log.debug('Creating pool of %d threads' % thread_count) 1254 thread_pool = ThreadPool(thread_count) 1255 factory = PlateAnalysisCtxFactory(service_factory) 1256 analysis_ctx = factory.get_analysis_ctx(plate_id) 1257 n_measurements = analysis_ctx.get_measurement_count() 1258 if measurement is not None and measurement >= n_measurements: 1259 usage("measurement %d not a valid index!") 1260 if info: 1261 for i in range(n_measurements): 1262 n_result_files = analysis_ctx.get_result_file_count(i) 1263 print "Measurement %d has %d result files." % \ 1264 (i, n_result_files) 1265 sys.exit(0) 1266 if measurement is not None: 1267 measurement_ctx = analysis_ctx.get_measurement_ctx(measurement) 1268 measurement_ctx.parse_and_populate() 1269 else: 1270 for i in range(n_measurements): 1271 measurement_ctx = analysis_ctx.get_measurement_ctx(i) 1272 measurement_ctx.parse_and_populate() 1273 finally: 1274 c.closeSession() 1275