Package omero :: Package util :: Module populate_roi
[hide private]
[frames] | no frames]

Source Code for Module omero.util.populate_roi

   1  #!/usr/bin/env python 
   2  # encoding: utf-8 
   3  """ 
   4  ... 
   5  """ 
   6   
   7  # 
   8  #  Copyright (C) 2009 University of Dundee. All rights reserved. 
   9  # 
  10  # 
  11  #  This program is free software; you can redistribute it and/or modify 
  12  #  it under the terms of the GNU General Public License as published by 
  13  #  the Free Software Foundation; either version 2 of the License, or 
  14  #  (at your option) any later version. 
  15  #  This program is distributed in the hope that it will be useful, 
  16  #  but WITHOUT ANY WARRANTY; without even the implied warranty of 
  17  #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  18  #  GNU General Public License for more details. 
  19  # 
  20  #  You should have received a copy of the GNU General Public License along 
  21  #  with this program; if not, write to the Free Software Foundation, Inc., 
  22  #  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 
  23  # 
  24   
  25   
  26  import exceptions 
  27  import tempfile 
  28  import logging 
  29  import time 
  30  import sys 
  31  import csv 
  32  import re 
  33  from StringIO import StringIO 
  34  from getpass import getpass 
  35  from getopt import getopt, GetoptError 
  36   
  37  import omero.clients 
  38  from omero.rtypes import rdouble, rstring, rint 
  39  from omero.model import OriginalFileI, PlateI, PlateAnnotationLinkI, ImageI, \ 
  40                          FileAnnotationI, RoiI, EllipseI, PointI 
  41  from omero.grid import ImageColumn, WellColumn, RoiColumn, LongColumn, DoubleColumn 
  42  from omero.util.temp_files import create_path, remove_path 
  43  from omero import client 
  44   
  45  # Handle Python 2.5 built-in ElementTree 
  46  try: 
  47          from xml.etree.cElementTree import XML, Element, SubElement, ElementTree, dump, iterparse 
  48  except ImportError: 
  49          from cElementTree import XML, Element, SubElement, ElementTree, dump, iterparse 
  50   
  51  log = logging.getLogger("omero.util.populate_roi") 
  52   
53 -def usage(error):
54 """Prints usage so that we don't have to. :)""" 55 cmd = sys.argv[0] 56 print """%s 57 Usage: %s [-s hostname] [-u username | -k session_key] <-p port> [plate_id] 58 Runs measurement population code for a given plate. 59 60 Options: 61 -s OMERO hostname to use 62 -p OMERO port to use [defaults to 4064] 63 -u OMERO username to use 64 -k OMERO session key to use 65 -m Measurement index to populate 66 -i Dump measurement information and exit (no population) 67 -d Print debug statements 68 69 Examples: 70 %s -s localhost -p 4063 -u bob 27 71 72 Report bugs to ome-devel@lists.openmicroscopy.org.uk""" % (error, cmd, cmd) 73 sys.exit(2)
74
75 -class MeasurementError(Exception):
76 """ 77 Raised by the analysis or measurement context when an error condition 78 is reached. 79 """ 80 pass
81
82 -class DownloadingOriginalFileProvider(object):
83 """ 84 Provides original file data by downloading it from an OMERO raw file store. 85 """ 86 87 # Default raw file store buffer size 88 BUFFER_SIZE = 1024 * 1024 # 1MB 89
90 - def __init__(self, service_factory):
91 self.service_factory = service_factory 92 self.raw_file_store = self.service_factory.createRawFileStore() 93 self.dir = create_path("populate_roi", "dir", folder = True)
94
95 - def get_original_file_data(self, original_file):
96 """ 97 Downloads an original file to a temporary file and returns an open 98 file handle to that temporary file seeked to zero. The caller is 99 responsible for closing the temporary file. 100 """ 101 print "Downloading original file: %d" % original_file.id.val 102 self.raw_file_store.setFileId(original_file.id.val) 103 temporary_file = tempfile.TemporaryFile(dir=str(self.dir)) 104 size = original_file.size.val 105 for i in range((size / self.BUFFER_SIZE) + 1): 106 index = i * self.BUFFER_SIZE 107 data = self.raw_file_store.read(index, self.BUFFER_SIZE) 108 temporary_file.write(data) 109 temporary_file.seek(0L) 110 temporary_file.truncate(size) 111 return temporary_file
112
113 - def __delete__(self):
114 self.raw_file_store.close()
115
116 -class AbstractPlateAnalysisCtx(object):
117 """ 118 Abstract class which aggregates and represents all measurement runs made on 119 a given Plate. 120 """ 121 122 DEFAULT_ORIGINAL_FILE_PROVIDER = DownloadingOriginalFileProvider 123
124 - def __init__(self, images, original_files, original_file_image_map, 125 plate_id, service_factory):
126 super(AbstractPlateAnalysisCtx, self).__init__() 127 self.images = images 128 self.numcols, self.numrows = self.guess_geometry(self.images) 129 self.original_files = original_files 130 self.original_file_image_map = original_file_image_map 131 self.plate_id = plate_id 132 self.service_factory = service_factory 133 self.log_files = dict() 134 self.detail_files = dict() 135 self.measurements = dict()
136
137 - def guess_geometry(self, images):
138 max_col = 0 139 max_row = 0 140 for image in images: 141 ws = image.copyWellSamples()[0] # Using only first well sample link 142 well = ws.well 143 max_col = max(max_col, well.column.val) 144 max_row = max(max_row, well.row.val) 145 return (max_col+1, max_row+1)
146
147 - def colrow_from_wellnumber(self, width, wellnumber):
148 x = wellnumber - 1 149 col = x % width 150 row = x / width 151 return (col,row)
152
153 - def image_from_wellnumber(self, wellnumber):
154 col, row = self.colrow_from_wellnumber(self.numcols, wellnumber) 155 log.debug("Finding image for %s (%s,%s)..." % (wellnumber, col, row)) 156 for image in self.images: 157 well = image.copyWellSamples()[0].well 158 if well.column.val == col and well.row.val == row: 159 return image 160 raise exceptions.Exception("Could not find image for (col,row)==(%s,%s)" % (col,row))
161 162 ### 163 ### Abstract methods 164 ### 165
166 - def is_this_type(klass):
167 """ 168 Concrete implementations are to return True if the class pertinent 169 for the original files associated with the plate. 170 """ 171 raise Exception("To be implemented by concrete implementations.")
172 is_this_type = classmethod(is_this_type) 173
174 - def get_measurement_count(self):
175 """Returns the number of recognized measurement runs.""" 176 raise Exception("To be implemented by concrete implementations.")
177
178 - def get_measurement_ctx(self, index):
179 """Returns the measurement context for a given index.""" 180 raise Exception("To be implemented by concrete implementations.")
181
182 - def get_result_file_count(self, measurement_index):
183 """ 184 Return the number of result files associated with a measurement run. 185 """ 186 raise Exception("To be implemented by concrete implementations.")
187
188 -class MIASPlateAnalysisCtx(AbstractPlateAnalysisCtx):
189 """ 190 MIAS dataset concrete class implementation of an analysis context. MIAS 191 measurements are aggregated based on a single "log" file. A result 192 file is present for each stitched (of multiple fields) mosaic and 193 contains the actual measured results and ROI. 194 """ 195 196 # Python datetime format string of the log filename completion date/time 197 datetime_format = '%Y-%m-%d-%Hh%Mm%Ss' 198 199 # Regular expression matching a log filename 200 log_regex = re.compile('.*log(\d+-\d+-\d+-\d+h\d+m\d+s).txt$') 201 202 # Regular expression matching a result filename 203 detail_regex = re.compile( 204 '^Well(\d+)_(.*)_detail_(\d+-\d+-\d+-\d+h\d+m\d+s).txt$') 205 206 # Companion file format 207 companion_format = 'Companion/MIAS' 208
209 - def __init__(self, images, original_files, original_file_image_map, 210 plate_id, service_factory):
211 super(MIASPlateAnalysisCtx, self).__init__( 212 images, original_files, original_file_image_map, plate_id, 213 service_factory) 214 self._populate_log_and_detail_files() 215 self._populate_measurements()
216
218 """ 219 Strips out erroneous files and collects the log and result original 220 files based on regular expression matching. 221 """ 222 for original_file in self.original_files: 223 if original_file.mimetype.val != self.companion_format: 224 continue 225 name = original_file.name.val 226 match = self.log_regex.match(name) 227 if match: 228 d = time.strptime(match.group(1), self.datetime_format) 229 self.log_files[d] = original_file 230 continue 231 match = self.detail_regex.match(name) 232 if match: 233 d = time.strptime(match.group(3), self.datetime_format) 234 self.detail_files[d] = original_file 235 continue
236
237 - def _populate_measurements(self):
238 """ 239 Result original files are only recognizable as part of a given 240 measurement (declared by a log file) based upon their parsed 241 date/time of completion as encoded in the filename. This method 242 collects result original files and groups them by collective 243 parsed date/time of completion. 244 """ 245 log_timestamps = list(self.log_files.keys()) 246 log_timestamps.sort() 247 detail_timestamps = list(self.detail_files.keys()) 248 detail_timestamps.sort() 249 for log_timestamp in log_timestamps: 250 self.measurements[log_timestamp] = list() 251 for detail_timestamp in detail_timestamps: 252 for log_timestamp in log_timestamps: 253 if detail_timestamp < log_timestamp: 254 self.measurements[log_timestamp].append( 255 self.detail_files[detail_timestamp]) 256 break
257 258 ### 259 ### Abstract method implementations 260 ### 261
262 - def is_this_type(klass, original_files):
263 for original_file in original_files: 264 format = original_file.mimetype.val 265 if format == klass.companion_format \ 266 and klass.log_regex.match(original_file.name.val): 267 return True
268 is_this_type = classmethod(is_this_type) 269
270 - def get_measurement_count(self):
271 return len(self.measurements.keys())
272
273 - def get_measurement_ctx(self, index):
274 key = self.log_files.keys()[index] 275 sf = self.service_factory 276 original_file = self.log_files[key] 277 result_files = self.measurements[key] 278 provider = self.DEFAULT_ORIGINAL_FILE_PROVIDER(sf) 279 return MIASMeasurementCtx(self, sf, provider, original_file, 280 result_files)
281
282 - def get_result_file_count(self, measurement_index):
283 key = self.log_files.keys()[measurement_index] 284 return len(self.measurements[key])
285
286 -class FlexPlateAnalysisCtx(AbstractPlateAnalysisCtx):
287 """ 288 Flex dataset concrete class implementation of an analysis context. Flex 289 measurements are aggregated in a single ".res" XML file and contain no 290 ROI. 291 """ 292 293 # Companion file format 294 companion_format = 'Companion/Flex' 295
296 - def __init__(self, images, original_files, original_file_image_map, 297 plate_id, service_factory):
298 super(FlexPlateAnalysisCtx, self).__init__( 299 images, original_files, original_file_image_map, plate_id, 300 service_factory) 301 path_original_file_map = dict() 302 for original_file in original_files: 303 path = original_file.path.val 304 name = original_file.name.val 305 format = original_file.mimetype.val 306 if format == self.companion_format and name.endswith('.res'): 307 path_original_file_map[path] = original_file 308 self.measurements = path_original_file_map.values()
309 310 ### 311 ### Abstract method implementations 312 ### 313
314 - def is_this_type(klass, original_files):
315 for original_file in original_files: 316 path = original_file.path.val 317 format = original_file.mimetype.val 318 name = original_file.name.val 319 if format == klass.companion_format and name.endswith('.res'): 320 return True 321 return False
322 is_this_type = classmethod(is_this_type) 323
324 - def get_measurement_count(self):
325 return len(self.measurements)
326
327 - def get_measurement_ctx(self, index):
328 sf = self.service_factory 329 original_file = self.measurements[index] 330 result_files = [] 331 provider = self.DEFAULT_ORIGINAL_FILE_PROVIDER(sf) 332 return FlexMeasurementCtx(self, sf, provider, original_file, 333 result_files)
334
335 - def get_result_file_count(self, measurement_index):
336 return 1
337
338 -class InCellPlateAnalysisCtx(AbstractPlateAnalysisCtx):
339 """ 340 InCell dataset concrete class implementation of an analysis context. 341 InCell measurements are from InCell Analyzer and are aggregated in a 342 single gargantuan (often larger than 100MB per plate) XML file. 343 """ 344 345 # Companion file format 346 companion_format = 'Companion/InCell' 347
348 - def __init__(self, images, original_files, original_file_image_map, 349 plate_id, service_factory):
350 super(InCellPlateAnalysisCtx, self).__init__( 351 images, original_files, original_file_image_map, plate_id, 352 service_factory) 353 path_original_file_map = dict() 354 for original_file in original_files: 355 path = original_file.path.val 356 name = original_file.name.val 357 format = original_file.mimetype.val 358 if format == self.companion_format and name.endswith('.xml'): 359 path_original_file_map[path] = original_file 360 self.measurements = path_original_file_map.values()
361 362 ### 363 ### Abstract method implementations 364 ### 365
366 - def is_this_type(klass, original_files):
367 for original_file in original_files: 368 path = original_file.path.val 369 format = original_file.mimetype.val 370 name = original_file.name.val 371 if format == klass.companion_format and name.endswith('.xml'): 372 return True 373 return False
374 is_this_type = classmethod(is_this_type) 375
376 - def get_measurement_count(self):
377 return len(self.measurements)
378
379 - def get_measurement_ctx(self, index):
380 sf = self.service_factory 381 original_file = self.measurements[index] 382 result_files = [] 383 provider = self.DEFAULT_ORIGINAL_FILE_PROVIDER(sf) 384 return InCellMeasurementCtx(self, sf, provider, original_file, 385 result_files)
386
387 - def get_result_file_count(self, measurement_index):
388 return 1
389
390 -class PlateAnalysisCtxFactory(object):
391 """ 392 The plate analysis context factory is responsible for detecting and 393 returning a plate analysis context instance for a given plate. 394 """ 395 396 implementations = (FlexPlateAnalysisCtx, MIASPlateAnalysisCtx, 397 InCellPlateAnalysisCtx) 398
399 - def __init__(self, service_factory):
400 self.service_factory = service_factory 401 self.query_service = self.service_factory.getQueryService()
402
403 - def find_images_for_plate(self, plate_id):
404 """ 405 Retrieves all the images associated with a given plate. Fetched 406 are the Image's WellSample, the WellSample's Well, the annotation 407 stack associated with the Image and each annotation's linked 408 original file. 409 """ 410 # The query that follows is doublely linked: 411 # * Image --> WellSample --> Well 412 # * Well --> WellSample --> Image 413 # This is to facilitate later "ordered" access of fields/well 414 # samples required by certain measurement contexts (notably InCell). 415 log.debug("Loading image...") 416 images = self.query_service.findAllByQuery( 417 'select img from Image as img ' \ 418 'join fetch img.wellSamples as ws ' \ 419 'join fetch ws.well as w ' \ 420 'join fetch w.wellSamples as ws2 ' \ 421 'join w.plate as p ' \ 422 'left outer join fetch img.annotationLinks as ia_links ' \ 423 'left outer join fetch ia_links.child as ia ' \ 424 'left outer join fetch ia.file as i_o_file ' \ 425 'where p.id = %d' % plate_id, None) 426 log.debug("Loading plate...") 427 plate = self.query_service.findByQuery( 428 'select p from Plate p ' \ 429 'left outer join fetch p.annotationLinks as pa_links ' \ 430 'left outer join fetch pa_links.child as pa ' \ 431 'left outer join fetch pa.file as p_o_file ' \ 432 'where p.id = %d' % plate_id, None) 433 log.debug("Linking plate and images...") 434 for image in images: 435 for ws in image.copyWellSamples(): 436 ws.well.plate = plate 437 return images
438
439 - def gather_original_files(self, obj, original_files, original_file_obj_map):
440 for annotation_link in obj.copyAnnotationLinks(): 441 annotation = annotation_link.child 442 if isinstance(annotation, FileAnnotationI): 443 f = annotation.file 444 original_files.add(f) 445 if original_file_obj_map is not None: 446 original_file_obj_map[f.id.val] = obj
447
448 - def get_analysis_ctx(self, plate_id):
449 """Retrieves a plate analysis context for a given plate.""" 450 # Using a set since 1) no one was using the image.id key and 2) 451 # we are now also collecting original files from plates (MIAS) 452 # for which there's no clear key. Since all the files are loaded 453 # in a single shot, double linking should not cause a problem. 454 plates = set() 455 original_files = set() 456 original_file_image_map = dict() 457 images = self.find_images_for_plate(plate_id) 458 for i, image in enumerate(images): 459 for ws in image.copyWellSamples(): 460 plate = ws.well.plate 461 if plate not in plates: 462 plates.add(plate) 463 self.gather_original_files(plate, original_files, None) 464 self.gather_original_files(image, original_files, original_file_image_map) 465 for klass in self.implementations: 466 if klass.is_this_type(original_files): 467 return klass(images, original_files, 468 original_file_image_map, 469 plate_id, self.service_factory) 470 raise MeasurementError( 471 "Unable to find suitable analysis context for plate: %d" % \ 472 plate_id)
473
474 -class MeasurementParsingResult(object):
475 """ 476 Holds the results of a measurement parsing event. 477 """
478 - def __init__(self, sets_of_columns=None):
479 if sets_of_columns is None: 480 self.sets_of_columns = list() 481 else: 482 self.sets_of_columns = sets_of_columns
483
484 - def append_columns(self, columns):
485 """Adds a set of columns to the parsing result.""" 486 self.sets_of_columns.append(columns)
487
488 -class AbstractMeasurementCtx(object):
489 """ 490 Abstract class which aggregates and represents all the results produced 491 from a given measurement run. It also provides a scaffold for interacting 492 with the OmeroTables infrastructure. 493 """ 494 495 # The number of ROI to have parsed before streaming them to the server 496 ROI_UPDATE_LIMIT = 1000 497
498 - def __init__(self, analysis_ctx, service_factory, original_file_provider, 499 original_file, result_files):
500 super(AbstractMeasurementCtx, self).__init__() 501 self.analysis_ctx = analysis_ctx 502 self.service_factory = service_factory 503 self.original_file_provider = original_file_provider 504 self.query_service = self.service_factory.getQueryService() 505 self.update_service = self.service_factory.getUpdateService() 506 self.original_file = original_file 507 self.result_files = result_files 508 509 # Establish the rest of our initial state 510 self.wellimages = dict() 511 for image in self.analysis_ctx.images: 512 for well_sample in image.copyWellSamples(): 513 well = well_sample.well 514 idx = well.copyWellSamples().index(well_sample) 515 row = well.row.val 516 column = well.column.val 517 if row not in self.wellimages: 518 self.wellimages[row] = dict() 519 if column not in self.wellimages[row]: 520 self.wellimages[row][column] = [] 521 # Now we save the image at it's proper index 522 l = self.wellimages[row][column] 523 for x in range(idx - len(l) + 1): 524 l.append(None) 525 l[idx] = image
526
527 - def get_well_images(self, row, col):
528 """ 529 Takes a row and a col index and returns a tuple 530 of Well and image. Either might be None. Uses the 531 first image found to find the Well and therefore 532 must be loaded (image->wellSample->well) 533 """ 534 try: 535 images = self.wellimages[row][col] 536 if not images: 537 return (None, None) 538 image = images[0] 539 well = image.copyWellSamples()[0].well 540 return (well, images) 541 except KeyError: 542 # This has the potential to happen alot with the 543 # datasets we have given the split machine acquisition 544 # ".flex" file storage. 545 print "WARNING: Missing data for row %d column %d" % \ 546 (row, col) 547 return (None, None)
548
549 - def update_table(self, columns):
550 """Updates the OmeroTables instance backing our results.""" 551 # Create a new OMERO table to store our measurement results 552 sr = self.service_factory.sharedResources() 553 name = self.get_name() 554 self.table = sr.newTable(1, '/%s.r5' % name) 555 if self.table is None: 556 raise MeasurementError( 557 "Unable to create table: %s" % name) 558 559 # Retrieve the original file corresponding to the table for the 560 # measurement, link it to the file annotation representing the 561 # umbrella measurement run, link the annotation to the plate from 562 # which it belongs and save the file annotation. 563 table_original_file = self.table.getOriginalFile() 564 table_original_file_id = table_original_file.id.val 565 print "Created new table: %d" % table_original_file_id 566 unloaded_o_file = OriginalFileI(table_original_file_id, False) 567 self.file_annotation.file = unloaded_o_file 568 unloaded_plate = PlateI(self.analysis_ctx.plate_id, False) 569 plate_annotation_link = PlateAnnotationLinkI() 570 plate_annotation_link.parent = unloaded_plate 571 plate_annotation_link.child = self.file_annotation 572 plate_annotation_link = \ 573 self.update_service.saveAndReturnObject(plate_annotation_link) 574 self.file_annotation = plate_annotation_link.child 575 576 t0 = int(time.time() * 1000) 577 self.table.initialize(columns) 578 print "Table init took %sms" % (int(time.time() * 1000) - t0) 579 t0 = int(time.time() * 1000) 580 self.table.addData(columns) 581 print "Table update took %sms" % (int(time.time() * 1000) - t0)
582
583 - def create_file_annotation(self, set_of_columns):
584 """ 585 Creates a file annotation to represent a set of columns from our 586 measurment. 587 """ 588 self.file_annotation = FileAnnotationI() 589 self.file_annotation.ns = \ 590 rstring('openmicroscopy.org/omero/measurement') 591 name = self.get_name(set_of_columns) 592 self.file_annotation.description = rstring(name)
593
594 - def update_rois(self, rois, column):
595 """ 596 Updates a set of ROI inserting the updated IDs back into a given 597 column. 598 """ 599 print "Saving %d ROI at %d" % (len(rois), len(column.values)) 600 t0 = int(time.time() * 1000) 601 roi_ids = self.update_service.saveAndReturnIds(rois) 602 print "ROI update took %sms" % (int(time.time() * 1000) - t0) 603 column.values += roi_ids 604 print "Total ROI saved: %d" % (len(column.values))
605
606 - def image_from_original_file(self, original_file):
607 """Returns the image from which an original file has originated.""" 608 m = self.analysis_ctx.original_file_image_map 609 return m[original_file.id.val]
610
611 - def parse_and_populate(self):
612 """ 613 Calls parse and populate, updating the OmeroTables instance backing 614 our results and the OMERO database itself. 615 """ 616 result = self.parse() 617 if result is None: 618 return 619 for i, columns in enumerate(result.sets_of_columns): 620 self.create_file_annotation(i) 621 self.parse_and_populate_roi(columns) 622 self.populate(columns)
623 624 ### 625 ### Abstract methods 626 ### 627
628 - def get_name(self, set_of_columns=None):
629 """Returns the name of the measurement, and a set of columns.""" 630 raise Exception("To be implemented by concrete implementations.")
631
632 - def parse(self):
633 """Parses result files, returning a MeasurementParsingResult.""" 634 raise Exception("To be implemented by concrete implementations.")
635
636 - def parse_and_populate_roi(self, columns):
637 """ 638 Parses and populates ROI from column data in the OMERO database. 639 """ 640 raise Exception("To be implemented by concrete implementations.")
641
642 - def populate(self, columns):
643 """ 644 Populates an OmeroTables instance backing our results and ROI 645 linkages. 646 """ 647 raise Exception("To be implemented by concrete implementations.")
648
649 -class MIASMeasurementCtx(AbstractMeasurementCtx):
650 """ 651 MIAS measurements are a set of tab delimited text files per well. Each 652 TSV file's content is prefixed by the analysis parameters. 653 """ 654 655 # The OmeroTable ImageColumn index 656 IMAGE_COL = 0 657 658 # The OmeroTable RoiColumn index 659 ROI_COL = 1 660 661 # Expected columns in NEO datasets 662 NEO_EXPECTED = ('Image', 'ROI', 'Label', 'Row', 'Col', 'Nucleus Area', 663 'Cell Diam.', 'Cell Type', 'Mean Nucleus Intens.') 664 665 # Expected columns in MNU datasets 666 MNU_EXPECTED = ('Image', 'ROI', 'row', 'col', 'type') 667
668 - def __init__(self, analysis_ctx, service_factory, original_file_provider, 669 original_file, result_files):
670 super(MIASMeasurementCtx, self).__init__( 671 analysis_ctx, service_factory, original_file_provider, 672 original_file, result_files)
673
674 - def get_empty_columns(self, n_columns):
675 """ 676 Retrieves a set of empty OmeroTables columns for the analysis results 677 prefixed by an ImageColumn and RoiColumn to handle these linked 678 object indexes. 679 """ 680 columns = [ImageColumn('Image', '', list()), 681 RoiColumn('ROI', '', list())] 682 for i in range(n_columns): 683 columns.append(DoubleColumn('', '', list())) 684 return columns
685 686 ### 687 ### Overriding abstract implementation 688 ### 689
690 - def image_from_original_file(self, original_file):
691 """ 692 Overriding the abstract implementation since the companion 693 files are no longer attached to the images, but only to the plate 694 for MIAS. Instead, we use the filename itself to find the image. 695 """ 696 name = original_file.name.val 697 # Copy: '^Well(\d+)_(.*)_detail_(\d+-\d+-\d+-\d+h\d+m\d+s).txt$' 698 match = MIASPlateAnalysisCtx.detail_regex.match(name) 699 if match: 700 well_num = int(match.group(1)) 701 return self.analysis_ctx.image_from_wellnumber(well_num) 702 else: 703 raise exceptions.Exception("Not a detail file")
704 705 ### 706 ### Abstract method implementations 707 ### 708
709 - def get_name(self, set_of_columns=None):
710 return self.original_file.name.val[:-4]
711
712 - def parse(self):
713 columns = None 714 for result_file in self.result_files: 715 print "Parsing: %s" % result_file.name.val 716 image = self.image_from_original_file(result_file) 717 provider = self.original_file_provider 718 data = provider.get_original_file_data(result_file) 719 try: 720 rows = list(csv.reader(data, delimiter='\t')) 721 finally: 722 data.close() 723 rows.reverse() 724 if columns is None: 725 columns = self.get_empty_columns(len(rows[0])) 726 for row in rows: 727 try: 728 for i, value in enumerate(row): 729 value = float(value) 730 columns[i + 2].values.append(value) 731 columns[self.IMAGE_COL].values.append(image.id.val) 732 except ValueError: 733 for i, value in enumerate(row): 734 columns[i + 2].name = value 735 break 736 print "Returning %d columns" % len(columns) 737 return MeasurementParsingResult([columns])
738
739 - def _parse_neo_roi(self, columns):
740 """Parses out ROI from OmeroTables columns for 'NEO' datasets.""" 741 print "Parsing %s NEO ROIs..." % (len(columns[0].values)) 742 image_ids = columns[self.IMAGE_COL].values 743 rois = list() 744 # Save our file annotation to the database so we can use an unloaded 745 # annotation for the saveAndReturnIds that will be triggered below. 746 self.file_annotation = \ 747 self.update_service.saveAndReturnObject(self.file_annotation) 748 unloaded_file_annotation = \ 749 FileAnnotationI(self.file_annotation.id.val, False) 750 for i, image_id in enumerate(image_ids): 751 unloaded_image = ImageI(image_id, False) 752 roi = RoiI() 753 shape = EllipseI() 754 values = columns[6].values 755 diameter = rdouble(float(values[i])) 756 shape.theZ = rint(0) 757 shape.theT = rint(0) 758 values = columns[4].values 759 shape.cx = rdouble(float(values[i])) 760 values = columns[3].values 761 shape.cy = rdouble(float(values[i])) 762 shape.rx = diameter 763 shape.ry = diameter 764 roi.addShape(shape) 765 roi.image = unloaded_image 766 roi.linkAnnotation(unloaded_file_annotation) 767 rois.append(roi) 768 if len(rois) == self.ROI_UPDATE_LIMIT: 769 self.update_rois(rois, columns[self.ROI_COL]) 770 rois = list() 771 self.update_rois(rois, columns[self.ROI_COL])
772
773 - def _parse_mnu_roi(self, columns):
774 """Parses out ROI from OmeroTables columns for 'MNU' datasets.""" 775 print "Parsing %s MNU ROIs..." % (len(columns[0].values)) 776 image_ids = columns[self.IMAGE_COL].values 777 rois = list() 778 # Save our file annotation to the database so we can use an unloaded 779 # annotation for the saveAndReturnIds that will be triggered below. 780 self.file_annotation = \ 781 self.update_service.saveAndReturnObject(self.file_annotation) 782 unloaded_file_annotation = \ 783 FileAnnotationI(self.file_annotation.id.val, False) 784 for i, image_id in enumerate(image_ids): 785 unloaded_image = ImageI(image_id, False) 786 roi = RoiI() 787 shape = PointI() 788 shape.theZ = rint(0) 789 shape.theT = rint(0) 790 values = columns[3].values 791 shape.cx = rdouble(float(values[i])) 792 values = columns[2].values 793 shape.cy = rdouble(float(values[i])) 794 roi.addShape(shape) 795 roi.image = unloaded_image 796 roi.linkAnnotation(unloaded_file_annotation) 797 rois.append(roi) 798 if len(rois) == self.ROI_UPDATE_LIMIT: 799 self.update_rois(rois, columns[self.ROI_COL]) 800 rois = list() 801 self.update_rois(rois, columns[self.ROI_COL])
802
803 - def parse_and_populate_roi(self, columns):
804 names = [column.name for column in columns] 805 neo = [name in self.NEO_EXPECTED for name in names] 806 mnu = [name in self.MNU_EXPECTED for name in names] 807 for name in names: 808 print "Column: %s" % name 809 if len(columns) == 9 and False not in neo: 810 self._parse_neo_roi(columns) 811 elif len(columns) == 5 and False not in mnu: 812 self._parse_mnu_roi(columns) 813 else: 814 print "WARNING: Unknown ROI type for MIAS dataset: %r" % names
815
816 - def populate(self, columns):
817 """ 818 Query performed:: 819 first_roi = columns[self.ROI_COL].values[0] 820 first_roi = self.query_service.findByQuery( 821 'select roi from Roi as roi ' \ 822 'join fetch roi.annotationLinks as link ' \ 823 'join fetch link.child ' \ 824 'where roi.id = %d' % first_roi, None) 825 self.file_annotation = first_roi.copyAnnotationLinks()[0].child 826 """ 827 self.update_table(columns)
828
829 -class FlexMeasurementCtx(AbstractMeasurementCtx):
830 """ 831 Flex measurements are located deep within a ".res" XML file container 832 and contain no ROI. 833 """ 834 835 # The XPath to the <Area> which aggregate an acquisition 836 AREA_XPATH = './/Areas/Area' 837 838 # The XPath to the an analysis <Parameter>; will become a column header 839 # and is below AREA_XPATH 840 PARAMETER_XPATH = './/Wells/ResultParameters/Parameter' 841 842 # The XPath to a <Well> which has had at least one acquisition event 843 # within and is below AREA_XPATH 844 WELL_XPATH = './/Wells/Well' 845 846 # The XPath to a <Result> for a given well and is below WELL_XPATH 847 RESULT_XPATH = './/Result' 848
849 - def __init__(self, analysis_ctx, service_factory, original_file_provider, 850 original_file, result_files):
851 super(FlexMeasurementCtx, self).__init__( 852 analysis_ctx, service_factory, original_file_provider, 853 original_file, result_files)
854
855 - def get_empty_columns(self, headers):
856 """ 857 Retrieves a set of empty OmeroTables columns for the analysis results 858 prefixed by a WellColumn to handle linked object indexes. 859 """ 860 columns = {'Well': WellColumn('Well', '', list())} 861 for header in headers: 862 columns[header] = DoubleColumn(header, '', list()) 863 return columns
864 865 ### 866 ### Abstract method implementations 867 ### 868
869 - def get_name(self, set_of_columns=None):
870 return self.original_file.name.val[:-4]
871
872 - def parse(self):
873 print "Parsing: %s" % self.original_file.name.val 874 provider = self.original_file_provider 875 data = provider.get_original_file_data(self.original_file) 876 try: 877 et = ElementTree(file=data) 878 finally: 879 data.close() 880 root = et.getroot() 881 areas = root.findall(self.AREA_XPATH) 882 print "Area count: %d" % len(areas) 883 for i, area in enumerate(areas): 884 result_parameters = area.findall(self.PARAMETER_XPATH) 885 print "Area %d result children: %d" % (i, len(result_parameters)) 886 if len(result_parameters) == 0: 887 print "%s contains no analysis data." % self.get_name() 888 return 889 headers = list() 890 for result_parameter in result_parameters: 891 headers.append(result_parameter.text) 892 columns = self.get_empty_columns(headers) 893 wells = area.findall(self.WELL_XPATH) 894 for well in wells: 895 # Rows and columns are 1-indexed, OMERO wells are 0-indexed 896 row = int(well.get('row')) - 1 897 column = int(well.get('col')) - 1 898 try: 899 v = columns['Well'].values 900 wellobj, images = self.get_well_images(row, column) 901 if not wellobj: 902 continue 903 v.append(wellobj.id.val) 904 except: 905 log.exception("ERROR: Failed to get well images") 906 continue 907 results = well.findall(self.RESULT_XPATH) 908 for result in results: 909 name = result.get('name') 910 columns[name].values.append(float(result.text)) 911 return MeasurementParsingResult([columns.values()])
912
913 - def parse_and_populate_roi(self, columns):
914 pass
915
916 - def populate(self, columns):
917 self.update_table(columns)
918
919 -class InCellMeasurementCtx(AbstractMeasurementCtx):
920 """ 921 InCell Analyzer measurements are located deep within an XML file container. 922 """ 923 924 # Cells expected centre of gravity columns 925 CELLS_CG_EXPECTED = ['Cell: cgX', 'Cell: cgY'] 926 927 # Nulcei expected centre of gravity columns 928 NUCLEI_CG_EXPECTED = ['Nucleus: cgX', 'Nucleus: cgY'] 929 930 # Expected source attribute value for cell data 931 CELLS_SOURCE = 'Cells' 932 933 # Expected source attribute value for nuclei data 934 NUCLEI_SOURCE = 'Nuclei' 935
936 - def __init__(self, analysis_ctx, service_factory, original_file_provider, 937 original_file, result_files):
938 super(InCellMeasurementCtx, self).__init__( 939 analysis_ctx, service_factory, original_file_provider, 940 original_file, result_files)
941
942 - def check_sparse_data(self, columns):
943 """ 944 Checks a set of columns for sparse data (one column shorter than 945 the rest) and adds -1 where appropriate. 946 """ 947 length = None 948 for i, column in enumerate(columns): 949 if column.name == 'ROI': 950 # ROI are processed late so we don't care if this column 951 # is sparse or not. 952 continue 953 current_length = len(column.values) 954 if length is not None: 955 if current_length > length: 956 log.warn("%s length %d > %d modding previous column" % \ 957 (column.name, current_length, length)) 958 columns[i - 1].values.append(-1.0) 959 if current_length < length: 960 log.warn("%s length %d < %d modding current column" % \ 961 (column.name, current_length, length)) 962 column.values.append(-1.0) 963 length = len(column.values)
964 965 ### 966 ### Abstract method implementations 967 ### 968
969 - def get_name(self, set_of_columns=None):
970 if set_of_columns is None: 971 return self.original_file.name.val[:-4] 972 elif set_of_columns == 0: 973 return self.original_file.name.val[:-4] + ' Cells' 974 elif set_of_columns == 1: 975 return self.original_file.name.val[:-4] + ' Nuclei'
976
977 - def parse(self):
978 print "Parsing: %s" % self.original_file.name.val 979 provider = self.original_file_provider 980 data = provider.get_original_file_data(self.original_file) 981 try: 982 events = ('start', 'end') 983 well_data = None 984 n_roi = 0 985 n_measurements = 0 986 cells_columns = {'Image': ImageColumn('Image', '', list()), 987 'Cell': LongColumn('Cell', '', list()), 988 'ROI': RoiColumn('ROI', '', list()) 989 } 990 nuclei_columns = {'Image': ImageColumn('Image', '', list()), 991 'Cell': LongColumn('Cell', '', list()), 992 'ROI': RoiColumn('ROI', '', list()) 993 } 994 for event, element in iterparse(data, events=events): 995 if event == 'start' and element.tag == 'WellData' \ 996 and element.get('cell') != 'Summary': 997 row = int(element.get('row')) - 1 998 col = int(element.get('col')) - 1 999 i = int(element.get('field')) - 1 1000 try: 1001 well, images = self.get_well_images(row, col) 1002 if not images: 1003 continue 1004 image = images[i] 1005 except: 1006 log.exception("ERROR: Failed to get well images") 1007 continue 1008 self.check_sparse_data(cells_columns.values()) 1009 self.check_sparse_data(nuclei_columns.values()) 1010 cell = long(element.get('cell')) 1011 cells_columns['Cell'].values.append(cell) 1012 nuclei_columns['Cell'].values.append(cell) 1013 well_data = element 1014 cells_columns['Image'].values.append(image.id.val) 1015 nuclei_columns['Image'].values.append(image.id.val) 1016 elif well_data is not None and event == 'start' \ 1017 and element.tag == 'Measure': 1018 source = element.get('source') 1019 key = element.get('key') 1020 value = float(element.get('value')) 1021 if source == self.CELLS_SOURCE: 1022 if n_roi == 0: 1023 cells_columns[key] = DoubleColumn(key, '', list()) 1024 cells_columns[key].values.append(value) 1025 elif source == self.NUCLEI_SOURCE: 1026 if n_roi == 0: 1027 nuclei_columns[key] = DoubleColumn(key, '', list()) 1028 nuclei_columns[key].values.append(value) 1029 else: 1030 if n_roi == 0: 1031 cells_columns[key] = DoubleColumn(key, '', list()) 1032 nuclei_columns[key] = DoubleColumn(key, '', list()) 1033 cells_columns[key].values.append(value) 1034 nuclei_columns[key].values.append(value) 1035 n_measurements += 1 1036 elif event == 'end' and element.tag == 'WellData': 1037 if well_data is not None: 1038 n_roi += 1 1039 well_data.clear() 1040 well_data = None 1041 else: 1042 element.clear() 1043 print "Total ROI: %d" % n_roi 1044 print "Total measurements: %d" % n_measurements 1045 sets_of_columns = [cells_columns.values(), nuclei_columns.values()] 1046 return MeasurementParsingResult(sets_of_columns) 1047 finally: 1048 data.close()
1049
1050 - def parse_and_populate_roi(self, columns_as_list):
1051 # First sanity check our provided columns 1052 names = [column.name for column in columns_as_list] 1053 cells_expected = [name in names for name in self.CELLS_CG_EXPECTED] 1054 nuclei_expected = [name in names for name in self.NUCLEI_CG_EXPECTED] 1055 if (False in cells_expected) and (False in nuclei_expected): 1056 print "WARNING: Missing CGs for InCell dataset: %r" % names 1057 return 1058 # Reconstruct a column name to column map 1059 columns = dict() 1060 for column in columns_as_list: 1061 columns[column.name] = column 1062 image_ids = columns['Image'].values 1063 rois = list() 1064 # Save our file annotation to the database so we can use an unloaded 1065 # annotation for the saveAndReturnIds that will be triggered below. 1066 self.file_annotation = \ 1067 self.update_service.saveAndReturnObject(self.file_annotation) 1068 unloaded_file_annotation = \ 1069 FileAnnotationI(self.file_annotation.id.val, False) 1070 # Parse and append ROI 1071 for i, image_id in enumerate(image_ids): 1072 unloaded_image = ImageI(image_id, False) 1073 if False in nuclei_expected: 1074 # Cell centre of gravity 1075 roi = RoiI() 1076 shape = PointI() 1077 shape.theZ = rint(0) 1078 shape.theT = rint(0) 1079 shape.cx = rdouble(float(columns['Cell: cgX'].values[i])) 1080 shape.cy = rdouble(float(columns['Cell: cgY'].values[i])) 1081 roi.addShape(shape) 1082 roi.image = unloaded_image 1083 roi.linkAnnotation(unloaded_file_annotation) 1084 rois.append(roi) 1085 elif False in cells_expected: 1086 # Nucleus centre of gravity 1087 roi = RoiI() 1088 shape = PointI() 1089 shape.theZ = rint(0) 1090 shape.theT = rint(0) 1091 shape.cx = rdouble(float(columns['Nucleus: cgX'].values[i])) 1092 shape.cy = rdouble(float(columns['Nucleus: cgY'].values[i])) 1093 roi.addShape(shape) 1094 roi.image = unloaded_image 1095 roi.linkAnnotation(unloaded_file_annotation) 1096 rois.append(roi) 1097 else: 1098 raise MeasurementError('Not a nucleus or cell ROI') 1099 if len(rois) == self.ROI_UPDATE_LIMIT: 1100 self.update_rois(rois, columns['ROI']) 1101 rois = list() 1102 self.update_rois(rois, columns['ROI'])
1103
1104 - def populate(self, columns):
1105 self.update_table(columns)
1106 1107 if __name__ == "__main__": 1108 try: 1109 options, args = getopt(sys.argv[1:], "s:p:u:m:k:id") 1110 except GetoptError, (msg, opt): 1111 usage(msg) 1112 1113 try: 1114 plate_id, = args 1115 plate_id = long(plate_id) 1116 except ValueError: 1117 usage("Plate ID must be a specified and a number!") 1118 1119 username = None 1120 hostname = None 1121 port = 4064 # SSL 1122 measurement = None 1123 info = False 1124 session_key = None 1125 logging_level = logging.WARN 1126 for option, argument in options: 1127 if option == "-u": 1128 username = argument 1129 if option == "-s": 1130 hostname = argument 1131 if option == "-p": 1132 port = int(argument) 1133 if option == "-m": 1134 measurement = int(argument) 1135 if option == "-i": 1136 info = True 1137 if option == "-k": 1138 session_key = argument 1139 if option == "-d": 1140 logging_level = logging.DEBUG 1141 if session_key is None and username is None: 1142 usage("Username must be specified!") 1143 if session_key is None and hostname is None: 1144 usage("Host name must be specified!") 1145 if session_key is None: 1146 password = getpass() 1147 1148 logging.basicConfig(level = logging_level) 1149 c = client(hostname, port) 1150 c.setAgent("OMERO.populate_roi") 1151 c.enableKeepAlive(60) 1152 try: 1153 if session_key is not None: 1154 service_factory = c.createSession(session_key) 1155 else: 1156 service_factory = c.createSession(username, password) 1157 1158 factory = PlateAnalysisCtxFactory(service_factory) 1159 analysis_ctx = factory.get_analysis_ctx(plate_id) 1160 n_measurements = analysis_ctx.get_measurement_count() 1161 if measurement is not None and measurement >= n_measurements: 1162 usage("measurement %d not a valid index!") 1163 if info: 1164 for i in range(n_measurements): 1165 n_result_files = analysis_ctx.get_result_file_count(i) 1166 print "Measurement %d has %d result files." % \ 1167 (i, n_result_files) 1168 sys.exit(0) 1169 if measurement is not None: 1170 measurement_ctx = analysis_ctx.get_measurement_ctx(measurement) 1171 measurement_ctx.parse_and_populate() 1172 else: 1173 for i in range(n_measurements): 1174 measurement_ctx = analysis_ctx.get_measurement_ctx(i) 1175 measurement_ctx.parse_and_populate() 1176 finally: 1177 c.closeSession() 1178