Package omero :: Package util :: Module populate_roi
[hide private]
[frames] | no frames]

Source Code for Module omero.util.populate_roi

   1  #!/usr/bin/env python 
   2  # encoding: utf-8 
   3  """ 
   4  ... 
   5  """ 
   6   
   7  # 
   8  #  Copyright (C) 2009 University of Dundee. All rights reserved. 
   9  # 
  10  # 
  11  #  This program is free software; you can redistribute it and/or modify 
  12  #  it under the terms of the GNU General Public License as published by 
  13  #  the Free Software Foundation; either version 2 of the License, or 
  14  #  (at your option) any later version. 
  15  #  This program is distributed in the hope that it will be useful, 
  16  #  but WITHOUT ANY WARRANTY; without even the implied warranty of 
  17  #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  18  #  GNU General Public License for more details. 
  19  # 
  20  #  You should have received a copy of the GNU General Public License along 
  21  #  with this program; if not, write to the Free Software Foundation, Inc., 
  22  #  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 
  23  # 
  24   
  25   
  26  import exceptions 
  27  import tempfile 
  28  import logging 
  29  import time 
  30  import sys 
  31  import csv 
  32  import re 
  33  from StringIO import StringIO 
  34  from getpass import getpass 
  35  from getopt import getopt, GetoptError 
  36   
  37  import omero.clients 
  38  from omero.rtypes import rdouble, rstring, rint 
  39  from omero.model import OriginalFileI, PlateI, PlateAnnotationLinkI, ImageI, \ 
  40                          FileAnnotationI, RoiI, EllipseI, PointI 
  41  from omero.grid import ImageColumn, WellColumn, RoiColumn, LongColumn, DoubleColumn 
  42  from omero.util.temp_files import create_path, remove_path 
  43  from omero import client 
  44   
  45  # Handle Python 2.5 built-in ElementTree 
  46  try: 
  47          from xml.etree.cElementTree import XML, Element, SubElement, ElementTree, dump, iterparse 
  48  except ImportError: 
  49          from cElementTree import XML, Element, SubElement, ElementTree, dump, iterparse 
  50   
  51  log = logging.getLogger("omero.util.populate_roi") 
  52   
53 -def usage(error):
54 """Prints usage so that we don't have to. :)""" 55 cmd = sys.argv[0] 56 print """%s 57 Usage: %s [-h hostname] [-u username | -k session_key] <-p port> [plate_id] 58 Runs measurement population code for a given plate. 59 60 Options: 61 -u OMERO username to use 62 -k OMERO session key to use 63 -h OMERO hostname to use 64 -p OMERO port to use [defaults to 4063] 65 -m Measurement index to populate 66 -i Dump measurement information and exit (no population) 67 -d Print debug statements 68 69 Examples: 70 %s -h localhost -p 4063 -u bob 27 71 72 Report bugs to ome-devel@lists.openmicroscopy.org.uk""" % (error, cmd, cmd) 73 sys.exit(2)
74
75 -class MeasurementError(Exception):
76 """ 77 Raised by the analysis or measurement context when an error condition 78 is reached. 79 """ 80 pass
81
82 -class DownloadingOriginalFileProvider(object):
83 """ 84 Provides original file data by downloading it from an OMERO raw file store. 85 """ 86 87 # Default raw file store buffer size 88 BUFFER_SIZE = 1024 * 1024 # 1MB 89
90 - def __init__(self, service_factory):
91 self.service_factory = service_factory 92 self.raw_file_store = self.service_factory.createRawFileStore() 93 self.dir = create_path("populate_roi", "dir", folder = True)
94
95 - def get_original_file_data(self, original_file):
96 """ 97 Downloads an original file to a temporary file and returns an open 98 file handle to that temporary file seeked to zero. The caller is 99 responsible for closing the temporary file. 100 """ 101 print "Downloading original file: %d" % original_file.id.val 102 self.raw_file_store.setFileId(original_file.id.val) 103 temporary_file = tempfile.TemporaryFile(dir=str(self.dir)) 104 size = original_file.size.val 105 for i in range((size / self.BUFFER_SIZE) + 1): 106 index = i * self.BUFFER_SIZE 107 data = self.raw_file_store.read(index, self.BUFFER_SIZE) 108 temporary_file.write(data) 109 temporary_file.seek(0L) 110 temporary_file.truncate(size) 111 return temporary_file
112
113 - def __delete__(self):
114 self.raw_file_store.close()
115
116 -class AbstractPlateAnalysisCtx(object):
117 """ 118 Abstract class which aggregates and represents all measurement runs made on 119 a given Plate. 120 """ 121 122 DEFAULT_ORIGINAL_FILE_PROVIDER = DownloadingOriginalFileProvider 123
124 - def __init__(self, images, original_files, original_file_image_map, 125 plate_id, service_factory):
126 super(AbstractPlateAnalysisCtx, self).__init__() 127 self.images = images 128 self.numcols, self.numrows = self.guess_geometry(self.images) 129 self.original_files = original_files 130 self.original_file_image_map = original_file_image_map 131 self.plate_id = plate_id 132 self.service_factory = service_factory 133 self.log_files = dict() 134 self.detail_files = dict() 135 self.measurements = dict()
136
137 - def guess_geometry(self, images):
138 max_col = 0 139 max_row = 0 140 for image in images: 141 ws = image.copyWellSamples()[0] # Using only first well sample link 142 well = ws.well 143 max_col = max(max_col, well.column.val) 144 max_row = max(max_row, well.row.val) 145 return (max_col+1, max_row+1)
146
147 - def colrow_from_wellnumber(self, width, wellnumber):
148 x = wellnumber - 1 149 col = x % width 150 row = x / width 151 return (col,row)
152
153 - def image_from_wellnumber(self, wellnumber):
154 col, row = self.colrow_from_wellnumber(self.numcols, wellnumber) 155 log.debug("Finding image for %s (%s,%s)..." % (wellnumber, col, row)) 156 for image in self.images: 157 well = image.copyWellSamples()[0].well 158 if well.column.val == col and well.row.val == row: 159 return image 160 raise exceptions.Exception("Could not find image for (col,row)==(%s,%s)" % (col,row))
161 162 ### 163 ### Abstract methods 164 ### 165
166 - def is_this_type(klass):
167 """ 168 Concrete implementations are to return True if the class pertinent 169 for the original files associated with the plate. 170 """ 171 raise Exception("To be implemented by concrete implementations.")
172 is_this_type = classmethod(is_this_type) 173
174 - def get_measurement_count(self):
175 """Returns the number of recognized measurement runs.""" 176 raise Exception("To be implemented by concrete implementations.")
177
178 - def get_measurement_ctx(self, index):
179 """Returns the measurement context for a given index.""" 180 raise Exception("To be implemented by concrete implementations.")
181
182 - def get_result_file_count(self, measurement_index):
183 """ 184 Return the number of result files associated with a measurement run. 185 """ 186 raise Exception("To be implemented by concrete implementations.")
187
188 -class MIASPlateAnalysisCtx(AbstractPlateAnalysisCtx):
189 """ 190 MIAS dataset concrete class implementation of an analysis context. MIAS 191 measurements are aggregated based on a single "log" file. A result 192 file is present for each stitched (of multiple fields) mosaic and 193 contains the actual measured results and ROI. 194 """ 195 196 # Python datetime format string of the log filename completion date/time 197 datetime_format = '%Y-%m-%d-%Hh%Mm%Ss' 198 199 # Regular expression matching a log filename 200 log_regex = re.compile('.*log(\d+-\d+-\d+-\d+h\d+m\d+s).txt$') 201 202 # Regular expression matching a result filename 203 detail_regex = re.compile( 204 '^Well(\d+)_(.*)_detail_(\d+-\d+-\d+-\d+h\d+m\d+s).txt$') 205 206 # Companion file format 207 companion_format = 'Companion/MIAS' 208
209 - def __init__(self, images, original_files, original_file_image_map, 210 plate_id, service_factory):
211 super(MIASPlateAnalysisCtx, self).__init__( 212 images, original_files, original_file_image_map, plate_id, 213 service_factory) 214 self._populate_log_and_detail_files() 215 self._populate_measurements()
216
218 """ 219 Strips out erroneous files and collects the log and result original 220 files based on regular expression matching. 221 """ 222 for original_file in self.original_files: 223 if original_file.format.value.val != self.companion_format: 224 continue 225 name = original_file.name.val 226 match = self.log_regex.match(name) 227 if match: 228 d = time.strptime(match.group(1), self.datetime_format) 229 self.log_files[d] = original_file 230 continue 231 match = self.detail_regex.match(name) 232 if match: 233 d = time.strptime(match.group(3), self.datetime_format) 234 self.detail_files[d] = original_file 235 continue
236
237 - def _populate_measurements(self):
238 """ 239 Result original files are only recognizable as part of a given 240 measurement (declared by a log file) based upon their parsed 241 date/time of completion as encoded in the filename. This method 242 collects result original files and groups them by collective 243 parsed date/time of completion. 244 """ 245 log_timestamps = list(self.log_files.keys()) 246 log_timestamps.sort() 247 detail_timestamps = list(self.detail_files.keys()) 248 detail_timestamps.sort() 249 for log_timestamp in log_timestamps: 250 self.measurements[log_timestamp] = list() 251 for detail_timestamp in detail_timestamps: 252 for log_timestamp in log_timestamps: 253 if detail_timestamp < log_timestamp: 254 self.measurements[log_timestamp].append( 255 self.detail_files[detail_timestamp]) 256 break
257 258 ### 259 ### Abstract method implementations 260 ### 261
262 - def is_this_type(klass, original_files):
263 for original_file in original_files: 264 format = original_file.format.value.val 265 if format == klass.companion_format \ 266 and klass.log_regex.match(original_file.name.val): 267 return True
268 is_this_type = classmethod(is_this_type) 269
270 - def get_measurement_count(self):
271 return len(self.measurements.keys())
272
273 - def get_measurement_ctx(self, index):
274 key = self.log_files.keys()[index] 275 sf = self.service_factory 276 original_file = self.log_files[key] 277 result_files = self.measurements[key] 278 provider = self.DEFAULT_ORIGINAL_FILE_PROVIDER(sf) 279 return MIASMeasurementCtx(self, sf, provider, original_file, 280 result_files)
281
282 - def get_result_file_count(self, measurement_index):
283 key = self.log_files.keys()[measurement_index] 284 return len(self.measurements[key])
285
286 -class FlexPlateAnalysisCtx(AbstractPlateAnalysisCtx):
287 """ 288 Flex dataset concrete class implementation of an analysis context. Flex 289 measurements are aggregated in a single ".res" XML file and contain no 290 ROI. 291 """ 292 293 # Companion file format 294 companion_format = 'Companion/Flex' 295
296 - def __init__(self, images, original_files, original_file_image_map, 297 plate_id, service_factory):
298 super(FlexPlateAnalysisCtx, self).__init__( 299 images, original_files, original_file_image_map, plate_id, 300 service_factory) 301 path_original_file_map = dict() 302 for original_file in original_files: 303 path = original_file.path.val 304 format = original_file.format.value.val 305 if format == self.companion_format and path.endswith('.res'): 306 path_original_file_map[path] = original_file 307 self.measurements = path_original_file_map.values()
308 309 ### 310 ### Abstract method implementations 311 ### 312
313 - def is_this_type(klass, original_files):
314 for original_file in original_files: 315 path = original_file.path.val 316 format = original_file.format.value.val 317 if format == klass.companion_format and path.endswith('.res'): 318 return True 319 return False
320 is_this_type = classmethod(is_this_type) 321
322 - def get_measurement_count(self):
323 return len(self.measurements)
324
325 - def get_measurement_ctx(self, index):
326 sf = self.service_factory 327 original_file = self.measurements[index] 328 result_files = [] 329 provider = self.DEFAULT_ORIGINAL_FILE_PROVIDER(sf) 330 return FlexMeasurementCtx(self, sf, provider, original_file, 331 result_files)
332
333 - def get_result_file_count(self, measurement_index):
334 return 1
335
336 -class InCellPlateAnalysisCtx(AbstractPlateAnalysisCtx):
337 """ 338 InCell dataset concrete class implementation of an analysis context. 339 InCell measurements are from InCell Analyzer and are aggregated in a 340 single gargantuan (often larger than 100MB per plate) XML file. 341 """ 342 343 # Companion file format 344 companion_format = 'Companion/InCell' 345
346 - def __init__(self, images, original_files, original_file_image_map, 347 plate_id, service_factory):
348 super(InCellPlateAnalysisCtx, self).__init__( 349 images, original_files, original_file_image_map, plate_id, 350 service_factory) 351 path_original_file_map = dict() 352 for original_file in original_files: 353 path = original_file.path.val 354 format = original_file.format.value.val 355 if format == self.companion_format and path.endswith('.xml'): 356 path_original_file_map[path] = original_file 357 self.measurements = path_original_file_map.values()
358 359 ### 360 ### Abstract method implementations 361 ### 362
363 - def is_this_type(klass, original_files):
364 for original_file in original_files: 365 path = original_file.path.val 366 format = original_file.format.value.val 367 if format == klass.companion_format and path.endswith('.xml'): 368 return True 369 return False
370 is_this_type = classmethod(is_this_type) 371
372 - def get_measurement_count(self):
373 return len(self.measurements)
374
375 - def get_measurement_ctx(self, index):
376 sf = self.service_factory 377 original_file = self.measurements[index] 378 result_files = [] 379 provider = self.DEFAULT_ORIGINAL_FILE_PROVIDER(sf) 380 return InCellMeasurementCtx(self, sf, provider, original_file, 381 result_files)
382
383 - def get_result_file_count(self, measurement_index):
384 return 1
385
386 -class PlateAnalysisCtxFactory(object):
387 """ 388 The plate analysis context factory is responsible for detecting and 389 returning a plate analysis context instance for a given plate. 390 """ 391 392 implementations = (FlexPlateAnalysisCtx, MIASPlateAnalysisCtx, 393 InCellPlateAnalysisCtx) 394
395 - def __init__(self, service_factory):
396 self.service_factory = service_factory 397 self.query_service = self.service_factory.getQueryService()
398
399 - def find_images_for_plate(self, plate_id):
400 """ 401 Retrieves all the images associated with a given plate. Fetched 402 are the Image's WellSample, the WellSample's Well, the annotation 403 stack associated with the Image and each annotation's linked 404 original file. 405 """ 406 # The query that follows is doublely linked: 407 # * Image --> WellSample --> Well 408 # * Well --> WellSample --> Image 409 # This is to facilitate later "ordered" access of fields/well 410 # samples required by certain measurement contexts (notably InCell). 411 log.debug("Loading image...") 412 images = self.query_service.findAllByQuery( 413 'select img from Image as img ' \ 414 'join fetch img.wellSamples as ws ' \ 415 'join fetch ws.well as w ' \ 416 'join fetch w.wellSamples as ws2 ' \ 417 'join w.plate as p ' \ 418 'left outer join fetch img.annotationLinks as ia_links ' \ 419 'left outer join fetch ia_links.child as ia ' \ 420 'left outer join fetch ia.file as i_o_file ' \ 421 'left outer join fetch i_o_file.format ' \ 422 'where p.id = %d' % plate_id, None) 423 log.debug("Loading plate...") 424 plate = self.query_service.findByQuery( 425 'select p from Plate p ' \ 426 'left outer join fetch p.annotationLinks as pa_links ' \ 427 'left outer join fetch pa_links.child as pa ' \ 428 'left outer join fetch pa.file as p_o_file ' \ 429 'left outer join fetch p_o_file.format ' \ 430 'where p.id = %d' % plate_id, None) 431 log.debug("Linking plate and images...") 432 for image in images: 433 for ws in image.copyWellSamples(): 434 ws.well.plate = plate 435 return images
436
437 - def gather_original_files(self, obj, original_files, original_file_obj_map):
438 for annotation_link in obj.copyAnnotationLinks(): 439 annotation = annotation_link.child 440 if isinstance(annotation, FileAnnotationI): 441 f = annotation.file 442 original_files.add(f) 443 if original_file_obj_map is not None: 444 original_file_obj_map[f.id.val] = obj
445
446 - def get_analysis_ctx(self, plate_id):
447 """Retrieves a plate analysis context for a given plate.""" 448 # Using a set since 1) no one was using the image.id key and 2) 449 # we are now also collecting original files from plates (MIAS) 450 # for which there's no clear key. Since all the files are loaded 451 # in a single shot, double linking should not cause a problem. 452 plates = set() 453 original_files = set() 454 original_file_image_map = dict() 455 images = self.find_images_for_plate(plate_id) 456 for i, image in enumerate(images): 457 for ws in image.copyWellSamples(): 458 plate = ws.well.plate 459 if plate not in plates: 460 plates.add(plate) 461 self.gather_original_files(plate, original_files, None) 462 self.gather_original_files(image, original_files, original_file_image_map) 463 for klass in self.implementations: 464 if klass.is_this_type(original_files): 465 return klass(images, original_files, 466 original_file_image_map, 467 plate_id, self.service_factory) 468 raise MeasurementError( 469 "Unable to find suitable analysis context for plate: %d" % \ 470 plate_id)
471
472 -class MeasurementParsingResult(object):
473 """ 474 Holds the results of a measurement parsing event. 475 """
476 - def __init__(self, sets_of_columns=None):
477 if sets_of_columns is None: 478 self.sets_of_columns = list() 479 else: 480 self.sets_of_columns = sets_of_columns
481
482 - def append_columns(self, columns):
483 """Adds a set of columns to the parsing result.""" 484 self.sets_of_columns.append(columns)
485
486 -class AbstractMeasurementCtx(object):
487 """ 488 Abstract class which aggregates and represents all the results produced 489 from a given measurement run. It also provides a scaffold for interacting 490 with the OmeroTables infrastructure. 491 """ 492 493 # The number of ROI to have parsed before streaming them to the server 494 ROI_UPDATE_LIMIT = 1000 495
496 - def __init__(self, analysis_ctx, service_factory, original_file_provider, 497 original_file, result_files):
498 super(AbstractMeasurementCtx, self).__init__() 499 self.analysis_ctx = analysis_ctx 500 self.service_factory = service_factory 501 self.original_file_provider = original_file_provider 502 self.query_service = self.service_factory.getQueryService() 503 self.update_service = self.service_factory.getUpdateService() 504 self.original_file = original_file 505 self.result_files = result_files 506 507 # Establish the rest of our initial state 508 self.wellimages = dict() 509 for image in self.analysis_ctx.images: 510 for well_sample in image.copyWellSamples(): 511 well = well_sample.well 512 idx = well.copyWellSamples().index(well_sample) 513 row = well.row.val 514 column = well.column.val 515 if row not in self.wellimages: 516 self.wellimages[row] = dict() 517 if column not in self.wellimages[row]: 518 self.wellimages[row][column] = [] 519 # Now we save the image at it's proper index 520 l = self.wellimages[row][column] 521 for x in range(idx - len(l) + 1): 522 l.append(None) 523 l[idx] = image
524
525 - def get_well_images(self, row, col):
526 """ 527 Takes a row and a col index and returns a tuple 528 of Well and image. Either might be None. Uses the 529 first image found to find the Well and therefore 530 must be loaded (image->wellSample->well) 531 """ 532 try: 533 images = self.wellimages[row][col] 534 if not images: 535 return (None, None) 536 image = images[0] 537 well = image.copyWellSamples()[0].well 538 return (well, images) 539 except KeyError: 540 # This has the potential to happen alot with the 541 # datasets we have given the split machine acquisition 542 # ".flex" file storage. 543 print "WARNING: Missing data for row %d column %d" % \ 544 (row, col) 545 return (None, None)
546
547 - def update_table(self, columns):
548 """Updates the OmeroTables instance backing our results.""" 549 # Create a new OMERO table to store our measurement results 550 sr = self.service_factory.sharedResources() 551 name = self.get_name() 552 self.table = sr.newTable(1, '/%s.r5' % name) 553 if self.table is None: 554 raise MeasurementError( 555 "Unable to create table: %s" % name) 556 557 # Retrieve the original file corresponding to the table for the 558 # measurement, link it to the file annotation representing the 559 # umbrella measurement run, link the annotation to the plate from 560 # which it belongs and save the file annotation. 561 table_original_file = self.table.getOriginalFile() 562 table_original_file_id = table_original_file.id.val 563 print "Created new table: %d" % table_original_file_id 564 unloaded_o_file = OriginalFileI(table_original_file_id, False) 565 self.file_annotation.file = unloaded_o_file 566 unloaded_plate = PlateI(self.analysis_ctx.plate_id, False) 567 plate_annotation_link = PlateAnnotationLinkI() 568 plate_annotation_link.parent = unloaded_plate 569 plate_annotation_link.child = self.file_annotation 570 plate_annotation_link = \ 571 self.update_service.saveAndReturnObject(plate_annotation_link) 572 self.file_annotation = plate_annotation_link.child 573 574 t0 = int(time.time() * 1000) 575 self.table.initialize(columns) 576 print "Table init took %sms" % (int(time.time() * 1000) - t0) 577 t0 = int(time.time() * 1000) 578 self.table.addData(columns) 579 print "Table update took %sms" % (int(time.time() * 1000) - t0)
580
581 - def create_file_annotation(self, set_of_columns):
582 """ 583 Creates a file annotation to represent a set of columns from our 584 measurment. 585 """ 586 self.file_annotation = FileAnnotationI() 587 self.file_annotation.ns = \ 588 rstring('openmicroscopy.org/omero/measurement') 589 name = self.get_name(set_of_columns) 590 self.file_annotation.description = rstring(name)
591
592 - def update_rois(self, rois, column):
593 """ 594 Updates a set of ROI inserting the updated IDs back into a given 595 column. 596 """ 597 print "Saving %d ROI at %d" % (len(rois), len(column.values)) 598 t0 = int(time.time() * 1000) 599 roi_ids = self.update_service.saveAndReturnIds(rois) 600 print "ROI update took %sms" % (int(time.time() * 1000) - t0) 601 column.values += roi_ids 602 print "Total ROI saved: %d" % (len(column.values))
603
604 - def image_from_original_file(self, original_file):
605 """Returns the image from which an original file has originated.""" 606 m = self.analysis_ctx.original_file_image_map 607 return m[original_file.id.val]
608
609 - def parse_and_populate(self):
610 """ 611 Calls parse and populate, updating the OmeroTables instance backing 612 our results and the OMERO database itself. 613 """ 614 result = self.parse() 615 if result is None: 616 return 617 for i, columns in enumerate(result.sets_of_columns): 618 self.create_file_annotation(i) 619 self.parse_and_populate_roi(columns) 620 self.populate(columns)
621 622 ### 623 ### Abstract methods 624 ### 625
626 - def get_name(self, set_of_columns=None):
627 """Returns the name of the measurement, and a set of columns.""" 628 raise Exception("To be implemented by concrete implementations.")
629
630 - def parse(self):
631 """Parses result files, returning a MeasurementParsingResult.""" 632 raise Exception("To be implemented by concrete implementations.")
633
634 - def parse_and_populate_roi(self, columns):
635 """ 636 Parses and populates ROI from column data in the OMERO database. 637 """ 638 raise Exception("To be implemented by concrete implementations.")
639
640 - def populate(self, columns):
641 """ 642 Populates an OmeroTables instance backing our results and ROI 643 linkages. 644 """ 645 raise Exception("To be implemented by concrete implementations.")
646
647 -class MIASMeasurementCtx(AbstractMeasurementCtx):
648 """ 649 MIAS measurements are a set of tab delimited text files per well. Each 650 TSV file's content is prefixed by the analysis parameters. 651 """ 652 653 # The OmeroTable ImageColumn index 654 IMAGE_COL = 0 655 656 # The OmeroTable RoiColumn index 657 ROI_COL = 1 658 659 # Expected columns in NEO datasets 660 NEO_EXPECTED = ('Image', 'ROI', 'Label', 'Row', 'Col', 'Nucleus Area', 661 'Cell Diam.', 'Cell Type', 'Mean Nucleus Intens.') 662 663 # Expected columns in MNU datasets 664 MNU_EXPECTED = ('Image', 'ROI', 'row', 'col', 'type') 665
666 - def __init__(self, analysis_ctx, service_factory, original_file_provider, 667 original_file, result_files):
668 super(MIASMeasurementCtx, self).__init__( 669 analysis_ctx, service_factory, original_file_provider, 670 original_file, result_files)
671
672 - def get_empty_columns(self, n_columns):
673 """ 674 Retrieves a set of empty OmeroTables columns for the analysis results 675 prefixed by an ImageColumn and RoiColumn to handle these linked 676 object indexes. 677 """ 678 columns = [ImageColumn('Image', '', list()), 679 RoiColumn('ROI', '', list())] 680 for i in range(n_columns): 681 columns.append(DoubleColumn('', '', list())) 682 return columns
683 684 ### 685 ### Overriding abstract implementation 686 ### 687
688 - def image_from_original_file(self, original_file):
689 """ 690 Overriding the abstract implementation since the companion 691 files are no longer attached to the images, but only to the plate 692 for MIAS. Instead, we use the filename itself to find the image. 693 """ 694 name = original_file.name.val 695 # Copy: '^Well(\d+)_(.*)_detail_(\d+-\d+-\d+-\d+h\d+m\d+s).txt$' 696 match = MIASPlateAnalysisCtx.detail_regex.match(name) 697 if match: 698 well_num = int(match.group(1)) 699 return self.analysis_ctx.image_from_wellnumber(well_num) 700 else: 701 raise exceptions.Exception("Not a detail file")
702 703 ### 704 ### Abstract method implementations 705 ### 706
707 - def get_name(self, set_of_columns=None):
708 return self.original_file.name.val[:-4]
709
710 - def parse(self):
711 columns = None 712 for result_file in self.result_files: 713 print "Parsing: %s" % result_file.name.val 714 image = self.image_from_original_file(result_file) 715 provider = self.original_file_provider 716 data = provider.get_original_file_data(result_file) 717 try: 718 rows = list(csv.reader(data, delimiter='\t')) 719 finally: 720 data.close() 721 rows.reverse() 722 if columns is None: 723 columns = self.get_empty_columns(len(rows[0])) 724 for row in rows: 725 try: 726 for i, value in enumerate(row): 727 value = float(value) 728 columns[i + 2].values.append(value) 729 columns[self.IMAGE_COL].values.append(image.id.val) 730 except ValueError: 731 for i, value in enumerate(row): 732 columns[i + 2].name = value 733 break 734 print "Returning %d columns" % len(columns) 735 return MeasurementParsingResult([columns])
736
737 - def _parse_neo_roi(self, columns):
738 """Parses out ROI from OmeroTables columns for 'NEO' datasets.""" 739 print "Parsing %s NEO ROIs..." % (len(columns[0].values)) 740 image_ids = columns[self.IMAGE_COL].values 741 rois = list() 742 # Save our file annotation to the database so we can use an unloaded 743 # annotation for the saveAndReturnIds that will be triggered below. 744 self.file_annotation = \ 745 self.update_service.saveAndReturnObject(self.file_annotation) 746 unloaded_file_annotation = \ 747 FileAnnotationI(self.file_annotation.id.val, False) 748 for i, image_id in enumerate(image_ids): 749 unloaded_image = ImageI(image_id, False) 750 roi = RoiI() 751 shape = EllipseI() 752 values = columns[6].values 753 diameter = rdouble(float(values[i])) 754 shape.theZ = rint(0) 755 shape.theT = rint(0) 756 values = columns[4].values 757 shape.cx = rdouble(float(values[i])) 758 values = columns[3].values 759 shape.cy = rdouble(float(values[i])) 760 shape.rx = diameter 761 shape.ry = diameter 762 roi.addShape(shape) 763 roi.image = unloaded_image 764 roi.linkAnnotation(unloaded_file_annotation) 765 rois.append(roi) 766 if len(rois) == self.ROI_UPDATE_LIMIT: 767 self.update_rois(rois, columns[self.ROI_COL]) 768 rois = list() 769 self.update_rois(rois, columns[self.ROI_COL])
770
771 - def _parse_mnu_roi(self, columns):
772 """Parses out ROI from OmeroTables columns for 'MNU' datasets.""" 773 print "Parsing %s MNU ROIs..." % (len(columns[0].values)) 774 image_ids = columns[self.IMAGE_COL].values 775 rois = list() 776 # Save our file annotation to the database so we can use an unloaded 777 # annotation for the saveAndReturnIds that will be triggered below. 778 self.file_annotation = \ 779 self.update_service.saveAndReturnObject(self.file_annotation) 780 unloaded_file_annotation = \ 781 FileAnnotationI(self.file_annotation.id.val, False) 782 for i, image_id in enumerate(image_ids): 783 unloaded_image = ImageI(image_id, False) 784 roi = RoiI() 785 shape = PointI() 786 shape.theZ = rint(0) 787 shape.theT = rint(0) 788 values = columns[3].values 789 shape.cx = rdouble(float(values[i])) 790 values = columns[2].values 791 shape.cy = rdouble(float(values[i])) 792 roi.addShape(shape) 793 roi.image = unloaded_image 794 roi.linkAnnotation(unloaded_file_annotation) 795 rois.append(roi) 796 if len(rois) == self.ROI_UPDATE_LIMIT: 797 self.update_rois(rois, columns[self.ROI_COL]) 798 rois = list() 799 self.update_rois(rois, columns[self.ROI_COL])
800
801 - def parse_and_populate_roi(self, columns):
802 names = [column.name for column in columns] 803 neo = [name in self.NEO_EXPECTED for name in names] 804 mnu = [name in self.MNU_EXPECTED for name in names] 805 for name in names: 806 print "Column: %s" % name 807 if len(columns) == 9 and False not in neo: 808 self._parse_neo_roi(columns) 809 elif len(columns) == 5 and False not in mnu: 810 self._parse_mnu_roi(columns) 811 else: 812 print "WARNING: Unknown ROI type for MIAS dataset: %r" % names
813
814 - def populate(self, columns):
815 """ 816 Query performed:: 817 first_roi = columns[self.ROI_COL].values[0] 818 first_roi = self.query_service.findByQuery( 819 'select roi from Roi as roi ' \ 820 'join fetch roi.annotationLinks as link ' \ 821 'join fetch link.child ' \ 822 'where roi.id = %d' % first_roi, None) 823 self.file_annotation = first_roi.copyAnnotationLinks()[0].child 824 """ 825 self.update_table(columns)
826
827 -class FlexMeasurementCtx(AbstractMeasurementCtx):
828 """ 829 Flex measurements are located deep within a ".res" XML file container 830 and contain no ROI. 831 """ 832 833 # The XPath to the <Area> which aggregate an acquisition 834 AREA_XPATH = './/Areas/Area' 835 836 # The XPath to the an analysis <Parameter>; will become a column header 837 # and is below AREA_XPATH 838 PARAMETER_XPATH = './/Wells/ResultParameters/Parameter' 839 840 # The XPath to a <Well> which has had at least one acquisition event 841 # within and is below AREA_XPATH 842 WELL_XPATH = './/Wells/Well' 843 844 # The XPath to a <Result> for a given well and is below WELL_XPATH 845 RESULT_XPATH = './/Result' 846
847 - def __init__(self, analysis_ctx, service_factory, original_file_provider, 848 original_file, result_files):
849 super(FlexMeasurementCtx, self).__init__( 850 analysis_ctx, service_factory, original_file_provider, 851 original_file, result_files)
852
853 - def get_empty_columns(self, headers):
854 """ 855 Retrieves a set of empty OmeroTables columns for the analysis results 856 prefixed by a WellColumn to handle linked object indexes. 857 """ 858 columns = {'Well': WellColumn('Well', '', list())} 859 for header in headers: 860 columns[header] = DoubleColumn(header, '', list()) 861 return columns
862 863 ### 864 ### Abstract method implementations 865 ### 866
867 - def get_name(self, set_of_columns=None):
868 return self.original_file.name.val[:-4]
869
870 - def parse(self):
871 print "Parsing: %s" % self.original_file.name.val 872 provider = self.original_file_provider 873 data = provider.get_original_file_data(self.original_file) 874 try: 875 et = ElementTree(file=data) 876 finally: 877 data.close() 878 root = et.getroot() 879 areas = root.findall(self.AREA_XPATH) 880 print "Area count: %d" % len(areas) 881 for i, area in enumerate(areas): 882 result_parameters = area.findall(self.PARAMETER_XPATH) 883 print "Area %d result children: %d" % (i, len(result_parameters)) 884 if len(result_parameters) == 0: 885 print "%s contains no analysis data." % self.get_name() 886 return 887 headers = list() 888 for result_parameter in result_parameters: 889 headers.append(result_parameter.text) 890 columns = self.get_empty_columns(headers) 891 wells = area.findall(self.WELL_XPATH) 892 for well in wells: 893 # Rows and columns are 1-indexed, OMERO wells are 0-indexed 894 row = int(well.get('row')) - 1 895 column = int(well.get('col')) - 1 896 try: 897 v = columns['Well'].values 898 wellobj, images = self.get_well_images(row, column) 899 if not wellobj: 900 continue 901 v.append(wellobj.id.val) 902 except: 903 log.exception("ERROR: Failed to get well images") 904 continue 905 results = well.findall(self.RESULT_XPATH) 906 for result in results: 907 name = result.get('name') 908 columns[name].values.append(float(result.text)) 909 return MeasurementParsingResult([columns.values()])
910
911 - def parse_and_populate_roi(self, columns):
912 pass
913
914 - def populate(self, columns):
915 self.update_table(columns)
916
917 -class InCellMeasurementCtx(AbstractMeasurementCtx):
918 """ 919 InCell Analyzer measurements are located deep within an XML file container. 920 """ 921 922 # Cells expected centre of gravity columns 923 CELLS_CG_EXPECTED = ['Cell: cgX', 'Cell: cgY'] 924 925 # Nulcei expected centre of gravity columns 926 NUCLEI_CG_EXPECTED = ['Nucleus: cgX', 'Nucleus: cgY'] 927 928 # Expected source attribute value for cell data 929 CELLS_SOURCE = 'Cells' 930 931 # Expected source attribute value for nuclei data 932 NUCLEI_SOURCE = 'Nuclei' 933
934 - def __init__(self, analysis_ctx, service_factory, original_file_provider, 935 original_file, result_files):
936 super(InCellMeasurementCtx, self).__init__( 937 analysis_ctx, service_factory, original_file_provider, 938 original_file, result_files)
939
940 - def check_sparse_data(self, columns):
941 """ 942 Checks a set of columns for sparse data (one column shorter than 943 the rest) and adds -1 where appropriate. 944 """ 945 length = None 946 for i, column in enumerate(columns): 947 if column.name == 'ROI': 948 # ROI are processed late so we don't care if this column 949 # is sparse or not. 950 continue 951 current_length = len(column.values) 952 if length is not None: 953 if current_length > length: 954 log.warn("%s length %d > %d modding previous column" % \ 955 (column.name, current_length, length)) 956 columns[i - 1].values.append(-1.0) 957 if current_length < length: 958 log.warn("%s length %d < %d modding current column" % \ 959 (column.name, current_length, length)) 960 column.values.append(-1.0) 961 length = len(column.values)
962 963 ### 964 ### Abstract method implementations 965 ### 966
967 - def get_name(self, set_of_columns=None):
968 if set_of_columns is None: 969 return self.original_file.name.val[:-4] 970 elif set_of_columns == 0: 971 return self.original_file.name.val[:-4] + ' Cells' 972 elif set_of_columns == 1: 973 return self.original_file.name.val[:-4] + ' Nuclei'
974
975 - def parse(self):
976 print "Parsing: %s" % self.original_file.name.val 977 provider = self.original_file_provider 978 data = provider.get_original_file_data(self.original_file) 979 try: 980 events = ('start', 'end') 981 well_data = None 982 n_roi = 0 983 n_measurements = 0 984 cells_columns = {'Image': ImageColumn('Image', '', list()), 985 'Cell': LongColumn('Cell', '', list()), 986 'ROI': RoiColumn('ROI', '', list()) 987 } 988 nuclei_columns = {'Image': ImageColumn('Image', '', list()), 989 'Cell': LongColumn('Cell', '', list()), 990 'ROI': RoiColumn('ROI', '', list()) 991 } 992 for event, element in iterparse(data, events=events): 993 if event == 'start' and element.tag == 'WellData' \ 994 and element.get('cell') != 'Summary': 995 row = int(element.get('row')) - 1 996 col = int(element.get('col')) - 1 997 i = int(element.get('field')) - 1 998 try: 999 well, images = self.get_well_images(row, col) 1000 if not images: 1001 continue 1002 image = images[i] 1003 except: 1004 log.exception("ERROR: Failed to get well images") 1005 continue 1006 self.check_sparse_data(cells_columns.values()) 1007 self.check_sparse_data(nuclei_columns.values()) 1008 cell = long(element.get('cell')) 1009 cells_columns['Cell'].values.append(cell) 1010 nuclei_columns['Cell'].values.append(cell) 1011 well_data = element 1012 cells_columns['Image'].values.append(image.id.val) 1013 nuclei_columns['Image'].values.append(image.id.val) 1014 elif well_data is not None and event == 'start' \ 1015 and element.tag == 'Measure': 1016 source = element.get('source') 1017 key = element.get('key') 1018 value = float(element.get('value')) 1019 if source == self.CELLS_SOURCE: 1020 if n_roi == 0: 1021 cells_columns[key] = DoubleColumn(key, '', list()) 1022 cells_columns[key].values.append(value) 1023 elif source == self.NUCLEI_SOURCE: 1024 if n_roi == 0: 1025 nuclei_columns[key] = DoubleColumn(key, '', list()) 1026 nuclei_columns[key].values.append(value) 1027 else: 1028 if n_roi == 0: 1029 cells_columns[key] = DoubleColumn(key, '', list()) 1030 nuclei_columns[key] = DoubleColumn(key, '', list()) 1031 cells_columns[key].values.append(value) 1032 nuclei_columns[key].values.append(value) 1033 n_measurements += 1 1034 elif event == 'end' and element.tag == 'WellData': 1035 if well_data is not None: 1036 n_roi += 1 1037 well_data.clear() 1038 well_data = None 1039 else: 1040 element.clear() 1041 print "Total ROI: %d" % n_roi 1042 print "Total measurements: %d" % n_measurements 1043 sets_of_columns = [cells_columns.values(), nuclei_columns.values()] 1044 return MeasurementParsingResult(sets_of_columns) 1045 finally: 1046 data.close()
1047
1048 - def parse_and_populate_roi(self, columns_as_list):
1049 # First sanity check our provided columns 1050 names = [column.name for column in columns_as_list] 1051 cells_expected = [name in names for name in self.CELLS_CG_EXPECTED] 1052 nuclei_expected = [name in names for name in self.NUCLEI_CG_EXPECTED] 1053 if (False in cells_expected) and (False in nuclei_expected): 1054 print "WARNING: Missing CGs for InCell dataset: %r" % names 1055 return 1056 # Reconstruct a column name to column map 1057 columns = dict() 1058 for column in columns_as_list: 1059 columns[column.name] = column 1060 image_ids = columns['Image'].values 1061 rois = list() 1062 # Save our file annotation to the database so we can use an unloaded 1063 # annotation for the saveAndReturnIds that will be triggered below. 1064 self.file_annotation = \ 1065 self.update_service.saveAndReturnObject(self.file_annotation) 1066 unloaded_file_annotation = \ 1067 FileAnnotationI(self.file_annotation.id.val, False) 1068 # Parse and append ROI 1069 for i, image_id in enumerate(image_ids): 1070 unloaded_image = ImageI(image_id, False) 1071 if False in nuclei_expected: 1072 # Cell centre of gravity 1073 roi = RoiI() 1074 shape = PointI() 1075 shape.theZ = rint(0) 1076 shape.theT = rint(0) 1077 shape.cx = rdouble(float(columns['Cell: cgX'].values[i])) 1078 shape.cy = rdouble(float(columns['Cell: cgY'].values[i])) 1079 roi.addShape(shape) 1080 roi.image = unloaded_image 1081 roi.linkAnnotation(unloaded_file_annotation) 1082 rois.append(roi) 1083 elif False in cells_expected: 1084 # Nucleus centre of gravity 1085 roi = RoiI() 1086 shape = PointI() 1087 shape.theZ = rint(0) 1088 shape.theT = rint(0) 1089 shape.cx = rdouble(float(columns['Nucleus: cgX'].values[i])) 1090 shape.cy = rdouble(float(columns['Nucleus: cgY'].values[i])) 1091 roi.addShape(shape) 1092 roi.image = unloaded_image 1093 roi.linkAnnotation(unloaded_file_annotation) 1094 rois.append(roi) 1095 else: 1096 raise MeasurementError('Not a nucleus or cell ROI') 1097 if len(rois) == self.ROI_UPDATE_LIMIT: 1098 self.update_rois(rois, columns['ROI']) 1099 rois = list() 1100 self.update_rois(rois, columns['ROI'])
1101
1102 - def populate(self, columns):
1103 self.update_table(columns)
1104 1105 if __name__ == "__main__": 1106 try: 1107 options, args = getopt(sys.argv[1:], "h:p:u:m:k:id") 1108 except GetoptError, (msg, opt): 1109 usage(msg) 1110 1111 try: 1112 plate_id, = args 1113 plate_id = long(plate_id) 1114 except ValueError: 1115 usage("Plate ID must be a specified and a number!") 1116 1117 username = None 1118 hostname = None 1119 port = 4063 1120 measurement = None 1121 info = False 1122 session_key = None 1123 logging_level = logging.WARN 1124 for option, argument in options: 1125 if option == "-u": 1126 username = argument 1127 if option == "-h": 1128 hostname = argument 1129 if option == "-p": 1130 port = int(argument) 1131 if option == "-m": 1132 measurement = int(argument) 1133 if option == "-i": 1134 info = True 1135 if option == "-k": 1136 session_key = argument 1137 if option == "-d": 1138 logging_level = logging.DEBUG 1139 if session_key is None and username is None: 1140 usage("Username must be specified!") 1141 if session_key is None and hostname is None: 1142 usage("Host name must be specified!") 1143 if session_key is None: 1144 password = getpass() 1145 1146 logging.basicConfig(level = logging_level) 1147 c = client(hostname, port) 1148 c.enableKeepAlive(60) 1149 try: 1150 if session_key is not None: 1151 service_factory = c.createSession(session_key) 1152 else: 1153 service_factory = c.createSession(username, password) 1154 1155 factory = PlateAnalysisCtxFactory(service_factory) 1156 analysis_ctx = factory.get_analysis_ctx(plate_id) 1157 n_measurements = analysis_ctx.get_measurement_count() 1158 if measurement is not None and measurement >= n_measurements: 1159 usage("measurement %d not a valid index!") 1160 if info: 1161 for i in range(n_measurements): 1162 n_result_files = analysis_ctx.get_result_file_count(i) 1163 print "Measurement %d has %d result files." % \ 1164 (i, n_result_files) 1165 sys.exit(0) 1166 if measurement is not None: 1167 measurement_ctx = analysis_ctx.get_measurement_ctx(measurement) 1168 measurement_ctx.parse_and_populate() 1169 else: 1170 for i in range(n_measurements): 1171 measurement_ctx = analysis_ctx.get_measurement_ctx(i) 1172 measurement_ctx.parse_and_populate() 1173 finally: 1174 c.closeSession() 1175