Package omero :: Package util :: Module populate_roi
[hide private]
[frames] | no frames]

Source Code for Module omero.util.populate_roi

   1  #!/usr/bin/env python 
   2  # encoding: utf-8 
   3  """ 
   4  ... 
   5  """ 
   6   
   7  # 
   8  #  Copyright (C) 2009 University of Dundee. All rights reserved. 
   9  # 
  10  # 
  11  #  This program is free software; you can redistribute it and/or modify 
  12  #  it under the terms of the GNU General Public License as published by 
  13  #  the Free Software Foundation; either version 2 of the License, or 
  14  #  (at your option) any later version. 
  15  #  This program is distributed in the hope that it will be useful, 
  16  #  but WITHOUT ANY WARRANTY; without even the implied warranty of 
  17  #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  18  #  GNU General Public License for more details. 
  19  # 
  20  #  You should have received a copy of the GNU General Public License along 
  21  #  with this program; if not, write to the Free Software Foundation, Inc., 
  22  #  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 
  23  # 
  24   
  25   
  26  import tempfile 
  27  import time 
  28  import sys 
  29  import csv 
  30  import re 
  31  from StringIO import StringIO 
  32  from getpass import getpass 
  33  from getopt import getopt, GetoptError 
  34   
  35  import omero.clients 
  36  from omero.rtypes import rdouble, rstring, rint 
  37  from omero.model import OriginalFileI, PlateI, PlateAnnotationLinkI, ImageI, \ 
  38                          FileAnnotationI, RoiI, EllipseI, PointI 
  39  from omero.grid import ImageColumn, WellColumn, RoiColumn, LongColumn, DoubleColumn 
  40  from omero.util.temp_files import create_path, remove_path 
  41  from omero import client 
  42   
  43  # Handle Python 2.5 built-in ElementTree 
  44  try: 
  45          from xml.etree.cElementTree import XML, Element, SubElement, ElementTree, dump, iterparse 
  46  except ImportError: 
  47          from cElementTree import XML, Element, SubElement, ElementTree, dump, iterparse 
  48   
49 -def usage(error):
50 """Prints usage so that we don't have to. :)""" 51 cmd = sys.argv[0] 52 print """%s 53 Usage: %s [-h hostname] [-u username | -k session_key] <-p port> [plate_id] 54 Runs measurement population code for a given plate. 55 56 Options: 57 -u OMERO username to use 58 -k OMERO session key to use 59 -h OMERO hostname to use 60 -p OMERO port to use [defaults to 4063] 61 -m Measurement index to populate 62 -i Dump measurement information and exit (no population) 63 64 Examples: 65 %s -h localhost -p 4063 -u bob 27 66 67 Report bugs to ome-devel@lists.openmicroscopy.org.uk""" % (error, cmd, cmd) 68 sys.exit(2)
69
70 -class MeasurementError(Exception):
71 """ 72 Raised by the analysis or measurement context when an error condition 73 is reached. 74 """ 75 pass
76
77 -class DownloadingOriginalFileProvider(object):
78 """ 79 Provides original file data by downloading it from an OMERO raw file store. 80 """ 81 82 # Default raw file store buffer size 83 BUFFER_SIZE = 1024 * 1024 # 1MB 84
85 - def __init__(self, service_factory):
86 self.service_factory = service_factory 87 self.raw_file_store = self.service_factory.createRawFileStore() 88 self.dir = create_path("populate_roi", "dir", folder = True)
89
90 - def get_original_file_data(self, original_file):
91 """ 92 Downloads an original file to a temporary file and returns an open 93 file handle to that temporary file seeked to zero. The caller is 94 responsible for closing the temporary file. 95 """ 96 print "Downloading original file: %d" % original_file.id.val 97 self.raw_file_store.setFileId(original_file.id.val) 98 temporary_file = tempfile.TemporaryFile(dir=str(self.dir)) 99 size = original_file.size.val 100 for i in range((size / self.BUFFER_SIZE) + 1): 101 index = i * self.BUFFER_SIZE 102 data = self.raw_file_store.read(index, self.BUFFER_SIZE) 103 temporary_file.write(data) 104 temporary_file.seek(0L) 105 temporary_file.truncate(size) 106 return temporary_file
107
108 - def __delete__(self):
109 self.raw_file_store.close()
110
111 -class AbstractPlateAnalysisCtx(object):
112 """ 113 Abstract class which aggregates and represents all measurement runs made on 114 a given Plate. 115 """ 116 117 DEFAULT_ORIGINAL_FILE_PROVIDER = DownloadingOriginalFileProvider 118
119 - def __init__(self, images, original_files, original_file_image_map, 120 plate_id, service_factory):
121 super(AbstractPlateAnalysisCtx, self).__init__() 122 self.images = images 123 self.original_files = original_files 124 self.original_file_image_map = original_file_image_map 125 self.plate_id = plate_id 126 self.service_factory = service_factory 127 self.log_files = dict() 128 self.detail_files = dict() 129 self.measurements = dict()
130 131 ### 132 ### Abstract methods 133 ### 134
135 - def is_this_type(klass):
136 """ 137 Concrete implementations are to return True if the class pertinent 138 for the original files associated with the plate. 139 """ 140 raise Exception("To be implemented by concrete implementations.")
141 is_this_type = classmethod(is_this_type) 142
143 - def get_measurement_count(self):
144 """Returns the number of recognized measurement runs.""" 145 raise Exception("To be implemented by concrete implementations.")
146
147 - def get_measurement_ctx(self, index):
148 """Returns the measurement context for a given index.""" 149 raise Exception("To be implemented by concrete implementations.")
150
151 - def get_result_file_count(self, measurement_index):
152 """ 153 Return the number of result files associated with a measurement run. 154 """ 155 raise Exception("To be implemented by concrete implementations.")
156
157 -class MIASPlateAnalysisCtx(AbstractPlateAnalysisCtx):
158 """ 159 MIAS dataset concrete class implementation of an analysis context. MIAS 160 measurements are aggregated based on a single "log" file. A result 161 file is present for each stitched (of multiple fields) mosaic and 162 contains the actual measured results and ROI. 163 """ 164 165 # Python datetime format string of the log filename completion date/time 166 datetime_format = '%Y-%m-%d-%Hh%Mm%Ss' 167 168 # Regular expression matching a log filename 169 log_regex = re.compile('.*log(\d+-\d+-\d+-\d+h\d+m\d+s).txt$') 170 171 # Regular expression matching a result filename 172 detail_regex = re.compile( 173 '^Well\d+_.*_detail_(\d+-\d+-\d+-\d+h\d+m\d+s).txt$') 174 175 # Companion file format 176 companion_format = 'Companion/MIAS' 177
178 - def __init__(self, images, original_files, original_file_image_map, 179 plate_id, service_factory):
180 super(MIASPlateAnalysisCtx, self).__init__( 181 images, original_files, original_file_image_map, plate_id, 182 service_factory) 183 self._populate_log_and_detail_files() 184 self._populate_measurements()
185
187 """ 188 Strips out erroneous files and collects the log and result original 189 files based on regular expression matching. 190 """ 191 for original_file in self.original_files.values(): 192 if original_file.format.value.val != self.companion_format: 193 continue 194 name = original_file.name.val 195 match = self.log_regex.match(name) 196 if match: 197 d = time.strptime(match.group(1), self.datetime_format) 198 self.log_files[d] = original_file 199 continue 200 match = self.detail_regex.match(name) 201 if match: 202 d = time.strptime(match.group(1), self.datetime_format) 203 self.detail_files[d] = original_file 204 continue
205
206 - def _populate_measurements(self):
207 """ 208 Result original files are only recognizable as part of a given 209 measurement (declared by a log file) based upon their parsed 210 date/time of completion as encoded in the filename. This method 211 collects result original files and groups them by collective 212 parsed date/time of completion. 213 """ 214 log_timestamps = list(self.log_files.keys()) 215 log_timestamps.sort() 216 detail_timestamps = list(self.detail_files.keys()) 217 detail_timestamps.sort() 218 for log_timestamp in log_timestamps: 219 self.measurements[log_timestamp] = list() 220 for detail_timestamp in detail_timestamps: 221 for log_timestamp in log_timestamps: 222 if detail_timestamp < log_timestamp: 223 self.measurements[log_timestamp].append( 224 self.detail_files[detail_timestamp]) 225 break
226 227 ### 228 ### Abstract method implementations 229 ### 230
231 - def is_this_type(klass, original_files):
232 for original_file in original_files.values(): 233 format = original_file.format.value.val 234 if format == klass.companion_format \ 235 and klass.log_regex.match(original_file.name.val): 236 return True
237 is_this_type = classmethod(is_this_type) 238
239 - def get_measurement_count(self):
240 return len(self.measurements.keys())
241
242 - def get_measurement_ctx(self, index):
243 key = self.log_files.keys()[index] 244 sf = self.service_factory 245 original_file = self.log_files[key] 246 result_files = self.measurements[key] 247 provider = self.DEFAULT_ORIGINAL_FILE_PROVIDER(sf) 248 return MIASMeasurementCtx(self, sf, provider, original_file, 249 result_files)
250
251 - def get_result_file_count(self, measurement_index):
252 key = self.log_files.keys()[measurement_index] 253 return len(self.measurements[key])
254
255 -class FlexPlateAnalysisCtx(AbstractPlateAnalysisCtx):
256 """ 257 Flex dataset concrete class implementation of an analysis context. Flex 258 measurements are aggregated in a single ".res" XML file and contain no 259 ROI. 260 """ 261 262 # Companion file format 263 companion_format = 'Companion/Flex' 264
265 - def __init__(self, images, original_files, original_file_image_map, 266 plate_id, service_factory):
267 super(FlexPlateAnalysisCtx, self).__init__( 268 images, original_files, original_file_image_map, plate_id, 269 service_factory) 270 path_original_file_map = dict() 271 for original_file in original_files.values(): 272 path = original_file.path.val 273 format = original_file.format.value.val 274 if format == self.companion_format and path.endswith('.res'): 275 path_original_file_map[path] = original_file 276 self.measurements = path_original_file_map.values()
277 278 ### 279 ### Abstract method implementations 280 ### 281
282 - def is_this_type(klass, original_files):
283 for original_file in original_files.values(): 284 path = original_file.path.val 285 format = original_file.format.value.val 286 if format == klass.companion_format and path.endswith('.res'): 287 return True 288 return False
289 is_this_type = classmethod(is_this_type) 290
291 - def get_measurement_count(self):
292 return len(self.measurements)
293
294 - def get_measurement_ctx(self, index):
295 sf = self.service_factory 296 original_file = self.measurements[index] 297 result_files = [] 298 provider = self.DEFAULT_ORIGINAL_FILE_PROVIDER(sf) 299 return FlexMeasurementCtx(self, sf, provider, original_file, 300 result_files)
301
302 - def get_result_file_count(self, measurement_index):
303 return 1
304
305 -class InCellPlateAnalysisCtx(AbstractPlateAnalysisCtx):
306 """ 307 InCell dataset concrete class implementation of an analysis context. 308 InCell measurements are from InCell Analyzer and are aggregated in a 309 single gargantuan (often larger than 100MB per plate) XML file. 310 """ 311 312 # Companion file format 313 companion_format = 'Companion/InCell' 314
315 - def __init__(self, images, original_files, original_file_image_map, 316 plate_id, service_factory):
317 super(InCellPlateAnalysisCtx, self).__init__( 318 images, original_files, original_file_image_map, plate_id, 319 service_factory) 320 path_original_file_map = dict() 321 for original_file in original_files.values(): 322 path = original_file.path.val 323 format = original_file.format.value.val 324 if format == self.companion_format and path.endswith('.xml'): 325 path_original_file_map[path] = original_file 326 self.measurements = path_original_file_map.values()
327 328 ### 329 ### Abstract method implementations 330 ### 331
332 - def is_this_type(klass, original_files):
333 for original_file in original_files.values(): 334 path = original_file.path.val 335 format = original_file.format.value.val 336 if format == klass.companion_format and path.endswith('.xml'): 337 return True 338 return False
339 is_this_type = classmethod(is_this_type) 340
341 - def get_measurement_count(self):
342 return len(self.measurements)
343
344 - def get_measurement_ctx(self, index):
345 sf = self.service_factory 346 original_file = self.measurements[index] 347 result_files = [] 348 provider = self.DEFAULT_ORIGINAL_FILE_PROVIDER(sf) 349 return InCellMeasurementCtx(self, sf, provider, original_file, 350 result_files)
351
352 - def get_result_file_count(self, measurement_index):
353 return 1
354
355 -class PlateAnalysisCtxFactory(object):
356 """ 357 The plate analysis context factory is responsible for detecting and 358 returning a plate analysis context instance for a given plate. 359 """ 360 361 implementations = (FlexPlateAnalysisCtx, MIASPlateAnalysisCtx, 362 InCellPlateAnalysisCtx) 363
364 - def __init__(self, service_factory):
365 self.service_factory = service_factory 366 self.query_service = self.service_factory.getQueryService()
367
368 - def find_images_for_plate(self, plate_id):
369 """ 370 Retrieves all the images associated with a given plate. Fetched 371 are the Image's WellSample, the WellSample's Well, the annotation 372 stack associated with the Image and each annotation's linked 373 original file. 374 """ 375 # The query that follows is doublely linked: 376 # * Image --> WellSample --> Well 377 # * Well --> WellSample --> Image 378 # This is to facilitate later "ordered" access of fields/well 379 # samples required by certain measurement contexts (notably InCell). 380 return self.query_service.findAllByQuery( 381 'select img from Image as img ' \ 382 'left outer join fetch img.annotationLinks as a_links ' \ 383 'join fetch img.wellSamples as ws ' \ 384 'join fetch ws.well as w ' \ 385 'join fetch w.wellSamples as w_ws ' \ 386 'join fetch w_ws.image ' \ 387 'join w.plate as p ' \ 388 'join fetch a_links.child as a ' \ 389 'join fetch a.file as o_file ' \ 390 'join fetch o_file.format ' \ 391 'where p.id = %d' % plate_id, None)
392
393 - def get_analysis_ctx(self, plate_id):
394 """Retrieves a plate analysis context for a given plate.""" 395 original_files = dict() 396 original_file_image_map = dict() 397 images = self.find_images_for_plate(plate_id) 398 for i, image in enumerate(images): 399 for annotation_link in image.copyAnnotationLinks(): 400 annotation = annotation_link.child 401 if isinstance(annotation, FileAnnotationI): 402 f = annotation.file 403 original_files[f.id.val] = f 404 original_file_image_map[f.id.val] = image 405 for klass in self.implementations: 406 if klass.is_this_type(original_files): 407 return klass(images, original_files, 408 original_file_image_map, 409 plate_id, self.service_factory) 410 raise MeasurementError( 411 "Unable to find suitable analysis context for plate: %d" % \ 412 plate_id)
413
414 -class MeasurementParsingResult(object):
415 """ 416 Holds the results of a measurement parsing event. 417 """
418 - def __init__(self, sets_of_columns=None):
419 if sets_of_columns is None: 420 self.sets_of_columns = list() 421 else: 422 self.sets_of_columns = sets_of_columns
423
424 - def append_columns(self, columns):
425 """Adds a set of columns to the parsing result.""" 426 self.sets_of_columns.append(columns)
427
428 -class AbstractMeasurementCtx(object):
429 """ 430 Abstract class which aggregates and represents all the results produced 431 from a given measurement run. It also provides a scaffold for interacting 432 with the OmeroTables infrastructure. 433 """ 434 435 # The number of ROI to have parsed before streaming them to the server 436 ROI_UPDATE_LIMIT = 1000 437
438 - def __init__(self, analysis_ctx, service_factory, original_file_provider, 439 original_file, result_files):
440 super(AbstractMeasurementCtx, self).__init__() 441 self.analysis_ctx = analysis_ctx 442 self.service_factory = service_factory 443 self.original_file_provider = original_file_provider 444 self.query_service = self.service_factory.getQueryService() 445 self.update_service = self.service_factory.getUpdateService() 446 self.original_file = original_file 447 self.result_files = result_files 448 449 # Establish the rest of our initial state 450 self.n_columns = None
451
452 - def update_table(self, columns):
453 """Updates the OmeroTables instance backing our results.""" 454 # Create a new OMERO table to store our measurement results 455 sr = self.service_factory.sharedResources() 456 name = self.get_name() 457 self.table = sr.newTable(1, '/%s.r5' % name) 458 if self.table is None: 459 raise MeasurementError( 460 "Unable to create table: %s" % name) 461 462 # Retrieve the original file corresponding to the table for the 463 # measurement, link it to the file annotation representing the 464 # umbrella measurement run, link the annotation to the plate from 465 # which it belongs and save the file annotation. 466 table_original_file = self.table.getOriginalFile() 467 table_original_file_id = table_original_file.id.val 468 print "Created new table: %d" % table_original_file_id 469 unloaded_o_file = OriginalFileI(table_original_file_id, False) 470 self.file_annotation.file = unloaded_o_file 471 unloaded_plate = PlateI(self.analysis_ctx.plate_id, False) 472 plate_annotation_link = PlateAnnotationLinkI() 473 plate_annotation_link.parent = unloaded_plate 474 plate_annotation_link.child = self.file_annotation 475 plate_annotation_link = \ 476 self.update_service.saveAndReturnObject(plate_annotation_link) 477 self.file_annotation = plate_annotation_link.child 478 479 t0 = int(time.time() * 1000) 480 self.table.initialize(columns) 481 print "Table init took %sms" % (int(time.time() * 1000) - t0) 482 t0 = int(time.time() * 1000) 483 self.table.addData(columns) 484 print "Table update took %sms" % (int(time.time() * 1000) - t0)
485
486 - def create_file_annotation(self, set_of_columns):
487 """ 488 Creates a file annotation to represent a set of columns from our 489 measurment. 490 """ 491 self.file_annotation = FileAnnotationI() 492 self.file_annotation.ns = \ 493 rstring('openmicroscopy.org/omero/measurement') 494 name = self.get_name(set_of_columns) 495 self.file_annotation.description = rstring(name)
496
497 - def update_rois(self, rois, column):
498 """ 499 Updates a set of ROI inserting the updated IDs back into a given 500 column. 501 """ 502 print "Saving %d ROI at %d" % (len(rois), len(column.values)) 503 t0 = int(time.time() * 1000) 504 roi_ids = self.update_service.saveAndReturnIds(rois) 505 print "ROI update took %sms" % (int(time.time() * 1000) - t0) 506 column.values += roi_ids 507 print "Total ROI saved: %d" % (len(column.values))
508
509 - def image_from_original_file(self, original_file):
510 """Returns the image from which an original file has originated.""" 511 m = self.analysis_ctx.original_file_image_map 512 return m[original_file.id.val]
513
514 - def parse_and_populate(self):
515 """ 516 Calls parse and populate, updating the OmeroTables instance backing 517 our results and the OMERO database itself. 518 """ 519 result = self.parse() 520 if result is None: 521 return 522 for i, columns in enumerate(result.sets_of_columns): 523 self.create_file_annotation(i) 524 self.parse_and_populate_roi(columns) 525 self.populate(columns)
526 527 ### 528 ### Abstract methods 529 ### 530
531 - def get_name(self, set_of_columns=None):
532 """Returns the name of the measurement, and a set of columns.""" 533 raise Exception("To be implemented by concrete implementations.")
534
535 - def parse(self):
536 """Parses result files, returning a MeasurementParsingResult.""" 537 raise Exception("To be implemented by concrete implementations.")
538
539 - def parse_and_populate_roi(self, columns):
540 """ 541 Parses and populates ROI from column data in the OMERO database. 542 """ 543 raise Exception("To be implemented by concrete implementations.")
544
545 - def populate(self, columns):
546 """ 547 Populates an OmeroTables instance backing our results and ROI 548 linkages. 549 """ 550 raise Exception("To be implemented by concrete implementations.")
551
552 -class MIASMeasurementCtx(AbstractMeasurementCtx):
553 """ 554 MIAS measurements are a set of tab delimited text files per well. Each 555 TSV file's content is prefixed by the analysis parameters. 556 """ 557 558 # The OmeroTable ImageColumn index 559 IMAGE_COL = 0 560 561 # The OmeroTable RoiColumn index 562 ROI_COL = 1 563 564 # Expected columns in NEO datasets 565 NEO_EXPECTED = ('Image', 'ROI', 'Label', 'Row', 'Col', 'Nucleus Area', 566 'Cell Diam.', 'Cell Type', 'Mean Nucleus Intens.') 567 568 # Expected columns in MNU datasets 569 MNU_EXPECTED = ('Image', 'ROI', 'row', 'col', 'type') 570
571 - def __init__(self, analysis_ctx, service_factory, original_file_provider, 572 original_file, result_files):
573 super(MIASMeasurementCtx, self).__init__( 574 analysis_ctx, service_factory, original_file_provider, 575 original_file, result_files)
576
577 - def get_empty_columns(self, n_columns):
578 """ 579 Retrieves a set of empty OmeroTables columns for the analysis results 580 prefixed by an ImageColumn and RoiColumn to handle these linked 581 object indexes. 582 """ 583 columns = [ImageColumn('Image', '', list()), 584 RoiColumn('ROI', '', list())] 585 for i in range(n_columns): 586 columns.append(DoubleColumn('', '', list())) 587 return columns
588 589 ### 590 ### Abstract method implementations 591 ### 592
593 - def get_name(self, set_of_columns=None):
594 return self.original_file.name.val[:-4]
595
596 - def parse(self):
597 columns = None 598 for result_file in self.result_files: 599 print "Parsing: %s" % result_file.name.val 600 image = self.image_from_original_file(result_file) 601 provider = self.original_file_provider 602 data = provider.get_original_file_data(result_file) 603 try: 604 rows = list(csv.reader(data, delimiter='\t')) 605 finally: 606 data.close() 607 rows.reverse() 608 if columns is None: 609 columns = self.get_empty_columns(len(rows[0])) 610 for row in rows: 611 try: 612 for i, value in enumerate(row): 613 value = float(value) 614 columns[i + 2].values.append(value) 615 columns[self.IMAGE_COL].values.append(image.id.val) 616 except ValueError: 617 for i, value in enumerate(row): 618 columns[i + 2].name = value 619 break 620 print "Returning %d columns" % len(columns) 621 return MeasurementParsingResult([columns])
622
623 - def _parse_neo_roi(self, columns):
624 """Parses out ROI from OmeroTables columns for 'NEO' datasets.""" 625 print "Parsing %s NEO ROIs..." % (len(columns[0].values)) 626 image_ids = columns[self.IMAGE_COL].values 627 rois = list() 628 # Save our file annotation to the database so we can use an unloaded 629 # annotation for the saveAndReturnIds that will be triggered below. 630 self.file_annotation = \ 631 self.update_service.saveAndReturnObject(self.file_annotation) 632 unloaded_file_annotation = \ 633 FileAnnotationI(self.file_annotation.id.val, False) 634 for i, image_id in enumerate(image_ids): 635 unloaded_image = ImageI(image_id, False) 636 roi = RoiI() 637 shape = EllipseI() 638 values = columns[6].values 639 diameter = rdouble(float(values[i])) 640 shape.theZ = rint(0) 641 shape.theT = rint(0) 642 values = columns[4].values 643 shape.cx = rdouble(float(values[i])) 644 values = columns[3].values 645 shape.cy = rdouble(float(values[i])) 646 shape.rx = diameter 647 shape.ry = diameter 648 roi.addShape(shape) 649 roi.image = unloaded_image 650 roi.linkAnnotation(unloaded_file_annotation) 651 rois.append(roi) 652 if len(rois) == self.ROI_UPDATE_LIMIT: 653 self.update_rois(rois, columns[self.ROI_COL]) 654 rois = list() 655 self.update_rois(rois, columns[self.ROI_COL])
656
657 - def _parse_mnu_roi(self, columns):
658 """Parses out ROI from OmeroTables columns for 'MNU' datasets.""" 659 print "Parsing %s MNU ROIs..." % (len(columns[0].values)) 660 image_ids = columns[self.IMAGE_COL].values 661 rois = list() 662 # Save our file annotation to the database so we can use an unloaded 663 # annotation for the saveAndReturnIds that will be triggered below. 664 self.file_annotation = \ 665 self.update_service.saveAndReturnObject(self.file_annotation) 666 unloaded_file_annotation = \ 667 FileAnnotationI(self.file_annotation.id.val, False) 668 for i, image_id in enumerate(image_ids): 669 unloaded_image = ImageI(image_id, False) 670 roi = RoiI() 671 shape = PointI() 672 shape.theZ = rint(0) 673 shape.theT = rint(0) 674 values = columns[3].values 675 shape.cx = rdouble(float(values[i])) 676 values = columns[2].values 677 shape.cy = rdouble(float(values[i])) 678 roi.addShape(shape) 679 roi.image = unloaded_image 680 roi.linkAnnotation(unloaded_file_annotation) 681 rois.append(roi) 682 if len(rois) == self.ROI_UPDATE_LIMIT: 683 self.update_rois(rois, columns[self.ROI_COL]) 684 rois = list() 685 self.update_rois(rois, columns[self.ROI_COL])
686
687 - def parse_and_populate_roi(self, columns):
688 names = [column.name for column in columns] 689 neo = [name in self.NEO_EXPECTED for name in names] 690 mnu = [name in self.MNU_EXPECTED for name in names] 691 for name in names: 692 print "Column: %s" % name 693 if len(columns) == 9 and False not in neo: 694 self._parse_neo_roi(columns) 695 elif len(columns) == 5 and False not in mnu: 696 self._parse_mnu_roi(columns) 697 else: 698 print "WARNING: Unknown ROI type for MIAS dataset: %r" % names
699
700 - def populate(self, columns):
701 """ 702 Query performed:: 703 first_roi = columns[self.ROI_COL].values[0] 704 first_roi = self.query_service.findByQuery( 705 'select roi from Roi as roi ' \ 706 'join fetch roi.annotationLinks as link ' \ 707 'join fetch link.child ' \ 708 'where roi.id = %d' % first_roi, None) 709 self.file_annotation = first_roi.copyAnnotationLinks()[0].child 710 """ 711 self.update_table(columns)
712
713 -class FlexMeasurementCtx(AbstractMeasurementCtx):
714 """ 715 Flex measurements are located deep within a ".res" XML file container 716 and contain no ROI. 717 """ 718 719 # The XPath to the <Area> which aggregate an acquisition 720 AREA_XPATH = './/Areas/Area' 721 722 # The XPath to the an analysis <Parameter>; will become a column header 723 # and is below AREA_XPATH 724 PARAMETER_XPATH = './/Wells/ResultParameters/Parameter' 725 726 # The XPath to a <Well> which has had at least one acquisition event 727 # within and is below AREA_XPATH 728 WELL_XPATH = './/Wells/Well' 729 730 # The XPath to a <Result> for a given well and is below WELL_XPATH 731 RESULT_XPATH = './/Result' 732
733 - def __init__(self, analysis_ctx, service_factory, original_file_provider, 734 original_file, result_files):
735 super(FlexMeasurementCtx, self).__init__( 736 analysis_ctx, service_factory, original_file_provider, 737 original_file, result_files) 738 self.wells = dict() 739 for image in self.analysis_ctx.images: 740 for well_sample in image.copyWellSamples(): 741 well = well_sample.well 742 row = well.row.val 743 column = well.column.val 744 if row not in self.wells.keys(): 745 self.wells[row] = dict() 746 self.wells[row][column] = well
747
748 - def get_empty_columns(self, headers):
749 """ 750 Retrieves a set of empty OmeroTables columns for the analysis results 751 prefixed by a WellColumn to handle linked object indexes. 752 """ 753 columns = {'Well': WellColumn('Well', '', list())} 754 for header in headers: 755 columns[header] = DoubleColumn(header, '', list()) 756 return columns
757 758 ### 759 ### Abstract method implementations 760 ### 761
762 - def get_name(self, set_of_columns=None):
763 return self.original_file.name.val[:-4]
764
765 - def parse(self):
766 print "Parsing: %s" % self.original_file.name.val 767 image = self.image_from_original_file(self.original_file) 768 unloaded_image = ImageI(image.id.val, False) 769 provider = self.original_file_provider 770 data = provider.get_original_file_data(self.original_file) 771 try: 772 et = ElementTree(file=data) 773 finally: 774 data.close() 775 root = et.getroot() 776 areas = root.findall(self.AREA_XPATH) 777 print "Area count: %d" % len(areas) 778 for i, area in enumerate(areas): 779 result_parameters = area.findall(self.PARAMETER_XPATH) 780 print "Area %d result children: %d" % (i, len(result_parameters)) 781 if len(result_parameters) == 0: 782 print "%s contains no analysis data." % self.get_name() 783 return 784 headers = list() 785 for result_parameter in result_parameters: 786 headers.append(result_parameter.text) 787 columns = self.get_empty_columns(headers) 788 wells = area.findall(self.WELL_XPATH) 789 for well in wells: 790 # Rows and columns are 1-indexed, OMERO wells are 0-indexed 791 row = int(well.get('row')) - 1 792 column = int(well.get('col')) - 1 793 try: 794 v = columns['Well'].values 795 v.append(self.wells[row][column].id.val) 796 except KeyError: 797 # This has the potential to happen alot with the 798 # datasets we have given the split machine acquisition 799 # ".flex" file storage. 800 print "WARNING: Missing data for row %d column %d" % \ 801 (row, column) 802 continue 803 results = well.findall(self.RESULT_XPATH) 804 for result in results: 805 name = result.get('name') 806 columns[name].values.append(float(result.text)) 807 return MeasurementParsingResult([columns.values()])
808
809 - def parse_and_populate_roi(self, columns):
810 pass
811
812 - def populate(self, columns):
813 self.update_table(columns)
814
815 -class InCellMeasurementCtx(AbstractMeasurementCtx):
816 """ 817 InCell Analyzer measurements are located deep within an XML file container. 818 """ 819 820 # Cells expected centre of gravity columns 821 CELLS_CG_EXPECTED = ['Cell: cgX', 'Cell: cgY'] 822 823 # Nulcei expected centre of gravity columns 824 NUCLEI_CG_EXPECTED = ['Nucleus: cgX', 'Nucleus: cgY'] 825 826 # Expected source attribute value for cell data 827 CELLS_SOURCE = 'Cells' 828 829 # Expected source attribute value for nuclei data 830 NUCLEI_SOURCE = 'Nuclei' 831
832 - def __init__(self, analysis_ctx, service_factory, original_file_provider, 833 original_file, result_files):
834 super(InCellMeasurementCtx, self).__init__( 835 analysis_ctx, service_factory, original_file_provider, 836 original_file, result_files) 837 self.wells = dict() 838 for image in self.analysis_ctx.images: 839 for well_sample in image.copyWellSamples(): 840 well = well_sample.well 841 row = well.row.val 842 column = well.column.val 843 if row not in self.wells.keys(): 844 self.wells[row] = dict() 845 self.wells[row][column] = well
846 847 ### 848 ### Abstract method implementations 849 ### 850
851 - def get_name(self, set_of_columns=None):
852 if set_of_columns is None: 853 return self.original_file.name.val[:-4] 854 elif set_of_columns == 0: 855 return self.original_file.name.val[:-4] + ' Cells' 856 elif set_of_columns == 1: 857 return self.original_file.name.val[:-4] + ' Nuclei'
858
859 - def parse(self):
860 print "Parsing: %s" % self.original_file.name.val 861 image = self.image_from_original_file(self.original_file) 862 unloaded_image = ImageI(image.id.val, False) 863 provider = self.original_file_provider 864 data = provider.get_original_file_data(self.original_file) 865 try: 866 events = ('start', 'end') 867 well_data = None 868 n_roi = 0 869 n_measurements = 0 870 cells_columns = {'Image': ImageColumn('Image', '', list()), 871 'Cell': LongColumn('Cell', '', list()), 872 'ROI': RoiColumn('ROI', '', list()) 873 } 874 nuclei_columns = {'Image': ImageColumn('Image', '', list()), 875 'Cell': LongColumn('Cell', '', list()), 876 'ROI': RoiColumn('ROI', '', list()) 877 } 878 for event, element in iterparse(data, events=events): 879 if event == 'start' and element.tag == 'WellData' \ 880 and element.get('cell') != 'Summary': 881 row = int(element.get('row')) - 1 882 col = int(element.get('col')) - 1 883 i = int(element.get('field')) - 1 884 try: 885 image = self.wells[row][col].copyWellSamples()[i].image 886 except KeyError: 887 # This has the potential to happen alot with the 888 # datasets we have been given. 889 print "WARNING: Missing data for row %d column %d" % \ 890 (row, col) 891 continue 892 cells_columns['Cell'].values.append(element.get('cell')) 893 nuclei_columns['Cell'].values.append(element.get('cell')) 894 well_data = element 895 cells_columns['Image'].values.append(image.id.val) 896 nuclei_columns['Image'].values.append(image.id.val) 897 elif well_data is not None and event == 'start' \ 898 and element.tag == 'Measure': 899 source = element.get('source') 900 key = element.get('key') 901 value = float(element.get('value')) 902 if source == self.CELLS_SOURCE: 903 if n_roi == 0: 904 cells_columns[key] = DoubleColumn(key, '', list()) 905 cells_columns[key].values.append(value) 906 elif source == self.NUCLEI_SOURCE: 907 if n_roi == 0: 908 nuclei_columns[key] = DoubleColumn(key, '', list()) 909 nuclei_columns[key].values.append(value) 910 else: 911 if n_roi == 0: 912 cells_columns[key] = DoubleColumn(key, '', list()) 913 nuclei_columns[key] = DoubleColumn(key, '', list()) 914 cells_columns[key].values.append(value) 915 nuclei_columns[key].values.append(value) 916 n_measurements += 1 917 elif event == 'end' and element.tag == 'WellData': 918 if well_data is not None: 919 n_roi += 1 920 well_data.clear() 921 well_data = None 922 else: 923 element.clear() 924 print "Total ROI: %d" % n_roi 925 print "Total measurements: %d" % n_measurements 926 sets_of_columns = [cells_columns.values(), nuclei_columns.values()] 927 return MeasurementParsingResult(sets_of_columns) 928 finally: 929 data.close()
930
931 - def parse_and_populate_roi(self, columns_as_list):
932 # First sanity check our provided columns 933 names = [column.name for column in columns_as_list] 934 cells_expected = [name in names for name in self.CELLS_CG_EXPECTED] 935 nuclei_expected = [name in names for name in self.NUCLEI_CG_EXPECTED] 936 if (False in cells_expected) and (False in nuclei_expected): 937 print "WARNING: Missing CGs for InCell dataset: %r" % names 938 return 939 # Reconstruct a column name to column map 940 columns = dict() 941 for column in columns_as_list: 942 columns[column.name] = column 943 image_ids = columns['Image'].values 944 rois = list() 945 # Save our file annotation to the database so we can use an unloaded 946 # annotation for the saveAndReturnIds that will be triggered below. 947 self.file_annotation = \ 948 self.update_service.saveAndReturnObject(self.file_annotation) 949 unloaded_file_annotation = \ 950 FileAnnotationI(self.file_annotation.id.val, False) 951 # Parse and append ROI 952 for i, image_id in enumerate(image_ids): 953 unloaded_image = ImageI(image_id, False) 954 if False in nuclei_expected: 955 # Cell centre of gravity 956 roi = RoiI() 957 shape = PointI() 958 shape.theZ = rint(0) 959 shape.theT = rint(0) 960 shape.cx = rdouble(float(columns['Cell: cgX'].values[i])) 961 shape.cy = rdouble(float(columns['Cell: cgY'].values[i])) 962 roi.addShape(shape) 963 roi.image = unloaded_image 964 roi.linkAnnotation(unloaded_file_annotation) 965 rois.append(roi) 966 elif False in cells_expected: 967 # Nucleus centre of gravity 968 roi = RoiI() 969 shape = PointI() 970 shape.theZ = rint(0) 971 shape.theT = rint(0) 972 shape.cx = rdouble(float(columns['Nucleus: cgX'].values[i])) 973 shape.cy = rdouble(float(columns['Nucleus: cgY'].values[i])) 974 roi.addShape(shape) 975 roi.image = unloaded_image 976 roi.linkAnnotation(unloaded_file_annotation) 977 rois.append(roi) 978 else: 979 raise MeasurementError('Not a nucleus or cell ROI') 980 if len(rois) == self.ROI_UPDATE_LIMIT: 981 self.update_rois(rois, columns['ROI']) 982 rois = list() 983 self.update_rois(rois, columns['ROI'])
984
985 - def populate(self, columns):
986 self.update_table(columns)
987 988 if __name__ == "__main__": 989 try: 990 options, args = getopt(sys.argv[1:], "h:p:u:m:k:i") 991 except GetoptError, (msg, opt): 992 usage(msg) 993 994 try: 995 plate_id, = args 996 plate_id = long(plate_id) 997 except ValueError: 998 usage("Plate ID must be a specified and a number!") 999 1000 username = None 1001 hostname = None 1002 port = 4063 1003 measurement = None 1004 info = False 1005 session_key = None 1006 for option, argument in options: 1007 if option == "-u": 1008 username = argument 1009 if option == "-h": 1010 hostname = argument 1011 if option == "-p": 1012 port = int(argument) 1013 if option == "-m": 1014 measurement = int(argument) 1015 if option == "-i": 1016 info = True 1017 if option == "-k": 1018 session_key = argument 1019 if session_key is None and username is None: 1020 usage("Username must be specified!") 1021 if session_key is None and hostname is None: 1022 usage("Host name must be specified!") 1023 if session_key is None: 1024 password = getpass() 1025 1026 c = client(hostname, port) 1027 c.enableKeepAlive(60) 1028 try: 1029 if session_key is not None: 1030 service_factory = c.createSession(session_key) 1031 else: 1032 service_factory = c.createSession(username, password) 1033 1034 factory = PlateAnalysisCtxFactory(service_factory) 1035 analysis_ctx = factory.get_analysis_ctx(plate_id) 1036 n_measurements = analysis_ctx.get_measurement_count() 1037 if measurement is not None and measurement >= n_measurements: 1038 usage("measurement %d not a valid index!") 1039 if info: 1040 for i in range(n_measurements): 1041 n_result_files = analysis_ctx.get_result_file_count(i) 1042 print "Measurement %d has %d result files." % \ 1043 (i, n_result_files) 1044 sys.exit(0) 1045 if measurement is not None: 1046 measurement_ctx = analysis_ctx.get_measurement_ctx(measurement) 1047 measurement_ctx.parse_and_populate() 1048 else: 1049 for i in range(n_measurements): 1050 measurement_ctx = analysis_ctx.get_measurement_ctx(i) 1051 measurement_ctx.parse_and_populate() 1052 finally: 1053 c.closeSession() 1054