1
2
3 """
4 ...
5 """
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 import exceptions
27 import tempfile
28 import logging
29 import time
30 import sys
31 import csv
32 import re
33 from StringIO import StringIO
34 from getpass import getpass
35 from getopt import getopt, GetoptError
36
37 import omero.clients
38 from omero.rtypes import rdouble, rstring, rint
39 from omero.model import OriginalFileI, PlateI, PlateAnnotationLinkI, ImageI, \
40 FileAnnotationI, RoiI, EllipseI, PointI
41 from omero.grid import ImageColumn, WellColumn, RoiColumn, LongColumn, DoubleColumn
42 from omero.util.temp_files import create_path, remove_path
43 from omero import client
44
45
46 try:
47 from xml.etree.cElementTree import XML, Element, SubElement, ElementTree, dump, iterparse
48 except ImportError:
49 from cElementTree import XML, Element, SubElement, ElementTree, dump, iterparse
50
51 log = logging.getLogger("omero.util.populate_roi")
52
54 """Prints usage so that we don't have to. :)"""
55 cmd = sys.argv[0]
56 print """%s
57 Usage: %s [-s hostname] [-u username | -k session_key] <-p port> [plate_id]
58 Runs measurement population code for a given plate.
59
60 Options:
61 -s OMERO hostname to use
62 -p OMERO port to use [defaults to 4064]
63 -u OMERO username to use
64 -k OMERO session key to use
65 -m Measurement index to populate
66 -i Dump measurement information and exit (no population)
67 -d Print debug statements
68
69 Examples:
70 %s -s localhost -p 4063 -u bob 27
71
72 Report bugs to ome-devel@lists.openmicroscopy.org.uk""" % (error, cmd, cmd)
73 sys.exit(2)
74
76 """
77 Raised by the analysis or measurement context when an error condition
78 is reached.
79 """
80 pass
81
83 """
84 Provides original file data by downloading it from an OMERO raw file store.
85 """
86
87
88 BUFFER_SIZE = 1024 * 1024
89
91 self.service_factory = service_factory
92 self.raw_file_store = self.service_factory.createRawFileStore()
93 self.dir = create_path("populate_roi", "dir", folder = True)
94
96 """
97 Downloads an original file to a temporary file and returns an open
98 file handle to that temporary file seeked to zero. The caller is
99 responsible for closing the temporary file.
100 """
101 print "Downloading original file: %d" % original_file.id.val
102 self.raw_file_store.setFileId(original_file.id.val)
103 temporary_file = tempfile.TemporaryFile(dir=str(self.dir))
104 size = original_file.size.val
105 for i in range((size / self.BUFFER_SIZE) + 1):
106 index = i * self.BUFFER_SIZE
107 data = self.raw_file_store.read(index, self.BUFFER_SIZE)
108 temporary_file.write(data)
109 temporary_file.seek(0L)
110 temporary_file.truncate(size)
111 return temporary_file
112
114 self.raw_file_store.close()
115
117 """
118 Abstract class which aggregates and represents all measurement runs made on
119 a given Plate.
120 """
121
122 DEFAULT_ORIGINAL_FILE_PROVIDER = DownloadingOriginalFileProvider
123
124 - def __init__(self, images, original_files, original_file_image_map,
125 plate_id, service_factory):
126 super(AbstractPlateAnalysisCtx, self).__init__()
127 self.images = images
128 self.numcols, self.numrows = self.guess_geometry(self.images)
129 self.original_files = original_files
130 self.original_file_image_map = original_file_image_map
131 self.plate_id = plate_id
132 self.service_factory = service_factory
133 self.log_files = dict()
134 self.detail_files = dict()
135 self.measurements = dict()
136
138 max_col = 0
139 max_row = 0
140 for image in images:
141 ws = image.copyWellSamples()[0]
142 well = ws.well
143 max_col = max(max_col, well.column.val)
144 max_row = max(max_row, well.row.val)
145 return (max_col+1, max_row+1)
146
148 x = wellnumber - 1
149 col = x % width
150 row = x / width
151 return (col,row)
152
154 col, row = self.colrow_from_wellnumber(self.numcols, wellnumber)
155 log.debug("Finding image for %s (%s,%s)..." % (wellnumber, col, row))
156 for image in self.images:
157 well = image.copyWellSamples()[0].well
158 if well.column.val == col and well.row.val == row:
159 return image
160 raise exceptions.Exception("Could not find image for (col,row)==(%s,%s)" % (col,row))
161
162
163
164
165
167 """
168 Concrete implementations are to return True if the class pertinent
169 for the original files associated with the plate.
170 """
171 raise Exception("To be implemented by concrete implementations.")
172 is_this_type = classmethod(is_this_type)
173
175 """Returns the number of recognized measurement runs."""
176 raise Exception("To be implemented by concrete implementations.")
177
179 """Returns the measurement context for a given index."""
180 raise Exception("To be implemented by concrete implementations.")
181
183 """
184 Return the number of result files associated with a measurement run.
185 """
186 raise Exception("To be implemented by concrete implementations.")
187
189 """
190 MIAS dataset concrete class implementation of an analysis context. MIAS
191 measurements are aggregated based on a single "log" file. A result
192 file is present for each stitched (of multiple fields) mosaic and
193 contains the actual measured results and ROI.
194 """
195
196
197 datetime_format = '%Y-%m-%d-%Hh%Mm%Ss'
198
199
200 log_regex = re.compile('.*log(\d+-\d+-\d+-\d+h\d+m\d+s).txt$')
201
202
203 detail_regex = re.compile(
204 '^Well(\d+)_(.*)_detail_(\d+-\d+-\d+-\d+h\d+m\d+s).txt$')
205
206
207 companion_format = 'Companion/MIAS'
208
209 - def __init__(self, images, original_files, original_file_image_map,
210 plate_id, service_factory):
216
218 """
219 Strips out erroneous files and collects the log and result original
220 files based on regular expression matching.
221 """
222 for original_file in self.original_files:
223 if original_file.mimetype.val != self.companion_format:
224 continue
225 name = original_file.name.val
226 match = self.log_regex.match(name)
227 if match:
228 d = time.strptime(match.group(1), self.datetime_format)
229 self.log_files[d] = original_file
230 continue
231 match = self.detail_regex.match(name)
232 if match:
233 d = time.strptime(match.group(3), self.datetime_format)
234 self.detail_files[d] = original_file
235 continue
236
238 """
239 Result original files are only recognizable as part of a given
240 measurement (declared by a log file) based upon their parsed
241 date/time of completion as encoded in the filename. This method
242 collects result original files and groups them by collective
243 parsed date/time of completion.
244 """
245 log_timestamps = list(self.log_files.keys())
246 log_timestamps.sort()
247 detail_timestamps = list(self.detail_files.keys())
248 detail_timestamps.sort()
249 for log_timestamp in log_timestamps:
250 self.measurements[log_timestamp] = list()
251 for detail_timestamp in detail_timestamps:
252 for log_timestamp in log_timestamps:
253 if detail_timestamp < log_timestamp:
254 self.measurements[log_timestamp].append(
255 self.detail_files[detail_timestamp])
256 break
257
258
259
260
261
263 for original_file in original_files:
264 format = original_file.mimetype.val
265 if format == klass.companion_format \
266 and klass.log_regex.match(original_file.name.val):
267 return True
268 is_this_type = classmethod(is_this_type)
269
271 return len(self.measurements.keys())
272
274 key = self.log_files.keys()[index]
275 sf = self.service_factory
276 original_file = self.log_files[key]
277 result_files = self.measurements[key]
278 provider = self.DEFAULT_ORIGINAL_FILE_PROVIDER(sf)
279 return MIASMeasurementCtx(self, sf, provider, original_file,
280 result_files)
281
283 key = self.log_files.keys()[measurement_index]
284 return len(self.measurements[key])
285
287 """
288 Flex dataset concrete class implementation of an analysis context. Flex
289 measurements are aggregated in a single ".res" XML file and contain no
290 ROI.
291 """
292
293
294 companion_format = 'Companion/Flex'
295
296 - def __init__(self, images, original_files, original_file_image_map,
297 plate_id, service_factory):
298 super(FlexPlateAnalysisCtx, self).__init__(
299 images, original_files, original_file_image_map, plate_id,
300 service_factory)
301 path_original_file_map = dict()
302 for original_file in original_files:
303 path = original_file.path.val
304 name = original_file.name.val
305 format = original_file.mimetype.val
306 if format == self.companion_format and name.endswith('.res'):
307 path_original_file_map[path] = original_file
308 self.measurements = path_original_file_map.values()
309
310
311
312
313
315 for original_file in original_files:
316 path = original_file.path.val
317 format = original_file.mimetype.val
318 name = original_file.name.val
319 if format == klass.companion_format and name.endswith('.res'):
320 return True
321 return False
322 is_this_type = classmethod(is_this_type)
323
325 return len(self.measurements)
326
328 sf = self.service_factory
329 original_file = self.measurements[index]
330 result_files = []
331 provider = self.DEFAULT_ORIGINAL_FILE_PROVIDER(sf)
332 return FlexMeasurementCtx(self, sf, provider, original_file,
333 result_files)
334
337
339 """
340 InCell dataset concrete class implementation of an analysis context.
341 InCell measurements are from InCell Analyzer and are aggregated in a
342 single gargantuan (often larger than 100MB per plate) XML file.
343 """
344
345
346 companion_format = 'Companion/InCell'
347
348 - def __init__(self, images, original_files, original_file_image_map,
349 plate_id, service_factory):
350 super(InCellPlateAnalysisCtx, self).__init__(
351 images, original_files, original_file_image_map, plate_id,
352 service_factory)
353 path_original_file_map = dict()
354 for original_file in original_files:
355 path = original_file.path.val
356 name = original_file.name.val
357 format = original_file.mimetype.val
358 if format == self.companion_format and name.endswith('.xml'):
359 path_original_file_map[path] = original_file
360 self.measurements = path_original_file_map.values()
361
362
363
364
365
367 for original_file in original_files:
368 path = original_file.path.val
369 format = original_file.mimetype.val
370 name = original_file.name.val
371 if format == klass.companion_format and name.endswith('.xml'):
372 return True
373 return False
374 is_this_type = classmethod(is_this_type)
375
377 return len(self.measurements)
378
380 sf = self.service_factory
381 original_file = self.measurements[index]
382 result_files = []
383 provider = self.DEFAULT_ORIGINAL_FILE_PROVIDER(sf)
384 return InCellMeasurementCtx(self, sf, provider, original_file,
385 result_files)
386
389
391 """
392 The plate analysis context factory is responsible for detecting and
393 returning a plate analysis context instance for a given plate.
394 """
395
396 implementations = (FlexPlateAnalysisCtx, MIASPlateAnalysisCtx,
397 InCellPlateAnalysisCtx)
398
402
404 """
405 Retrieves all the images associated with a given plate. Fetched
406 are the Image's WellSample, the WellSample's Well, the annotation
407 stack associated with the Image and each annotation's linked
408 original file.
409 """
410
411
412
413
414
415 log.debug("Loading image...")
416 images = self.query_service.findAllByQuery(
417 'select img from Image as img ' \
418 'join fetch img.wellSamples as ws ' \
419 'join fetch ws.well as w ' \
420 'join fetch w.wellSamples as ws2 ' \
421 'join w.plate as p ' \
422 'left outer join fetch img.annotationLinks as ia_links ' \
423 'left outer join fetch ia_links.child as ia ' \
424 'left outer join fetch ia.file as i_o_file ' \
425 'where p.id = %d' % plate_id, None)
426 log.debug("Loading plate...")
427 plate = self.query_service.findByQuery(
428 'select p from Plate p ' \
429 'left outer join fetch p.annotationLinks as pa_links ' \
430 'left outer join fetch pa_links.child as pa ' \
431 'left outer join fetch pa.file as p_o_file ' \
432 'where p.id = %d' % plate_id, None)
433 log.debug("Linking plate and images...")
434 for image in images:
435 for ws in image.copyWellSamples():
436 ws.well.plate = plate
437 return images
438
440 for annotation_link in obj.copyAnnotationLinks():
441 annotation = annotation_link.child
442 if isinstance(annotation, FileAnnotationI):
443 f = annotation.file
444 original_files.add(f)
445 if original_file_obj_map is not None:
446 original_file_obj_map[f.id.val] = obj
447
449 """Retrieves a plate analysis context for a given plate."""
450
451
452
453
454 plates = set()
455 original_files = set()
456 original_file_image_map = dict()
457 images = self.find_images_for_plate(plate_id)
458 for i, image in enumerate(images):
459 for ws in image.copyWellSamples():
460 plate = ws.well.plate
461 if plate not in plates:
462 plates.add(plate)
463 self.gather_original_files(plate, original_files, None)
464 self.gather_original_files(image, original_files, original_file_image_map)
465 for klass in self.implementations:
466 if klass.is_this_type(original_files):
467 return klass(images, original_files,
468 original_file_image_map,
469 plate_id, self.service_factory)
470 raise MeasurementError(
471 "Unable to find suitable analysis context for plate: %d" % \
472 plate_id)
473
475 """
476 Holds the results of a measurement parsing event.
477 """
478 - def __init__(self, sets_of_columns=None):
479 if sets_of_columns is None:
480 self.sets_of_columns = list()
481 else:
482 self.sets_of_columns = sets_of_columns
483
485 """Adds a set of columns to the parsing result."""
486 self.sets_of_columns.append(columns)
487
489 """
490 Abstract class which aggregates and represents all the results produced
491 from a given measurement run. It also provides a scaffold for interacting
492 with the OmeroTables infrastructure.
493 """
494
495
496 ROI_UPDATE_LIMIT = 1000
497
498 - def __init__(self, analysis_ctx, service_factory, original_file_provider,
499 original_file, result_files):
500 super(AbstractMeasurementCtx, self).__init__()
501 self.analysis_ctx = analysis_ctx
502 self.service_factory = service_factory
503 self.original_file_provider = original_file_provider
504 self.query_service = self.service_factory.getQueryService()
505 self.update_service = self.service_factory.getUpdateService()
506 self.original_file = original_file
507 self.result_files = result_files
508
509
510 self.wellimages = dict()
511 for image in self.analysis_ctx.images:
512 for well_sample in image.copyWellSamples():
513 well = well_sample.well
514 idx = well.copyWellSamples().index(well_sample)
515 row = well.row.val
516 column = well.column.val
517 if row not in self.wellimages:
518 self.wellimages[row] = dict()
519 if column not in self.wellimages[row]:
520 self.wellimages[row][column] = []
521
522 l = self.wellimages[row][column]
523 for x in range(idx - len(l) + 1):
524 l.append(None)
525 l[idx] = image
526
528 """
529 Takes a row and a col index and returns a tuple
530 of Well and image. Either might be None. Uses the
531 first image found to find the Well and therefore
532 must be loaded (image->wellSample->well)
533 """
534 try:
535 images = self.wellimages[row][col]
536 if not images:
537 return (None, None)
538 image = images[0]
539 well = image.copyWellSamples()[0].well
540 return (well, images)
541 except KeyError:
542
543
544
545 print "WARNING: Missing data for row %d column %d" % \
546 (row, col)
547 return (None, None)
548
550 """Updates the OmeroTables instance backing our results."""
551
552 sr = self.service_factory.sharedResources()
553 name = self.get_name()
554 self.table = sr.newTable(1, '/%s.r5' % name)
555 if self.table is None:
556 raise MeasurementError(
557 "Unable to create table: %s" % name)
558
559
560
561
562
563 table_original_file = self.table.getOriginalFile()
564 table_original_file_id = table_original_file.id.val
565 print "Created new table: %d" % table_original_file_id
566 unloaded_o_file = OriginalFileI(table_original_file_id, False)
567 self.file_annotation.file = unloaded_o_file
568 unloaded_plate = PlateI(self.analysis_ctx.plate_id, False)
569 plate_annotation_link = PlateAnnotationLinkI()
570 plate_annotation_link.parent = unloaded_plate
571 plate_annotation_link.child = self.file_annotation
572 plate_annotation_link = \
573 self.update_service.saveAndReturnObject(plate_annotation_link)
574 self.file_annotation = plate_annotation_link.child
575
576 t0 = int(time.time() * 1000)
577 self.table.initialize(columns)
578 print "Table init took %sms" % (int(time.time() * 1000) - t0)
579 t0 = int(time.time() * 1000)
580 self.table.addData(columns)
581 print "Table update took %sms" % (int(time.time() * 1000) - t0)
582
584 """
585 Creates a file annotation to represent a set of columns from our
586 measurment.
587 """
588 self.file_annotation = FileAnnotationI()
589 self.file_annotation.ns = \
590 rstring('openmicroscopy.org/omero/measurement')
591 name = self.get_name(set_of_columns)
592 self.file_annotation.description = rstring(name)
593
595 """
596 Updates a set of ROI inserting the updated IDs back into a given
597 column.
598 """
599 print "Saving %d ROI at %d" % (len(rois), len(column.values))
600 t0 = int(time.time() * 1000)
601 roi_ids = self.update_service.saveAndReturnIds(rois)
602 print "ROI update took %sms" % (int(time.time() * 1000) - t0)
603 column.values += roi_ids
604 print "Total ROI saved: %d" % (len(column.values))
605
607 """Returns the image from which an original file has originated."""
608 m = self.analysis_ctx.original_file_image_map
609 return m[original_file.id.val]
610
623
624
625
626
627
628 - def get_name(self, set_of_columns=None):
629 """Returns the name of the measurement, and a set of columns."""
630 raise Exception("To be implemented by concrete implementations.")
631
633 """Parses result files, returning a MeasurementParsingResult."""
634 raise Exception("To be implemented by concrete implementations.")
635
637 """
638 Parses and populates ROI from column data in the OMERO database.
639 """
640 raise Exception("To be implemented by concrete implementations.")
641
643 """
644 Populates an OmeroTables instance backing our results and ROI
645 linkages.
646 """
647 raise Exception("To be implemented by concrete implementations.")
648
650 """
651 MIAS measurements are a set of tab delimited text files per well. Each
652 TSV file's content is prefixed by the analysis parameters.
653 """
654
655
656 IMAGE_COL = 0
657
658
659 ROI_COL = 1
660
661
662 NEO_EXPECTED = ('Image', 'ROI', 'Label', 'Row', 'Col', 'Nucleus Area',
663 'Cell Diam.', 'Cell Type', 'Mean Nucleus Intens.')
664
665
666 MNU_EXPECTED = ('Image', 'ROI', 'row', 'col', 'type')
667
668 - def __init__(self, analysis_ctx, service_factory, original_file_provider,
669 original_file, result_files):
670 super(MIASMeasurementCtx, self).__init__(
671 analysis_ctx, service_factory, original_file_provider,
672 original_file, result_files)
673
675 """
676 Retrieves a set of empty OmeroTables columns for the analysis results
677 prefixed by an ImageColumn and RoiColumn to handle these linked
678 object indexes.
679 """
680 columns = [ImageColumn('Image', '', list()),
681 RoiColumn('ROI', '', list())]
682 for i in range(n_columns):
683 columns.append(DoubleColumn('', '', list()))
684 return columns
685
686
687
688
689
691 """
692 Overriding the abstract implementation since the companion
693 files are no longer attached to the images, but only to the plate
694 for MIAS. Instead, we use the filename itself to find the image.
695 """
696 name = original_file.name.val
697
698 match = MIASPlateAnalysisCtx.detail_regex.match(name)
699 if match:
700 well_num = int(match.group(1))
701 return self.analysis_ctx.image_from_wellnumber(well_num)
702 else:
703 raise exceptions.Exception("Not a detail file")
704
705
706
707
708
709 - def get_name(self, set_of_columns=None):
710 return self.original_file.name.val[:-4]
711
738
740 """Parses out ROI from OmeroTables columns for 'NEO' datasets."""
741 print "Parsing %s NEO ROIs..." % (len(columns[0].values))
742 image_ids = columns[self.IMAGE_COL].values
743 rois = list()
744
745
746 self.file_annotation = \
747 self.update_service.saveAndReturnObject(self.file_annotation)
748 unloaded_file_annotation = \
749 FileAnnotationI(self.file_annotation.id.val, False)
750 for i, image_id in enumerate(image_ids):
751 unloaded_image = ImageI(image_id, False)
752 roi = RoiI()
753 shape = EllipseI()
754 values = columns[6].values
755 diameter = rdouble(float(values[i]))
756 shape.theZ = rint(0)
757 shape.theT = rint(0)
758 values = columns[4].values
759 shape.cx = rdouble(float(values[i]))
760 values = columns[3].values
761 shape.cy = rdouble(float(values[i]))
762 shape.rx = diameter
763 shape.ry = diameter
764 roi.addShape(shape)
765 roi.image = unloaded_image
766 roi.linkAnnotation(unloaded_file_annotation)
767 rois.append(roi)
768 if len(rois) == self.ROI_UPDATE_LIMIT:
769 self.update_rois(rois, columns[self.ROI_COL])
770 rois = list()
771 self.update_rois(rois, columns[self.ROI_COL])
772
774 """Parses out ROI from OmeroTables columns for 'MNU' datasets."""
775 print "Parsing %s MNU ROIs..." % (len(columns[0].values))
776 image_ids = columns[self.IMAGE_COL].values
777 rois = list()
778
779
780 self.file_annotation = \
781 self.update_service.saveAndReturnObject(self.file_annotation)
782 unloaded_file_annotation = \
783 FileAnnotationI(self.file_annotation.id.val, False)
784 for i, image_id in enumerate(image_ids):
785 unloaded_image = ImageI(image_id, False)
786 roi = RoiI()
787 shape = PointI()
788 shape.theZ = rint(0)
789 shape.theT = rint(0)
790 values = columns[3].values
791 shape.cx = rdouble(float(values[i]))
792 values = columns[2].values
793 shape.cy = rdouble(float(values[i]))
794 roi.addShape(shape)
795 roi.image = unloaded_image
796 roi.linkAnnotation(unloaded_file_annotation)
797 rois.append(roi)
798 if len(rois) == self.ROI_UPDATE_LIMIT:
799 self.update_rois(rois, columns[self.ROI_COL])
800 rois = list()
801 self.update_rois(rois, columns[self.ROI_COL])
802
815
817 """
818 Query performed::
819 first_roi = columns[self.ROI_COL].values[0]
820 first_roi = self.query_service.findByQuery(
821 'select roi from Roi as roi ' \
822 'join fetch roi.annotationLinks as link ' \
823 'join fetch link.child ' \
824 'where roi.id = %d' % first_roi, None)
825 self.file_annotation = first_roi.copyAnnotationLinks()[0].child
826 """
827 self.update_table(columns)
828
830 """
831 Flex measurements are located deep within a ".res" XML file container
832 and contain no ROI.
833 """
834
835
836 AREA_XPATH = './/Areas/Area'
837
838
839
840 PARAMETER_XPATH = './/Wells/ResultParameters/Parameter'
841
842
843
844 WELL_XPATH = './/Wells/Well'
845
846
847 RESULT_XPATH = './/Result'
848
849 - def __init__(self, analysis_ctx, service_factory, original_file_provider,
850 original_file, result_files):
851 super(FlexMeasurementCtx, self).__init__(
852 analysis_ctx, service_factory, original_file_provider,
853 original_file, result_files)
854
856 """
857 Retrieves a set of empty OmeroTables columns for the analysis results
858 prefixed by a WellColumn to handle linked object indexes.
859 """
860 columns = {'Well': WellColumn('Well', '', list())}
861 for header in headers:
862 columns[header] = DoubleColumn(header, '', list())
863 return columns
864
865
866
867
868
869 - def get_name(self, set_of_columns=None):
870 return self.original_file.name.val[:-4]
871
873 print "Parsing: %s" % self.original_file.name.val
874 provider = self.original_file_provider
875 data = provider.get_original_file_data(self.original_file)
876 try:
877 et = ElementTree(file=data)
878 finally:
879 data.close()
880 root = et.getroot()
881 areas = root.findall(self.AREA_XPATH)
882 print "Area count: %d" % len(areas)
883 for i, area in enumerate(areas):
884 result_parameters = area.findall(self.PARAMETER_XPATH)
885 print "Area %d result children: %d" % (i, len(result_parameters))
886 if len(result_parameters) == 0:
887 print "%s contains no analysis data." % self.get_name()
888 return
889 headers = list()
890 for result_parameter in result_parameters:
891 headers.append(result_parameter.text)
892 columns = self.get_empty_columns(headers)
893 wells = area.findall(self.WELL_XPATH)
894 for well in wells:
895
896 row = int(well.get('row')) - 1
897 column = int(well.get('col')) - 1
898 try:
899 v = columns['Well'].values
900 wellobj, images = self.get_well_images(row, column)
901 if not wellobj:
902 continue
903 v.append(wellobj.id.val)
904 except:
905 log.exception("ERROR: Failed to get well images")
906 continue
907 results = well.findall(self.RESULT_XPATH)
908 for result in results:
909 name = result.get('name')
910 columns[name].values.append(float(result.text))
911 return MeasurementParsingResult([columns.values()])
912
915
918
920 """
921 InCell Analyzer measurements are located deep within an XML file container.
922 """
923
924
925 CELLS_CG_EXPECTED = ['Cell: cgX', 'Cell: cgY']
926
927
928 NUCLEI_CG_EXPECTED = ['Nucleus: cgX', 'Nucleus: cgY']
929
930
931 CELLS_SOURCE = 'Cells'
932
933
934 NUCLEI_SOURCE = 'Nuclei'
935
936 - def __init__(self, analysis_ctx, service_factory, original_file_provider,
937 original_file, result_files):
941
943 """
944 Checks a set of columns for sparse data (one column shorter than
945 the rest) and adds -1 where appropriate.
946 """
947 length = None
948 for i, column in enumerate(columns):
949 if column.name == 'ROI':
950
951
952 continue
953 current_length = len(column.values)
954 if length is not None:
955 if current_length > length:
956 log.warn("%s length %d > %d modding previous column" % \
957 (column.name, current_length, length))
958 columns[i - 1].values.append(-1.0)
959 if current_length < length:
960 log.warn("%s length %d < %d modding current column" % \
961 (column.name, current_length, length))
962 column.values.append(-1.0)
963 length = len(column.values)
964
965
966
967
968
969 - def get_name(self, set_of_columns=None):
970 if set_of_columns is None:
971 return self.original_file.name.val[:-4]
972 elif set_of_columns == 0:
973 return self.original_file.name.val[:-4] + ' Cells'
974 elif set_of_columns == 1:
975 return self.original_file.name.val[:-4] + ' Nuclei'
976
978 print "Parsing: %s" % self.original_file.name.val
979 provider = self.original_file_provider
980 data = provider.get_original_file_data(self.original_file)
981 try:
982 events = ('start', 'end')
983 well_data = None
984 n_roi = 0
985 n_measurements = 0
986 cells_columns = {'Image': ImageColumn('Image', '', list()),
987 'Cell': LongColumn('Cell', '', list()),
988 'ROI': RoiColumn('ROI', '', list())
989 }
990 nuclei_columns = {'Image': ImageColumn('Image', '', list()),
991 'Cell': LongColumn('Cell', '', list()),
992 'ROI': RoiColumn('ROI', '', list())
993 }
994 for event, element in iterparse(data, events=events):
995 if event == 'start' and element.tag == 'WellData' \
996 and element.get('cell') != 'Summary':
997 row = int(element.get('row')) - 1
998 col = int(element.get('col')) - 1
999 i = int(element.get('field')) - 1
1000 try:
1001 well, images = self.get_well_images(row, col)
1002 if not images:
1003 continue
1004 image = images[i]
1005 except:
1006 log.exception("ERROR: Failed to get well images")
1007 continue
1008 self.check_sparse_data(cells_columns.values())
1009 self.check_sparse_data(nuclei_columns.values())
1010 cell = long(element.get('cell'))
1011 cells_columns['Cell'].values.append(cell)
1012 nuclei_columns['Cell'].values.append(cell)
1013 well_data = element
1014 cells_columns['Image'].values.append(image.id.val)
1015 nuclei_columns['Image'].values.append(image.id.val)
1016 elif well_data is not None and event == 'start' \
1017 and element.tag == 'Measure':
1018 source = element.get('source')
1019 key = element.get('key')
1020 value = float(element.get('value'))
1021 if source == self.CELLS_SOURCE:
1022 if n_roi == 0:
1023 cells_columns[key] = DoubleColumn(key, '', list())
1024 cells_columns[key].values.append(value)
1025 elif source == self.NUCLEI_SOURCE:
1026 if n_roi == 0:
1027 nuclei_columns[key] = DoubleColumn(key, '', list())
1028 nuclei_columns[key].values.append(value)
1029 else:
1030 if n_roi == 0:
1031 cells_columns[key] = DoubleColumn(key, '', list())
1032 nuclei_columns[key] = DoubleColumn(key, '', list())
1033 cells_columns[key].values.append(value)
1034 nuclei_columns[key].values.append(value)
1035 n_measurements += 1
1036 elif event == 'end' and element.tag == 'WellData':
1037 if well_data is not None:
1038 n_roi += 1
1039 well_data.clear()
1040 well_data = None
1041 else:
1042 element.clear()
1043 print "Total ROI: %d" % n_roi
1044 print "Total measurements: %d" % n_measurements
1045 sets_of_columns = [cells_columns.values(), nuclei_columns.values()]
1046 return MeasurementParsingResult(sets_of_columns)
1047 finally:
1048 data.close()
1049
1051
1052 names = [column.name for column in columns_as_list]
1053 cells_expected = [name in names for name in self.CELLS_CG_EXPECTED]
1054 nuclei_expected = [name in names for name in self.NUCLEI_CG_EXPECTED]
1055 if (False in cells_expected) and (False in nuclei_expected):
1056 print "WARNING: Missing CGs for InCell dataset: %r" % names
1057 return
1058
1059 columns = dict()
1060 for column in columns_as_list:
1061 columns[column.name] = column
1062 image_ids = columns['Image'].values
1063 rois = list()
1064
1065
1066 self.file_annotation = \
1067 self.update_service.saveAndReturnObject(self.file_annotation)
1068 unloaded_file_annotation = \
1069 FileAnnotationI(self.file_annotation.id.val, False)
1070
1071 for i, image_id in enumerate(image_ids):
1072 unloaded_image = ImageI(image_id, False)
1073 if False in nuclei_expected:
1074
1075 roi = RoiI()
1076 shape = PointI()
1077 shape.theZ = rint(0)
1078 shape.theT = rint(0)
1079 shape.cx = rdouble(float(columns['Cell: cgX'].values[i]))
1080 shape.cy = rdouble(float(columns['Cell: cgY'].values[i]))
1081 roi.addShape(shape)
1082 roi.image = unloaded_image
1083 roi.linkAnnotation(unloaded_file_annotation)
1084 rois.append(roi)
1085 elif False in cells_expected:
1086
1087 roi = RoiI()
1088 shape = PointI()
1089 shape.theZ = rint(0)
1090 shape.theT = rint(0)
1091 shape.cx = rdouble(float(columns['Nucleus: cgX'].values[i]))
1092 shape.cy = rdouble(float(columns['Nucleus: cgY'].values[i]))
1093 roi.addShape(shape)
1094 roi.image = unloaded_image
1095 roi.linkAnnotation(unloaded_file_annotation)
1096 rois.append(roi)
1097 else:
1098 raise MeasurementError('Not a nucleus or cell ROI')
1099 if len(rois) == self.ROI_UPDATE_LIMIT:
1100 self.update_rois(rois, columns['ROI'])
1101 rois = list()
1102 self.update_rois(rois, columns['ROI'])
1103
1106
1107 if __name__ == "__main__":
1108 try:
1109 options, args = getopt(sys.argv[1:], "s:p:u:m:k:id")
1110 except GetoptError, (msg, opt):
1111 usage(msg)
1112
1113 try:
1114 plate_id, = args
1115 plate_id = long(plate_id)
1116 except ValueError:
1117 usage("Plate ID must be a specified and a number!")
1118
1119 username = None
1120 hostname = None
1121 port = 4064
1122 measurement = None
1123 info = False
1124 session_key = None
1125 logging_level = logging.WARN
1126 for option, argument in options:
1127 if option == "-u":
1128 username = argument
1129 if option == "-s":
1130 hostname = argument
1131 if option == "-p":
1132 port = int(argument)
1133 if option == "-m":
1134 measurement = int(argument)
1135 if option == "-i":
1136 info = True
1137 if option == "-k":
1138 session_key = argument
1139 if option == "-d":
1140 logging_level = logging.DEBUG
1141 if session_key is None and username is None:
1142 usage("Username must be specified!")
1143 if session_key is None and hostname is None:
1144 usage("Host name must be specified!")
1145 if session_key is None:
1146 password = getpass()
1147
1148 logging.basicConfig(level = logging_level)
1149 c = client(hostname, port)
1150 c.setAgent("OMERO.populate_roi")
1151 c.enableKeepAlive(60)
1152 try:
1153 if session_key is not None:
1154 service_factory = c.createSession(session_key)
1155 else:
1156 service_factory = c.createSession(username, password)
1157
1158 factory = PlateAnalysisCtxFactory(service_factory)
1159 analysis_ctx = factory.get_analysis_ctx(plate_id)
1160 n_measurements = analysis_ctx.get_measurement_count()
1161 if measurement is not None and measurement >= n_measurements:
1162 usage("measurement %d not a valid index!")
1163 if info:
1164 for i in range(n_measurements):
1165 n_result_files = analysis_ctx.get_result_file_count(i)
1166 print "Measurement %d has %d result files." % \
1167 (i, n_result_files)
1168 sys.exit(0)
1169 if measurement is not None:
1170 measurement_ctx = analysis_ctx.get_measurement_ctx(measurement)
1171 measurement_ctx.parse_and_populate()
1172 else:
1173 for i in range(n_measurements):
1174 measurement_ctx = analysis_ctx.get_measurement_ctx(i)
1175 measurement_ctx.parse_and_populate()
1176 finally:
1177 c.closeSession()
1178