1
2
3 """
4 ...
5 """
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 import exceptions
27 import tempfile
28 import logging
29 import time
30 import sys
31 import csv
32 import re
33 from StringIO import StringIO
34 from getpass import getpass
35 from getopt import getopt, GetoptError
36
37 import omero.clients
38 from omero.rtypes import rdouble, rstring, rint
39 from omero.model import OriginalFileI, PlateI, PlateAnnotationLinkI, ImageI, \
40 FileAnnotationI, RoiI, EllipseI, PointI
41 from omero.grid import ImageColumn, WellColumn, RoiColumn, LongColumn, DoubleColumn
42 from omero.util.temp_files import create_path, remove_path
43 from omero import client
44
45
46 try:
47 from xml.etree.cElementTree import XML, Element, SubElement, ElementTree, dump, iterparse
48 except ImportError:
49 from cElementTree import XML, Element, SubElement, ElementTree, dump, iterparse
50
51 log = logging.getLogger("omero.util.populate_roi")
52
54 """Prints usage so that we don't have to. :)"""
55 cmd = sys.argv[0]
56 print """%s
57 Usage: %s [-h hostname] [-u username | -k session_key] <-p port> [plate_id]
58 Runs measurement population code for a given plate.
59
60 Options:
61 -u OMERO username to use
62 -k OMERO session key to use
63 -h OMERO hostname to use
64 -p OMERO port to use [defaults to 4063]
65 -m Measurement index to populate
66 -i Dump measurement information and exit (no population)
67 -d Print debug statements
68
69 Examples:
70 %s -h localhost -p 4063 -u bob 27
71
72 Report bugs to ome-devel@lists.openmicroscopy.org.uk""" % (error, cmd, cmd)
73 sys.exit(2)
74
76 """
77 Raised by the analysis or measurement context when an error condition
78 is reached.
79 """
80 pass
81
83 """
84 Provides original file data by downloading it from an OMERO raw file store.
85 """
86
87
88 BUFFER_SIZE = 1024 * 1024
89
91 self.service_factory = service_factory
92 self.raw_file_store = self.service_factory.createRawFileStore()
93 self.dir = create_path("populate_roi", "dir", folder = True)
94
96 """
97 Downloads an original file to a temporary file and returns an open
98 file handle to that temporary file seeked to zero. The caller is
99 responsible for closing the temporary file.
100 """
101 print "Downloading original file: %d" % original_file.id.val
102 self.raw_file_store.setFileId(original_file.id.val)
103 temporary_file = tempfile.TemporaryFile(dir=str(self.dir))
104 size = original_file.size.val
105 for i in range((size / self.BUFFER_SIZE) + 1):
106 index = i * self.BUFFER_SIZE
107 data = self.raw_file_store.read(index, self.BUFFER_SIZE)
108 temporary_file.write(data)
109 temporary_file.seek(0L)
110 temporary_file.truncate(size)
111 return temporary_file
112
114 self.raw_file_store.close()
115
117 """
118 Abstract class which aggregates and represents all measurement runs made on
119 a given Plate.
120 """
121
122 DEFAULT_ORIGINAL_FILE_PROVIDER = DownloadingOriginalFileProvider
123
124 - def __init__(self, images, original_files, original_file_image_map,
125 plate_id, service_factory):
126 super(AbstractPlateAnalysisCtx, self).__init__()
127 self.images = images
128 self.numcols, self.numrows = self.guess_geometry(self.images)
129 self.original_files = original_files
130 self.original_file_image_map = original_file_image_map
131 self.plate_id = plate_id
132 self.service_factory = service_factory
133 self.log_files = dict()
134 self.detail_files = dict()
135 self.measurements = dict()
136
138 max_col = 0
139 max_row = 0
140 for image in images:
141 ws = image.copyWellSamples()[0]
142 well = ws.well
143 max_col = max(max_col, well.column.val)
144 max_row = max(max_row, well.row.val)
145 return (max_col+1, max_row+1)
146
148 x = wellnumber - 1
149 col = x % width
150 row = x / width
151 return (col,row)
152
154 col, row = self.colrow_from_wellnumber(self.numcols, wellnumber)
155 log.debug("Finding image for %s (%s,%s)..." % (wellnumber, col, row))
156 for image in self.images:
157 well = image.copyWellSamples()[0].well
158 if well.column.val == col and well.row.val == row:
159 return image
160 raise exceptions.Exception("Could not find image for (col,row)==(%s,%s)" % (col,row))
161
162
163
164
165
167 """
168 Concrete implementations are to return True if the class pertinent
169 for the original files associated with the plate.
170 """
171 raise Exception("To be implemented by concrete implementations.")
172 is_this_type = classmethod(is_this_type)
173
175 """Returns the number of recognized measurement runs."""
176 raise Exception("To be implemented by concrete implementations.")
177
179 """Returns the measurement context for a given index."""
180 raise Exception("To be implemented by concrete implementations.")
181
183 """
184 Return the number of result files associated with a measurement run.
185 """
186 raise Exception("To be implemented by concrete implementations.")
187
189 """
190 MIAS dataset concrete class implementation of an analysis context. MIAS
191 measurements are aggregated based on a single "log" file. A result
192 file is present for each stitched (of multiple fields) mosaic and
193 contains the actual measured results and ROI.
194 """
195
196
197 datetime_format = '%Y-%m-%d-%Hh%Mm%Ss'
198
199
200 log_regex = re.compile('.*log(\d+-\d+-\d+-\d+h\d+m\d+s).txt$')
201
202
203 detail_regex = re.compile(
204 '^Well(\d+)_(.*)_detail_(\d+-\d+-\d+-\d+h\d+m\d+s).txt$')
205
206
207 companion_format = 'Companion/MIAS'
208
209 - def __init__(self, images, original_files, original_file_image_map,
210 plate_id, service_factory):
216
218 """
219 Strips out erroneous files and collects the log and result original
220 files based on regular expression matching.
221 """
222 for original_file in self.original_files:
223 if original_file.format.value.val != self.companion_format:
224 continue
225 name = original_file.name.val
226 match = self.log_regex.match(name)
227 if match:
228 d = time.strptime(match.group(1), self.datetime_format)
229 self.log_files[d] = original_file
230 continue
231 match = self.detail_regex.match(name)
232 if match:
233 d = time.strptime(match.group(3), self.datetime_format)
234 self.detail_files[d] = original_file
235 continue
236
238 """
239 Result original files are only recognizable as part of a given
240 measurement (declared by a log file) based upon their parsed
241 date/time of completion as encoded in the filename. This method
242 collects result original files and groups them by collective
243 parsed date/time of completion.
244 """
245 log_timestamps = list(self.log_files.keys())
246 log_timestamps.sort()
247 detail_timestamps = list(self.detail_files.keys())
248 detail_timestamps.sort()
249 for log_timestamp in log_timestamps:
250 self.measurements[log_timestamp] = list()
251 for detail_timestamp in detail_timestamps:
252 for log_timestamp in log_timestamps:
253 if detail_timestamp < log_timestamp:
254 self.measurements[log_timestamp].append(
255 self.detail_files[detail_timestamp])
256 break
257
258
259
260
261
263 for original_file in original_files:
264 format = original_file.format.value.val
265 if format == klass.companion_format \
266 and klass.log_regex.match(original_file.name.val):
267 return True
268 is_this_type = classmethod(is_this_type)
269
271 return len(self.measurements.keys())
272
274 key = self.log_files.keys()[index]
275 sf = self.service_factory
276 original_file = self.log_files[key]
277 result_files = self.measurements[key]
278 provider = self.DEFAULT_ORIGINAL_FILE_PROVIDER(sf)
279 return MIASMeasurementCtx(self, sf, provider, original_file,
280 result_files)
281
283 key = self.log_files.keys()[measurement_index]
284 return len(self.measurements[key])
285
287 """
288 Flex dataset concrete class implementation of an analysis context. Flex
289 measurements are aggregated in a single ".res" XML file and contain no
290 ROI.
291 """
292
293
294 companion_format = 'Companion/Flex'
295
296 - def __init__(self, images, original_files, original_file_image_map,
297 plate_id, service_factory):
298 super(FlexPlateAnalysisCtx, self).__init__(
299 images, original_files, original_file_image_map, plate_id,
300 service_factory)
301 path_original_file_map = dict()
302 for original_file in original_files:
303 path = original_file.path.val
304 format = original_file.format.value.val
305 if format == self.companion_format and path.endswith('.res'):
306 path_original_file_map[path] = original_file
307 self.measurements = path_original_file_map.values()
308
309
310
311
312
314 for original_file in original_files:
315 path = original_file.path.val
316 format = original_file.format.value.val
317 if format == klass.companion_format and path.endswith('.res'):
318 return True
319 return False
320 is_this_type = classmethod(is_this_type)
321
323 return len(self.measurements)
324
326 sf = self.service_factory
327 original_file = self.measurements[index]
328 result_files = []
329 provider = self.DEFAULT_ORIGINAL_FILE_PROVIDER(sf)
330 return FlexMeasurementCtx(self, sf, provider, original_file,
331 result_files)
332
335
337 """
338 InCell dataset concrete class implementation of an analysis context.
339 InCell measurements are from InCell Analyzer and are aggregated in a
340 single gargantuan (often larger than 100MB per plate) XML file.
341 """
342
343
344 companion_format = 'Companion/InCell'
345
346 - def __init__(self, images, original_files, original_file_image_map,
347 plate_id, service_factory):
348 super(InCellPlateAnalysisCtx, self).__init__(
349 images, original_files, original_file_image_map, plate_id,
350 service_factory)
351 path_original_file_map = dict()
352 for original_file in original_files:
353 path = original_file.path.val
354 format = original_file.format.value.val
355 if format == self.companion_format and path.endswith('.xml'):
356 path_original_file_map[path] = original_file
357 self.measurements = path_original_file_map.values()
358
359
360
361
362
364 for original_file in original_files:
365 path = original_file.path.val
366 format = original_file.format.value.val
367 if format == klass.companion_format and path.endswith('.xml'):
368 return True
369 return False
370 is_this_type = classmethod(is_this_type)
371
373 return len(self.measurements)
374
376 sf = self.service_factory
377 original_file = self.measurements[index]
378 result_files = []
379 provider = self.DEFAULT_ORIGINAL_FILE_PROVIDER(sf)
380 return InCellMeasurementCtx(self, sf, provider, original_file,
381 result_files)
382
385
387 """
388 The plate analysis context factory is responsible for detecting and
389 returning a plate analysis context instance for a given plate.
390 """
391
392 implementations = (FlexPlateAnalysisCtx, MIASPlateAnalysisCtx,
393 InCellPlateAnalysisCtx)
394
396 self.service_factory = service_factory
397 self.query_service = self.service_factory.getQueryService()
398
400 """
401 Retrieves all the images associated with a given plate. Fetched
402 are the Image's WellSample, the WellSample's Well, the annotation
403 stack associated with the Image and each annotation's linked
404 original file.
405 """
406
407
408
409
410
411 log.debug("Loading image...")
412 images = self.query_service.findAllByQuery(
413 'select img from Image as img ' \
414 'join fetch img.wellSamples as ws ' \
415 'join fetch ws.well as w ' \
416 'join fetch w.wellSamples as ws2 ' \
417 'join w.plate as p ' \
418 'left outer join fetch img.annotationLinks as ia_links ' \
419 'left outer join fetch ia_links.child as ia ' \
420 'left outer join fetch ia.file as i_o_file ' \
421 'left outer join fetch i_o_file.format ' \
422 'where p.id = %d' % plate_id, None)
423 log.debug("Loading plate...")
424 plate = self.query_service.findByQuery(
425 'select p from Plate p ' \
426 'left outer join fetch p.annotationLinks as pa_links ' \
427 'left outer join fetch pa_links.child as pa ' \
428 'left outer join fetch pa.file as p_o_file ' \
429 'left outer join fetch p_o_file.format ' \
430 'where p.id = %d' % plate_id, None)
431 log.debug("Linking plate and images...")
432 for image in images:
433 for ws in image.copyWellSamples():
434 ws.well.plate = plate
435 return images
436
438 for annotation_link in obj.copyAnnotationLinks():
439 annotation = annotation_link.child
440 if isinstance(annotation, FileAnnotationI):
441 f = annotation.file
442 original_files.add(f)
443 if original_file_obj_map is not None:
444 original_file_obj_map[f.id.val] = obj
445
447 """Retrieves a plate analysis context for a given plate."""
448
449
450
451
452 plates = set()
453 original_files = set()
454 original_file_image_map = dict()
455 images = self.find_images_for_plate(plate_id)
456 for i, image in enumerate(images):
457 for ws in image.copyWellSamples():
458 plate = ws.well.plate
459 if plate not in plates:
460 plates.add(plate)
461 self.gather_original_files(plate, original_files, None)
462 self.gather_original_files(image, original_files, original_file_image_map)
463 for klass in self.implementations:
464 if klass.is_this_type(original_files):
465 return klass(images, original_files,
466 original_file_image_map,
467 plate_id, self.service_factory)
468 raise MeasurementError(
469 "Unable to find suitable analysis context for plate: %d" % \
470 plate_id)
471
473 """
474 Holds the results of a measurement parsing event.
475 """
476 - def __init__(self, sets_of_columns=None):
477 if sets_of_columns is None:
478 self.sets_of_columns = list()
479 else:
480 self.sets_of_columns = sets_of_columns
481
483 """Adds a set of columns to the parsing result."""
484 self.sets_of_columns.append(columns)
485
487 """
488 Abstract class which aggregates and represents all the results produced
489 from a given measurement run. It also provides a scaffold for interacting
490 with the OmeroTables infrastructure.
491 """
492
493
494 ROI_UPDATE_LIMIT = 1000
495
496 - def __init__(self, analysis_ctx, service_factory, original_file_provider,
497 original_file, result_files):
498 super(AbstractMeasurementCtx, self).__init__()
499 self.analysis_ctx = analysis_ctx
500 self.service_factory = service_factory
501 self.original_file_provider = original_file_provider
502 self.query_service = self.service_factory.getQueryService()
503 self.update_service = self.service_factory.getUpdateService()
504 self.original_file = original_file
505 self.result_files = result_files
506
507
508 self.wellimages = dict()
509 for image in self.analysis_ctx.images:
510 for well_sample in image.copyWellSamples():
511 well = well_sample.well
512 idx = well.copyWellSamples().index(well_sample)
513 row = well.row.val
514 column = well.column.val
515 if row not in self.wellimages:
516 self.wellimages[row] = dict()
517 if column not in self.wellimages[row]:
518 self.wellimages[row][column] = []
519
520 l = self.wellimages[row][column]
521 for x in range(idx - len(l) + 1):
522 l.append(None)
523 l[idx] = image
524
526 """
527 Takes a row and a col index and returns a tuple
528 of Well and image. Either might be None. Uses the
529 first image found to find the Well and therefore
530 must be loaded (image->wellSample->well)
531 """
532 try:
533 images = self.wellimages[row][col]
534 if not images:
535 return (None, None)
536 image = images[0]
537 well = image.copyWellSamples()[0].well
538 return (well, images)
539 except KeyError:
540
541
542
543 print "WARNING: Missing data for row %d column %d" % \
544 (row, col)
545 return (None, None)
546
548 """Updates the OmeroTables instance backing our results."""
549
550 sr = self.service_factory.sharedResources()
551 name = self.get_name()
552 self.table = sr.newTable(1, '/%s.r5' % name)
553 if self.table is None:
554 raise MeasurementError(
555 "Unable to create table: %s" % name)
556
557
558
559
560
561 table_original_file = self.table.getOriginalFile()
562 table_original_file_id = table_original_file.id.val
563 print "Created new table: %d" % table_original_file_id
564 unloaded_o_file = OriginalFileI(table_original_file_id, False)
565 self.file_annotation.file = unloaded_o_file
566 unloaded_plate = PlateI(self.analysis_ctx.plate_id, False)
567 plate_annotation_link = PlateAnnotationLinkI()
568 plate_annotation_link.parent = unloaded_plate
569 plate_annotation_link.child = self.file_annotation
570 plate_annotation_link = \
571 self.update_service.saveAndReturnObject(plate_annotation_link)
572 self.file_annotation = plate_annotation_link.child
573
574 t0 = int(time.time() * 1000)
575 self.table.initialize(columns)
576 print "Table init took %sms" % (int(time.time() * 1000) - t0)
577 t0 = int(time.time() * 1000)
578 self.table.addData(columns)
579 print "Table update took %sms" % (int(time.time() * 1000) - t0)
580
582 """
583 Creates a file annotation to represent a set of columns from our
584 measurment.
585 """
586 self.file_annotation = FileAnnotationI()
587 self.file_annotation.ns = \
588 rstring('openmicroscopy.org/omero/measurement')
589 name = self.get_name(set_of_columns)
590 self.file_annotation.description = rstring(name)
591
593 """
594 Updates a set of ROI inserting the updated IDs back into a given
595 column.
596 """
597 print "Saving %d ROI at %d" % (len(rois), len(column.values))
598 t0 = int(time.time() * 1000)
599 roi_ids = self.update_service.saveAndReturnIds(rois)
600 print "ROI update took %sms" % (int(time.time() * 1000) - t0)
601 column.values += roi_ids
602 print "Total ROI saved: %d" % (len(column.values))
603
605 """Returns the image from which an original file has originated."""
606 m = self.analysis_ctx.original_file_image_map
607 return m[original_file.id.val]
608
621
622
623
624
625
626 - def get_name(self, set_of_columns=None):
627 """Returns the name of the measurement, and a set of columns."""
628 raise Exception("To be implemented by concrete implementations.")
629
631 """Parses result files, returning a MeasurementParsingResult."""
632 raise Exception("To be implemented by concrete implementations.")
633
635 """
636 Parses and populates ROI from column data in the OMERO database.
637 """
638 raise Exception("To be implemented by concrete implementations.")
639
641 """
642 Populates an OmeroTables instance backing our results and ROI
643 linkages.
644 """
645 raise Exception("To be implemented by concrete implementations.")
646
648 """
649 MIAS measurements are a set of tab delimited text files per well. Each
650 TSV file's content is prefixed by the analysis parameters.
651 """
652
653
654 IMAGE_COL = 0
655
656
657 ROI_COL = 1
658
659
660 NEO_EXPECTED = ('Image', 'ROI', 'Label', 'Row', 'Col', 'Nucleus Area',
661 'Cell Diam.', 'Cell Type', 'Mean Nucleus Intens.')
662
663
664 MNU_EXPECTED = ('Image', 'ROI', 'row', 'col', 'type')
665
666 - def __init__(self, analysis_ctx, service_factory, original_file_provider,
667 original_file, result_files):
668 super(MIASMeasurementCtx, self).__init__(
669 analysis_ctx, service_factory, original_file_provider,
670 original_file, result_files)
671
673 """
674 Retrieves a set of empty OmeroTables columns for the analysis results
675 prefixed by an ImageColumn and RoiColumn to handle these linked
676 object indexes.
677 """
678 columns = [ImageColumn('Image', '', list()),
679 RoiColumn('ROI', '', list())]
680 for i in range(n_columns):
681 columns.append(DoubleColumn('', '', list()))
682 return columns
683
684
685
686
687
689 """
690 Overriding the abstract implementation since the companion
691 files are no longer attached to the images, but only to the plate
692 for MIAS. Instead, we use the filename itself to find the image.
693 """
694 name = original_file.name.val
695
696 match = MIASPlateAnalysisCtx.detail_regex.match(name)
697 if match:
698 well_num = int(match.group(1))
699 return self.analysis_ctx.image_from_wellnumber(well_num)
700 else:
701 raise exceptions.Exception("Not a detail file")
702
703
704
705
706
707 - def get_name(self, set_of_columns=None):
708 return self.original_file.name.val[:-4]
709
736
738 """Parses out ROI from OmeroTables columns for 'NEO' datasets."""
739 print "Parsing %s NEO ROIs..." % (len(columns[0].values))
740 image_ids = columns[self.IMAGE_COL].values
741 rois = list()
742
743
744 self.file_annotation = \
745 self.update_service.saveAndReturnObject(self.file_annotation)
746 unloaded_file_annotation = \
747 FileAnnotationI(self.file_annotation.id.val, False)
748 for i, image_id in enumerate(image_ids):
749 unloaded_image = ImageI(image_id, False)
750 roi = RoiI()
751 shape = EllipseI()
752 values = columns[6].values
753 diameter = rdouble(float(values[i]))
754 shape.theZ = rint(0)
755 shape.theT = rint(0)
756 values = columns[4].values
757 shape.cx = rdouble(float(values[i]))
758 values = columns[3].values
759 shape.cy = rdouble(float(values[i]))
760 shape.rx = diameter
761 shape.ry = diameter
762 roi.addShape(shape)
763 roi.image = unloaded_image
764 roi.linkAnnotation(unloaded_file_annotation)
765 rois.append(roi)
766 if len(rois) == self.ROI_UPDATE_LIMIT:
767 self.update_rois(rois, columns[self.ROI_COL])
768 rois = list()
769 self.update_rois(rois, columns[self.ROI_COL])
770
772 """Parses out ROI from OmeroTables columns for 'MNU' datasets."""
773 print "Parsing %s MNU ROIs..." % (len(columns[0].values))
774 image_ids = columns[self.IMAGE_COL].values
775 rois = list()
776
777
778 self.file_annotation = \
779 self.update_service.saveAndReturnObject(self.file_annotation)
780 unloaded_file_annotation = \
781 FileAnnotationI(self.file_annotation.id.val, False)
782 for i, image_id in enumerate(image_ids):
783 unloaded_image = ImageI(image_id, False)
784 roi = RoiI()
785 shape = PointI()
786 shape.theZ = rint(0)
787 shape.theT = rint(0)
788 values = columns[3].values
789 shape.cx = rdouble(float(values[i]))
790 values = columns[2].values
791 shape.cy = rdouble(float(values[i]))
792 roi.addShape(shape)
793 roi.image = unloaded_image
794 roi.linkAnnotation(unloaded_file_annotation)
795 rois.append(roi)
796 if len(rois) == self.ROI_UPDATE_LIMIT:
797 self.update_rois(rois, columns[self.ROI_COL])
798 rois = list()
799 self.update_rois(rois, columns[self.ROI_COL])
800
813
815 """
816 Query performed::
817 first_roi = columns[self.ROI_COL].values[0]
818 first_roi = self.query_service.findByQuery(
819 'select roi from Roi as roi ' \
820 'join fetch roi.annotationLinks as link ' \
821 'join fetch link.child ' \
822 'where roi.id = %d' % first_roi, None)
823 self.file_annotation = first_roi.copyAnnotationLinks()[0].child
824 """
825 self.update_table(columns)
826
828 """
829 Flex measurements are located deep within a ".res" XML file container
830 and contain no ROI.
831 """
832
833
834 AREA_XPATH = './/Areas/Area'
835
836
837
838 PARAMETER_XPATH = './/Wells/ResultParameters/Parameter'
839
840
841
842 WELL_XPATH = './/Wells/Well'
843
844
845 RESULT_XPATH = './/Result'
846
847 - def __init__(self, analysis_ctx, service_factory, original_file_provider,
848 original_file, result_files):
849 super(FlexMeasurementCtx, self).__init__(
850 analysis_ctx, service_factory, original_file_provider,
851 original_file, result_files)
852
854 """
855 Retrieves a set of empty OmeroTables columns for the analysis results
856 prefixed by a WellColumn to handle linked object indexes.
857 """
858 columns = {'Well': WellColumn('Well', '', list())}
859 for header in headers:
860 columns[header] = DoubleColumn(header, '', list())
861 return columns
862
863
864
865
866
867 - def get_name(self, set_of_columns=None):
868 return self.original_file.name.val[:-4]
869
871 print "Parsing: %s" % self.original_file.name.val
872 provider = self.original_file_provider
873 data = provider.get_original_file_data(self.original_file)
874 try:
875 et = ElementTree(file=data)
876 finally:
877 data.close()
878 root = et.getroot()
879 areas = root.findall(self.AREA_XPATH)
880 print "Area count: %d" % len(areas)
881 for i, area in enumerate(areas):
882 result_parameters = area.findall(self.PARAMETER_XPATH)
883 print "Area %d result children: %d" % (i, len(result_parameters))
884 if len(result_parameters) == 0:
885 print "%s contains no analysis data." % self.get_name()
886 return
887 headers = list()
888 for result_parameter in result_parameters:
889 headers.append(result_parameter.text)
890 columns = self.get_empty_columns(headers)
891 wells = area.findall(self.WELL_XPATH)
892 for well in wells:
893
894 row = int(well.get('row')) - 1
895 column = int(well.get('col')) - 1
896 try:
897 v = columns['Well'].values
898 wellobj, images = self.get_well_images(row, column)
899 if not wellobj:
900 continue
901 v.append(wellobj.id.val)
902 except:
903 log.exception("ERROR: Failed to get well images")
904 continue
905 results = well.findall(self.RESULT_XPATH)
906 for result in results:
907 name = result.get('name')
908 columns[name].values.append(float(result.text))
909 return MeasurementParsingResult([columns.values()])
910
913
916
918 """
919 InCell Analyzer measurements are located deep within an XML file container.
920 """
921
922
923 CELLS_CG_EXPECTED = ['Cell: cgX', 'Cell: cgY']
924
925
926 NUCLEI_CG_EXPECTED = ['Nucleus: cgX', 'Nucleus: cgY']
927
928
929 CELLS_SOURCE = 'Cells'
930
931
932 NUCLEI_SOURCE = 'Nuclei'
933
934 - def __init__(self, analysis_ctx, service_factory, original_file_provider,
935 original_file, result_files):
939
941 """
942 Checks a set of columns for sparse data (one column shorter than
943 the rest) and adds -1 where appropriate.
944 """
945 length = None
946 for i, column in enumerate(columns):
947 if column.name == 'ROI':
948
949
950 continue
951 current_length = len(column.values)
952 if length is not None:
953 if current_length > length:
954 log.warn("%s length %d > %d modding previous column" % \
955 (column.name, current_length, length))
956 columns[i - 1].values.append(-1.0)
957 if current_length < length:
958 log.warn("%s length %d < %d modding current column" % \
959 (column.name, current_length, length))
960 column.values.append(-1.0)
961 length = len(column.values)
962
963
964
965
966
967 - def get_name(self, set_of_columns=None):
968 if set_of_columns is None:
969 return self.original_file.name.val[:-4]
970 elif set_of_columns == 0:
971 return self.original_file.name.val[:-4] + ' Cells'
972 elif set_of_columns == 1:
973 return self.original_file.name.val[:-4] + ' Nuclei'
974
976 print "Parsing: %s" % self.original_file.name.val
977 provider = self.original_file_provider
978 data = provider.get_original_file_data(self.original_file)
979 try:
980 events = ('start', 'end')
981 well_data = None
982 n_roi = 0
983 n_measurements = 0
984 cells_columns = {'Image': ImageColumn('Image', '', list()),
985 'Cell': LongColumn('Cell', '', list()),
986 'ROI': RoiColumn('ROI', '', list())
987 }
988 nuclei_columns = {'Image': ImageColumn('Image', '', list()),
989 'Cell': LongColumn('Cell', '', list()),
990 'ROI': RoiColumn('ROI', '', list())
991 }
992 for event, element in iterparse(data, events=events):
993 if event == 'start' and element.tag == 'WellData' \
994 and element.get('cell') != 'Summary':
995 row = int(element.get('row')) - 1
996 col = int(element.get('col')) - 1
997 i = int(element.get('field')) - 1
998 try:
999 well, images = self.get_well_images(row, col)
1000 if not images:
1001 continue
1002 image = images[i]
1003 except:
1004 log.exception("ERROR: Failed to get well images")
1005 continue
1006 self.check_sparse_data(cells_columns.values())
1007 self.check_sparse_data(nuclei_columns.values())
1008 cell = long(element.get('cell'))
1009 cells_columns['Cell'].values.append(cell)
1010 nuclei_columns['Cell'].values.append(cell)
1011 well_data = element
1012 cells_columns['Image'].values.append(image.id.val)
1013 nuclei_columns['Image'].values.append(image.id.val)
1014 elif well_data is not None and event == 'start' \
1015 and element.tag == 'Measure':
1016 source = element.get('source')
1017 key = element.get('key')
1018 value = float(element.get('value'))
1019 if source == self.CELLS_SOURCE:
1020 if n_roi == 0:
1021 cells_columns[key] = DoubleColumn(key, '', list())
1022 cells_columns[key].values.append(value)
1023 elif source == self.NUCLEI_SOURCE:
1024 if n_roi == 0:
1025 nuclei_columns[key] = DoubleColumn(key, '', list())
1026 nuclei_columns[key].values.append(value)
1027 else:
1028 if n_roi == 0:
1029 cells_columns[key] = DoubleColumn(key, '', list())
1030 nuclei_columns[key] = DoubleColumn(key, '', list())
1031 cells_columns[key].values.append(value)
1032 nuclei_columns[key].values.append(value)
1033 n_measurements += 1
1034 elif event == 'end' and element.tag == 'WellData':
1035 if well_data is not None:
1036 n_roi += 1
1037 well_data.clear()
1038 well_data = None
1039 else:
1040 element.clear()
1041 print "Total ROI: %d" % n_roi
1042 print "Total measurements: %d" % n_measurements
1043 sets_of_columns = [cells_columns.values(), nuclei_columns.values()]
1044 return MeasurementParsingResult(sets_of_columns)
1045 finally:
1046 data.close()
1047
1049
1050 names = [column.name for column in columns_as_list]
1051 cells_expected = [name in names for name in self.CELLS_CG_EXPECTED]
1052 nuclei_expected = [name in names for name in self.NUCLEI_CG_EXPECTED]
1053 if (False in cells_expected) and (False in nuclei_expected):
1054 print "WARNING: Missing CGs for InCell dataset: %r" % names
1055 return
1056
1057 columns = dict()
1058 for column in columns_as_list:
1059 columns[column.name] = column
1060 image_ids = columns['Image'].values
1061 rois = list()
1062
1063
1064 self.file_annotation = \
1065 self.update_service.saveAndReturnObject(self.file_annotation)
1066 unloaded_file_annotation = \
1067 FileAnnotationI(self.file_annotation.id.val, False)
1068
1069 for i, image_id in enumerate(image_ids):
1070 unloaded_image = ImageI(image_id, False)
1071 if False in nuclei_expected:
1072
1073 roi = RoiI()
1074 shape = PointI()
1075 shape.theZ = rint(0)
1076 shape.theT = rint(0)
1077 shape.cx = rdouble(float(columns['Cell: cgX'].values[i]))
1078 shape.cy = rdouble(float(columns['Cell: cgY'].values[i]))
1079 roi.addShape(shape)
1080 roi.image = unloaded_image
1081 roi.linkAnnotation(unloaded_file_annotation)
1082 rois.append(roi)
1083 elif False in cells_expected:
1084
1085 roi = RoiI()
1086 shape = PointI()
1087 shape.theZ = rint(0)
1088 shape.theT = rint(0)
1089 shape.cx = rdouble(float(columns['Nucleus: cgX'].values[i]))
1090 shape.cy = rdouble(float(columns['Nucleus: cgY'].values[i]))
1091 roi.addShape(shape)
1092 roi.image = unloaded_image
1093 roi.linkAnnotation(unloaded_file_annotation)
1094 rois.append(roi)
1095 else:
1096 raise MeasurementError('Not a nucleus or cell ROI')
1097 if len(rois) == self.ROI_UPDATE_LIMIT:
1098 self.update_rois(rois, columns['ROI'])
1099 rois = list()
1100 self.update_rois(rois, columns['ROI'])
1101
1104
1105 if __name__ == "__main__":
1106 try:
1107 options, args = getopt(sys.argv[1:], "h:p:u:m:k:id")
1108 except GetoptError, (msg, opt):
1109 usage(msg)
1110
1111 try:
1112 plate_id, = args
1113 plate_id = long(plate_id)
1114 except ValueError:
1115 usage("Plate ID must be a specified and a number!")
1116
1117 username = None
1118 hostname = None
1119 port = 4063
1120 measurement = None
1121 info = False
1122 session_key = None
1123 logging_level = logging.WARN
1124 for option, argument in options:
1125 if option == "-u":
1126 username = argument
1127 if option == "-h":
1128 hostname = argument
1129 if option == "-p":
1130 port = int(argument)
1131 if option == "-m":
1132 measurement = int(argument)
1133 if option == "-i":
1134 info = True
1135 if option == "-k":
1136 session_key = argument
1137 if option == "-d":
1138 logging_level = logging.DEBUG
1139 if session_key is None and username is None:
1140 usage("Username must be specified!")
1141 if session_key is None and hostname is None:
1142 usage("Host name must be specified!")
1143 if session_key is None:
1144 password = getpass()
1145
1146 logging.basicConfig(level = logging_level)
1147 c = client(hostname, port)
1148 c.enableKeepAlive(60)
1149 try:
1150 if session_key is not None:
1151 service_factory = c.createSession(session_key)
1152 else:
1153 service_factory = c.createSession(username, password)
1154
1155 factory = PlateAnalysisCtxFactory(service_factory)
1156 analysis_ctx = factory.get_analysis_ctx(plate_id)
1157 n_measurements = analysis_ctx.get_measurement_count()
1158 if measurement is not None and measurement >= n_measurements:
1159 usage("measurement %d not a valid index!")
1160 if info:
1161 for i in range(n_measurements):
1162 n_result_files = analysis_ctx.get_result_file_count(i)
1163 print "Measurement %d has %d result files." % \
1164 (i, n_result_files)
1165 sys.exit(0)
1166 if measurement is not None:
1167 measurement_ctx = analysis_ctx.get_measurement_ctx(measurement)
1168 measurement_ctx.parse_and_populate()
1169 else:
1170 for i in range(n_measurements):
1171 measurement_ctx = analysis_ctx.get_measurement_ctx(i)
1172 measurement_ctx.parse_and_populate()
1173 finally:
1174 c.closeSession()
1175