Package omero :: Module tables
[hide private]
[frames] | no frames]

Source Code for Module omero.tables

  1  #!/usr/bin/env python 
  2  # 
  3  # OMERO Tables Interface 
  4  # Copyright 2009 Glencoe Software, Inc.  All Rights Reserved. 
  5  # Use is subject to license terms supplied in LICENSE.txt 
  6  # 
  7   
  8  import os 
  9  import Ice 
 10  import time 
 11  import numpy 
 12  import signal 
 13  import logging 
 14  import threading 
 15  import traceback 
 16  import subprocess 
 17  import exceptions 
 18  import portalocker # Third-party 
 19   
 20  from path import path 
 21   
 22   
 23  import omero # Do we need both?? 
 24  import omero.clients 
 25   
 26  # For ease of use 
 27  from omero.columns import * 
 28  from omero.rtypes import * 
 29  from omero.util.decorators import remoted, locked, perf 
 30  from omero_ext.functional import wraps 
 31   
 32   
 33  sys = __import__("sys") # Python sys 
 34  tables = __import__("tables") # Pytables 
35 36 -def slen(rv):
37 """ 38 Returns the length of the argument or None 39 if the argument is None 40 """ 41 if rv is None: 42 return None 43 return len(rv)
44
45 -def stamped(func, update = False):
46 """ 47 Decorator which takes the first argument after "self" and compares 48 that to the last modification time. If the stamp is older, then the 49 method call will throw an omero.OptimisticLockException. Otherwise, 50 execution will complete normally. If update is True, then the 51 last modification time will be updated after the method call if it 52 is successful. 53 54 Note: stamped implies locked 55 56 """ 57 def check_and_update_stamp(*args, **kwargs): 58 self = args[0] 59 stamp = args[1] 60 if stamp < self._stamp: 61 raise omero.OptimisticLockException(None, None, "Resource modified by another thread") 62 63 try: 64 return func(*args, **kwargs) 65 finally: 66 if update: 67 self._stamp = time.time()
68 checked_and_update_stamp = wraps(func)(check_and_update_stamp) 69 return locked(check_and_update_stamp) 70
71 72 -class HdfList(object):
73 """ 74 Since two calls to tables.openFile() return non-equal files 75 with equal fileno's, portalocker cannot be used to prevent 76 the creation of two HdfStorage instances from the same 77 Python process. 78 """ 79
80 - def __init__(self):
81 self._lock = threading.RLock() 82 self.__filenos = {} 83 self.__paths = {}
84 85 @locked
86 - def addOrThrow(self, hdfpath, hdffile, hdfstorage, action):
87 fileno = hdffile.fileno() 88 if fileno in self.__filenos.keys(): 89 raise omero.LockTimeout(None, None, "File already opened by process: %s" % hdfpath, 0) 90 else: 91 self.__filenos[fileno] = hdfstorage 92 self.__paths[hdfpath] = hdfstorage 93 action()
94 95 @locked
96 - def getOrCreate(self, hdfpath):
97 try: 98 return self.__paths[hdfpath] 99 except KeyError: 100 return HdfStorage(hdfpath) # Adds itself.
101 102 @locked
103 - def remove(self, hdfpath, hdffile):
104 del self.__filenos[hdffile.fileno()] 105 del self.__paths[hdfpath]
106 107 # Global object for maintaining files 108 HDFLIST = HdfList()
109 110 -class HdfStorage(object):
111 """ 112 Provides HDF-storage for measurement results. At most a single 113 instance will be available for any given physical HDF5 file. 114 """ 115 116
117 - def __init__(self, file_path):
118 119 """ 120 file_path should be the path to a file in a valid directory where 121 this HDF instance can be stored (Not None or Empty). Once this 122 method is finished, self.__hdf_file is guaranteed to be a PyTables HDF 123 file, but not necessarily initialized. 124 """ 125 126 if file_path is None or str(file_path) == "": 127 raise omero.ValidationException(None, None, "Invalid file_path") 128 129 self.logger = logging.getLogger("omero.tables.HdfStorage") 130 self.__hdf_path = path(file_path) 131 self.__hdf_file = self.__openfile("a") 132 self.__tables = [] 133 134 self._lock = threading.RLock() 135 self._stamp = time.time() 136 137 # These are what we'd like to have 138 self.__mea = None 139 self.__ome = None 140 141 # Now we try to lock the file, if this fails, we rollback 142 # any previous initialization (opening the file) 143 try: 144 fileno = self.__hdf_file.fileno() 145 HDFLIST.addOrThrow(self.__hdf_path, self.__hdf_file, self,\ 146 lambda: portalocker.lock(self.__hdf_file, portalocker.LOCK_NB|portalocker.LOCK_EX)) 147 except portalocker.LockException, le: 148 self.cleanup() 149 raise omero.LockTimeout(None, None, "Cannot acquire exclusive lock on: %s" % self.__hdf_path, 0) 150 151 try: 152 self.__ome = self.__hdf_file.root.OME 153 self.__mea = self.__ome.Measurements 154 self.__types = self.__ome.ColumnTypes[:] 155 self.__descriptions = self.__ome.ColumnDescriptions[:] 156 self.__initialized = True 157 except tables.NoSuchNodeError: 158 self.__initialized = False
159 160 # 161 # Non-locked methods 162 # 163
164 - def __openfile(self, mode):
165 try: 166 if self.__hdf_path.exists() and self.__hdf_path.size == 0: 167 mode = "w" 168 return tables.openFile(self.__hdf_path, mode=mode, title="OMERO HDF Measurement Storage", rootUEP="/") 169 except IOError, io: 170 msg = "HDFStorage initialized with bad path: %s" % self.__hdf_path 171 self.logger.error(msg) 172 raise omero.ValidationException(None, None, msg)
173
174 - def __initcheck(self):
175 if not self.__initialized: 176 raise omero.ApiUsageException(None, None, "Not yet initialized")
177
178 - def __width(self):
179 return len(self.__types)
180
181 - def __length(self):
182 return self.__mea.nrows
183
184 - def __sizecheck(self, colNumbers, rowNumbers):
185 if colNumbers is not None: 186 if len(colNumbers) > 0: 187 maxcol = max(colNumbers) 188 totcol = self.__width() 189 if maxcol >= totcol: 190 raise omero.ApiUsageException(None, None, "Column overflow: %s >= %s" % (maxcol, totcol)) 191 else: 192 raise omero.ApiUsageException(None, None, "Columns not specified: %s" % colNumbers) 193 194 195 if rowNumbers is not None: 196 if len(rowNumbers) > 0: 197 maxrow = max(rowNumbers) 198 totrow = self.__length() 199 if maxrow >= totrow: 200 raise omero.ApiUsageException(None, None, "Row overflow: %s >= %s" % (maxrow, totrow)) 201 else: 202 raise omero.ApiUsageException(None, None, "Rows not specified: %s" % rowNumbers)
203 204 # 205 # Locked methods 206 # 207 208 @locked
209 - def initialize(self, cols, metadata = {}):
210 """ 211 212 """ 213 214 if self.__initialized: 215 raise omero.ValidationException(None, None, "Already initialized.") 216 217 if not cols: 218 raise omero.ApiUsageException(None, None, "No columns provided") 219 220 for c in cols: 221 if not c.name: 222 raise omero.ApiUsageException(None, None, "Column unnamed: %s" % c) 223 224 self.__definition = columns2definition(cols) 225 self.__ome = self.__hdf_file.createGroup("/", "OME") 226 self.__mea = self.__hdf_file.createTable(self.__ome, "Measurements", self.__definition) 227 228 self.__types = [ x.ice_staticId() for x in cols ] 229 self.__descriptions = [ (x.description != None) and x.description or "" for x in cols ] 230 self.__hdf_file.createArray(self.__ome, "ColumnTypes", self.__types) 231 self.__hdf_file.createArray(self.__ome, "ColumnDescriptions", self.__descriptions) 232 233 self.__mea.attrs.version = "v1" 234 self.__mea.attrs.initialized = time.time() 235 if metadata: 236 for k, v in metadata.items(): 237 self.__mea.attrs[k] = v 238 # See attrs._f_list("user") to retrieve these. 239 240 self.__mea.flush() 241 self.__hdf_file.flush() 242 self.__initialized = True
243 244 @locked
245 - def incr(self, table):
246 sz = len(self.__tables) 247 self.logger.info("Size: %s - Attaching %s to %s" % (sz, table, self.__hdf_path)) 248 if table in self.__tables: 249 self.logger.warn("Already added") 250 raise omero.ApiUsageException(None, Non, "Already added") 251 self.__tables.append(table) 252 return sz + 1
253 254 @locked
255 - def decr(self, table):
256 sz = len(self.__tables) 257 self.logger.info("Size: %s - Detaching %s from %s", sz, table, self.__hdf_path) 258 if not (table in self.__tables): 259 self.logger.warn("Unknown table") 260 raise omero.ApiUsageException(None, None, "Unknown table") 261 self.__tables.remove(table) 262 if sz <= 1: 263 self.cleanup() 264 return sz - 1
265 266 @locked
267 - def uptodate(self, stamp):
268 return self._stamp <= stamp
269 270 @locked
271 - def rows(self):
272 self.__initcheck() 273 return self.__mea.nrows
274 275 @locked
276 - def cols(self, size, current):
277 self.__initcheck() 278 ic = current.adapter.getCommunicator() 279 types = self.__types 280 names = self.__mea.colnames 281 cols = [] 282 for i in range(len(types)): 283 t = types[i] 284 n = names[i] 285 try: 286 col = ic.findObjectFactory(t).create(t) 287 col.name = n 288 col.setsize(size) 289 col.settable(self.__mea) 290 cols.append(col) 291 except: 292 msg = traceback.format_exc() 293 raise omero.ValidationException(None, msg, "BAD COLUMN TYPE: %s for %s" % (t,n)) 294 return cols
295 296 @locked
297 - def meta(self):
298 self.__initcheck() 299 metadata = {} 300 attr = self.__mea.attrs 301 keys = list(self.__mea.attrs._v_attrnamesuser) 302 for key in keys: 303 val = attr[key] 304 if type(val) == numpy.float64: 305 val = rfloat(val) 306 elif type(val) == numpy.int32: 307 val = rint(val) 308 elif type(val) == numpy.string_: 309 val = rstring(val) 310 else: 311 raise omero.ValidationException("BAD TYPE: %s" % type(val)) 312 metadata[key] = val
313 314 @locked
315 - def append(self, cols):
316 # Optimize! 317 arrays = [] 318 names = [] 319 sz = None 320 for col in cols: 321 if sz is None: 322 sz = col.getsize() 323 else: 324 if sz != col.getsize(): 325 raise omero.ValidationException("Columns are of differing length") 326 names.extend(col.names()) 327 arrays.extend(col.arrays()) 328 col.append(self.__mea) # Potential corruption !!! 329 records = numpy.rec.fromarrays(arrays, names=names) 330 self.__mea.append(records) 331 self.__mea.flush()
332 333 # 334 # Stamped methods 335 # 336 337 @stamped
338 - def update(self, stamp, data):
339 if data: 340 for rn in data.rowNumbers: 341 for col in data.columns: 342 getattr(self.__mea.cols, col.name)[rn] = col.values[rn] 343 self.__mea.flush()
344 345 @stamped
346 - def getWhereList(self, stamp, condition, variables, unused, start, stop, step):
347 self.__initcheck() 348 try: 349 return self.__mea.getWhereList(condition, variables, None, start, stop, step).tolist() 350 except (exceptions.NameError, exceptions.SyntaxError, exceptions.TypeError, exceptions.ValueError), err: 351 aue = omero.ApiUsageException() 352 aue.message = "Bad condition: %s, %s" % (condition, variables) 353 aue.serverStackTrace = "".join(traceback.format_exc()) 354 aue.serverExceptionClass = str(err.__class__.__name__) 355 raise aue
356
357 - def _as_data(self, cols, rowNumbers):
358 """ 359 Constructs a omero.grid.Data object for returning to the client. 360 """ 361 data = omero.grid.Data() 362 data.columns = cols 363 data.rowNumbers = rowNumbers 364 data.lastModification = long(self._stamp*1000) # Convert to millis since epoch 365 return data
366 367 @stamped
368 - def readCoordinates(self, stamp, rowNumbers, current):
369 self.__initcheck() 370 self.__sizecheck(None, rowNumbers) 371 cols = self.cols(None, current) 372 for col in cols: 373 col.readCoordinates(self.__mea, rowNumbers) 374 return self._as_data(cols, rowNumbers)
375 376 @stamped
377 - def read(self, stamp, colNumbers, start, stop, current):
378 self.__initcheck() 379 self.__sizecheck(colNumbers, None) 380 cols = self.cols(None, current) 381 rv = [] 382 l = 0 383 for i in colNumbers: 384 col = cols[i] 385 col.read(self.__mea, start, stop) 386 rv.append(col) 387 l = len(col.values) 388 return self._as_data(rv, range(start, start+l))
389 390 @stamped
391 - def slice(self, stamp, colNumbers, rowNumbers, current):
392 self.__initcheck() 393 394 if colNumbers is None: 395 colNumbers = range(self.__width()) 396 if rowNumbers is None: 397 rowNumbers = range(self.__length()) 398 399 self.__sizecheck(colNumbers, rowNumbers) 400 cols = self.cols(None, current) 401 rv = [] 402 for i in colNumbers: 403 col = cols[i] 404 col.readCoordinates(self.__mea, rowNumbers) 405 rv.append(col) 406 return self._as_data(rv, rowNumbers)
407 408 # 409 # Lifecycle methods 410 # 411
412 - def check(self):
413 return True
414 415 @locked
416 - def cleanup(self):
417 self.logger.info("Cleaning storage: %s", self.__hdf_path) 418 if self.__mea: 419 self.__mea.flush() 420 self.__mea = None 421 if self.__ome: 422 self.__ome = None 423 if self.__hdf_file: 424 HDFLIST.remove(self.__hdf_path, self.__hdf_file) 425 hdffile = self.__hdf_file 426 self.__hdf_file = None 427 hdffile.close() # Resources freed
428
429 # End class HdfStorage 430 431 432 -class TableI(omero.grid.Table, omero.util.SimpleServant):
433 """ 434 Spreadsheet implementation based on pytables. 435 """ 436
437 - def __init__(self, ctx, file_obj, storage, uuid = "unknown"):
438 self.uuid = uuid 439 self.file_obj = file_obj 440 self.stamp = time.time() 441 self.storage = storage 442 omero.util.SimpleServant.__init__(self, ctx) 443 self.storage.incr(self)
444
445 - def check(self):
446 """ 447 Called periodically to check the resource is alive. Returns 448 False if this resource can be cleaned up. (Resources API) 449 """ 450 self.logger.debug("Checking %s" % self) 451 return True
452
453 - def cleanup(self):
454 """ 455 Decrements the counter on the held storage to allow it to 456 be cleaned up. 457 """ 458 if self.storage: 459 try: 460 self.storage.decr(self) 461 finally: 462 self.storage = None
463
464 - def __str__(self):
465 return "Table-%s" % self.uuid
466 467 @remoted 468 @perf
469 - def close(self, current = None):
470 try: 471 self.cleanup() 472 self.logger.info("Closed %s", self) 473 except: 474 self.logger.warn("Closed %s with errors", self)
475 476 # TABLES READ API ============================ 477 478 @remoted 479 @perf
480 - def getOriginalFile(self, current = None):
481 msg = "unknown" 482 if self.file_obj: 483 if self.file_obj.id: 484 msg = self.file_obj.id.val 485 self.logger.info("%s.getOriginalFile() => id=%s", self, msg) 486 return self.file_obj
487 488 @remoted 489 @perf
490 - def getHeaders(self, current = None):
491 rv = self.storage.cols(None, current) 492 self.logger.info("%s.getHeaders() => size=%s", self, slen(rv)) 493 return rv
494 495 @remoted 496 @perf
497 - def getMetadata(self, current = None):
498 rv = self.storage.meta() 499 self.logger.info("%s.getMetadata() => size=%s", self, slen(rv)) 500 return rv
501 502 @remoted 503 @perf
504 - def getNumberOfRows(self, current = None):
505 rv = self.storage.rows() 506 self.logger.info("%s.getNumberOfRows() => %s", self, rv) 507 return long(rv)
508 509 @remoted 510 @perf
511 - def getWhereList(self, condition, variables, start, stop, step, current = None):
512 if stop == 0: 513 stop = None 514 if step == 0: 515 step = None 516 rv = self.storage.getWhereList(self.stamp, condition, variables, None, start, stop, step) 517 self.logger.info("%s.getWhereList(%s, %s, %s, %s, %s) => size=%s", self, condition, variables, start, stop, step, slen(rv)) 518 return rv
519 520 @remoted 521 @perf
522 - def readCoordinates(self, rowNumbers, current = None):
523 self.logger.info("%s.readCoordinates(size=%s)", self, slen(rowNumbers)) 524 try: 525 return self.storage.readCoordinates(self.stamp, rowNumbers, current) 526 except tables.HDF5ExtError, err: 527 aue = omero.ApiUsageException() 528 aue.message = "Error reading coordinates. Most likely out of range" 529 aue.serverStackTrace = "".join(traceback.format_exc()) 530 aue.serverExceptionClass = str(err.__class__.__name__) 531 raise aue
532 533 @remoted 534 @perf
535 - def read(self, colNumbers, start, stop, current = None):
536 self.logger.info("%s.read(%s, %s, %s)", self, colNumbers, start, stop) 537 try: 538 return self.storage.read(self.stamp, colNumbers, start, stop, current) 539 except tables.HDF5ExtError, err: 540 aue = omero.ApiUsageException() 541 aue.message = "Error reading coordinates. Most likely out of range" 542 aue.serverStackTrace = "".join(traceback.format_exc()) 543 aue.serverExceptionClass = str(err.__class__.__name__) 544 raise aue
545 546 @remoted 547 @perf
548 - def slice(self, colNumbers, rowNumbers, current = None):
549 self.logger.info("%s.slice(size=%s, size=%s)", self, slen(colNumbers), slen(rowNumbers)) 550 return self.storage.slice(self.stamp, colNumbers, rowNumbers, current)
551 552 # TABLES WRITE API =========================== 553 554 @remoted 555 @perf
556 - def initialize(self, cols, current = None):
557 self.storage.initialize(cols) 558 if cols: 559 self.logger.info("Initialized %s with %s col(s)", self, slen(cols))
560 561 @remoted 562 @perf
563 - def addColumn(self, col, current = None):
564 raise omero.ApiUsageException(None, None, "NYI")
565 566 @remoted 567 @perf
568 - def addData(self, cols, current = None):
569 self.storage.append(cols) 570 sz = 0 571 if cols and cols[0] and cols[0].getsize(): 572 self.logger.info("Added %s row(s) of data to %s", cols[0].getsize(), self)
573 574 @remoted 575 @perf
576 - def update(self, data, current = None):
577 if data: 578 self.storage.update(self.stamp, data) 579 self.logger.info("Updated %s row(s) of data to %s", slen(data.rowNumbers), self)
580
581 -class TablesI(omero.grid.Tables, omero.util.Servant):
582 """ 583 Implementation of the omero.grid.Tables API. Provides 584 spreadsheet like functionality across the OMERO.grid. 585 This servant serves as a session-less, user-less 586 resource for obtaining omero.grid.Table proxies. 587 588 The first major step in initialization is getting 589 a session. This will block until the Blitz server 590 is reachable. 591 """ 592
593 - def __init__(self,\ 594 ctx,\ 595 table_cast = omero.grid.TablePrx.uncheckedCast,\ 596 internal_repo_cast = omero.grid.InternalRepositoryPrx.checkedCast):
597 598 omero.util.Servant.__init__(self, ctx, needs_session = True) 599 600 # Storing these methods, mainly to allow overriding via 601 # test methods. Static methods are evil. 602 self._table_cast = table_cast 603 self._internal_repo_cast = internal_repo_cast 604 605 self.__stores = [] 606 self._get_dir() 607 self._get_uuid() 608 self._get_repo()
609
610 - def _get_dir(self):
611 """ 612 Second step in initialization is to find the .omero/repository 613 directory. If this is not created, then a required server has 614 not started, and so this instance will not start. 615 """ 616 wait = int(self.communicator.getProperties().getPropertyWithDefault("omero.repo.wait", "1")) 617 self.repo_dir = self.communicator.getProperties().getProperty("omero.repo.dir") 618 619 if not self.repo_dir: 620 # Implies this is the legacy directory. Obtain from server 621 self.repo_dir = self.ctx.getSession().getConfigService().getConfigValue("omero.data.dir") 622 623 self.repo_cfg = path(self.repo_dir) / ".omero" / "repository" 624 start = time.time() 625 while not self.repo_cfg.exists() and wait < (time.time() - start): 626 self.logger.info("%s doesn't exist; waiting 5 seconds..." % self.repo_cfg) 627 time.sleep(5) 628 count -= 1 629 if not self.repo_cfg.exists(): 630 msg = "No repository found: %s" % self.repo_cfg 631 self.logger.error(msg) 632 raise omero.ResourceError(None, None, msg)
633
634 - def _get_uuid(self):
635 """ 636 Third step in initialization is to find the database uuid 637 for this grid instance. Multiple OMERO.grids could be watching 638 the same directory. 639 """ 640 cfg = self.ctx.getSession().getConfigService() 641 self.db_uuid = cfg.getDatabaseUuid() 642 self.instance = self.repo_cfg / self.db_uuid
643
644 - def _get_repo(self):
645 """ 646 Fourth step in initialization is to find the repository object 647 for the UUID found in .omero/repository/<db_uuid>, and then 648 create a proxy for the InternalRepository attached to that. 649 """ 650 651 # Get and parse the uuid from the RandomAccessFile format from FileMaker 652 self.repo_uuid = (self.instance / "repo_uuid").lines()[0].strip() 653 if len(self.repo_uuid) != 38: 654 raise omero.ResourceError("Poorly formed UUID: %s" % self.repo_uuid) 655 self.repo_uuid = self.repo_uuid[2:] 656 657 # Using the repo_uuid, find our OriginalFile object 658 self.repo_obj = self.ctx.getSession().getQueryService().findByQuery("select f from OriginalFile f where sha1 = :uuid", 659 omero.sys.ParametersI().add("uuid", rstring(self.repo_uuid))) 660 self.repo_mgr = self.communicator.stringToProxy("InternalRepository-%s" % self.repo_uuid) 661 self.repo_mgr = self._internal_repo_cast(self.repo_mgr) 662 self.repo_svc = self.repo_mgr.getProxy()
663 664 @remoted
665 - def getRepository(self, current = None):
666 """ 667 Returns the Repository object for this Tables server. 668 """ 669 return self.repo_svc
670 671 @remoted 672 @perf
673 - def getTable(self, file_obj, current = None):
674 """ 675 Create and/or register a table servant. 676 """ 677 678 # Will throw an exception if not allowed. 679 self.logger.info("getTable: %s", (file_obj and file_obj.id and file_obj.id.val)) 680 file_path = self.repo_mgr.getFilePath(file_obj) 681 p = path(file_path).dirname() 682 if not p.exists(): 683 p.makedirs() 684 685 storage = HDFLIST.getOrCreate(file_path) 686 id = Ice.Identity() 687 id.name = Ice.generateUUID() 688 table = TableI(self.ctx, file_obj, storage, uuid = id.name) 689 self.resources.add(table) 690 691 prx = current.adapter.add(table, id) 692 return self._table_cast(prx)
693