Package omero :: Module tables
[hide private]
[frames] | no frames]

Source Code for Module omero.tables

  1  #!/usr/bin/env python 
  2  # 
  3  # OMERO Tables Interface 
  4  # Copyright 2009 Glencoe Software, Inc.  All Rights Reserved. 
  5  # Use is subject to license terms supplied in LICENSE.txt 
  6  # 
  7   
  8  import os 
  9  import Ice 
 10  import time 
 11  import numpy 
 12  import signal 
 13  import logging 
 14  import threading 
 15  import traceback 
 16  import subprocess 
 17  import exceptions 
 18  import portalocker # Third-party 
 19   
 20  from path import path 
 21   
 22   
 23  import omero # Do we need both?? 
 24  import omero.clients 
 25   
 26  # For ease of use 
 27  from omero.columns import * 
 28  from omero.rtypes import * 
 29  from omero.util.decorators import remoted, locked, perf 
 30  from omero_ext.functional import wraps 
 31   
 32   
 33  sys = __import__("sys") # Python sys 
 34  tables = __import__("tables") # Pytables 
35 36 -def slen(rv):
37 """ 38 Returns the length of the argument or None 39 if the argument is None 40 """ 41 if rv is None: 42 return None 43 return len(rv)
44
45 -def stamped(func, update = False):
46 """ 47 Decorator which takes the first argument after "self" and compares 48 that to the last modification time. If the stamp is older, then the 49 method call will throw an omero.OptimisticLockException. Otherwise, 50 execution will complete normally. If update is True, then the 51 last modification time will be updated after the method call if it 52 is successful. 53 54 Note: stamped implies locked 55 56 """ 57 def check_and_update_stamp(*args, **kwargs): 58 self = args[0] 59 stamp = args[1] 60 if stamp < self._stamp: 61 raise omero.OptimisticLockException(None, None, "Resource modified by another thread") 62 63 try: 64 return func(*args, **kwargs) 65 finally: 66 if update: 67 self._stamp = time.time()
68 checked_and_update_stamp = wraps(func)(check_and_update_stamp) 69 return locked(check_and_update_stamp) 70
71 72 -class HdfList(object):
73 """ 74 Since two calls to tables.openFile() return non-equal files 75 with equal fileno's, portalocker cannot be used to prevent 76 the creation of two HdfStorage instances from the same 77 Python process. 78 """ 79
80 - def __init__(self):
81 self._lock = threading.RLock() 82 self.__filenos = {} 83 self.__paths = {}
84 85 @locked
86 - def addOrThrow(self, hdfpath, hdffile, hdfstorage, action):
87 fileno = hdffile.fileno() 88 if fileno in self.__filenos.keys(): 89 raise omero.LockTimeout(None, None, "File already opened by process: %s" % hdfpath, 0) 90 else: 91 self.__filenos[fileno] = hdfstorage 92 self.__paths[hdfpath] = hdfstorage 93 action()
94 95 @locked
96 - def getOrCreate(self, hdfpath):
97 try: 98 return self.__paths[hdfpath] 99 except KeyError: 100 return HdfStorage(hdfpath) # Adds itself.
101 102 @locked
103 - def remove(self, hdfpath, hdffile):
104 del self.__filenos[hdffile.fileno()] 105 del self.__paths[hdfpath]
106 107 # Global object for maintaining files 108 HDFLIST = HdfList()
109 110 -class HdfStorage(object):
111 """ 112 Provides HDF-storage for measurement results. At most a single 113 instance will be available for any given physical HDF5 file. 114 """ 115 116
117 - def __init__(self, file_path):
118 119 """ 120 file_path should be the path to a file in a valid directory where 121 this HDF instance can be stored (Not None or Empty). Once this 122 method is finished, self.__hdf_file is guaranteed to be a PyTables HDF 123 file, but not necessarily initialized. 124 """ 125 126 if file_path is None or str(file_path) == "": 127 raise omero.ValidationException(None, None, "Invalid file_path") 128 129 self.logger = logging.getLogger("omero.tables.HdfStorage") 130 self.__hdf_path = path(file_path) 131 self.__hdf_file = self.__openfile("a") 132 self.__tables = [] 133 134 self._lock = threading.RLock() 135 self._stamp = time.time() 136 137 # These are what we'd like to have 138 self.__mea = None 139 self.__ome = None 140 141 # Now we try to lock the file, if this fails, we rollback 142 # any previous initialization (opening the file) 143 try: 144 fileno = self.__hdf_file.fileno() 145 HDFLIST.addOrThrow(self.__hdf_path, self.__hdf_file, self,\ 146 lambda: portalocker.lock(self.__hdf_file, portalocker.LOCK_NB|portalocker.LOCK_EX)) 147 except portalocker.LockException, le: 148 self.cleanup() 149 raise omero.LockTimeout(None, None, "Cannot acquire exclusive lock on: %s" % self.__hdf_path, 0) 150 151 try: 152 self.__ome = self.__hdf_file.root.OME 153 self.__mea = self.__ome.Measurements 154 self.__types = self.__ome.ColumnTypes[:] 155 self.__descriptions = self.__ome.ColumnDescriptions[:] 156 self.__initialized = True 157 except tables.NoSuchNodeError: 158 self.__initialized = False
159 160 # 161 # Non-locked methods 162 # 163
164 - def __openfile(self, mode):
165 try: 166 return tables.openFile(self.__hdf_path, mode=mode, title="OMERO HDF Measurement Storage", rootUEP="/") 167 except IOError, io: 168 msg = "HDFStorage initialized with bad path: %s" % self.__hdf_path 169 self.logger.error(msg) 170 raise omero.ValidationException(None, None, msg)
171
172 - def __initcheck(self):
173 if not self.__initialized: 174 raise omero.ApiUsageException(None, None, "Not yet initialized")
175
176 - def __sizecheck(self, colNumbers, rowNumbers):
177 if colNumbers is not None: 178 if len(colNumbers) > 0: 179 maxcol = max(colNumbers) 180 totcol = len(self.__types) 181 if maxcol >= totcol: 182 raise omero.ApiUsageException(None, None, "Column overflow: %s >= %s" % (maxcol, totcol)) 183 else: 184 raise omero.ApiUsageException(None, None, "Columns not specified: %s" % colNumbers) 185 186 187 if rowNumbers is not None: 188 if len(rowNumbers) > 0: 189 maxrow = max(rowNumbers) 190 totrow = self.__mea.nrows 191 if maxrow >= totrow: 192 raise omero.ApiUsageException(None, None, "Row overflow: %s >= %s" % (maxrow, totrow)) 193 else: 194 raise omero.ApiUsageException(None, None, "Rows not specified: %s" % rowNumbers)
195 196 # 197 # Locked methods 198 # 199 200 @locked
201 - def initialize(self, cols, metadata = {}):
202 """ 203 204 """ 205 206 if self.__initialized: 207 raise omero.ValidationException(None, None, "Already initialized.") 208 209 if not cols: 210 raise omero.ApiUsageException(None, None, "No columns provided") 211 212 for c in cols: 213 if not c.name: 214 raise omero.ApiUsageException(None, None, "Column unnamed: %s" % c) 215 216 self.__definition = columns2definition(cols) 217 self.__ome = self.__hdf_file.createGroup("/", "OME") 218 self.__mea = self.__hdf_file.createTable(self.__ome, "Measurements", self.__definition) 219 220 self.__types = [ x.ice_staticId() for x in cols ] 221 self.__descriptions = [ (x.description != None) and x.description or "" for x in cols ] 222 self.__hdf_file.createArray(self.__ome, "ColumnTypes", self.__types) 223 self.__hdf_file.createArray(self.__ome, "ColumnDescriptions", self.__descriptions) 224 225 self.__mea.attrs.version = "v1" 226 self.__mea.attrs.initialized = time.time() 227 if metadata: 228 for k, v in metadata.items(): 229 self.__mea.attrs[k] = v 230 # See attrs._f_list("user") to retrieve these. 231 232 self.__mea.flush() 233 self.__hdf_file.flush() 234 self.__initialized = True
235 236 @locked
237 - def incr(self, table):
238 sz = len(self.__tables) 239 self.logger.info("Size: %s - Attaching %s to %s" % (sz, table, self.__hdf_path)) 240 if table in self.__tables: 241 self.logger.warn("Already added") 242 raise omero.ApiUsageException(None, Non, "Already added") 243 self.__tables.append(table) 244 return sz + 1
245 246 @locked
247 - def decr(self, table):
248 sz = len(self.__tables) 249 self.logger.info("Size: %s - Detaching %s from %s", sz, table, self.__hdf_path) 250 if not (table in self.__tables): 251 self.logger.warn("Unknown table") 252 raise omero.ApiUsageException(None, None, "Unknown table") 253 self.__tables.remove(table) 254 if sz <= 1: 255 self.cleanup() 256 return sz - 1
257 258 @locked
259 - def uptodate(self, stamp):
260 return self._stamp <= stamp
261 262 @locked
263 - def rows(self):
264 self.__initcheck() 265 return self.__mea.nrows
266 267 @locked
268 - def cols(self, size, current):
269 self.__initcheck() 270 ic = current.adapter.getCommunicator() 271 types = self.__types 272 names = self.__mea.colnames 273 cols = [] 274 for i in range(len(types)): 275 t = types[i] 276 n = names[i] 277 try: 278 col = ic.findObjectFactory(t).create(t) 279 col.name = n 280 col.setsize(size) 281 cols.append(col) 282 except: 283 msg = traceback.format_exc() 284 raise omero.ValidationException(None, msg, "BAD COLUMN TYPE: %s for %s" % (t,n)) 285 return cols
286 287 @locked
288 - def meta(self):
289 self.__initcheck() 290 metadata = {} 291 attr = self.__mea.attrs 292 keys = list(self.__mea.attrs._v_attrnamesuser) 293 for key in keys: 294 val = attr[key] 295 if type(val) == numpy.float64: 296 val = rfloat(val) 297 elif type(val) == numpy.int32: 298 val = rint(val) 299 elif type(val) == numpy.string_: 300 val = rstring(val) 301 else: 302 raise omero.ValidationException("BAD TYPE: %s" % type(val)) 303 metadata[key] = val
304 305 @locked
306 - def append(self, cols):
307 # Optimize! 308 arrays = [] 309 names = [] 310 sz = None 311 for col in cols: 312 if sz is None: 313 sz = col.getsize() 314 else: 315 if sz != col.getsize(): 316 raise omero.ValidationException("Columns are of differing length") 317 names.extend(col.names()) 318 arrays.extend(col.arrays()) 319 col.append(self.__mea) # Potential corruption !!! 320 records = numpy.rec.fromarrays(arrays, names=names) 321 self.__mea.append(records) 322 self.__mea.flush()
323 324 # 325 # Stamped methods 326 # 327 328 @stamped
329 - def getWhereList(self, stamp, condition, variables, unused, start, stop, step):
330 self.__initcheck() 331 try: 332 return self.__mea.getWhereList(condition, variables, None, start, stop, step).tolist() 333 except (exceptions.NameError, exceptions.SyntaxError, exceptions.TypeError, exceptions.ValueError), err: 334 aue = omero.ApiUsageException() 335 aue.message = "Bad condition: %s, %s" % (condition, variables) 336 aue.serverStackTrace = "".join(traceback.format_exc()) 337 aue.serverExceptionClass = str(err.__class__.__name__) 338 raise aue
339
340 - def _as_data(self, cols, rowNumbers):
341 """ 342 Constructs a omero.grid.Data object for returning to the client. 343 """ 344 data = omero.grid.Data() 345 data.columns = cols 346 data.rowNumbers = rowNumbers 347 data.lastModification = long(self._stamp*1000) # Convert to millis since epoch 348 return data
349 350 @stamped
351 - def readCoordinates(self, stamp, rowNumbers, current):
352 self.__initcheck() 353 self.__sizecheck(None, rowNumbers) 354 cols = self.cols(None, current) 355 for col in cols: 356 col.readCoordinates(self.__mea, rowNumbers) 357 return self._as_data(cols, rowNumbers)
358 359 @stamped
360 - def read(self, stamp, colNumbers, start, stop, current):
361 self.__initcheck() 362 self.__sizecheck(colNumbers, None) 363 cols = self.cols(None, current) 364 rv = [] 365 for i in colNumbers: 366 col = cols[i] 367 col.read(self.__mea, start, stop) 368 rv.append(col) 369 return self._as_data(rv, [])
370 371 @stamped
372 - def slice(self, stamp, colNumbers, rowNumbers, current):
373 self.__initcheck() 374 self.__sizecheck(colNumbers, rowNumbers) 375 cols = self.cols(None, current) 376 rv = [] 377 for i in colNumbers: 378 col = cols[i] 379 col.readCoordinates(self.__mea, rowNumbers) 380 rv.append(col) 381 return self._as_data(rv, rowNumbers)
382 # 383 # Lifecycle methods 384 # 385
386 - def check(self):
387 return True
388 389 @locked
390 - def cleanup(self):
391 self.logger.info("Cleaning storage: %s", self.__hdf_path) 392 if self.__mea: 393 self.__mea.flush() 394 self.__mea = None 395 if self.__ome: 396 self.__ome = None 397 if self.__hdf_file: 398 HDFLIST.remove(self.__hdf_path, self.__hdf_file) 399 hdffile = self.__hdf_file 400 self.__hdf_file = None 401 hdffile.close() # Resources freed
402
403 # End class HdfStorage 404 405 406 -class TableI(omero.grid.Table, omero.util.SimpleServant):
407 """ 408 Spreadsheet implementation based on pytables. 409 """ 410
411 - def __init__(self, ctx, file_obj, storage, uuid = "unknown"):
412 self.uuid = uuid 413 self.file_obj = file_obj 414 self.stamp = time.time() 415 self.storage = storage 416 omero.util.SimpleServant.__init__(self, ctx) 417 self.storage.incr(self)
418
419 - def check(self):
420 """ 421 Called periodically to check the resource is alive. Returns 422 False if this resource can be cleaned up. (Resources API) 423 """ 424 self.logger.debug("Checking %s" % self) 425 return True
426
427 - def cleanup(self):
428 """ 429 Decrements the counter on the held storage to allow it to 430 be cleaned up. 431 """ 432 if self.storage: 433 try: 434 self.storage.decr(self) 435 finally: 436 self.storage = None
437
438 - def __str__(self):
439 return "Table-%s" % self.uuid
440 441 @remoted 442 @perf
443 - def close(self, current = None):
444 try: 445 self.cleanup() 446 self.logger.info("Closed %s", self) 447 except: 448 self.logger.warn("Closed %s with errors", self)
449 450 # TABLES READ API ============================ 451 452 @remoted 453 @perf
454 - def getOriginalFile(self, current = None):
455 msg = "unknown" 456 if self.file_obj: 457 if self.file_obj.id: 458 msg = self.file_obj.id.val 459 self.logger.info("%s.getOriginalFile() => id=%s", self, msg) 460 return self.file_obj
461 462 @remoted 463 @perf
464 - def getHeaders(self, current = None):
465 rv = self.storage.cols(None, current) 466 self.logger.info("%s.getHeaders() => size=%s", self, slen(rv)) 467 return rv
468 469 @remoted 470 @perf
471 - def getMetadata(self, current = None):
472 rv = self.storage.meta() 473 self.logger.info("%s.getMetadata() => size=%s", self, slen(rv)) 474 return rv
475 476 @remoted 477 @perf
478 - def getNumberOfRows(self, current = None):
479 rv = self.storage.rows() 480 self.logger.info("%s.getNumberOfRows() => %s", self, rv) 481 return long(rv)
482 483 @remoted 484 @perf
485 - def getWhereList(self, condition, variables, start, stop, step, current = None):
486 if stop == 0: 487 stop = None 488 if step == 0: 489 step = None 490 rv = self.storage.getWhereList(self.stamp, condition, variables, None, start, stop, step) 491 self.logger.info("%s.getWhereList(%s, %s, %s, %s, %s) => size=%s", self, condition, variables, start, stop, step, slen(rv)) 492 return rv
493 494 @remoted 495 @perf
496 - def readCoordinates(self, rowNumbers, current = None):
497 self.logger.info("%s.readCoordinates(size=%s)", self, slen(rowNumbers)) 498 try: 499 return self.storage.readCoordinates(self.stamp, rowNumbers, current) 500 except tables.HDF5ExtError, err: 501 aue = omero.ApiUsageException() 502 aue.message = "Error reading coordinates. Most likely out of range" 503 aue.serverStackTrace = "".join(traceback.format_exc()) 504 aue.serverExceptionClass = str(err.__class__.__name__) 505 raise aue
506 507 @remoted 508 @perf
509 - def read(self, colNumbers, start, stop, current = None):
510 self.logger.info("%s.read(%s, %s, %s)", self, colNumbers, start, stop) 511 try: 512 return self.storage.read(self.stamp, colNumbers, start, stop, current) 513 except tables.HDF5ExtError, err: 514 aue = omero.ApiUsageException() 515 aue.message = "Error reading coordinates. Most likely out of range" 516 aue.serverStackTrace = "".join(traceback.format_exc()) 517 aue.serverExceptionClass = str(err.__class__.__name__) 518 raise aue
519 520 @remoted 521 @perf
522 - def slice(self, colNumbers, rowNumbers, current = None):
523 self.logger.info("%s.slice(size=%s, size=%s)", self, slen(colNumbers), slen(rowNumbers)) 524 return self.storage.slice(self.stamp, colNumbers, rowNumbers, current)
525 526 # TABLES WRITE API =========================== 527 528 @remoted 529 @perf
530 - def initialize(self, cols, current = None):
531 self.storage.initialize(cols) 532 if cols: 533 self.logger.info("Initialized %s with %s cols", self, slen(cols))
534 535 @remoted 536 @perf
537 - def addColumn(self, col, current = None):
538 raise omero.ApiUsageException(None, None, "NYI")
539 540 @remoted 541 @perf
542 - def addData(self, cols, current = None):
543 self.storage.append(cols) 544 sz = 0 545 if cols and cols[0] and cols[0].getsize(): 546 self.logger.info("Added %s rows of data to %s", cols[0].getsize(), self)
547
548 549 -class TablesI(omero.grid.Tables, omero.util.Servant):
550 """ 551 Implementation of the omero.grid.Tables API. Provides 552 spreadsheet like functionality across the OMERO.grid. 553 This servant serves as a session-less, user-less 554 resource for obtaining omero.grid.Table proxies. 555 556 The first major step in initialization is getting 557 a session. This will block until the Blitz server 558 is reachable. 559 """ 560
561 - def __init__(self,\ 562 ctx,\ 563 table_cast = omero.grid.TablePrx.uncheckedCast,\ 564 internal_repo_cast = omero.grid.InternalRepositoryPrx.checkedCast):
565 566 omero.util.Servant.__init__(self, ctx, needs_session = True) 567 568 # Storing these methods, mainly to allow overriding via 569 # test methods. Static methods are evil. 570 self._table_cast = table_cast 571 self._internal_repo_cast = internal_repo_cast 572 573 self.__stores = [] 574 self._get_dir() 575 self._get_uuid() 576 self._get_repo()
577
578 - def _get_dir(self):
579 """ 580 Second step in initialization is to find the .omero/repository 581 directory. If this is not created, then a required server has 582 not started, and so this instance will not start. 583 """ 584 wait = int(self.communicator.getProperties().getPropertyWithDefault("omero.repo.wait", "1")) 585 self.repo_dir = self.communicator.getProperties().getProperty("omero.repo.dir") 586 587 if not self.repo_dir: 588 # Implies this is the legacy directory. Obtain from server 589 self.repo_dir = self.ctx.getSession().getConfigService().getConfigValue("omero.data.dir") 590 591 self.repo_cfg = path(self.repo_dir) / ".omero" / "repository" 592 start = time.time() 593 while not self.repo_cfg.exists() and wait < (time.time() - start): 594 self.logger.info("%s doesn't exist; waiting 5 seconds..." % self.repo_cfg) 595 time.sleep(5) 596 count -= 1 597 if not self.repo_cfg.exists(): 598 msg = "No repository found: %s" % self.repo_cfg 599 self.logger.error(msg) 600 raise omero.ResourceError(None, None, msg)
601
602 - def _get_uuid(self):
603 """ 604 Third step in initialization is to find the database uuid 605 for this grid instance. Multiple OMERO.grids could be watching 606 the same directory. 607 """ 608 cfg = self.ctx.getSession().getConfigService() 609 self.db_uuid = cfg.getDatabaseUuid() 610 self.instance = self.repo_cfg / self.db_uuid
611
612 - def _get_repo(self):
613 """ 614 Fourth step in initialization is to find the repository object 615 for the UUID found in .omero/repository/<db_uuid>, and then 616 create a proxy for the InternalRepository attached to that. 617 """ 618 619 # Get and parse the uuid from the RandomAccessFile format from FileMaker 620 self.repo_uuid = (self.instance / "repo_uuid").lines()[0].strip() 621 if len(self.repo_uuid) != 38: 622 raise omero.ResourceError("Poorly formed UUID: %s" % self.repo_uuid) 623 self.repo_uuid = self.repo_uuid[2:] 624 625 # Using the repo_uuid, find our OriginalFile object 626 self.repo_obj = self.ctx.getSession().getQueryService().findByQuery("select f from OriginalFile f where sha1 = :uuid", 627 omero.sys.ParametersI().add("uuid", rstring(self.repo_uuid))) 628 self.repo_mgr = self.communicator.stringToProxy("InternalRepository-%s" % self.repo_uuid) 629 self.repo_mgr = self._internal_repo_cast(self.repo_mgr) 630 self.repo_svc = self.repo_mgr.getProxy()
631 632 @remoted
633 - def getRepository(self, current = None):
634 """ 635 Returns the Repository object for this Tables server. 636 """ 637 return self.repo_svc
638 639 @remoted 640 @perf
641 - def getTable(self, file_obj, current = None):
642 """ 643 Create and/or register a table servant. 644 """ 645 646 # Will throw an exception if not allowed. 647 self.logger.info("getTable: %s", (file_obj and file_obj.id and file_obj.id.val)) 648 file_path = self.repo_mgr.getFilePath(file_obj) 649 p = path(file_path).dirname() 650 if not p.exists(): 651 p.makedirs() 652 653 storage = HDFLIST.getOrCreate(file_path) 654 id = Ice.Identity() 655 id.name = Ice.generateUUID() 656 table = TableI(self.ctx, file_obj, storage, uuid = id.name) 657 self.resources.add(table) 658 659 prx = current.adapter.add(table, id) 660 return self._table_cast(prx)
661