Package omero :: Package util :: Module cleanse
[hide private]
[frames] | no frames]

Source Code for Module omero.util.cleanse

  1  #!/usr/bin/env python 
  2  # encoding: utf-8 
  3  """ 
  4  Reconcile and cleanse where necessary an OMERO data directory of orphaned data. 
  5  """ 
  6   
  7  #   
  8  #  Copyright (c) 2009 University of Dundee. All rights reserved. 
  9  # 
 10  #  Redistribution and use in source and binary forms, with or without 
 11  #  modification, are permitted provided that the following conditions 
 12  #  are met: 
 13  #  1. Redistributions of source code must retain the above copyright 
 14  #     notice, this list of conditions and the following disclaimer. 
 15  #  2. Redistributions in binary form must reproduce the above copyright 
 16  #     notice, this list of conditions and the following disclaimer in the 
 17  #     documentation and/or other materials provided with the distribution. 
 18  # 
 19  #  THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 
 20  #  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
 21  #  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 22  #  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 
 23  #  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
 24  #  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
 25  #  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
 26  #  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
 27  #  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
 28  #  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
 29  #  SUCH DAMAGE. 
 30   
 31  import getpass 
 32  import omero.clients 
 33  import omero 
 34  import sys 
 35  import os 
 36   
 37  from Glacier2 import PermissionDeniedException 
 38  from getopt import getopt, GetoptError 
 39  from stat import * 
 40   
 41  # The directories underneath an OMERO data directory to search for "dangling" 
 42  # files and reconcile with the database. Directory name key and corresponding 
 43  # OMERO data type value. 
 44  SEARCH_DIRECTORIES = { 
 45          'Pixels': 'Pixels', 
 46          'Files': 'OriginalFile', 
 47  } 
 48   
49 -def usage(error):
50 """ 51 Prints usage so that we don't have to. :) 52 """ 53 cmd = sys.argv[0] 54 print """%s 55 Usage: %s [-dry-run] [-u username | -k] <omero.data.dir> 56 Cleanses files in the OMERO data directory that have no reference in the 57 OMERO database. NOTE: As this script is designed to be run via cron or in 58 a scheduled manner it produces NO output unless a dry run is performed. 59 60 Options: 61 -u Administrator username to log in to OMERO with 62 -k Session key to log in to OMERO with 63 --dry-run Just prints out what would have been done 64 65 Examples: 66 %s --dry-run -u root /OMERO 67 68 Report bugs to OME Users <ome-users@lists.openmicroscopy.org.uk>""" % \ 69 (error, cmd, cmd) 70 sys.exit(2)
71
72 -class Cleanser(object):
73 """ 74 Keeps file cleansing state and performs OMERO database reconciliation of 75 files within an OMERO binary repository. 76 """ 77 78 # Number of objects to defer before we actually make a query 79 QUERY_THRESHOLD = 25 80
81 - def __init__(self, query_service, object_type):
82 self.query_service = query_service 83 self.object_type = object_type 84 self.cleansed = list() 85 self.bytes_cleansed = 0 86 self.deferred_paths = list() 87 self.dry_run = False
88
89 - def cleanse(self, root):
90 """ 91 Begins a cleansing operation from a given OMERO binary repository 92 root directory. /OMERO/Files or /OMERO/Pixels for instance. 93 """ 94 for file in os.listdir(root): 95 path = os.path.join(root, file) 96 if os.path.isdir(path): 97 self.cleanse(path) 98 else: 99 self.query_or_defer(path)
100
101 - def query_or_defer(self, path):
102 """ 103 Adds a given path to the list of deferred paths. If the number of 104 deferred paths has reached the QUERY_THRESHOLD (to reduce database 105 hits) a reconciliation check will happen against OMERO. 106 """ 107 self.deferred_paths.append(path) 108 if len(self.deferred_paths) == self.QUERY_THRESHOLD: 109 self.do_cleanse()
110
111 - def do_cleanse(self):
112 """ 113 Actually performs the reconciliation check against OMERO and 114 removes relevant files. 115 """ 116 if len(self.deferred_paths) == 0: 117 return 118 split = os.path.split 119 object_ids = [omero.rtypes.rlong(long(split(path)[1])) \ 120 for path in self.deferred_paths] 121 parameters = omero.sys.Parameters() 122 parameters.map = {'ids': omero.rtypes.rlist(object_ids)} 123 objects = self.query_service.findAllByQuery( 124 "select o from %s as o where o.id in (:ids)" % self.object_type, 125 parameters) 126 existing_ids = [o.id.val for o in objects] 127 for i, object_id in enumerate(object_ids): 128 path = self.deferred_paths[i] 129 if object_id.val not in existing_ids: 130 size = os.stat(path)[ST_SIZE] 131 self.cleansed.append(path) 132 self.bytes_cleansed = size 133 if self.dry_run: 134 print " \_ %s (remove)" % path 135 else: 136 try: 137 os.unlink(path) 138 except OSError, e: 139 print e 140 elif self.dry_run: 141 print " \_ %s (keep)" % path 142 self.deferred_paths = list()
143
144 - def finalize(self):
145 """ 146 Takes the final set of deferred paths and performs a reconciliation 147 check against OMERO for them. This method's purpose is basically to 148 catch the final set of paths in the deferred path list and/or perform 149 any cleanup. 150 """ 151 self.do_cleanse()
152
153 - def __str__(self):
154 return "Cleansing context: %d files (%d bytes)" % \ 155 (len(self.cleansed), self.bytes_cleansed)
156
157 -def main():
158 """ 159 Default main() that performs OMERO data directory cleansing. 160 """ 161 try: 162 options, args = getopt(sys.argv[1:], "u:k:", ["dry-run"]) 163 except GetoptError, (msg, opt): 164 usage(msg) 165 166 try: 167 data_dir, = args 168 except: 169 usage('Expecting single OMERO data directory!') 170 171 username = getpass.getuser() 172 session_key = None 173 dry_run = False 174 for option, argument in options: 175 if option == "-u": 176 username = argument 177 if option == "-k": 178 session_key = argument 179 if option == "--dry-run": 180 dry_run = True 181 182 if session_key is None: 183 print "Username: %s" % username 184 try: 185 password = getpass.getpass() 186 except KeyboardInterrupt: 187 sys.exit(2) 188 189 try: 190 client = omero.client('localhost') 191 session = None 192 if session_key is None: 193 session = client.createSession(username, password) 194 else: 195 session = client.createSession(session_key) 196 except PermissionDeniedException: 197 print "%s: Permission denied" % sys.argv[0] 198 print "Sorry." 199 sys.exit(1) 200 query_service = session.getQueryService() 201 try: 202 for directory in SEARCH_DIRECTORIES: 203 full_path = os.path.join(data_dir, directory) 204 if dry_run: 205 print "Reconciling OMERO data directory...\n %s" % full_path 206 object_type = SEARCH_DIRECTORIES[directory] 207 cleanser = Cleanser(query_service, object_type) 208 cleanser.dry_run = dry_run 209 cleanser.cleanse(full_path) 210 cleanser.finalize() 211 finally: 212 if dry_run: 213 print cleanser 214 if session_key is None: 215 client.closeSession()
216 217 if __name__ == '__main__': 218 main() 219