Package omero :: Package util :: Module cleanse
[hide private]
[frames] | no frames]

Source Code for Module omero.util.cleanse

  1  #!/usr/bin/env python 
  2  # encoding: utf-8 
  3  """ 
  4  Reconcile and cleanse where necessary an OMERO data directory of orphaned data. 
  5  """ 
  6   
  7  # 
  8  #  Copyright (c) 2009 University of Dundee. All rights reserved. 
  9  # 
 10  #  Redistribution and use in source and binary forms, with or without 
 11  #  modification, are permitted provided that the following conditions 
 12  #  are met: 
 13  #  1. Redistributions of source code must retain the above copyright 
 14  #     notice, this list of conditions and the following disclaimer. 
 15  #  2. Redistributions in binary form must reproduce the above copyright 
 16  #     notice, this list of conditions and the following disclaimer in the 
 17  #     documentation and/or other materials provided with the distribution. 
 18  # 
 19  #  THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 
 20  #  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
 21  #  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 22  #  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 
 23  #  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
 24  #  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
 25  #  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
 26  #  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
 27  #  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
 28  #  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
 29  #  SUCH DAMAGE. 
 30   
 31  import getpass 
 32  import omero.clients 
 33  import omero 
 34  import sys 
 35  import os 
 36   
 37  from Glacier2 import PermissionDeniedException 
 38  from getopt import getopt, GetoptError 
 39  from stat import * 
 40   
 41  # The directories underneath an OMERO data directory to search for "dangling" 
 42  # files and reconcile with the database. Directory name key and corresponding 
 43  # OMERO data type value. 
 44  SEARCH_DIRECTORIES = { 
 45          'Pixels': 'Pixels', 
 46          'Files': 'OriginalFile', 
 47          'Thumbnails': 'Thumbnail' 
 48  } 
 49   
50 -def usage(error):
51 """ 52 Prints usage so that we don't have to. :) 53 """ 54 cmd = sys.argv[0] 55 print """%s 56 Usage: %s [-dry-run] [-u username | -k] <omero.data.dir> 57 Cleanses files in the OMERO data directory that have no reference in the 58 OMERO database. NOTE: As this script is designed to be run via cron or in 59 a scheduled manner it produces NO output unless a dry run is performed. 60 61 Options: 62 -u Administrator username to log in to OMERO with 63 -k Session key to log in to OMERO with 64 --dry-run Just prints out what would have been done 65 66 Examples: 67 %s --dry-run -u root /OMERO 68 69 Report bugs to OME Users <ome-users@lists.openmicroscopy.org.uk>""" % \ 70 (error, cmd, cmd) 71 sys.exit(2)
72
73 -class Cleanser(object):
74 """ 75 Keeps file cleansing state and performs OMERO database reconciliation of 76 files within an OMERO binary repository. 77 """ 78 79 # Number of objects to defer before we actually make a query 80 QUERY_THRESHOLD = 25 81
82 - def __init__(self, query_service, object_type):
83 self.query_service = query_service 84 self.object_type = object_type 85 self.cleansed = list() 86 self.bytes_cleansed = 0 87 self.deferred_paths = list() 88 self.dry_run = False
89
90 - def cleanse(self, root):
91 """ 92 Begins a cleansing operation from a given OMERO binary repository 93 root directory. /OMERO/Files or /OMERO/Pixels for instance. 94 """ 95 for file in os.listdir(root): 96 path = os.path.join(root, file) 97 if os.path.isdir(path): 98 self.cleanse(path) 99 else: 100 self.query_or_defer(path)
101
102 - def query_or_defer(self, path):
103 """ 104 Adds a given path to the list of deferred paths. If the number of 105 deferred paths has reached the QUERY_THRESHOLD (to reduce database 106 hits) a reconciliation check will happen against OMERO. 107 """ 108 self.deferred_paths.append(path) 109 if len(self.deferred_paths) == self.QUERY_THRESHOLD: 110 self.do_cleanse()
111
112 - def do_cleanse(self):
113 """ 114 Actually performs the reconciliation check against OMERO and 115 removes relevant files. 116 """ 117 if len(self.deferred_paths) == 0: 118 return 119 split = os.path.split 120 object_ids = [omero.rtypes.rlong(long(split(path)[1])) \ 121 for path in self.deferred_paths] 122 parameters = omero.sys.Parameters() 123 parameters.map = {'ids': omero.rtypes.rlist(object_ids)} 124 rows = self.query_service.projection( 125 "select o.id from %s as o where o.id in (:ids)" % self.object_type, 126 parameters, {"omero.group":"-1"}) 127 existing_ids = [cols[0].val for cols in rows] 128 for i, object_id in enumerate(object_ids): 129 path = self.deferred_paths[i] 130 if object_id.val not in existing_ids: 131 size = os.stat(path)[ST_SIZE] 132 self.cleansed.append(path) 133 self.bytes_cleansed = size 134 if self.dry_run: 135 print " \_ %s (remove)" % path 136 else: 137 try: 138 os.unlink(path) 139 except OSError, e: 140 print e 141 elif self.dry_run: 142 print " \_ %s (keep)" % path 143 self.deferred_paths = list()
144
145 - def finalize(self):
146 """ 147 Takes the final set of deferred paths and performs a reconciliation 148 check against OMERO for them. This method's purpose is basically to 149 catch the final set of paths in the deferred path list and/or perform 150 any cleanup. 151 """ 152 self.do_cleanse()
153
154 - def __str__(self):
155 return "Cleansing context: %d files (%d bytes)" % \ 156 (len(self.cleansed), self.bytes_cleansed)
157
158 -def cleanse(data_dir, query_service, dry_run = False, config_service = None):
159 160 # 161 # Compare server versions. See ticket #3123 162 # 163 if config_service is None: 164 print "No config service provided! Waiting 10 seconds to allow cancellation" 165 from threading import Event 166 Event().wait(10) 167 168 server_version = config_service.getVersion() 169 server_tuple = tuple([int(x) for x in server_version.split(".")]) 170 if server_tuple < (4, 2, 1): 171 print "Server version is too old! (%s) Aborting..." % server_version 172 sys.exit(3) 173 174 try: 175 cleanser = "" 176 for directory in SEARCH_DIRECTORIES: 177 full_path = os.path.join(data_dir, directory) 178 if dry_run: 179 print "Reconciling OMERO data directory...\n %s" % full_path 180 object_type = SEARCH_DIRECTORIES[directory] 181 cleanser = Cleanser(query_service, object_type) 182 cleanser.dry_run = dry_run 183 cleanser.cleanse(full_path) 184 cleanser.finalize() 185 finally: 186 if dry_run: 187 print cleanser
188
189 -def main():
190 """ 191 Default main() that performs OMERO data directory cleansing. 192 """ 193 try: 194 options, args = getopt(sys.argv[1:], "u:k:", ["dry-run"]) 195 except GetoptError, (msg, opt): 196 usage(msg) 197 198 try: 199 data_dir, = args 200 except: 201 usage('Expecting single OMERO data directory!') 202 203 username = getpass.getuser() 204 session_key = None 205 dry_run = False 206 for option, argument in options: 207 if option == "-u": 208 username = argument 209 if option == "-k": 210 session_key = argument 211 if option == "--dry-run": 212 dry_run = True 213 214 if session_key is None: 215 print "Username: %s" % username 216 try: 217 password = getpass.getpass() 218 except KeyboardInterrupt: 219 sys.exit(2) 220 221 try: 222 client = omero.client('localhost') 223 client.setAgent("OMERO.cleanse") 224 session = None 225 if session_key is None: 226 session = client.createSession(username, password) 227 else: 228 session = client.createSession(session_key) 229 except PermissionDeniedException: 230 print "%s: Permission denied" % sys.argv[0] 231 print "Sorry." 232 sys.exit(1) 233 234 235 query_service = session.getQueryService() 236 config_service = session.getConfigService() 237 try: 238 cleanse(data_dir, query_service, dry_run, config_service) 239 finally: 240 if session_key is None: 241 client.closeSession()
242 243 if __name__ == '__main__': 244 main() 245