Package omero :: Package util :: Module cleanse
[hide private]
[frames] | no frames]

Source Code for Module omero.util.cleanse

  1  #!/usr/bin/env python 
  2  # encoding: utf-8 
  3  """ 
  4  Reconcile and cleanse where necessary an OMERO data directory of orphaned data. 
  5  """ 
  6   
  7  # 
  8  #  Copyright (c) 2009 University of Dundee. All rights reserved. 
  9  # 
 10  #  Redistribution and use in source and binary forms, with or without 
 11  #  modification, are permitted provided that the following conditions 
 12  #  are met: 
 13  #  1. Redistributions of source code must retain the above copyright 
 14  #     notice, this list of conditions and the following disclaimer. 
 15  #  2. Redistributions in binary form must reproduce the above copyright 
 16  #     notice, this list of conditions and the following disclaimer in the 
 17  #     documentation and/or other materials provided with the distribution. 
 18  # 
 19  #  THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 
 20  #  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
 21  #  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 22  #  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 
 23  #  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
 24  #  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
 25  #  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
 26  #  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
 27  #  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
 28  #  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
 29  #  SUCH DAMAGE. 
 30   
 31  import omero.clients 
 32  import omero 
 33  import sys 
 34  import os 
 35   
 36  from Glacier2 import PermissionDeniedException 
 37  from getopt import getopt, GetoptError 
 38  from omero.util import get_user 
 39  from stat import * 
 40   
 41  # The directories underneath an OMERO data directory to search for "dangling" 
 42  # files and reconcile with the database. Directory name key and corresponding 
 43  # OMERO data type value. 
 44  SEARCH_DIRECTORIES = { 
 45      'Pixels': 'Pixels', 
 46      'Files': 'OriginalFile', 
 47      'Thumbnails': 'Thumbnail' 
 48  } 
 49   
50 -def usage(error):
51 """ 52 Prints usage so that we don't have to. :) 53 """ 54 cmd = sys.argv[0] 55 print """%s 56 Usage: %s [-dry-run] [-u username | -k] <omero.data.dir> 57 Cleanses files in the OMERO data directory that have no reference in the 58 OMERO database. NOTE: As this script is designed to be run via cron or in 59 a scheduled manner it produces NO output unless a dry run is performed. 60 61 Options: 62 -u Administrator username to log in to OMERO with 63 -k Session key to log in to OMERO with 64 --dry-run Just prints out what would have been done 65 66 Examples: 67 %s --dry-run -u root /OMERO 68 69 Report bugs to OME Users <ome-users@lists.openmicroscopy.org.uk>""" % \ 70 (error, cmd, cmd) 71 sys.exit(2)
72
73 -class Cleanser(object):
74 """ 75 Keeps file cleansing state and performs OMERO database reconciliation of 76 files within an OMERO binary repository. 77 """ 78 79 # Number of objects to defer before we actually make a query 80 QUERY_THRESHOLD = 25 81 82 # Strings identifying pyramid files 83 PYRAMID_FILE = "_pyramid" 84 PYRAMID_LOCK = ".pyr_lock" 85 PYRAMID_TEMP = ".tmp" 86
87 - def __init__(self, query_service, object_type):
88 self.query_service = query_service 89 self.object_type = object_type 90 self.cleansed = list() 91 self.bytes_cleansed = 0 92 self.deferred_paths = list() 93 self.dry_run = False
94
95 - def cleanse(self, root):
96 """ 97 Begins a cleansing operation from a given OMERO binary repository 98 root directory. /OMERO/Files or /OMERO/Pixels for instance. 99 """ 100 for file in os.listdir(root): 101 path = os.path.join(root, file) 102 if os.path.isdir(path): 103 self.cleanse(path) 104 else: 105 self.query_or_defer(path)
106
107 - def query_or_defer(self, path):
108 """ 109 Adds a given path to the list of deferred paths. If the number of 110 deferred paths has reached the QUERY_THRESHOLD (to reduce database 111 hits) a reconciliation check will happen against OMERO. 112 """ 113 self.deferred_paths.append(path) 114 if len(self.deferred_paths) == self.QUERY_THRESHOLD: 115 self.do_cleanse()
116
117 - def do_cleanse(self):
118 """ 119 Actually performs the reconciliation check against OMERO and 120 removes relevant files. 121 """ 122 if len(self.deferred_paths) == 0: 123 return 124 split = os.path.split 125 object_ids = [] 126 for path in self.deferred_paths: 127 file_name = split(path)[1] 128 try: 129 object_id = omero.rtypes.rlong(long(file_name)) 130 except ValueError: 131 try: 132 file_name.index(self.PYRAMID_FILE) 133 id_part = file_name.split("_")[0] 134 if file_name.endswith(self.PYRAMID_FILE): 135 object_id = omero.rtypes.rlong(long(id_part)) 136 elif file_name.endswith(self.PYRAMID_LOCK) or file_name.endswith(self.PYRAMID_TEMP): 137 object_id = omero.rtypes.rlong(long(id_part.lstrip('.'))) 138 else: 139 object_id = omero.rtypes.rlong(-1) 140 except ValueError: 141 object_id = omero.rtypes.rlong(-1) 142 object_ids.append(object_id) 143 144 parameters = omero.sys.Parameters() 145 parameters.map = {'ids': omero.rtypes.rlist(object_ids)} 146 rows = self.query_service.projection( 147 "select o.id from %s as o where o.id in (:ids)" % self.object_type, 148 parameters, {"omero.group":"-1"}) 149 existing_ids = [cols[0].val for cols in rows] 150 151 for i, object_id in enumerate(object_ids): 152 path = self.deferred_paths[i] 153 if object_id.val not in existing_ids: 154 if object_id.val == -1: 155 if self.dry_run: 156 print " \_ %s (ignored/keep)" % path 157 else: 158 size = os.stat(path)[ST_SIZE] 159 self.cleansed.append(path) 160 self.bytes_cleansed = size 161 if self.dry_run: 162 print " \_ %s (remove)" % path 163 else: 164 try: 165 os.unlink(path) 166 except OSError, e: 167 print e 168 elif self.dry_run: 169 print " \_ %s (keep)" % path 170 self.deferred_paths = list()
171
172 - def finalize(self):
173 """ 174 Takes the final set of deferred paths and performs a reconciliation 175 check against OMERO for them. This method's purpose is basically to 176 catch the final set of paths in the deferred path list and/or perform 177 any cleanup. 178 """ 179 self.do_cleanse()
180
181 - def __str__(self):
182 return "Cleansing context: %d files (%d bytes)" % \ 183 (len(self.cleansed), self.bytes_cleansed)
184
185 -def cleanse(data_dir, query_service, dry_run = False, config_service = None):
186 187 # 188 # Compare server versions. See ticket #3123 189 # 190 if config_service is None: 191 print "No config service provided! Waiting 10 seconds to allow cancellation" 192 from threading import Event 193 Event().wait(10) 194 195 server_version = config_service.getVersion() 196 server_tuple = tuple([int(x) for x in server_version.split(".")]) 197 if server_tuple < (4, 2, 1): 198 print "Server version is too old! (%s) Aborting..." % server_version 199 sys.exit(3) 200 201 try: 202 cleanser = "" 203 for directory in SEARCH_DIRECTORIES: 204 full_path = os.path.join(data_dir, directory) 205 if dry_run: 206 print "Reconciling OMERO data directory...\n %s" % full_path 207 object_type = SEARCH_DIRECTORIES[directory] 208 cleanser = Cleanser(query_service, object_type) 209 cleanser.dry_run = dry_run 210 cleanser.cleanse(full_path) 211 cleanser.finalize() 212 finally: 213 if dry_run: 214 print cleanser
215
216 -def main():
217 """ 218 Default main() that performs OMERO data directory cleansing. 219 """ 220 try: 221 options, args = getopt(sys.argv[1:], "u:k:", ["dry-run"]) 222 except GetoptError, (msg, opt): 223 usage(msg) 224 225 try: 226 data_dir, = args 227 except: 228 usage('Expecting single OMERO data directory!') 229 230 username = get_user("root") 231 session_key = None 232 dry_run = False 233 for option, argument in options: 234 if option == "-u": 235 username = argument 236 if option == "-k": 237 session_key = argument 238 if option == "--dry-run": 239 dry_run = True 240 241 if session_key is None: 242 print "Username: %s" % username 243 try: 244 password = getpass.getpass() 245 except KeyboardInterrupt: 246 sys.exit(2) 247 248 try: 249 client = omero.client('localhost') 250 client.setAgent("OMERO.cleanse") 251 session = None 252 if session_key is None: 253 session = client.createSession(username, password) 254 else: 255 session = client.createSession(session_key) 256 except PermissionDeniedException: 257 print "%s: Permission denied" % sys.argv[0] 258 print "Sorry." 259 sys.exit(1) 260 261 262 query_service = session.getQueryService() 263 config_service = session.getConfigService() 264 try: 265 cleanse(data_dir, query_service, dry_run, config_service) 266 finally: 267 if session_key is None: 268 client.closeSession()
269 270 if __name__ == '__main__': 271 main() 272