Package omero :: Package util :: Module cleanse
[hide private]
[frames] | no frames]

Source Code for Module omero.util.cleanse

  1  #!/usr/bin/env python 
  2  # encoding: utf-8 
  3  """ 
  4  Reconcile and cleanse where necessary an OMERO data directory of orphaned data. 
  5  """ 
  6   
  7  #   
  8  #  Copyright (c) 2009 University of Dundee. All rights reserved. 
  9  # 
 10  #  Redistribution and use in source and binary forms, with or without 
 11  #  modification, are permitted provided that the following conditions 
 12  #  are met: 
 13  #  1. Redistributions of source code must retain the above copyright 
 14  #     notice, this list of conditions and the following disclaimer. 
 15  #  2. Redistributions in binary form must reproduce the above copyright 
 16  #     notice, this list of conditions and the following disclaimer in the 
 17  #     documentation and/or other materials provided with the distribution. 
 18  # 
 19  #  THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 
 20  #  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
 21  #  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 22  #  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 
 23  #  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
 24  #  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
 25  #  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
 26  #  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
 27  #  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
 28  #  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
 29  #  SUCH DAMAGE. 
 30   
 31  import getpass 
 32  import omero.clients 
 33  import omero 
 34  import sys 
 35  import os 
 36   
 37  from Glacier2 import PermissionDeniedException 
 38  from getopt import getopt, GetoptError 
 39  from stat import * 
 40   
 41  # The directories underneath an OMERO data directory to search for "dangling" 
 42  # files and reconcile with the database. Directory name key and corresponding 
 43  # OMERO data type value. 
 44  SEARCH_DIRECTORIES = { 
 45          'Pixels': 'Pixels', 
 46          'Files': 'OriginalFile', 
 47  } 
 48   
49 -def usage(error):
50 """ 51 Prints usage so that we don't have to. :) 52 """ 53 cmd = sys.argv[0] 54 print """%s 55 Usage: %s [-dry-run] [-u username | -k] <omero.data.dir> 56 Cleanses files in the OMERO data directory that have no reference in the 57 OMERO database. NOTE: As this script is designed to be run via cron or in 58 a scheduled manner it produces NO output unless a dry run is performed. 59 60 Options: 61 -u Administrator username to log in to OMERO with 62 -k Session key to log in to OMERO with 63 --dry-run Just prints out what would have been done 64 65 Examples: 66 %s --dry-run -u root /OMERO 67 68 Report bugs to OME Users <ome-users@lists.openmicroscopy.org.uk>""" % \ 69 (error, cmd, cmd) 70 sys.exit(2)
71
72 -class Cleanser(object):
73 """ 74 Keeps file cleansing state and performs OMERO database reconciliation of 75 files within an OMERO binary repository. 76 """ 77 78 # Number of objects to defer before we actually make a query 79 QUERY_THRESHOLD = 25 80
81 - def __init__(self, query_service, object_type):
82 self.query_service = query_service 83 self.object_type = object_type 84 self.cleansed = list() 85 self.bytes_cleansed = 0 86 self.deferred_paths = list() 87 self.dry_run = False
88
89 - def cleanse(self, root):
90 """ 91 Begins a cleansing operation from a given OMERO binary repository 92 root directory. /OMERO/Files or /OMERO/Pixels for instance. 93 """ 94 for file in os.listdir(root): 95 path = os.path.join(root, file) 96 if os.path.isdir(path): 97 self.cleanse(path) 98 else: 99 self.query_or_defer(path)
100
101 - def query_or_defer(self, path):
102 """ 103 Adds a given path to the list of deferred paths. If the number of 104 deferred paths has reached the QUERY_THRESHOLD (to reduce database 105 hits) a reconciliation check will happen against OMERO. 106 """ 107 self.deferred_paths.append(path) 108 if len(self.deferred_paths) == self.QUERY_THRESHOLD: 109 self.do_cleanse()
110
111 - def do_cleanse(self):
112 """ 113 Actually performs the reconciliation check against OMERO and 114 removes relevant files. 115 """ 116 if len(self.deferred_paths) == 0: 117 return 118 split = os.path.split 119 object_ids = [omero.rtypes.rlong(long(split(path)[1])) \ 120 for path in self.deferred_paths] 121 parameters = omero.sys.Parameters() 122 parameters.map = {'ids': omero.rtypes.rlist(object_ids)} 123 objects = self.query_service.findAllByQuery( 124 "select o from %s as o where o.id in (:ids)" % self.object_type, 125 parameters) 126 existing_ids = [o.id.val for o in objects] 127 for i, object_id in enumerate(object_ids): 128 path = self.deferred_paths[i] 129 if object_id.val not in existing_ids: 130 size = os.stat(path)[ST_SIZE] 131 self.cleansed.append(path) 132 self.bytes_cleansed = size 133 if self.dry_run: 134 print " \_ %s (remove)" % path 135 else: 136 try: 137 os.unlink(path) 138 except OSError, e: 139 print e 140 elif self.dry_run: 141 print " \_ %s (keep)" % path 142 self.deferred_paths = list()
143
144 - def finalize(self):
145 """ 146 Takes the final set of deferred paths and performs a reconciliation 147 check against OMERO for them. This method's purpose is basically to 148 catch the final set of paths in the deferred path list and/or perform 149 any cleanup. 150 """ 151 self.do_cleanse()
152
153 - def __str__(self):
154 return "Cleansing context: %d files (%d bytes)" % \ 155 (len(self.cleansed), self.bytes_cleansed)
156
157 -def cleanse(data_dir, query_service, dry_run = False):
158 try: 159 cleanser = "" 160 for directory in SEARCH_DIRECTORIES: 161 full_path = os.path.join(data_dir, directory) 162 if dry_run: 163 print "Reconciling OMERO data directory...\n %s" % full_path 164 object_type = SEARCH_DIRECTORIES[directory] 165 cleanser = Cleanser(query_service, object_type) 166 cleanser.dry_run = dry_run 167 cleanser.cleanse(full_path) 168 cleanser.finalize() 169 finally: 170 if dry_run: 171 print cleanser
172
173 -def main():
174 """ 175 Default main() that performs OMERO data directory cleansing. 176 """ 177 try: 178 options, args = getopt(sys.argv[1:], "u:k:", ["dry-run"]) 179 except GetoptError, (msg, opt): 180 usage(msg) 181 182 try: 183 data_dir, = args 184 except: 185 usage('Expecting single OMERO data directory!') 186 187 username = getpass.getuser() 188 session_key = None 189 dry_run = False 190 for option, argument in options: 191 if option == "-u": 192 username = argument 193 if option == "-k": 194 session_key = argument 195 if option == "--dry-run": 196 dry_run = True 197 198 if session_key is None: 199 print "Username: %s" % username 200 try: 201 password = getpass.getpass() 202 except KeyboardInterrupt: 203 sys.exit(2) 204 205 try: 206 client = omero.client('localhost') 207 client.setAgent("OMERO.cleanse") 208 session = None 209 if session_key is None: 210 session = client.createSession(username, password) 211 else: 212 session = client.createSession(session_key) 213 except PermissionDeniedException: 214 print "%s: Permission denied" % sys.argv[0] 215 print "Sorry." 216 sys.exit(1) 217 query_service = session.getQueryService() 218 try: 219 cleanse(data_dir, query_service, dry_run) 220 finally: 221 if session_key is None: 222 client.closeSession()
223 224 if __name__ == '__main__': 225 main() 226