1
2
3 """
4 Reconcile and cleanse where necessary an OMERO data directory of orphaned data.
5 """
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 import omero.clients
32 import omero
33 import sys
34 import os
35
36 from Glacier2 import PermissionDeniedException
37 from getopt import getopt, GetoptError
38 from omero.util import get_user
39 from stat import *
40
41
42
43
44 SEARCH_DIRECTORIES = {
45 'Pixels': 'Pixels',
46 'Files': 'OriginalFile',
47 'Thumbnails': 'Thumbnail'
48 }
49
51 """
52 Prints usage so that we don't have to. :)
53 """
54 cmd = sys.argv[0]
55 print """%s
56 Usage: %s [-dry-run] [-u username | -k] <omero.data.dir>
57 Cleanses files in the OMERO data directory that have no reference in the
58 OMERO database. NOTE: As this script is designed to be run via cron or in
59 a scheduled manner it produces NO output unless a dry run is performed.
60
61 Options:
62 -u Administrator username to log in to OMERO with
63 -k Session key to log in to OMERO with
64 --dry-run Just prints out what would have been done
65
66 Examples:
67 %s --dry-run -u root /OMERO
68
69 Report bugs to OME Users <ome-users@lists.openmicroscopy.org.uk>""" % \
70 (error, cmd, cmd)
71 sys.exit(2)
72
74 """
75 Keeps file cleansing state and performs OMERO database reconciliation of
76 files within an OMERO binary repository.
77 """
78
79
80 QUERY_THRESHOLD = 25
81
82
83 PYRAMID_FILE = "_pyramid"
84 PYRAMID_LOCK = ".pyr_lock"
85 PYRAMID_TEMP = ".tmp"
86
87 - def __init__(self, query_service, object_type):
88 self.query_service = query_service
89 self.object_type = object_type
90 self.cleansed = list()
91 self.bytes_cleansed = 0
92 self.deferred_paths = list()
93 self.dry_run = False
94
96 """
97 Begins a cleansing operation from a given OMERO binary repository
98 root directory. /OMERO/Files or /OMERO/Pixels for instance.
99 """
100 for file in os.listdir(root):
101 path = os.path.join(root, file)
102 if os.path.isdir(path):
103 self.cleanse(path)
104 else:
105 self.query_or_defer(path)
106
108 """
109 Adds a given path to the list of deferred paths. If the number of
110 deferred paths has reached the QUERY_THRESHOLD (to reduce database
111 hits) a reconciliation check will happen against OMERO.
112 """
113 self.deferred_paths.append(path)
114 if len(self.deferred_paths) == self.QUERY_THRESHOLD:
115 self.do_cleanse()
116
118 """
119 Actually performs the reconciliation check against OMERO and
120 removes relevant files.
121 """
122 if len(self.deferred_paths) == 0:
123 return
124 split = os.path.split
125 object_ids = []
126 for path in self.deferred_paths:
127 file_name = split(path)[1]
128 try:
129 object_id = omero.rtypes.rlong(long(file_name))
130 except ValueError:
131 try:
132 file_name.index(self.PYRAMID_FILE)
133 id_part = file_name.split("_")[0]
134 if file_name.endswith(self.PYRAMID_FILE):
135 object_id = omero.rtypes.rlong(long(id_part))
136 elif file_name.endswith(self.PYRAMID_LOCK) or file_name.endswith(self.PYRAMID_TEMP):
137 object_id = omero.rtypes.rlong(long(id_part.lstrip('.')))
138 else:
139 object_id = omero.rtypes.rlong(-1)
140 except ValueError:
141 object_id = omero.rtypes.rlong(-1)
142 object_ids.append(object_id)
143
144 parameters = omero.sys.Parameters()
145 parameters.map = {'ids': omero.rtypes.rlist(object_ids)}
146 rows = self.query_service.projection(
147 "select o.id from %s as o where o.id in (:ids)" % self.object_type,
148 parameters, {"omero.group":"-1"})
149 existing_ids = [cols[0].val for cols in rows]
150
151 for i, object_id in enumerate(object_ids):
152 path = self.deferred_paths[i]
153 if object_id.val not in existing_ids:
154 if object_id.val == -1:
155 if self.dry_run:
156 print " \_ %s (ignored/keep)" % path
157 else:
158 size = os.stat(path)[ST_SIZE]
159 self.cleansed.append(path)
160 self.bytes_cleansed = size
161 if self.dry_run:
162 print " \_ %s (remove)" % path
163 else:
164 try:
165 os.unlink(path)
166 except OSError, e:
167 print e
168 elif self.dry_run:
169 print " \_ %s (keep)" % path
170 self.deferred_paths = list()
171
173 """
174 Takes the final set of deferred paths and performs a reconciliation
175 check against OMERO for them. This method's purpose is basically to
176 catch the final set of paths in the deferred path list and/or perform
177 any cleanup.
178 """
179 self.do_cleanse()
180
182 return "Cleansing context: %d files (%d bytes)" % \
183 (len(self.cleansed), self.bytes_cleansed)
184
185 -def cleanse(data_dir, query_service, dry_run = False, config_service = None):
186
187
188
189
190 if config_service is None:
191 print "No config service provided! Waiting 10 seconds to allow cancellation"
192 from threading import Event
193 Event().wait(10)
194
195 server_version = config_service.getVersion()
196 server_tuple = tuple([int(x) for x in server_version.split(".")])
197 if server_tuple < (4, 2, 1):
198 print "Server version is too old! (%s) Aborting..." % server_version
199 sys.exit(3)
200
201 try:
202 cleanser = ""
203 for directory in SEARCH_DIRECTORIES:
204 full_path = os.path.join(data_dir, directory)
205 if dry_run:
206 print "Reconciling OMERO data directory...\n %s" % full_path
207 object_type = SEARCH_DIRECTORIES[directory]
208 cleanser = Cleanser(query_service, object_type)
209 cleanser.dry_run = dry_run
210 cleanser.cleanse(full_path)
211 cleanser.finalize()
212 finally:
213 if dry_run:
214 print cleanser
215
217 """
218 Default main() that performs OMERO data directory cleansing.
219 """
220 try:
221 options, args = getopt(sys.argv[1:], "u:k:", ["dry-run"])
222 except GetoptError, (msg, opt):
223 usage(msg)
224
225 try:
226 data_dir, = args
227 except:
228 usage('Expecting single OMERO data directory!')
229
230 username = get_user("root")
231 session_key = None
232 dry_run = False
233 for option, argument in options:
234 if option == "-u":
235 username = argument
236 if option == "-k":
237 session_key = argument
238 if option == "--dry-run":
239 dry_run = True
240
241 if session_key is None:
242 print "Username: %s" % username
243 try:
244 password = getpass.getpass()
245 except KeyboardInterrupt:
246 sys.exit(2)
247
248 try:
249 client = omero.client('localhost')
250 client.setAgent("OMERO.cleanse")
251 session = None
252 if session_key is None:
253 session = client.createSession(username, password)
254 else:
255 session = client.createSession(session_key)
256 except PermissionDeniedException:
257 print "%s: Permission denied" % sys.argv[0]
258 print "Sorry."
259 sys.exit(1)
260
261
262 query_service = session.getQueryService()
263 config_service = session.getConfigService()
264 try:
265 cleanse(data_dir, query_service, dry_run, config_service)
266 finally:
267 if session_key is None:
268 client.closeSession()
269
270 if __name__ == '__main__':
271 main()
272