1
2
3 """
4 Reconcile and cleanse where necessary an OMERO data directory of orphaned data.
5 """
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 import getpass
32 import omero.clients
33 import omero
34 import sys
35 import os
36
37 from Glacier2 import PermissionDeniedException
38 from getopt import getopt, GetoptError
39 from stat import *
40
41
42
43
44 SEARCH_DIRECTORIES = {
45 'Pixels': 'Pixels',
46 'Files': 'OriginalFile',
47 'Thumbnails': 'Thumbnail'
48 }
49
51 """
52 Prints usage so that we don't have to. :)
53 """
54 cmd = sys.argv[0]
55 print """%s
56 Usage: %s [-dry-run] [-u username | -k] <omero.data.dir>
57 Cleanses files in the OMERO data directory that have no reference in the
58 OMERO database. NOTE: As this script is designed to be run via cron or in
59 a scheduled manner it produces NO output unless a dry run is performed.
60
61 Options:
62 -u Administrator username to log in to OMERO with
63 -k Session key to log in to OMERO with
64 --dry-run Just prints out what would have been done
65
66 Examples:
67 %s --dry-run -u root /OMERO
68
69 Report bugs to OME Users <ome-users@lists.openmicroscopy.org.uk>""" % \
70 (error, cmd, cmd)
71 sys.exit(2)
72
74 """
75 Keeps file cleansing state and performs OMERO database reconciliation of
76 files within an OMERO binary repository.
77 """
78
79
80 QUERY_THRESHOLD = 25
81
82 - def __init__(self, query_service, object_type):
83 self.query_service = query_service
84 self.object_type = object_type
85 self.cleansed = list()
86 self.bytes_cleansed = 0
87 self.deferred_paths = list()
88 self.dry_run = False
89
91 """
92 Begins a cleansing operation from a given OMERO binary repository
93 root directory. /OMERO/Files or /OMERO/Pixels for instance.
94 """
95 for file in os.listdir(root):
96 path = os.path.join(root, file)
97 if os.path.isdir(path):
98 self.cleanse(path)
99 else:
100 self.query_or_defer(path)
101
103 """
104 Adds a given path to the list of deferred paths. If the number of
105 deferred paths has reached the QUERY_THRESHOLD (to reduce database
106 hits) a reconciliation check will happen against OMERO.
107 """
108 self.deferred_paths.append(path)
109 if len(self.deferred_paths) == self.QUERY_THRESHOLD:
110 self.do_cleanse()
111
113 """
114 Actually performs the reconciliation check against OMERO and
115 removes relevant files.
116 """
117 if len(self.deferred_paths) == 0:
118 return
119 split = os.path.split
120 object_ids = [omero.rtypes.rlong(long(split(path)[1])) \
121 for path in self.deferred_paths]
122 parameters = omero.sys.Parameters()
123 parameters.map = {'ids': omero.rtypes.rlist(object_ids)}
124 rows = self.query_service.projection(
125 "select o.id from %s as o where o.id in (:ids)" % self.object_type,
126 parameters, {"omero.group":"-1"})
127 existing_ids = [cols[0].val for cols in rows]
128 for i, object_id in enumerate(object_ids):
129 path = self.deferred_paths[i]
130 if object_id.val not in existing_ids:
131 size = os.stat(path)[ST_SIZE]
132 self.cleansed.append(path)
133 self.bytes_cleansed = size
134 if self.dry_run:
135 print " \_ %s (remove)" % path
136 else:
137 try:
138 os.unlink(path)
139 except OSError, e:
140 print e
141 elif self.dry_run:
142 print " \_ %s (keep)" % path
143 self.deferred_paths = list()
144
146 """
147 Takes the final set of deferred paths and performs a reconciliation
148 check against OMERO for them. This method's purpose is basically to
149 catch the final set of paths in the deferred path list and/or perform
150 any cleanup.
151 """
152 self.do_cleanse()
153
155 return "Cleansing context: %d files (%d bytes)" % \
156 (len(self.cleansed), self.bytes_cleansed)
157
158 -def cleanse(data_dir, query_service, dry_run = False, config_service = None):
159
160
161
162
163 if config_service is None:
164 print "No config service provided! Waiting 10 seconds to allow cancellation"
165 from threading import Event
166 Event().wait(10)
167
168 server_version = config_service.getVersion()
169 server_tuple = tuple([int(x) for x in server_version.split(".")])
170 if server_tuple < (4, 2, 1):
171 print "Server version is too old! (%s) Aborting..." % server_version
172 sys.exit(3)
173
174 try:
175 cleanser = ""
176 for directory in SEARCH_DIRECTORIES:
177 full_path = os.path.join(data_dir, directory)
178 if dry_run:
179 print "Reconciling OMERO data directory...\n %s" % full_path
180 object_type = SEARCH_DIRECTORIES[directory]
181 cleanser = Cleanser(query_service, object_type)
182 cleanser.dry_run = dry_run
183 cleanser.cleanse(full_path)
184 cleanser.finalize()
185 finally:
186 if dry_run:
187 print cleanser
188
190 """
191 Default main() that performs OMERO data directory cleansing.
192 """
193 try:
194 options, args = getopt(sys.argv[1:], "u:k:", ["dry-run"])
195 except GetoptError, (msg, opt):
196 usage(msg)
197
198 try:
199 data_dir, = args
200 except:
201 usage('Expecting single OMERO data directory!')
202
203 username = getpass.getuser()
204 session_key = None
205 dry_run = False
206 for option, argument in options:
207 if option == "-u":
208 username = argument
209 if option == "-k":
210 session_key = argument
211 if option == "--dry-run":
212 dry_run = True
213
214 if session_key is None:
215 print "Username: %s" % username
216 try:
217 password = getpass.getpass()
218 except KeyboardInterrupt:
219 sys.exit(2)
220
221 try:
222 client = omero.client('localhost')
223 client.setAgent("OMERO.cleanse")
224 session = None
225 if session_key is None:
226 session = client.createSession(username, password)
227 else:
228 session = client.createSession(session_key)
229 except PermissionDeniedException:
230 print "%s: Permission denied" % sys.argv[0]
231 print "Sorry."
232 sys.exit(1)
233
234
235 query_service = session.getQueryService()
236 config_service = session.getConfigService()
237 try:
238 cleanse(data_dir, query_service, dry_run, config_service)
239 finally:
240 if session_key is None:
241 client.closeSession()
242
243 if __name__ == '__main__':
244 main()
245