1
2
3 """
4 Reconcile and cleanse where necessary an OMERO data directory of orphaned data.
5 """
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 import getpass
32 import omero.clients
33 import omero
34 import sys
35 import os
36
37 from Glacier2 import PermissionDeniedException
38 from getopt import getopt, GetoptError
39 from stat import *
40
41
42
43
44 SEARCH_DIRECTORIES = {
45 'Pixels': 'Pixels',
46 'Files': 'OriginalFile',
47 }
48
50 """
51 Prints usage so that we don't have to. :)
52 """
53 cmd = sys.argv[0]
54 print """%s
55 Usage: %s [-dry-run] [-u username | -k] <omero.data.dir>
56 Cleanses files in the OMERO data directory that have no reference in the
57 OMERO database. NOTE: As this script is designed to be run via cron or in
58 a scheduled manner it produces NO output unless a dry run is performed.
59
60 Options:
61 -u Administrator username to log in to OMERO with
62 -k Session key to log in to OMERO with
63 --dry-run Just prints out what would have been done
64
65 Examples:
66 %s --dry-run -u root /OMERO
67
68 Report bugs to OME Users <ome-users@lists.openmicroscopy.org.uk>""" % \
69 (error, cmd, cmd)
70 sys.exit(2)
71
73 """
74 Keeps file cleansing state and performs OMERO database reconciliation of
75 files within an OMERO binary repository.
76 """
77
78
79 QUERY_THRESHOLD = 25
80
81 - def __init__(self, query_service, object_type):
82 self.query_service = query_service
83 self.object_type = object_type
84 self.cleansed = list()
85 self.bytes_cleansed = 0
86 self.deferred_paths = list()
87 self.dry_run = False
88
90 """
91 Begins a cleansing operation from a given OMERO binary repository
92 root directory. /OMERO/Files or /OMERO/Pixels for instance.
93 """
94 for file in os.listdir(root):
95 path = os.path.join(root, file)
96 if os.path.isdir(path):
97 self.cleanse(path)
98 else:
99 self.query_or_defer(path)
100
102 """
103 Adds a given path to the list of deferred paths. If the number of
104 deferred paths has reached the QUERY_THRESHOLD (to reduce database
105 hits) a reconciliation check will happen against OMERO.
106 """
107 self.deferred_paths.append(path)
108 if len(self.deferred_paths) == self.QUERY_THRESHOLD:
109 self.do_cleanse()
110
112 """
113 Actually performs the reconciliation check against OMERO and
114 removes relevant files.
115 """
116 if len(self.deferred_paths) == 0:
117 return
118 split = os.path.split
119 object_ids = [omero.rtypes.rlong(long(split(path)[1])) \
120 for path in self.deferred_paths]
121 parameters = omero.sys.Parameters()
122 parameters.map = {'ids': omero.rtypes.rlist(object_ids)}
123 objects = self.query_service.findAllByQuery(
124 "select o from %s as o where o.id in (:ids)" % self.object_type,
125 parameters)
126 existing_ids = [o.id.val for o in objects]
127 for i, object_id in enumerate(object_ids):
128 path = self.deferred_paths[i]
129 if object_id.val not in existing_ids:
130 size = os.stat(path)[ST_SIZE]
131 self.cleansed.append(path)
132 self.bytes_cleansed = size
133 if self.dry_run:
134 print " \_ %s (remove)" % path
135 else:
136 try:
137 os.unlink(path)
138 except OSError, e:
139 print e
140 elif self.dry_run:
141 print " \_ %s (keep)" % path
142 self.deferred_paths = list()
143
145 """
146 Takes the final set of deferred paths and performs a reconciliation
147 check against OMERO for them. This method's purpose is basically to
148 catch the final set of paths in the deferred path list and/or perform
149 any cleanup.
150 """
151 self.do_cleanse()
152
154 return "Cleansing context: %d files (%d bytes)" % \
155 (len(self.cleansed), self.bytes_cleansed)
156
157 -def cleanse(data_dir, query_service, dry_run = False):
158 try:
159 cleanser = ""
160 for directory in SEARCH_DIRECTORIES:
161 full_path = os.path.join(data_dir, directory)
162 if dry_run:
163 print "Reconciling OMERO data directory...\n %s" % full_path
164 object_type = SEARCH_DIRECTORIES[directory]
165 cleanser = Cleanser(query_service, object_type)
166 cleanser.dry_run = dry_run
167 cleanser.cleanse(full_path)
168 cleanser.finalize()
169 finally:
170 if dry_run:
171 print cleanser
172
174 """
175 Default main() that performs OMERO data directory cleansing.
176 """
177 try:
178 options, args = getopt(sys.argv[1:], "u:k:", ["dry-run"])
179 except GetoptError, (msg, opt):
180 usage(msg)
181
182 try:
183 data_dir, = args
184 except:
185 usage('Expecting single OMERO data directory!')
186
187 username = getpass.getuser()
188 session_key = None
189 dry_run = False
190 for option, argument in options:
191 if option == "-u":
192 username = argument
193 if option == "-k":
194 session_key = argument
195 if option == "--dry-run":
196 dry_run = True
197
198 if session_key is None:
199 print "Username: %s" % username
200 try:
201 password = getpass.getpass()
202 except KeyboardInterrupt:
203 sys.exit(2)
204
205 try:
206 client = omero.client('localhost')
207 client.setAgent("OMERO.cleanse")
208 session = None
209 if session_key is None:
210 session = client.createSession(username, password)
211 else:
212 session = client.createSession(session_key)
213 except PermissionDeniedException:
214 print "%s: Permission denied" % sys.argv[0]
215 print "Sorry."
216 sys.exit(1)
217 query_service = session.getQueryService()
218 try:
219 cleanse(data_dir, query_service, dry_run)
220 finally:
221 if session_key is None:
222 client.closeSession()
223
224 if __name__ == '__main__':
225 main()
226