1
2
3 """
4 Reconcile and cleanse where necessary an OMERO data directory of orphaned data.
5 """
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 import getpass
32 import omero.clients
33 import omero
34 import sys
35 import os
36
37 from Glacier2 import PermissionDeniedException
38 from getopt import getopt, GetoptError
39 from stat import *
40
41
42
43
44 SEARCH_DIRECTORIES = {
45 'Pixels': 'Pixels',
46 'Files': 'OriginalFile',
47 }
48
50 """
51 Prints usage so that we don't have to. :)
52 """
53 cmd = sys.argv[0]
54 print """%s
55 Usage: %s [-dry-run] [-u username | -k] <omero.data.dir>
56 Cleanses files in the OMERO data directory that have no reference in the
57 OMERO database. NOTE: As this script is designed to be run via cron or in
58 a scheduled manner it produces NO output unless a dry run is performed.
59
60 Options:
61 -u Administrator username to log in to OMERO with
62 -k Session key to log in to OMERO with
63 --dry-run Just prints out what would have been done
64
65 Examples:
66 %s --dry-run -u root /OMERO
67
68 Report bugs to OME Users <ome-users@lists.openmicroscopy.org.uk>""" % \
69 (error, cmd, cmd)
70 sys.exit(2)
71
73 """
74 Keeps file cleansing state and performs OMERO database reconciliation of
75 files within an OMERO binary repository.
76 """
77
78
79 QUERY_THRESHOLD = 25
80
81 - def __init__(self, query_service, object_type):
82 self.query_service = query_service
83 self.object_type = object_type
84 self.cleansed = list()
85 self.bytes_cleansed = 0
86 self.deferred_paths = list()
87 self.dry_run = False
88
90 """
91 Begins a cleansing operation from a given OMERO binary repository
92 root directory. /OMERO/Files or /OMERO/Pixels for instance.
93 """
94 for file in os.listdir(root):
95 path = os.path.join(root, file)
96 if os.path.isdir(path):
97 self.cleanse(path)
98 else:
99 self.query_or_defer(path)
100
102 """
103 Adds a given path to the list of deferred paths. If the number of
104 deferred paths has reached the QUERY_THRESHOLD (to reduce database
105 hits) a reconciliation check will happen against OMERO.
106 """
107 self.deferred_paths.append(path)
108 if len(self.deferred_paths) == self.QUERY_THRESHOLD:
109 self.do_cleanse()
110
112 """
113 Actually performs the reconciliation check against OMERO and
114 removes relevant files.
115 """
116 if len(self.deferred_paths) == 0:
117 return
118 split = os.path.split
119 object_ids = [omero.rtypes.rlong(long(split(path)[1])) \
120 for path in self.deferred_paths]
121 parameters = omero.sys.Parameters()
122 parameters.map = {'ids': omero.rtypes.rlist(object_ids)}
123 objects = self.query_service.findAllByQuery(
124 "select o from %s as o where o.id in (:ids)" % self.object_type,
125 parameters)
126 existing_ids = [o.id.val for o in objects]
127 for i, object_id in enumerate(object_ids):
128 path = self.deferred_paths[i]
129 if object_id.val not in existing_ids:
130 size = os.stat(path)[ST_SIZE]
131 self.cleansed.append(path)
132 self.bytes_cleansed = size
133 if self.dry_run:
134 print " \_ %s (remove)" % path
135 else:
136 try:
137 os.unlink(path)
138 except OSError, e:
139 print e
140 elif self.dry_run:
141 print " \_ %s (keep)" % path
142 self.deferred_paths = list()
143
145 """
146 Takes the final set of deferred paths and performs a reconciliation
147 check against OMERO for them. This method's purpose is basically to
148 catch the final set of paths in the deferred path list and/or perform
149 any cleanup.
150 """
151 self.do_cleanse()
152
154 return "Cleansing context: %d files (%d bytes)" % \
155 (len(self.cleansed), self.bytes_cleansed)
156
158 """
159 Default main() that performs OMERO data directory cleansing.
160 """
161 try:
162 options, args = getopt(sys.argv[1:], "u:k:", ["dry-run"])
163 except GetoptError, (msg, opt):
164 usage(msg)
165
166 try:
167 data_dir, = args
168 except:
169 usage('Expecting single OMERO data directory!')
170
171 username = getpass.getuser()
172 session_key = None
173 dry_run = False
174 for option, argument in options:
175 if option == "-u":
176 username = argument
177 if option == "-k":
178 session_key = argument
179 if option == "--dry-run":
180 dry_run = True
181
182 if session_key is None:
183 print "Username: %s" % username
184 try:
185 password = getpass.getpass()
186 except KeyboardInterrupt:
187 sys.exit(2)
188
189 try:
190 client = omero.client('localhost')
191 session = None
192 if session_key is None:
193 session = client.createSession(username, password)
194 else:
195 session = client.createSession(session_key)
196 except PermissionDeniedException:
197 print "%s: Permission denied" % sys.argv[0]
198 print "Sorry."
199 sys.exit(1)
200 query_service = session.getQueryService()
201 try:
202 for directory in SEARCH_DIRECTORIES:
203 full_path = os.path.join(data_dir, directory)
204 if dry_run:
205 print "Reconciling OMERO data directory...\n %s" % full_path
206 object_type = SEARCH_DIRECTORIES[directory]
207 cleanser = Cleanser(query_service, object_type)
208 cleanser.dry_run = dry_run
209 cleanser.cleanse(full_path)
210 cleanser.finalize()
211 finally:
212 if dry_run:
213 print cleanser
214 if session_key is None:
215 client.closeSession()
216
217 if __name__ == '__main__':
218 main()
219