1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
|
--- fence-agents-4.10.0/agents/gce/fence_gce.py 2022-04-29 10:13:50.317888041 +0200
+++ fence-agents-4.10.0/agents/gce/fence_gce.py2 2022-04-29 10:17:54.813248566 +0200
@@ -2,10 +2,10 @@
#
# Requires the googleapiclient and oauth2client
-# RHEL 7.x: google-api-python-client==1.6.7 python-gflags==2.0 pyasn1==0.4.8 rsa==3.4.2
-# RHEL 8.x: nothing additional needed
-# SLES 12.x: python-google-api-python-client python-oauth2client python-oauth2client-gce
-# SLES 15.x: python3-google-api-python-client python3-oauth2client python3-oauth2client-gce
+# RHEL 7.x: google-api-python-client==1.6.7 python-gflags==2.0 pyasn1==0.4.8 rsa==3.4.2 pysocks==1.7.1 httplib2==0.19.0
+# RHEL 8.x: pysocks==1.7.1 httplib2==0.19.0
+# SLES 12.x: python-google-api-python-client python-oauth2client python-oauth2client-gce pysocks==1.7.1 httplib2==0.19.0
+# SLES 15.x: python3-google-api-python-client python3-oauth2client pysocks==1.7.1 httplib2==0.19.0
#
import atexit
@@ -27,7 +27,7 @@
import urllib2 as urlrequest
sys.path.append("@FENCEAGENTSLIBDIR@")
-from fencing import fail_usage, run_delay, all_opt, atexit_handler, check_input, process_input, show_docs, fence_action
+from fencing import fail_usage, run_delay, all_opt, atexit_handler, check_input, process_input, show_docs, fence_action, run_command
try:
sys.path.insert(0, '/usr/lib/fence-agents/support/google')
import httplib2
@@ -42,6 +42,19 @@
METADATA_SERVER = 'http://metadata.google.internal/computeMetadata/v1/'
METADATA_HEADERS = {'Metadata-Flavor': 'Google'}
+INSTANCE_LINK = 'https://www.googleapis.com/compute/v1/projects/{}/zones/{}/instances/{}'
+
+def run_on_fail(options):
+ if "--runonfail" in options:
+ run_command(options, options["--runonfail"])
+
+def fail_fence_agent(options, message):
+ run_on_fail(options)
+ fail_usage(message)
+
+def raise_fence_agent(options, message):
+ run_on_fail(options)
+ raise Exception(message)
#
# Will use baremetalsolution setting or the environment variable
@@ -66,7 +79,7 @@
{
"matchlength": 4,
"match": "https://compute.googleapis.com/compute/v1/projects/(.*)/zones/(.*)/instances/(.*)/reset(.*)",
- "replace": "https://baremetalsolution.googleapis.com/v1alpha1/projects/\\1/locations/\\2/instances/\\3:resetInstance\\4"
+ "replace": "https://baremetalsolution.googleapis.com/v1/projects/\\1/locations/\\2/instances/\\3:resetInstance\\4"
})
for uri_replacement in uri_replacements:
# each uri_replacement should have matchlength, match, and replace
@@ -121,14 +134,17 @@
def get_nodes_list(conn, options):
result = {}
+ if "--zone" not in options:
+ fail_fence_agent(options, "Failed: get_nodes_list: Please specify the --zone in the command")
try:
- instanceList = retry_api_execute(options, conn.instances().list(
- project=options["--project"],
- zone=options["--zone"]))
- for instance in instanceList["items"]:
- result[instance["id"]] = (instance["name"], translate_status(instance["status"]))
+ for zone in options["--zone"].split(","):
+ instanceList = retry_api_execute(options, conn.instances().list(
+ project=options["--project"],
+ zone=zone))
+ for instance in instanceList["items"]:
+ result[instance["id"]] = (instance["name"], translate_status(instance["status"]))
except Exception as err:
- fail_usage("Failed: get_nodes_list: {}".format(str(err)))
+ fail_fence_agent(options, "Failed: get_nodes_list: {}".format(str(err)))
return result
@@ -142,23 +158,54 @@
return "off"
else:
return "on"
+ # If zone is not listed for an entry we attempt to get it automatically
+ instance = options["--plug"]
+ zone = get_zone(conn, options, instance) if "--plugzonemap" not in options else options["--plugzonemap"][instance]
+ instance_status = get_instance_power_status(conn, options, instance, zone)
+ # If any of the instances do not match the intended status we return the
+ # the opposite status so that the fence agent can change it.
+ if instance_status != options.get("--action"):
+ return instance_status
+
+ return options.get("--action")
+
+
+def get_instance_power_status(conn, options, instance, zone):
try:
- instance = retry_api_execute(options, conn.instances().get(
- project=options["--project"],
- zone=options["--zone"],
- instance=options["--plug"]))
+ instance = retry_api_execute(
+ options,
+ conn.instances().get(project=options["--project"], zone=zone, instance=instance))
return translate_status(instance["status"])
except Exception as err:
- fail_usage("Failed: get_power_status: {}".format(str(err)))
+ fail_fence_agent(options, "Failed: get_instance_power_status: {}".format(str(err)))
-def wait_for_operation(conn, options, operation):
+def check_for_existing_operation(conn, options, instance, zone, operation_type):
+ logging.debug("check_for_existing_operation")
+ if "--baremetalsolution" in options:
+ # There is no API for checking in progress operations
+ return False
+
+ project = options["--project"]
+ target_link = INSTANCE_LINK.format(project, zone, instance)
+ query_filter = '(targetLink = "{}") AND (operationType = "{}") AND (status = "RUNNING")'.format(target_link, operation_type)
+ result = retry_api_execute(
+ options,
+ conn.zoneOperations().list(project=project, zone=zone, filter=query_filter, maxResults=1))
+
+ if "items" in result and result["items"]:
+ logging.info("Existing %s operation found", operation_type)
+ return result["items"][0]
+
+
+def wait_for_operation(conn, options, zone, operation):
if 'name' not in operation:
logging.warning('Cannot wait for operation to complete, the'
' requested operation will continue asynchronously')
- return
+ return False
+
+ wait_time = 0
project = options["--project"]
- zone = options["--zone"]
while True:
result = retry_api_execute(options, conn.zoneOperations().get(
project=project,
@@ -166,56 +213,93 @@
operation=operation['name']))
if result['status'] == 'DONE':
if 'error' in result:
- raise Exception(result['error'])
- return
+ raise_fence_agent(options, result['error'])
+ return True
+
+ if "--errortimeout" in options and wait_time > int(options["--errortimeout"]):
+ raise_fence_agent(options, "Operation did not complete before the timeout.")
+
+ if "--warntimeout" in options and wait_time > int(options["--warntimeout"]):
+ logging.warning("Operation did not complete before the timeout.")
+ if "--runonwarn" in options:
+ run_command(options, options["--runonwarn"])
+ return False
+
+ wait_time = wait_time + 1
time.sleep(1)
def set_power_status(conn, options):
- logging.debug("set_power_status");
+ logging.debug("set_power_status")
+ instance = options["--plug"]
+ # If zone is not listed for an entry we attempt to get it automatically
+ zone = get_zone(conn, options, instance) if "--plugzonemap" not in options else options["--plugzonemap"][instance]
+ set_instance_power_status(conn, options, instance, zone, options["--action"])
+
+
+def set_instance_power_status(conn, options, instance, zone, action):
+ logging.info("Setting power status of %s in zone %s", instance, zone)
+ project = options["--project"]
+
try:
- if options["--action"] == "off":
- logging.info("Issuing poweroff of %s in zone %s" % (options["--plug"], options["--zone"]))
- operation = retry_api_execute(options, conn.instances().stop(
- project=options["--project"],
- zone=options["--zone"],
- instance=options["--plug"]))
+ if action == "off":
+ logging.info("Issuing poweroff of %s in zone %s", instance, zone)
+ operation = check_for_existing_operation(conn, options, instance, zone, "stop")
+ if operation and "--earlyexit" in options:
+ return
+ if not operation:
+ operation = retry_api_execute(
+ options,
+ conn.instances().stop(project=project, zone=zone, instance=instance))
logging.info("Poweroff command completed, waiting for the operation to complete")
- wait_for_operation(conn, options, operation)
- logging.info("Poweroff of %s in zone %s complete" % (options["--plug"], options["--zone"]))
- elif options["--action"] == "on":
- logging.info("Issuing poweron of %s in zone %s" % (options["--plug"], options["--zone"]))
- operation = retry_api_execute(options, conn.instances().start(
- project=options["--project"],
- zone=options["--zone"],
- instance=options["--plug"]))
- wait_for_operation(conn, options, operation)
- logging.info("Poweron of %s in zone %s complete" % (options["--plug"], options["--zone"]))
+ if wait_for_operation(conn, options, zone, operation):
+ logging.info("Poweroff of %s in zone %s complete", instance, zone)
+ elif action == "on":
+ logging.info("Issuing poweron of %s in zone %s", instance, zone)
+ operation = check_for_existing_operation(conn, options, instance, zone, "start")
+ if operation and "--earlyexit" in options:
+ return
+ if not operation:
+ operation = retry_api_execute(
+ options,
+ conn.instances().start(project=project, zone=zone, instance=instance))
+ if wait_for_operation(conn, options, zone, operation):
+ logging.info("Poweron of %s in zone %s complete", instance, zone)
except Exception as err:
- fail_usage("Failed: set_power_status: {}".format(str(err)))
-
+ fail_fence_agent(options, "Failed: set_instance_power_status: {}".format(str(err)))
def power_cycle(conn, options):
- logging.debug("power_cycle");
+ logging.debug("power_cycle")
+ instance = options["--plug"]
+ # If zone is not listed for an entry we attempt to get it automatically
+ zone = get_zone(conn, options, instance) if "--plugzonemap" not in options else options["--plugzonemap"][instance]
+ return power_cycle_instance(conn, options, instance, zone)
+
+
+def power_cycle_instance(conn, options, instance, zone):
+ logging.info("Issuing reset of %s in zone %s", instance, zone)
+ project = options["--project"]
+
try:
- logging.info('Issuing reset of %s in zone %s' % (options["--plug"], options["--zone"]))
- operation = retry_api_execute(options, conn.instances().reset(
- project=options["--project"],
- zone=options["--zone"],
- instance=options["--plug"]))
- logging.info("Reset command completed, waiting for the operation to complete")
- wait_for_operation(conn, options, operation)
- logging.info('Reset of %s in zone %s complete' % (options["--plug"], options["--zone"]))
+ operation = check_for_existing_operation(conn, options, instance, zone, "reset")
+ if operation and "--earlyexit" in options:
+ return True
+ if not operation:
+ operation = retry_api_execute(
+ options,
+ conn.instances().reset(project=project, zone=zone, instance=instance))
+ logging.info("Reset command sent, waiting for the operation to complete")
+ if wait_for_operation(conn, options, zone, operation):
+ logging.info("Reset of %s in zone %s complete", instance, zone)
return True
except Exception as err:
- logging.error("Failed: power_cycle: {}".format(str(err)))
- return False
+ logging.exception("Failed: power_cycle")
+ raise err
-def get_zone(conn, options):
+def get_zone(conn, options, instance):
logging.debug("get_zone");
project = options['--project']
- instance = options['--plug']
fl = 'name="%s"' % instance
request = replace_api_uri(options, conn.instances().aggregatedList(project=project, filter=fl))
while request is not None:
@@ -227,7 +311,7 @@
return inst['zone'].split("/")[-1]
request = replace_api_uri(options, conn.instances().aggregatedList_next(
previous_request=request, previous_response=response))
- raise Exception("Unable to find instance %s" % (instance))
+ raise_fence_agent(options, "Unable to find instance %s" % (instance))
def get_metadata(metadata_key, params=None, timeout=None):
@@ -326,13 +410,21 @@
"required" : "0",
"order" : 9
}
+ all_opt["plugzonemap"] = {
+ "getopt" : ":",
+ "longopt" : "plugzonemap",
+ "help" : "--plugzonemap=[plugzonemap] Comma separated zone map when fencing multiple plugs",
+ "shortdesc" : "Comma separated zone map when fencing multiple plugs.",
+ "required" : "0",
+ "order" : 10
+ }
all_opt["proxyhost"] = {
"getopt" : ":",
"longopt" : "proxyhost",
"help" : "--proxyhost=[proxy_host] The proxy host to use, if one is needed to access the internet (Example: 10.122.0.33)",
"shortdesc" : "If a proxy is used for internet access, the proxy host should be specified.",
"required" : "0",
- "order" : 10
+ "order" : 11
}
all_opt["proxyport"] = {
"getopt" : ":",
@@ -341,7 +433,49 @@
"help" : "--proxyport=[proxy_port] The proxy port to use, if one is needed to access the internet (Example: 3127)",
"shortdesc" : "If a proxy is used for internet access, the proxy port should be specified.",
"required" : "0",
- "order" : 11
+ "order" : 12
+ }
+ all_opt["earlyexit"] = {
+ "getopt" : "",
+ "longopt" : "earlyexit",
+ "help" : "--earlyexit Return early if reset is already in progress",
+ "shortdesc" : "If an existing reset operation is detected, the fence agent will return before the operation completes with a 0 return code.",
+ "required" : "0",
+ "order" : 13
+ }
+ all_opt["warntimeout"] = {
+ "getopt" : ":",
+ "type" : "second",
+ "longopt" : "warntimeout",
+ "help" : "--warntimeout=[warn_timeout] Timeout seconds before logging a warning and returning a 0 status code",
+ "shortdesc" : "If the operation is not completed within the timeout, the cluster operations are allowed to continue.",
+ "required" : "0",
+ "order" : 14
+ }
+ all_opt["errortimeout"] = {
+ "getopt" : ":",
+ "type" : "second",
+ "longopt" : "errortimeout",
+ "help" : "--errortimeout=[error_timeout] Timeout seconds before failing and returning a non-zero status code",
+ "shortdesc" : "If the operation is not completed within the timeout, cluster is notified of the operation failure.",
+ "required" : "0",
+ "order" : 15
+ }
+ all_opt["runonwarn"] = {
+ "getopt" : ":",
+ "longopt" : "runonwarn",
+ "help" : "--runonwarn=[run_on_warn] If a timeout occurs and warning is generated, run the supplied command",
+ "shortdesc" : "If a timeout would occur while running the agent, then the supplied command is run.",
+ "required" : "0",
+ "order" : 16
+ }
+ all_opt["runonfail"] = {
+ "getopt" : ":",
+ "longopt" : "runonfail",
+ "help" : "--runonfail=[run_on_fail] If a failure occurs, run the supplied command",
+ "shortdesc" : "If a failure would occur while running the agent, then the supplied command is run.",
+ "required" : "0",
+ "order" : 17
}
@@ -350,7 +484,8 @@
device_opt = ["port", "no_password", "zone", "project", "stackdriver-logging",
"method", "baremetalsolution", "apitimeout", "retries", "retrysleep",
- "serviceaccount", "proxyhost", "proxyport"]
+ "serviceaccount", "plugzonemap", "proxyhost", "proxyport", "earlyexit",
+ "warntimeout", "errortimeout", "runonwarn", "runonfail"]
atexit.register(atexit_handler)
@@ -431,22 +566,39 @@
conn = googleapiclient.discovery.build(
'compute', 'v1', credentials=credentials, cache_discovery=False)
except Exception as err:
- fail_usage("Failed: Create GCE compute v1 connection: {}".format(str(err)))
+ fail_fence_agent(options, "Failed: Create GCE compute v1 connection: {}".format(str(err)))
# Get project and zone
if not options.get("--project"):
try:
options["--project"] = get_metadata('project/project-id')
except Exception as err:
- fail_usage("Failed retrieving GCE project. Please provide --project option: {}".format(str(err)))
+ fail_fence_agent(options, "Failed retrieving GCE project. Please provide --project option: {}".format(str(err)))
if "--baremetalsolution" in options:
options["--zone"] = "none"
- if not options.get("--zone"):
- try:
- options["--zone"] = get_zone(conn, options)
- except Exception as err:
- fail_usage("Failed retrieving GCE zone. Please provide --zone option: {}".format(str(err)))
+
+ # Populates zone automatically if missing from the command
+ zones = [] if not "--zone" in options else options["--zone"].split(",")
+ options["--plugzonemap"] = {}
+ if "--plug" in options:
+ for i, instance in enumerate(options["--plug"].split(",")):
+ if len(zones) == 1:
+ # If only one zone is specified, use it across all plugs
+ options["--plugzonemap"][instance] = zones[0]
+ continue
+
+ if len(zones) - 1 >= i:
+ # If we have enough zones specified with the --zone flag use the zone at
+ # the same index as the plug
+ options["--plugzonemap"][instance] = zones[i]
+ continue
+
+ try:
+ # In this case we do not have a zone specified so we attempt to detect it
+ options["--plugzonemap"][instance] = get_zone(conn, options, instance)
+ except Exception as err:
+ fail_fence_agent(options, "Failed retrieving GCE zone. Please provide --zone option: {}".format(str(err)))
# Operate the fencing device
result = fence_action(conn, options, set_power_status, get_power_status, get_nodes_list, power_cycle)
|