1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
|
From 73fdae1b9da5aa1ba1d371dcc47fe31a4d22bb31 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 30 Mar 2023 12:20:05 +0200
Subject: [PATCH] fence_aws: fixes to allow running outside of AWS network
- add --skip-race-check parameter to allow running outside of AWS
network e.g. for openshift
- fixed and improved logging logic
- use --debug-file parameter for file logging
---
agents/aws/fence_aws.py | 50 ++++++++++++++++++++-----------
tests/data/metadata/fence_aws.xml | 5 ++++
2 files changed, 37 insertions(+), 18 deletions(-)
diff --git a/agents/aws/fence_aws.py b/agents/aws/fence_aws.py
index c947bf29c..5d1677144 100644
--- a/agents/aws/fence_aws.py
+++ b/agents/aws/fence_aws.py
@@ -16,13 +16,13 @@
except ImportError:
pass
-logger = logging.getLogger("fence_aws")
+logger = logging.getLogger()
logger.propagate = False
logger.setLevel(logging.INFO)
logger.addHandler(SyslogLibHandler())
logging.getLogger('botocore.vendored').propagate = False
-def get_instance_id():
+def get_instance_id(options):
try:
token = requests.put('http://169.254.169.254/latest/api/token', headers={"X-aws-ec2-metadata-token-ttl-seconds" : "21600"}).content.decode("UTF-8")
r = requests.get('http://169.254.169.254/latest/meta-data/instance-id', headers={"X-aws-ec2-metadata-token" : token}).content.decode("UTF-8")
@@ -30,12 +30,15 @@ def get_instance_id():
except HTTPError as http_err:
logger.error('HTTP error occurred while trying to access EC2 metadata server: %s', http_err)
except Exception as err:
- logger.error('A fatal error occurred while trying to access EC2 metadata server: %s', err)
+ if "--skip-race-check" not in options:
+ logger.error('A fatal error occurred while trying to access EC2 metadata server: %s', err)
+ else:
+ logger.debug('A fatal error occurred while trying to access EC2 metadata server: %s', err)
return None
-
+
def get_nodes_list(conn, options):
- logger.info("Starting monitor operation")
+ logger.debug("Starting monitor operation")
result = {}
try:
if "--filter" in options:
@@ -63,7 +66,7 @@ def get_power_status(conn, options):
try:
instance = conn.instances.filter(Filters=[{"Name": "instance-id", "Values": [options["--plug"]]}])
state = list(instance)[0].state["Name"]
- logger.info("Status operation for EC2 instance %s returned state: %s",options["--plug"],state.upper())
+ logger.debug("Status operation for EC2 instance %s returned state: %s",options["--plug"],state.upper())
if state == "running":
return "on"
elif state == "stopped":
@@ -78,7 +81,7 @@ def get_power_status(conn, options):
except IndexError:
fail(EC_STATUS)
except Exception as e:
- logging.error("Failed to get power status: %s", e)
+ logger.error("Failed to get power status: %s", e)
fail(EC_STATUS)
def get_self_power_status(conn, instance_id):
@@ -86,10 +89,10 @@ def get_self_power_status(conn, instance_id):
instance = conn.instances.filter(Filters=[{"Name": "instance-id", "Values": [instance_id]}])
state = list(instance)[0].state["Name"]
if state == "running":
- logging.debug("Captured my (%s) state and it %s - returning OK - Proceeding with fencing",instance_id,state.upper())
+ logger.debug("Captured my (%s) state and it %s - returning OK - Proceeding with fencing",instance_id,state.upper())
return "ok"
else:
- logging.debug("Captured my (%s) state it is %s - returning Alert - Unable to fence other nodes",instance_id,state.upper())
+ logger.debug("Captured my (%s) state it is %s - returning Alert - Unable to fence other nodes",instance_id,state.upper())
return "alert"
except ClientError:
@@ -100,18 +103,18 @@ def get_self_power_status(conn, instance_id):
return "fail"
def set_power_status(conn, options):
- my_instance = get_instance_id()
+ my_instance = get_instance_id(options)
try:
if (options["--action"]=="off"):
- if (get_self_power_status(conn,my_instance) == "ok"):
+ if "--skip-race-check" in options or get_self_power_status(conn,my_instance) == "ok":
conn.instances.filter(InstanceIds=[options["--plug"]]).stop(Force=True)
- logger.info("Called StopInstance API call for %s", options["--plug"])
+ logger.debug("Called StopInstance API call for %s", options["--plug"])
else:
- logger.info("Skipping fencing as instance is not in running status")
+ logger.debug("Skipping fencing as instance is not in running status")
elif (options["--action"]=="on"):
conn.instances.filter(InstanceIds=[options["--plug"]]).start()
except Exception as e:
- logger.error("Failed to power %s %s: %s", \
+ logger.debug("Failed to power %s %s: %s", \
options["--action"], options["--plug"], e)
def define_new_opts():
@@ -156,12 +159,20 @@ def define_new_opts():
"default": "False",
"order": 6
}
+ all_opt["skip_race_check"] = {
+ "getopt" : "",
+ "longopt" : "skip-race-check",
+ "help" : "--skip-race-check Skip race condition check",
+ "shortdesc": "Skip race condition check",
+ "required": "0",
+ "order": 7
+ }
# Main agent method
def main():
conn = None
- device_opt = ["port", "no_password", "region", "access_key", "secret_key", "filter", "boto3_debug"]
+ device_opt = ["port", "no_password", "region", "access_key", "secret_key", "filter", "boto3_debug", "skip_race_check"]
atexit.register(atexit_handler)
@@ -183,12 +194,15 @@ def main():
run_delay(options)
- if options.get("--verbose") is not None:
- lh = logging.FileHandler('/var/log/fence_aws_debug.log')
+ if "--debug-file" in options:
+ for handler in logger.handlers:
+ if isinstance(handler, logging.FileHandler):
+ logger.removeHandler(handler)
+ lh = logging.FileHandler(options["--debug-file"])
logger.addHandler(lh)
lhf = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
lh.setFormatter(lhf)
- logger.setLevel(logging.DEBUG)
+ lh.setLevel(logging.DEBUG)
if options["--boto3_debug"].lower() not in ["1", "yes", "on", "true"]:
boto3.set_stream_logger('boto3',logging.INFO)
diff --git a/tests/data/metadata/fence_aws.xml b/tests/data/metadata/fence_aws.xml
index 76995ecf2..32de4418a 100644
--- a/tests/data/metadata/fence_aws.xml
+++ b/tests/data/metadata/fence_aws.xml
@@ -46,6 +46,11 @@ For instructions see: https://boto3.readthedocs.io/en/latest/guide/quickstart.ht
<content type="string" default="False" />
<shortdesc lang="en">Boto Lib debug</shortdesc>
</parameter>
+ <parameter name="skip_race_check" unique="0" required="0">
+ <getopt mixed="--skip-race-check" />
+ <content type="boolean" />
+ <shortdesc lang="en">Skip race condition check</shortdesc>
+ </parameter>
<parameter name="quiet" unique="0" required="0">
<getopt mixed="-q, --quiet" />
<content type="boolean" />
|