From 9fde1ac3512ea01fd3f73c50b78e9c3d7af756f0 Mon Sep 17 00:00:00 2001 From: Antoine Cotten Date: Thu, 28 Oct 2021 16:41:49 +0200 Subject: [PATCH] ci(core): Retry sending data over TCP in case of error The first attempt occasionally fails, probably due to a race with the listener's startup. Additionally, perform minor adjustments to the output written to stderr to ensure it is properly flushed in the CI environment. --- .github/workflows/scripts/lib/testing.sh | 14 +++++++++++ .github/workflows/scripts/run-tests-core.sh | 24 +++++++++++++++---- .../workflows/scripts/run-tests-logspout.sh | 12 +++++++--- .../workflows/scripts/run-tests-metricbeat.sh | 18 ++++++++++---- 4 files changed, 56 insertions(+), 12 deletions(-) diff --git a/.github/workflows/scripts/lib/testing.sh b/.github/workflows/scripts/lib/testing.sh index 66a6d5b..1099d1e 100755 --- a/.github/workflows/scripts/lib/testing.sh +++ b/.github/workflows/scripts/lib/testing.sh @@ -23,6 +23,8 @@ function container_id { local cid + local -i was_retried=0 + # retry for max 60s (30*2s) for _ in $(seq 1 30); do cid="$(docker container ls -aq -f label="$label")" @@ -30,9 +32,14 @@ function container_id { break fi + was_retried=1 echo -n '.' >&2 sleep 2 done + if ((was_retried)); then + # flush stderr, important in non-interactive environments (CI) + echo >&2 + fi if [ -z "${cid:-}" ]; then err "Timed out waiting for creation of container with label ${label}" @@ -91,6 +98,8 @@ function poll_ready { local -i result=1 local output + local -i was_retried=0 + # retry for max 300s (60*5s) for _ in $(seq 1 60); do if [[ $(docker container inspect "$cid" --format '{{ .State.Status}}') == 'exited' ]]; then @@ -104,9 +113,14 @@ function poll_ready { break fi + was_retried=1 echo -n 'x' >&2 sleep 5 done + if ((was_retried)); then + # flush stderr, important in non-interactive environments (CI) + echo >&2 + fi echo -e "\n${output::-3}" diff --git a/.github/workflows/scripts/run-tests-core.sh b/.github/workflows/scripts/run-tests-core.sh index 0ca8498..5757d16 100755 --- a/.github/workflows/scripts/run-tests-core.sh +++ b/.github/workflows/scripts/run-tests-core.sh @@ -37,15 +37,31 @@ log 'Searching index pattern via Kibana API' response="$(curl "http://${ip_kb}:5601/api/saved_objects/_find?type=index-pattern" -s -u elastic:testpasswd)" echo "$response" count="$(jq -rn --argjson data "${response}" '$data.total')" -if [[ $count -ne 1 ]]; then +if (( count != 1 )); then echo "Expected 1 index pattern, got ${count}" exit 1 fi log 'Sending message to Logstash TCP input' -echo 'dockerelk' | nc -q0 "$ip_ls" 5000 -sleep 1 +declare -i was_retried=0 + +# retry for max 10s (5*2s) +for _ in $(seq 1 5); do + if echo 'dockerelk' | nc -q0 "$ip_ls" 5000; then + break + fi + + was_retried=1 + echo -n 'x' >&2 + sleep 2 +done +if ((was_retried)); then + # flush stderr, important in non-interactive environments (CI) + echo >&2 +fi + +sleep 3 curl -X POST "http://${ip_es}:9200/_refresh" -u elastic:testpasswd \ -s -w '\n' @@ -53,7 +69,7 @@ log 'Searching message in Elasticsearch' response="$(curl "http://${ip_es}:9200/logstash-*/_count?q=message:dockerelk&pretty" -s -u elastic:testpasswd)" echo "$response" count="$(jq -rn --argjson data "${response}" '$data.count')" -if [[ $count -ne 1 ]]; then +if (( count != 1 )); then echo "Expected 1 document, got ${count}" exit 1 fi diff --git a/.github/workflows/scripts/run-tests-logspout.sh b/.github/workflows/scripts/run-tests-logspout.sh index cebd4d8..caf62bc 100755 --- a/.github/workflows/scripts/run-tests-logspout.sh +++ b/.github/workflows/scripts/run-tests-logspout.sh @@ -35,6 +35,8 @@ log 'Searching a log entry forwarded by Logspout' declare response declare -i count +declare -i was_retried=0 + # retry for max 60s (30*2s) for _ in $(seq 1 30); do response="$(curl "http://${ip_es}:9200/logstash-*/_search?q=docker.image:%22docker-elk_logspout%22%20AND%20message:%22logspout%20gliderlabs%22~3&pretty" -s -u elastic:testpasswd)" @@ -47,15 +49,19 @@ for _ in $(seq 1 30); do break fi + was_retried=1 echo -n 'x' >&2 sleep 2 done -echo -e '\n' >&2 +if ((was_retried)); then + # flush stderr, important in non-interactive environments (CI) + echo >&2 +fi echo "$response" # Logspout may restart if Logstash isn't ready yet, so we tolerate multiple # results -if (( count < 1 )); then - echo "Expected at least 1 document, got ${count}" +if (( count == 0 )); then + echo 'Expected at least 1 document' exit 1 fi diff --git a/.github/workflows/scripts/run-tests-metricbeat.sh b/.github/workflows/scripts/run-tests-metricbeat.sh index 46d0d7d..b0e9856 100755 --- a/.github/workflows/scripts/run-tests-metricbeat.sh +++ b/.github/workflows/scripts/run-tests-metricbeat.sh @@ -19,8 +19,8 @@ poll_ready "$cid_es" "http://${ip_es}:9200/" -u 'elastic:testpasswd' log 'Waiting for readiness of Metricbeat' poll_ready "$cid_mb" "http://${ip_mb}:5066/?pretty" -# We expect to find one monitoring entry for the 'elasticsearch' Compose -# service using the following query: +# We expect to find monitoring entries for the 'elasticsearch' Compose service +# using the following query: # # agent.type:"metricbeat" # AND event.module:"docker" @@ -32,6 +32,8 @@ log 'Searching a document generated by Metricbeat' declare response declare -i count +declare -i was_retried=0 + # retry for max 60s (30*2s) for _ in $(seq 1 30); do response="$(curl "http://${ip_es}:9200/metricbeat-*/_search?q=agent.type:%22metricbeat%22%20AND%20event.module:%22docker%22%20AND%20event.dataset:%22docker.container%22%20AND%20container.name:%22docker-elk-elasticsearch-1%22&pretty" -s -u elastic:testpasswd)" @@ -44,13 +46,19 @@ for _ in $(seq 1 30); do break fi + was_retried=1 echo -n 'x' >&2 sleep 2 done -echo -e '\n' >&2 +if ((was_retried)); then + # flush stderr, important in non-interactive environments (CI) + echo >&2 +fi echo "$response" -if (( count != 1 )); then - echo "Expected 1 document, got ${count}" +# Metricbeat buffers metrics until Elasticsearch becomes ready, so we tolerate +# multiple results +if (( count == 0 )); then + echo 'Expected at least 1 document' exit 1 fi