scripts that log content with telegraf so we can show it in grafana. These run on the host as they need access to logs/content that is underneath the containers
This commit is contained in:
16
telegraf-and-kuma-backups
Executable file
16
telegraf-and-kuma-backups
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
prom_file=/srv/docker/container/telegraf/monitoring-results/backups.prom
|
||||
ISSUE=`/home/ddp/bin/backup-success`
|
||||
RES=$?
|
||||
echo "# HELP node_backups status of last backup" > $prom_file
|
||||
echo "# TYPE node_backups gauge" >> $prom_file
|
||||
if [ $RES != 0 ];
|
||||
then
|
||||
echo "node_backups{what=\"state\",issue=\"$ISSUE\"} $RES" >> $prom_file
|
||||
curl 'http://mara.ddp.net:3001/api/push/hoJD234qsx?status=down&msg=OK'
|
||||
else
|
||||
# need a space here to make the metric still have an issue field & so grafanas regex matches it
|
||||
echo "node_backups{what=\"state\",issue=\" \"} $RES" >> $prom_file
|
||||
curl 'http://mara.ddp.net:3001/api/push/hoJD234qsx?status=up&msg=OK'
|
||||
fi
|
||||
268
telegraf-last-docker-update.py
Executable file
268
telegraf-last-docker-update.py
Executable file
@@ -0,0 +1,268 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import yaml
|
||||
import subprocess
|
||||
import re
|
||||
import os
|
||||
from datetime import datetime, timedelta, timezone
|
||||
import requests
|
||||
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
# these ports are nat'd on the modem to mara, so if a container is in host mode
|
||||
# on one of these ports, or maps these potrs, then that container is internet-facing
|
||||
PORT_FWDS=[ '7777', '7778', '27015', '993', '25', '465', '587' ]
|
||||
|
||||
# get list of running docker containers (this is done to also find true image id
|
||||
docker_ps=subprocess.run( ['sudo','docker','ps'], stdout=subprocess.PIPE)
|
||||
|
||||
# This finds the actual img id for the container, avoids quirks I had around postgres:16 not matching for some reason
|
||||
def ImageIdFor( c ):
|
||||
for line in docker_ps.stdout.splitlines():
|
||||
m=re.search( f" {c}'$", str(line) )
|
||||
if m:
|
||||
m2=re.search( r"\S+\s+(\S+)\s+", str(line) )
|
||||
if m2:
|
||||
return m2[1]
|
||||
return c
|
||||
|
||||
|
||||
# return True if a container is running
|
||||
def IsRunning( c ):
|
||||
for line in docker_ps.stdout.splitlines():
|
||||
m=re.search( f" {c}'$", str(line) )
|
||||
if m:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# Just exclude MAIA containers for now, think about this much harder another day
|
||||
def Excluded( c ):
|
||||
if 'MAIA_' in c:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
# see if this container either has a matching port to the list of open ports on the modem (PORT_FWDS)
|
||||
# or if traefik is front-ending this, and the rule includes a Host that has depaoli.id.au on it
|
||||
# if the above are true, its internet-facing, and a higher risk, so worth knowing
|
||||
def external(container):
|
||||
if container == 'traefik':
|
||||
return True
|
||||
|
||||
if 'ports' in compose['services'][container]:
|
||||
for port in PORT_FWDS:
|
||||
if any(port in substring for substring in compose['services'][container]['ports']):
|
||||
return True
|
||||
|
||||
if 'labels' not in compose['services'][container]:
|
||||
return False
|
||||
|
||||
traefik_on=False
|
||||
ext_host=False
|
||||
for s in compose['services'][container]['labels']:
|
||||
if 'traefik.enable=true' in s:
|
||||
if 'true' in s:
|
||||
traefik_on=True
|
||||
m=re.search( r"traefik.\S+.routers.\S+.rule=\s*Host\S*\(\`(.*)\`\)", s )
|
||||
if m and 'depaoli.id.au' in m[1]:
|
||||
ext_host=True
|
||||
if traefik_on and ext_host:
|
||||
return True
|
||||
|
||||
if 'network_mode' in compose['services'][container]:
|
||||
return "Maybe"
|
||||
|
||||
return False
|
||||
|
||||
def watchtower(container):
|
||||
if 'labels' not in compose['services'][container]:
|
||||
return False
|
||||
for s in compose['services'][container]['labels']:
|
||||
if 'watchtower' in s:
|
||||
if 'true' in s:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# take a dt_str like 2020-12-10T09:21:12+11:00 and convert into a datetime
|
||||
# then work out how many hours extra the timezone (will be 10 or 11)
|
||||
# then remove the +TZ, and then add it as actual hours
|
||||
# this seems mad that there is no func for this, but it works
|
||||
def norm_time(dt_str):
|
||||
d=datetime.strptime(dt_str, '%Y-%m-%dT%H:%M:%S%z')
|
||||
return d.replace(tzinfo=None)
|
||||
|
||||
|
||||
def norm_time_git(dt_str):
|
||||
d=datetime.strptime(dt_str, '%Y-%m-%d %H:%M:%S %z')
|
||||
return d.replace(tzinfo=None)
|
||||
|
||||
# Load the docker-compose.yaml file
|
||||
with open('/srv/docker/config/docker-compose.yml') as f:
|
||||
compose = yaml.safe_load(f)
|
||||
|
||||
# Get the container names from the docker-compose.yaml file
|
||||
containers = [service_name for service_name in compose['services'].keys()]
|
||||
|
||||
|
||||
# Load the update-docker.log file
|
||||
with open('/srv/docker/log/update-docker.log') as f:
|
||||
logs = f.readlines()
|
||||
|
||||
# Find the last log line for each container
|
||||
last_logs = {}
|
||||
for container in containers:
|
||||
last_log = None
|
||||
for log in logs:
|
||||
if 'image' in compose['services'][container]:
|
||||
im=compose['services'][container]['image']
|
||||
else:
|
||||
im=''
|
||||
if im != "" and im in log:
|
||||
last_log = log
|
||||
last_logs[container] = last_log
|
||||
|
||||
|
||||
# Get docker containers being monitored in kuma
|
||||
f = open('/srv/docker/container/mon/monitoring-results/kuma.txt', 'r')
|
||||
monitors = f.read()
|
||||
|
||||
|
||||
# checks to see if the named container
|
||||
def InKuma( container ):
|
||||
if container in monitors:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def GetLastCommmitDaysAgo( container ):
|
||||
for tmp in compose['services'][container]['labels']:
|
||||
m=re.search('^last.commit.url=(.+)', tmp )
|
||||
if m:
|
||||
# Fetch the latest commit details from GitHub API
|
||||
response = requests.get(m[1])
|
||||
response.raise_for_status() # Raise an error for bad status codes
|
||||
commit_info = response.json()[0]
|
||||
|
||||
# Extract the commit URL and date
|
||||
commit_url = commit_info["html_url"]
|
||||
commit_date_str = commit_info["commit"]["author"]["date"]
|
||||
|
||||
# Convert commit date to datetime object
|
||||
commit_date = datetime.fromisoformat(commit_date_str.replace('Z', '+00:00'))
|
||||
|
||||
# Calculate the number of days since the commit was made
|
||||
current_date = datetime.now(timezone.utc)
|
||||
days_ago = (current_date - commit_date).days
|
||||
|
||||
return days_ago
|
||||
return -1
|
||||
|
||||
current_datetime = datetime.now()
|
||||
|
||||
# open file for writing prometheus formatted data into
|
||||
f = open('/srv/docker/container/telegraf/monitoring-results/docker_updates.prom', 'w')
|
||||
|
||||
# put required help/type text in
|
||||
print('# HELP node_docker_updates details of last known update of a container, whether it has a locked version (or latest tag) and whether watchtower is updating it', file=f)
|
||||
print('# TYPE node_docker_updates gauge', file=f )
|
||||
|
||||
|
||||
# Print the last log line and container name for each container
|
||||
for container, last_log in last_logs.items():
|
||||
# skip containers that are not running or are deliberately excluded
|
||||
if not IsRunning( container ) or Excluded( container ):
|
||||
continue
|
||||
|
||||
out_str= 'node_docker_updates{container="' + container + '"'
|
||||
if 'image' in compose['services'][container]:
|
||||
im=compose['services'][container]['image']
|
||||
else:
|
||||
im=''
|
||||
|
||||
out_str +=', latest_tag='
|
||||
if ':' in im and ':latest' not in im and ':nightly' not in im and ':beta' not in im and im != 'php:apache':
|
||||
out_str += '"no"'
|
||||
else:
|
||||
out_str += '"yes"'
|
||||
|
||||
is_built=0
|
||||
out_str += ', image="'
|
||||
if 'image' in compose['services'][container]:
|
||||
out_str += compose['services'][container]['image']
|
||||
else:
|
||||
out_str += 'Built'
|
||||
is_built=1
|
||||
out_str += '"'
|
||||
|
||||
out_str += ', watchtower='
|
||||
if watchtower( container ):
|
||||
out_str += '"yes"'
|
||||
elif is_built:
|
||||
out_str += '"Built"'
|
||||
else:
|
||||
out_str += '"no"'
|
||||
|
||||
out_str += f', monitored="{InKuma(container)}"'
|
||||
|
||||
if 'image' in compose['services'][container]:
|
||||
img_id=ImageIdFor( container )
|
||||
res=subprocess.run(['sudo','docker','image','history','--human=false', img_id], stdout=subprocess.PIPE)
|
||||
if res.returncode == 0:
|
||||
m=re.search(r'(\d{1,4}-\d{1,2}-\d{1,2}T\d{1,2}:\d{1,2}:\d{1,2}\+\d{1,2}:\d{1,2})', str(res.stdout, 'utf-8') )
|
||||
if '0001-01-01' in m[1]:
|
||||
m=re.findall( r'(\d{1,4}-\d{1,2}-\d{1,2}T\d{1,2}:\d{1,2}:\d{1,2})', str(res.stdout, 'utf-8') )
|
||||
cdate=norm_time( m[1] + '+10:00')
|
||||
else:
|
||||
cdate=norm_time( m[1] )
|
||||
last_update=cdate
|
||||
else:
|
||||
last_update='No Date'
|
||||
else:
|
||||
os.chdir("/srv/docker/config")
|
||||
res=subprocess.run(f'sudo docker compose images {container} | tail -n1', shell=True, stdout=subprocess.PIPE)
|
||||
if res.returncode == 0:
|
||||
m=re.search( r'(\S+)\s+(\S+)', str(res.stdout, 'utf-8') )
|
||||
img_id = m[2]
|
||||
|
||||
res=subprocess.run(['sudo','docker','history','--human=false',img_id], stdout=subprocess.PIPE)
|
||||
if res.returncode == 0:
|
||||
m=re.search(r'(\d{1,4}-\d{1,2}-\d{1,2}T\d{1,2}:\d{1,2}:\d{1,2}\+\d{1,2}:\d{1,2})', str(res.stdout, 'utf-8') )
|
||||
cdate=norm_time( m[1] )
|
||||
last_update=cdate
|
||||
else:
|
||||
last_update='No Date'
|
||||
|
||||
# if is_built and 'book' in container:
|
||||
# res=subprocess.run(['git', '-C', '/home/ddp/src/pybook/', 'log', '--date=iso', '-n', '1', '--pretty=%ci'], stdout=subprocess.PIPE)
|
||||
# last_update=norm_time_git( str(res.stdout, 'utf-8').strip() )
|
||||
#
|
||||
# if is_built and 'pa' in container:
|
||||
# res=subprocess.run(['git', '-C', '/home/ddp/src/photoassistant/', 'log', '--date=iso', '-n', '1', '--pretty=%ci'], stdout=subprocess.PIPE)
|
||||
# last_update=norm_time_git( str(res.stdout, 'utf-8').strip() )
|
||||
|
||||
if last_update == 'No Date':
|
||||
out_str += f', age_in_days="99999"'
|
||||
else:
|
||||
if type(last_update) == type(current_datetime):
|
||||
last_update_as_datetime = last_update
|
||||
else:
|
||||
last_update_as_datetime = datetime.strptime( last_update, '%Y-%m-%d %H:%M:%S' )
|
||||
time_difference = current_datetime - last_update_as_datetime
|
||||
out_str += f', age_in_days="{time_difference.days}"'
|
||||
|
||||
days_ago = GetLastCommmitDaysAgo( container )
|
||||
if days_ago >= 0:
|
||||
out_str += f', repo_commit_in_days="{days_ago}"'
|
||||
else:
|
||||
out_str += f', repo_commit_in_days="99999"'
|
||||
|
||||
out_str += f', last_update="{last_update}"'
|
||||
out_str += f', internet_facing="{external(container)}"'
|
||||
out_str += '} 1'
|
||||
|
||||
print( out_str, file=f )
|
||||
|
||||
f.close()
|
||||
16
telegraf-watchtower-enabled
Executable file
16
telegraf-watchtower-enabled
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# an update, BUT, as prometheus is just looking at the last state of the file,
|
||||
# it would see the last values and think they are 'current'.
|
||||
#
|
||||
# SO, lets date 'stamp' when we ran this, rather than give a boolean, because if
|
||||
# its running this will update the date, if not, it wont anyway :)
|
||||
|
||||
log_file=/srv/docker/container/telegraf/monitoring-results/watchtower-enabled.influx
|
||||
cnt=`egrep -v '^#' /srv/docker/config/docker-compose.yml | grep -c com.centurylinklabs.watchtower.enable`
|
||||
|
||||
tstamp=`date +%s%N`
|
||||
# using influx format
|
||||
echo mara_watchtower_enabled count=$cnt $tstamp > $log_file
|
||||
|
||||
Reference in New Issue
Block a user