前言 Link to heading
探针采用的是 cppla 的 ServerStatus 项目,配置起来还是挺简单的。
安装 ServerStatus 服务端 Link to heading
首先,执行以下命令以下载原始的配置文件:
wget --no-check-certificate -qO ~/serverstatus-config.json https://raw.githubusercontent.com/cppla/ServerStatus/master/server/config.json && mkdir ~/serverstatus-monthtraffic
mkdir serv_status && mkdir client_status && cd server_status && touch docker-compose.yml
将 docker-compose.yml 的内容修改为如下:
version: '3'
services:
serverstatus:
image: cppla/serverstatus:latest
container_name: serverstatus
restart: always
volumes:
- ~/serverstatus-config.json:/ServerStatus/server/config.json
- ~/serverstatus-monthtraffic:/usr/share/nginx/html/json
ports:
- "10081:80"
- "35601:35601"
并修改 ~/serverstatus-config.json 内容:
{
"servers": [
{
"username": "s01",
"name": "绿云 日本",
"type": "GreenCloud",
"host": "host1",
"location": "🇯🇵",
"password": "passwordlin",
"monthstart": 1
},
{
"username": "s02",
"name": "BV 新加坡",
"type": "ByteVirt",
"host": "host1",
"location": "🇯🇵",
"password": "passwordlin",
"monthstart": 1
},
{
"username": "s03",
"name": "甲骨文零号 AMD 圣保罗",
"type": "Oracle",
"host": "host1",
"location": "🇧🇷",
"password": "passwordlin",
"monthstart": 1
},
{
"username": "s04",
"name": "甲骨文一号 AMD 圣保罗",
"type": "Oracle",
"host": "host1",
"location": "🇧🇷",
"password": "passwordlin",
"monthstart": 1
},
{
"username": "s05",
"name": "甲骨文 ARM 圣保罗",
"type": "Oracle",
"host": "host1",
"location": "🇧🇷",
"password": "passwordlin",
"monthstart": 1
},
{
"username": "s06",
"name": "阿里轻量云 香港",
"type": "Alibaba",
"host": "host1",
"location": "🇭🇰",
"password": "passwordlin",
"monthstart": 1
},
{
"username": "s07",
"name": "Netcup 德国",
"type": "Netcup",
"host": "host1",
"location": "🇩🇪",
"password": "passwordlin",
"monthstart": 1
},
{
"username": "s08",
"name": "CCS 洛杉矶",
"type": "ColorCrossing",
"host": "host1",
"location": "🇺🇸",
"password": "passwordlin",
"monthstart": 1
}
],
"watchdog": [
{
"name": "cpu high warning,exclude username s01",
"rule": "cpu>90&load_1>5&username!='s01'",
"interval": 600,
"callback": "https://yourSMSurl"
},
{
"name": "memory high warning, exclude less than 1GB vps",
"rule": "(memory_used/memory_total)*100>90&memory_total>1048576",
"interval": 300,
"callback": "https://yourSMSurl"
},
{
"name": "offline warning,exclude name node1",
"rule": "online4=0&online6=0&name!='node1'",
"interval": 600,
"callback": "https://yourSMSurl"
},
{
"name": "ddcc attack,limit type Oracle",
"rule": "tcp_count>600&type='Oracle'",
"interval": 300,
"callback": "https://yourSMSurl"
},
{
"name": "month traffic warning",
"rule": "(network_out-last_network_out)/1024/1024/1024>999",
"interval": 3600,
"callback": "https://yourSMSurl"
},
{
"name": "you can parse an expression combining any known field",
"rule": "load_5>3",
"interval": 900,
"callback": "https://yourSMSurl"
}
]
}
然后执行 docker-compose up -d。
安装 ServerStatus 客户端 Link to heading
执行如下命令
mkdir ~/client_status && cd client_status && touch client-linux.py
修改 client-linux.py 的内容为如下:
#!/usr/bin/env python3
# coding: utf-8
# Update by : https://github.com/cppla/ServerStatus, Update date: 20220530
# 版本:1.0.3, 支持Python版本:2.7 to 3.10
# 支持操作系统: Linux, OSX, FreeBSD, OpenBSD and NetBSD, both 32-bit and 64-bit architectures
# 说明: 默认情况下修改server和user就可以了。丢包率监测方向可以自定义,例如:CU = "www.facebook.com"。
SERVER = "193.26.156.130"
USER = "s13"
PASSWORD = "server_status_1136lin"
PORT = 35601
CU = "cu.tz.cloudcpp.com"
CT = "ct.tz.cloudcpp.com"
CM = "cm.tz.cloudcpp.com"
PROBEPORT = 80
PROBE_PROTOCOL_PREFER = "ipv4" # ipv4, ipv6
PING_PACKET_HISTORY_LEN = 100
INTERVAL = 1
import socket
import time
import timeit
import re
import os
import sys
import json
import errno
import subprocess
import threading
try:
from queue import Queue # python3
except ImportError:
from Queue import Queue # python2
def get_uptime():
with open("/proc/uptime", "r") as f:
uptime = f.readline().split(".", 2)
return int(uptime[0])
def get_memory():
re_parser = re.compile(r"^(?P<key>\S*):\s*(?P<value>\d*)\s*kB")
result = dict()
for line in open("/proc/meminfo"):
match = re_parser.match(line)
if not match:
continue
key, value = match.groups(["key", "value"])
result[key] = int(value)
MemTotal = float(result["MemTotal"])
MemUsed = (
MemTotal
- float(result["MemFree"])
- float(result["Buffers"])
- float(result["Cached"])
- float(result["SReclaimable"])
)
SwapTotal = float(result["SwapTotal"])
SwapFree = float(result["SwapFree"])
return int(MemTotal), int(MemUsed), int(SwapTotal), int(SwapFree)
def get_hdd():
p = subprocess.check_output(
[
"df",
"-Tlm",
"--total",
"-t",
"ext4",
"-t",
"ext3",
"-t",
"ext2",
"-t",
"reiserfs",
"-t",
"jfs",
"-t",
"ntfs",
"-t",
"fat32",
"-t",
"btrfs",
"-t",
"fuseblk",
"-t",
"zfs",
"-t",
"simfs",
"-t",
"xfs",
]
).decode("Utf-8")
total = p.splitlines()[-1]
used = total.split()[3]
size = total.split()[2]
return int(size), int(used)
def get_time():
with open("/proc/stat", "r") as f:
time_list = f.readline().split(" ")[2:6]
for i in range(len(time_list)):
time_list[i] = int(time_list[i])
return time_list
def delta_time():
x = get_time()
time.sleep(INTERVAL)
y = get_time()
for i in range(len(x)):
y[i] -= x[i]
return y
def get_cpu():
t = delta_time()
st = sum(t)
if st == 0:
st = 1
result = 100 - (t[len(t) - 1] * 100.00 / st)
return round(result, 1)
def liuliang():
NET_IN = 0
NET_OUT = 0
with open("/proc/net/dev") as f:
for line in f.readlines():
netinfo = re.findall(
"([^\s]+):[\s]{0,}(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)",
line,
)
if netinfo:
if (
netinfo[0][0] == "lo"
or "tun" in netinfo[0][0]
or "docker" in netinfo[0][0]
or "veth" in netinfo[0][0]
or "br-" in netinfo[0][0]
or "vmbr" in netinfo[0][0]
or "vnet" in netinfo[0][0]
or "kube" in netinfo[0][0]
or netinfo[0][1] == "0"
or netinfo[0][9] == "0"
):
continue
else:
NET_IN += int(netinfo[0][1])
NET_OUT += int(netinfo[0][9])
return NET_IN, NET_OUT
def tupd():
"""
tcp, udp, process, thread count: for view ddcc attack , then send warning
:return:
"""
s = subprocess.check_output("ss -t|wc -l", shell=True)
t = int(s[:-1]) - 1
s = subprocess.check_output("ss -u|wc -l", shell=True)
u = int(s[:-1]) - 1
s = subprocess.check_output("ps -ef|wc -l", shell=True)
p = int(s[:-1]) - 2
s = subprocess.check_output("ps -eLf|wc -l", shell=True)
d = int(s[:-1]) - 2
return t, u, p, d
def get_network(ip_version):
if ip_version == 4:
HOST = "ipv4.google.com"
elif ip_version == 6:
HOST = "ipv6.google.com"
try:
socket.create_connection((HOST, 80), 2).close()
return True
except:
return False
lostRate = {"10010": 0.0, "189": 0.0, "10086": 0.0}
pingTime = {"10010": 0, "189": 0, "10086": 0}
netSpeed = {
"netrx": 0.0,
"nettx": 0.0,
"clock": 0.0,
"diff": 0.0,
"avgrx": 0,
"avgtx": 0,
}
diskIO = {"read": 0, "write": 0}
def _ping_thread(host, mark, port):
lostPacket = 0
packet_queue = Queue(maxsize=PING_PACKET_HISTORY_LEN)
while True:
# flush dns , every time.
IP = host
if (
host.count(":") < 1
): # if not plain ipv6 address, means ipv4 address or hostname
try:
if PROBE_PROTOCOL_PREFER == "ipv4":
IP = socket.getaddrinfo(host, None, socket.AF_INET)[0][4][0]
else:
IP = socket.getaddrinfo(host, None, socket.AF_INET6)[0][4][0]
except Exception:
pass
if packet_queue.full():
if packet_queue.get() == 0:
lostPacket -= 1
try:
b = timeit.default_timer()
socket.create_connection((IP, port), timeout=1).close()
pingTime[mark] = int((timeit.default_timer() - b) * 1000)
packet_queue.put(1)
except socket.error as error:
if error.errno == errno.ECONNREFUSED:
pingTime[mark] = int((timeit.default_timer() - b) * 1000)
packet_queue.put(1)
# elif error.errno == errno.ETIMEDOUT:
else:
lostPacket += 1
packet_queue.put(0)
if packet_queue.qsize() > 30:
lostRate[mark] = float(lostPacket) / packet_queue.qsize()
time.sleep(INTERVAL)
def _net_speed():
while True:
with open("/proc/net/dev", "r") as f:
net_dev = f.readlines()
avgrx = 0
avgtx = 0
for dev in net_dev[2:]:
dev = dev.split(":")
if (
"lo" in dev[0]
or "tun" in dev[0]
or "docker" in dev[0]
or "veth" in dev[0]
or "br-" in dev[0]
or "vmbr" in dev[0]
or "vnet" in dev[0]
or "kube" in dev[0]
):
continue
dev = dev[1].split()
avgrx += int(dev[0])
avgtx += int(dev[8])
now_clock = time.time()
netSpeed["diff"] = now_clock - netSpeed["clock"]
netSpeed["clock"] = now_clock
netSpeed["netrx"] = int((avgrx - netSpeed["avgrx"]) / netSpeed["diff"])
netSpeed["nettx"] = int((avgtx - netSpeed["avgtx"]) / netSpeed["diff"])
netSpeed["avgrx"] = avgrx
netSpeed["avgtx"] = avgtx
time.sleep(INTERVAL)
def _disk_io():
"""
good luck for opensource! by: cpp.la
磁盘IO:因为IOPS原因,SSD和HDD、包括RAID卡,ZFS等阵列技术。IO对性能的影响还需要结合自身服务器情况来判断。
比如我这里是机械硬盘,大量做随机小文件读写,那么很低的读写也就能造成硬盘长时间的等待。
如果这里做连续性IO,那么普通机械硬盘写入到100Mb/s,那么也能造成硬盘长时间的等待。
磁盘读写有误差:4k,8k ,https://stackoverflow.com/questions/34413926/psutil-vs-dd-monitoring-disk-i-o
:return:
"""
while True:
# pre pid snapshot
snapshot_first = {}
# next pid snapshot
snapshot_second = {}
# read count snapshot
snapshot_read = 0
# write count snapshot
snapshot_write = 0
# process snapshot
pid_snapshot = [str(i) for i in os.listdir("/proc") if i.isdigit() is True]
for pid in pid_snapshot:
try:
with open("/proc/{}/io".format(pid)) as f:
pid_io = {}
for line in f.readlines():
if "read_bytes" in line:
pid_io["read"] = int(line.split("read_bytes:")[-1].strip())
elif (
"write_bytes" in line
and "cancelled_write_bytes" not in line
):
pid_io["write"] = int(
line.split("write_bytes:")[-1].strip()
)
pid_io["name"] = (
open("/proc/{}/comm".format(pid), "r").read().strip()
)
snapshot_first[pid] = pid_io
except:
if pid in snapshot_first:
snapshot_first.pop(pid)
time.sleep(INTERVAL)
for pid in pid_snapshot:
try:
with open("/proc/{}/io".format(pid)) as f:
pid_io = {}
for line in f.readlines():
if "read_bytes" in line:
pid_io["read"] = int(line.split("read_bytes:")[-1].strip())
elif (
"write_bytes" in line
and "cancelled_write_bytes" not in line
):
pid_io["write"] = int(
line.split("write_bytes:")[-1].strip()
)
pid_io["name"] = (
open("/proc/{}/comm".format(pid), "r").read().strip()
)
snapshot_second[pid] = pid_io
except:
if pid in snapshot_first:
snapshot_first.pop(pid)
if pid in snapshot_second:
snapshot_second.pop(pid)
for k, v in snapshot_first.items():
if (
snapshot_first[k]["name"] == snapshot_second[k]["name"]
and snapshot_first[k]["name"] != "bash"
):
snapshot_read += snapshot_second[k]["read"] - snapshot_first[k]["read"]
snapshot_write += (
snapshot_second[k]["write"] - snapshot_first[k]["write"]
)
diskIO["read"] = snapshot_read
diskIO["write"] = snapshot_write
def get_realtime_data():
"""
real time get system data
:return:
"""
t1 = threading.Thread(
target=_ping_thread, kwargs={"host": CU, "mark": "10010", "port": PROBEPORT}
)
t2 = threading.Thread(
target=_ping_thread, kwargs={"host": CT, "mark": "189", "port": PROBEPORT}
)
t3 = threading.Thread(
target=_ping_thread, kwargs={"host": CM, "mark": "10086", "port": PROBEPORT}
)
t4 = threading.Thread(
target=_net_speed,
)
t5 = threading.Thread(
target=_disk_io,
)
for ti in [t1, t2, t3, t4, t5]:
ti.daemon = True
ti.start()
def byte_str(object):
"""
bytes to str, str to bytes
:param object:
:return:
"""
if isinstance(object, str):
return object.encode(encoding="utf-8")
elif isinstance(object, bytes):
return bytes.decode(object)
else:
print(type(object))
if __name__ == "__main__":
for argc in sys.argv:
if "SERVER" in argc:
SERVER = argc.split("SERVER=")[-1]
elif "PORT" in argc:
PORT = int(argc.split("PORT=")[-1])
elif "USER" in argc:
USER = argc.split("USER=")[-1]
elif "PASSWORD" in argc:
PASSWORD = argc.split("PASSWORD=")[-1]
elif "INTERVAL" in argc:
INTERVAL = int(argc.split("INTERVAL=")[-1])
socket.setdefaulttimeout(30)
get_realtime_data()
while True:
try:
print("Connecting...")
s = socket.create_connection((SERVER, PORT))
data = byte_str(s.recv(1024))
if data.find("Authentication required") > -1:
s.send(byte_str(USER + ":" + PASSWORD + "\n"))
data = byte_str(s.recv(1024))
if data.find("Authentication successful") < 0:
print(data)
raise socket.error
else:
print(data)
raise socket.error
print(data)
if data.find("You are connecting via") < 0:
data = byte_str(s.recv(1024))
print(data)
timer = 0
check_ip = 0
if data.find("IPv4") > -1:
check_ip = 6
elif data.find("IPv6") > -1:
check_ip = 4
else:
print(data)
raise socket.error
while True:
CPU = get_cpu()
NET_IN, NET_OUT = liuliang()
Uptime = get_uptime()
Load_1, Load_5, Load_15 = os.getloadavg()
MemoryTotal, MemoryUsed, SwapTotal, SwapFree = get_memory()
HDDTotal, HDDUsed = get_hdd()
array = {}
if not timer:
array["online" + str(check_ip)] = get_network(check_ip)
timer = 10
else:
timer -= 1 * INTERVAL
array["uptime"] = Uptime
array["load_1"] = Load_1
array["load_5"] = Load_5
array["load_15"] = Load_15
array["memory_total"] = MemoryTotal
array["memory_used"] = MemoryUsed
array["swap_total"] = SwapTotal
array["swap_used"] = SwapTotal - SwapFree
array["hdd_total"] = HDDTotal
array["hdd_used"] = HDDUsed
array["cpu"] = CPU
array["network_rx"] = netSpeed.get("netrx")
array["network_tx"] = netSpeed.get("nettx")
array["network_in"] = NET_IN
array["network_out"] = NET_OUT
# todo:兼容旧版本,下个版本删除ip_status
array["ip_status"] = True
array["ping_10010"] = lostRate.get("10010") * 100
array["ping_189"] = lostRate.get("189") * 100
array["ping_10086"] = lostRate.get("10086") * 100
array["time_10010"] = pingTime.get("10010")
array["time_189"] = pingTime.get("189")
array["time_10086"] = pingTime.get("10086")
array["tcp"], array["udp"], array["process"], array["thread"] = tupd()
array["io_read"] = diskIO.get("read")
array["io_write"] = diskIO.get("write")
s.send(byte_str("update " + json.dumps(array) + "\n"))
except KeyboardInterrupt:
raise
except socket.error:
print("Disconnected...")
if "s" in locals().keys():
del s
time.sleep(3)
except Exception as e:
print("Caught Exception:", e)
if "s" in locals().keys():
del s
time.sleep(3)
其中,对于在不同的服务器上安装 client,我们只需要修改开头的 SERVER、USER、PASSWORD 即可,将 SERVER 修改为安装了服务端的服务器的 IP,将 USER 修改为 serverstatus-config.json 中对应服务器的 username,PASSWORD 修改为 serverstatus-config.json 对应服务器的 password。
然后执行 sudo nvim /etc/systemd/system/client_status.service,将内容修改为以下:
[Unit]
Description=Client Status
[Service]
ExecStart=/usr/bin/python3 /home/myuser/client_status/client-linux.py >/dev/null 2>&1 &
Restart=always
[Install]
WantedBy=multi-user.target
注意 ExecStart 字段中的路径要与 client-linux.py 的实际路径相匹配,必须使用绝对路径,而不能使用 ~!
修改 Caddyfile Link to heading
添加以下内容:
status.example.eu.org {
reverse_proxy example.eu.org:10081
}
这样就能通过 status.example.eu.org 来直接访问网页而不需要通过网址 + 端口来访问了。