UUBulb 3dfb62287b
report geoip separately (#87)
* report geoip separately

* default 30s
2024-11-22 22:36:50 +08:00

286 lines
6.4 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package monitor
import (
"context"
"runtime"
"sync/atomic"
"time"
"github.com/shirou/gopsutil/v4/host"
"github.com/shirou/gopsutil/v4/mem"
"github.com/shirou/gopsutil/v4/process"
"github.com/nezhahq/agent/model"
"github.com/nezhahq/agent/pkg/monitor/conn"
"github.com/nezhahq/agent/pkg/monitor/cpu"
"github.com/nezhahq/agent/pkg/monitor/disk"
"github.com/nezhahq/agent/pkg/monitor/gpu"
"github.com/nezhahq/agent/pkg/monitor/load"
"github.com/nezhahq/agent/pkg/monitor/nic"
"github.com/nezhahq/agent/pkg/monitor/temperature"
"github.com/nezhahq/agent/pkg/util"
)
var (
Version string
agentConfig *model.AgentConfig
)
var (
netInSpeed, netOutSpeed, netInTransfer, netOutTransfer, lastUpdateNetStats uint64
cachedBootTime time.Time
temperatureStat []model.SensorTemperature
)
// 获取设备数据的最大尝试次数
const maxDeviceDataFetchAttempts = 3
const (
CPU = iota + 1
GPU
Load
Temperatures
)
// 获取主机数据的尝试次数Key 为 Host 的属性名
var hostDataFetchAttempts = map[uint8]uint8{
CPU: 0,
GPU: 0,
}
// 获取状态数据的尝试次数Key 为 HostState 的属性名
var statDataFetchAttempts = map[uint8]uint8{
CPU: 0,
GPU: 0,
Load: 0,
Temperatures: 0,
}
var (
updateTempStatus = new(atomic.Bool)
)
func InitConfig(cfg *model.AgentConfig) {
agentConfig = cfg
}
// GetHost 获取主机硬件信息
func GetHost() *model.Host {
var ret model.Host
var cpuType string
hi, err := host.Info()
if err != nil {
printf("host.Info error: %v", err)
} else {
if hi.VirtualizationRole == "guest" {
cpuType = "Virtual"
ret.Virtualization = hi.VirtualizationSystem
} else {
cpuType = "Physical"
ret.Virtualization = ""
}
ret.Platform = hi.Platform
ret.PlatformVersion = hi.PlatformVersion
ret.Arch = hi.KernelArch
ret.BootTime = hi.BootTime
}
ctxCpu := context.WithValue(context.Background(), cpu.CPUHostKey, cpuType)
ret.CPU = tryHost(ctxCpu, CPU, cpu.GetHost)
if agentConfig.GPU {
ret.GPU = tryHost(context.Background(), GPU, gpu.GetHost)
}
ret.DiskTotal = getDiskTotal()
mv, err := mem.VirtualMemory()
if err != nil {
printf("mem.VirtualMemory error: %v", err)
} else {
ret.MemTotal = mv.Total
if runtime.GOOS != "windows" {
ret.SwapTotal = mv.SwapTotal
}
}
if runtime.GOOS == "windows" {
ms, err := mem.SwapMemory()
if err != nil {
printf("mem.SwapMemory error: %v", err)
} else {
ret.SwapTotal = ms.Total
}
}
cachedBootTime = time.Unix(int64(hi.BootTime), 0)
ret.Version = Version
return &ret
}
func GetState(skipConnectionCount bool, skipProcsCount bool) *model.HostState {
var ret model.HostState
cp := tryStat(context.Background(), CPU, cpu.GetState)
if len(cp) > 0 {
ret.CPU = cp[0]
}
vm, err := mem.VirtualMemory()
if err != nil {
printf("mem.VirtualMemory error: %v", err)
} else {
ret.MemUsed = vm.Total - vm.Available
if runtime.GOOS != "windows" {
ret.SwapUsed = vm.SwapTotal - vm.SwapFree
}
}
if runtime.GOOS == "windows" {
// gopsutil 在 Windows 下不能正确取 swap
ms, err := mem.SwapMemory()
if err != nil {
printf("mem.SwapMemory error: %v", err)
} else {
ret.SwapUsed = ms.Used
}
}
ret.DiskUsed = getDiskUsed()
loadStat := tryStat(context.Background(), Load, load.GetState)
ret.Load1 = loadStat.Load1
ret.Load5 = loadStat.Load5
ret.Load15 = loadStat.Load15
var procs []int32
if !skipProcsCount {
procs, err = process.Pids()
if err != nil {
printf("process.Pids error: %v", err)
} else {
ret.ProcessCount = uint64(len(procs))
}
}
if agentConfig.Temperature {
go updateTemperatureStat()
ret.Temperatures = temperatureStat
}
ret.GPU = tryStat(context.Background(), GPU, gpu.GetState)
ret.NetInTransfer, ret.NetOutTransfer = netInTransfer, netOutTransfer
ret.NetInSpeed, ret.NetOutSpeed = netInSpeed, netOutSpeed
ret.Uptime = uint64(time.Since(cachedBootTime).Seconds())
if !skipConnectionCount {
ret.TcpConnCount, ret.UdpConnCount = getConns()
}
return &ret
}
// TrackNetworkSpeed NIC监控统计流量与速度
func TrackNetworkSpeed() {
var innerNetInTransfer, innerNetOutTransfer uint64
ctx := context.WithValue(context.Background(), nic.NICKey, agentConfig.NICAllowlist)
nc, err := nic.GetState(ctx)
if err != nil {
return
}
innerNetInTransfer = nc[0]
innerNetOutTransfer = nc[1]
now := uint64(time.Now().Unix())
diff := now - lastUpdateNetStats
if diff > 0 {
netInSpeed = (innerNetInTransfer - netInTransfer) / diff
netOutSpeed = (innerNetOutTransfer - netOutTransfer) / diff
}
netInTransfer = innerNetInTransfer
netOutTransfer = innerNetOutTransfer
lastUpdateNetStats = now
}
func getDiskTotal() uint64 {
ctx := context.WithValue(context.Background(), disk.DiskKey, agentConfig.HardDrivePartitionAllowlist)
total, _ := disk.GetHost(ctx)
return total
}
func getDiskUsed() uint64 {
ctx := context.WithValue(context.Background(), disk.DiskKey, agentConfig.HardDrivePartitionAllowlist)
used, _ := disk.GetState(ctx)
return used
}
func getConns() (tcpConnCount, udpConnCount uint64) {
connStat, err := conn.GetState(context.Background())
if err != nil {
return
}
if len(connStat) < 2 {
return
}
return connStat[0], connStat[1]
}
func updateTemperatureStat() {
if !updateTempStatus.CompareAndSwap(false, true) {
return
}
defer updateTempStatus.Store(false)
stat := tryStat(context.Background(), Temperatures, temperature.GetState)
temperatureStat = stat
}
type hostStateFunc[T any] func(context.Context) (T, error)
func tryHost[T any](ctx context.Context, typ uint8, f hostStateFunc[T]) T {
var val T
if hostDataFetchAttempts[typ] < maxDeviceDataFetchAttempts {
v, err := f(ctx)
if err != nil {
hostDataFetchAttempts[typ]++
printf("monitor error: %v, attempt: %d", err, hostDataFetchAttempts[typ])
return val
} else {
val = v
hostDataFetchAttempts[typ] = 0
}
}
return val
}
func tryStat[T any](ctx context.Context, typ uint8, f hostStateFunc[T]) T {
var val T
if statDataFetchAttempts[typ] < maxDeviceDataFetchAttempts {
v, err := f(ctx)
if err != nil {
statDataFetchAttempts[typ]++
printf("monitor error: %v, attempt: %d", err, statDataFetchAttempts[typ])
return val
} else {
val = v
statDataFetchAttempts[typ] = 0
}
}
return val
}
func printf(format string, v ...interface{}) {
util.Printf(agentConfig.Debug, format, v...)
}