feat: make logging and temperature monitoring optional (#35)

* feat: make logging and temperature monitoring optional

* chore: improve code style

* fix: import

---------

Co-authored-by: naiba <hi@nai.ba>
This commit is contained in:
UUBulb 2024-07-10 23:58:19 +08:00 committed by GitHub
parent 1bf702cfaa
commit 3bf3f97767
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 89 additions and 60 deletions

View File

@ -72,13 +72,29 @@ func editAgentConfig(cmd *cobra.Command, args []string) {
Default: false,
},
},
{
Name: "temperature",
Prompt: &survey.Confirm{
Message: "是否启用温度监控?",
Default: false,
},
},
{
Name: "slient",
Prompt: &survey.Confirm{
Message: "是否禁用日志输出?",
Default: false,
},
},
}
answers := struct {
Nic []string
Disk []string
DNS string
GPU bool
Nic []string
Disk []string
DNS string
GPU bool
Temperature bool
Slient bool
}{}
err = survey.Ask(qs, &answers, survey.WithValidator(survey.Required))
@ -117,6 +133,8 @@ func editAgentConfig(cmd *cobra.Command, args []string) {
}
agentConfig.GPU = answers.GPU
agentConfig.Temperature = answers.Temperature
agentConfig.Slient = answers.Slient
if err = agentConfig.Save(); err != nil {
panic(err)

View File

@ -152,10 +152,14 @@ func init() {
agentCmd.PersistentFlags().BoolVar(&agentCliParam.DisableForceUpdate, "disable-force-update", false, "禁用强制升级")
agentCmd.PersistentFlags().BoolVar(&agentCliParam.UseIPv6CountryCode, "use-ipv6-countrycode", false, "使用IPv6的位置上报")
agentCmd.PersistentFlags().BoolVar(&agentConfig.GPU, "gpu", false, "启用GPU监控")
agentCmd.PersistentFlags().BoolVar(&agentConfig.Temperature, "temperature", false, "启用温度监控")
agentCmd.PersistentFlags().Uint32VarP(&agentCliParam.IPReportPeriod, "ip-report-period", "u", 30*60, "本地IP更新间隔, 上报频率依旧取决于report-delay的值")
agentCmd.Flags().BoolVarP(&agentConfig.Slient, "slient", "q", false, "关闭日志输出")
agentCmd.Flags().BoolVarP(&agentCliParam.Version, "version", "v", false, "查看当前版本号")
agentConfig.Read(filepath.Dir(ex) + "/config.yml")
monitor.InitConfig(&agentConfig)
}
func main() {
@ -219,7 +223,7 @@ func run() {
// 上报服务器信息
go reportState()
// 更新IP信息
go monitor.UpdateIP(agentCliParam.UseIPv6CountryCode, agentCliParam.IPReportPeriod)
go monitor.UpdateIP(agentConfig.Slient, agentCliParam.UseIPv6CountryCode, agentCliParam.IPReportPeriod)
// 定时检查更新
if _, err := semver.Parse(version); err == nil && !agentCliParam.DisableAutoUpdate {
@ -267,7 +271,7 @@ func run() {
client = pb.NewNezhaServiceClient(conn)
// 第一步注册
timeOutCtx, cancel = context.WithTimeout(context.Background(), networkTimeOut)
_, err = client.ReportSystemInfo(timeOutCtx, monitor.GetHost(&agentConfig).PB())
_, err = client.ReportSystemInfo(timeOutCtx, monitor.GetHost().PB())
if err != nil {
println("上报系统信息失败:", err)
cancel()
@ -277,7 +281,7 @@ func run() {
cancel()
inited = true
// 执行 Task
tasks, err := client.RequestTask(context.Background(), monitor.GetHost(&agentConfig).PB())
tasks, err := client.RequestTask(context.Background(), monitor.GetHost().PB())
if err != nil {
println("请求任务失败:", err)
retry()
@ -400,9 +404,9 @@ func reportState() {
for {
// 为了更准确的记录时段流量inited 后再上传状态信息
if client != nil && inited {
monitor.TrackNetworkSpeed(&agentConfig)
monitor.TrackNetworkSpeed()
timeOutCtx, cancel := context.WithTimeout(context.Background(), networkTimeOut)
_, err = client.ReportSystemState(timeOutCtx, monitor.GetState(&agentConfig, agentCliParam.SkipConnectionCount, agentCliParam.SkipProcsCount).PB())
_, err = client.ReportSystemState(timeOutCtx, monitor.GetState(agentCliParam.SkipConnectionCount, agentCliParam.SkipProcsCount).PB())
cancel()
if err != nil {
println("reportState error", err)
@ -411,7 +415,7 @@ func reportState() {
// 每10分钟重新获取一次硬件信息
if lastReportHostInfo.Before(time.Now().Add(-10 * time.Minute)) {
lastReportHostInfo = time.Now()
client.ReportSystemInfo(context.Background(), monitor.GetHost(&agentConfig).PB())
client.ReportSystemInfo(context.Background(), monitor.GetHost().PB())
}
}
time.Sleep(time.Second * time.Duration(agentCliParam.ReportDelay))
@ -709,7 +713,7 @@ func handleTerminalTask(task *pb.Task) {
func println(v ...interface{}) {
if agentCliParam.Debug {
util.Println(v...)
util.Println(agentConfig.Slient, v...)
}
}

View File

@ -12,6 +12,8 @@ type AgentConfig struct {
NICAllowlist map[string]bool
DNS []string
GPU bool
Temperature bool
Slient bool
v *viper.Viper
}

View File

@ -7,7 +7,6 @@ import (
"runtime"
"strconv"
"strings"
"sync"
"sync/atomic"
"syscall"
"time"
@ -37,6 +36,7 @@ var (
excludeNetInterfaces = []string{
"lo", "tun", "docker", "veth", "br-", "vmbr", "vnet", "kube",
}
agentConfig *model.AgentConfig
)
var (
@ -66,17 +66,20 @@ var statDataFetchAttempts = map[string]int{
var (
updateGPUStatus int32
updateTempStatus int32
tempWriteLock sync.RWMutex
)
func InitConfig(cfg *model.AgentConfig) {
agentConfig = cfg
}
// GetHost 获取主机硬件信息
func GetHost(agentConfig *model.AgentConfig) *model.Host {
func GetHost() *model.Host {
var ret model.Host
var cpuType string
hi, err := host.Info()
if err != nil {
util.Println("host.Info error: ", err)
println("host.Info error: ", err)
} else {
if hi.VirtualizationRole == "guest" {
cpuType = "Virtual"
@ -99,7 +102,7 @@ func GetHost(agentConfig *model.AgentConfig) *model.Host {
ci, err := cpu.Info()
if err != nil {
hostDataFetchAttempts["CPU"]++
util.Println("cpu.Info error: ", err, ", attempt: ", hostDataFetchAttempts["CPU"])
println("cpu.Info error: ", err, ", attempt: ", hostDataFetchAttempts["CPU"])
} else {
hostDataFetchAttempts["CPU"] = 0
for i := 0; i < len(ci); i++ {
@ -120,18 +123,18 @@ func GetHost(agentConfig *model.AgentConfig) *model.Host {
ret.GPU, err = gpu.GetGPUModel()
if err != nil {
hostDataFetchAttempts["GPU"]++
util.Println("gpu.GetGPUModel error: ", err, ", attempt: ", hostDataFetchAttempts["GPU"])
println("gpu.GetGPUModel error: ", err, ", attempt: ", hostDataFetchAttempts["GPU"])
} else {
hostDataFetchAttempts["GPU"] = 0
}
}
}
ret.DiskTotal, _ = getDiskTotalAndUsed(agentConfig)
ret.DiskTotal, _ = getDiskTotalAndUsed()
mv, err := mem.VirtualMemory()
if err != nil {
util.Println("mem.VirtualMemory error: ", err)
println("mem.VirtualMemory error: ", err)
} else {
ret.MemTotal = mv.Total
if runtime.GOOS != "windows" {
@ -142,7 +145,7 @@ func GetHost(agentConfig *model.AgentConfig) *model.Host {
if runtime.GOOS == "windows" {
ms, err := mem.SwapMemory()
if err != nil {
util.Println("mem.SwapMemory error: ", err)
println("mem.SwapMemory error: ", err)
} else {
ret.SwapTotal = ms.Total
}
@ -157,14 +160,14 @@ func GetHost(agentConfig *model.AgentConfig) *model.Host {
return &ret
}
func GetState(agentConfig *model.AgentConfig, skipConnectionCount bool, skipProcsCount bool) *model.HostState {
func GetState(skipConnectionCount bool, skipProcsCount bool) *model.HostState {
var ret model.HostState
if statDataFetchAttempts["CPU"] < maxDeviceDataFetchAttempts {
cp, err := cpu.Percent(0, false)
if err != nil || len(cp) == 0 {
statDataFetchAttempts["CPU"]++
util.Println("cpu.Percent error: ", err, ", attempt: ", statDataFetchAttempts["CPU"])
println("cpu.Percent error: ", err, ", attempt: ", statDataFetchAttempts["CPU"])
} else {
statDataFetchAttempts["CPU"] = 0
ret.CPU = cp[0]
@ -173,7 +176,7 @@ func GetState(agentConfig *model.AgentConfig, skipConnectionCount bool, skipProc
vm, err := mem.VirtualMemory()
if err != nil {
util.Println("mem.VirtualMemory error: ", err)
println("mem.VirtualMemory error: ", err)
} else {
ret.MemUsed = vm.Total - vm.Available
if runtime.GOOS != "windows" {
@ -184,19 +187,19 @@ func GetState(agentConfig *model.AgentConfig, skipConnectionCount bool, skipProc
// gopsutil 在 Windows 下不能正确取 swap
ms, err := mem.SwapMemory()
if err != nil {
util.Println("mem.SwapMemory error: ", err)
println("mem.SwapMemory error: ", err)
} else {
ret.SwapUsed = ms.Used
}
}
_, ret.DiskUsed = getDiskTotalAndUsed(agentConfig)
_, ret.DiskUsed = getDiskTotalAndUsed()
if statDataFetchAttempts["Load"] < maxDeviceDataFetchAttempts {
loadStat, err := load.Avg()
if err != nil {
statDataFetchAttempts["Load"]++
util.Println("load.Avg error: ", err, ", attempt: ", statDataFetchAttempts["Load"])
println("load.Avg error: ", err, ", attempt: ", statDataFetchAttempts["Load"])
} else {
statDataFetchAttempts["Load"] = 0
ret.Load1 = loadStat.Load1
@ -209,7 +212,7 @@ func GetState(agentConfig *model.AgentConfig, skipConnectionCount bool, skipProc
if !skipProcsCount {
procs, err = process.Pids()
if err != nil {
util.Println("process.Pids error: ", err)
println("process.Pids error: ", err)
} else {
ret.ProcessCount = uint64(len(procs))
}
@ -249,14 +252,15 @@ func GetState(agentConfig *model.AgentConfig, skipConnectionCount bool, skipProc
}
}
go updateTemperatureStat()
if agentConfig.Temperature {
go updateTemperatureStat()
ret.Temperatures = temperatureStat
}
tempWriteLock.RLock()
defer tempWriteLock.RUnlock()
ret.Temperatures = temperatureStat
go updateGPUStat(agentConfig, &gpuStat)
ret.GPU = math.Float64frombits(gpuStat)
if agentConfig.GPU {
go updateGPUStat(&gpuStat)
ret.GPU = math.Float64frombits(gpuStat)
}
ret.NetInTransfer, ret.NetOutTransfer = netInTransfer, netOutTransfer
ret.NetInSpeed, ret.NetOutSpeed = netInSpeed, netOutSpeed
@ -267,7 +271,7 @@ func GetState(agentConfig *model.AgentConfig, skipConnectionCount bool, skipProc
}
// TrackNetworkSpeed NIC监控统计流量与速度
func TrackNetworkSpeed(agentConfig *model.AgentConfig) {
func TrackNetworkSpeed() {
var innerNetInTransfer, innerNetOutTransfer uint64
nc, err := net.IOCounters(true)
if err == nil {
@ -296,7 +300,7 @@ func TrackNetworkSpeed(agentConfig *model.AgentConfig) {
}
}
func getDiskTotalAndUsed(agentConfig *model.AgentConfig) (total uint64, used uint64) {
func getDiskTotalAndUsed() (total uint64, used uint64) {
devices := make(map[string]string)
if len(agentConfig.HardDrivePartitionAllowlist) > 0 {
@ -348,23 +352,21 @@ func getDiskTotalAndUsed(agentConfig *model.AgentConfig) (total uint64, used uin
return
}
func updateGPUStat(agentConfig *model.AgentConfig, gpuStat *uint64) {
func updateGPUStat(gpuStat *uint64) {
if !atomic.CompareAndSwapInt32(&updateGPUStatus, 0, 1) {
return
}
defer atomic.StoreInt32(&updateGPUStatus, 0)
if agentConfig.GPU {
if statDataFetchAttempts["GPU"] < maxDeviceDataFetchAttempts {
gs, err := gpustat.GetGPUStat()
if err != nil {
statDataFetchAttempts["GPU"]++
util.Println("gpustat.GetGPUStat error: ", err, ", attempt: ", statDataFetchAttempts["GPU"])
atomicStoreFloat64(gpuStat, gs)
} else {
statDataFetchAttempts["GPU"] = 0
atomicStoreFloat64(gpuStat, gs)
}
if statDataFetchAttempts["GPU"] < maxDeviceDataFetchAttempts {
gs, err := gpustat.GetGPUStat()
if err != nil {
statDataFetchAttempts["GPU"]++
println("gpustat.GetGPUStat error: ", err, ", attempt: ", statDataFetchAttempts["GPU"])
atomicStoreFloat64(gpuStat, gs)
} else {
statDataFetchAttempts["GPU"] = 0
atomicStoreFloat64(gpuStat, gs)
}
}
}
@ -379,7 +381,7 @@ func updateTemperatureStat() {
temperatures, err := sensors.SensorsTemperatures()
if err != nil {
statDataFetchAttempts["Temperatures"]++
util.Println("host.SensorsTemperatures error: ", err, ", attempt: ", statDataFetchAttempts["Temperatures"])
println("host.SensorsTemperatures error: ", err, ", attempt: ", statDataFetchAttempts["Temperatures"])
} else {
statDataFetchAttempts["Temperatures"] = 0
tempStat := []model.SensorTemperature{}
@ -392,8 +394,6 @@ func updateTemperatureStat() {
}
}
tempWriteLock.Lock()
defer tempWriteLock.Unlock()
temperatureStat = tempStat
}
}
@ -411,3 +411,7 @@ func isListContainsStr(list []string, str string) bool {
func atomicStoreFloat64(x *uint64, v float64) {
atomic.StoreUint64(x, math.Float64bits(v))
}
func println(v ...interface{}) {
util.Println(agentConfig.Slient, v...)
}

View File

@ -3,7 +3,6 @@ package monitor
import (
"fmt"
"io"
"log"
"net/http"
"strings"
"time"
@ -55,9 +54,9 @@ var (
)
// UpdateIP 按设置时间间隔更新IP地址与国家码的缓存
func UpdateIP(useIPv6CountryCode bool, period uint32) {
func UpdateIP(logging bool, useIPv6CountryCode bool, period uint32) {
for {
log.Println("NEZHA_AGENT>> 正在更新本地缓存IP信息")
util.Println(logging, "NEZHA_AGENT>> 正在更新本地缓存IP信息")
var primaryIP, secondaryIP geoIP
if useIPv6CountryCode {
primaryIP = fetchGeoIP(geoIPApiList, true)

View File

@ -19,11 +19,13 @@ func IsWindows() bool {
return os.PathSeparator == '\\' && os.PathListSeparator == ';'
}
func Println(v ...interface{}) {
if runtime.GOOS != "darwin" {
Logger.Infof("NEZHA@%s>> %v", time.Now().Format("2006-01-02 15:04:05"), fmt.Sprint(v...))
} else {
fmt.Printf("NEZHA@%s>> ", time.Now().Format("2006-01-02 15:04:05"))
fmt.Println(v...)
func Println(disabled bool, v ...interface{}) {
if !disabled {
if runtime.GOOS != "darwin" {
Logger.Infof("NEZHA@%s>> %v", time.Now().Format("2006-01-02 15:04:05"), fmt.Sprint(v...))
} else {
fmt.Printf("NEZHA@%s>> ", time.Now().Format("2006-01-02 15:04:05"))
fmt.Println(v...)
}
}
}