diff --git a/.gitignore b/.gitignore index db687ba..fc468e6 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ /agent /cmd/agent/agent *.pprof +dist \ No newline at end of file diff --git a/pkg/monitor/monitor.go b/pkg/monitor/monitor.go index f8312b3..7b9a491 100644 --- a/pkg/monitor/monitor.go +++ b/pkg/monitor/monitor.go @@ -41,9 +41,21 @@ var ( netInSpeed, netOutSpeed, netInTransfer, netOutTransfer, lastUpdateNetStats uint64 cachedBootTime time.Time gpuStat float64 + temperatureStat []model.SensorTemperature ) -var updateStatus int32 +// 获取设备数据的最大尝试次数 +const maxDeviceDataFetchAttempts = 3 + +// 获取状态数据的尝试次数,Key 为 HostState 的属性名 +var deviceDataFetchAttempts = map[string]int{ + "Temperatures": 0, +} + +var ( + updateGPUStatus int32 + updateTempStatus int32 +) // GetHost 获取主机硬件信息 func GetHost(agentConfig *model.AgentConfig) *model.Host { @@ -205,17 +217,8 @@ func GetState(agentConfig *model.AgentConfig, skipConnectionCount bool, skipProc } } - temperatures, err := host.SensorsTemperatures() - if err != nil { - println("host.SensorsTemperatures error:", err) - } else { - for _, t := range temperatures { - ret.Temperatures = append(ret.Temperatures, model.SensorTemperature{ - Name: t.SensorKey, - Temperature: t.Temperature, - }) - } - } + go updateTemplatureStat(&temperatureStat) + ret.Temperatures = temperatureStat go updateGPUStat(agentConfig, &gpuStat) ret.GPU = gpuStat @@ -311,10 +314,10 @@ func getDiskTotalAndUsed(agentConfig *model.AgentConfig) (total uint64, used uin } func updateGPUStat(agentConfig *model.AgentConfig, gpuStat *float64) { - if !atomic.CompareAndSwapInt32(&updateStatus, 0, 1) { + if !atomic.CompareAndSwapInt32(&updateGPUStatus, 0, 1) { return } - defer atomic.StoreInt32(&updateStatus, 0) + defer atomic.StoreInt32(&updateGPUStatus, 0) if agentConfig.GPU { gs, err := gpustat.GetGPUStat() if err != nil { @@ -328,6 +331,30 @@ func updateGPUStat(agentConfig *model.AgentConfig, gpuStat *float64) { } } +func updateTemplatureStat(tempStat *[]model.SensorTemperature) { + if !atomic.CompareAndSwapInt32(&updateTempStatus, 0, 1) { + return + } + defer atomic.StoreInt32(&updateTempStatus, 0) + if deviceDataFetchAttempts["Temperatures"] <= maxDeviceDataFetchAttempts { + temperatures, err := host.SensorsTemperatures() + if err != nil { + deviceDataFetchAttempts["Temperatures"]++ + println("host.SensorsTemperatures error:", err, "attempt:", deviceDataFetchAttempts["Temperatures"]) + } else { + deviceDataFetchAttempts["Temperatures"] = 0 + for _, t := range temperatures { + if t.Temperature > 0 { + *tempStat = append(*tempStat, model.SensorTemperature{ + Name: t.SensorKey, + Temperature: t.Temperature, + }) + } + } + } + } +} + func isListContainsStr(list []string, str string) bool { for i := 0; i < len(list); i++ { if strings.Contains(str, list[i]) {