refactor: improve error handling for fetching sensor temperatures (#27)

This commit is contained in:
Leon 2024-06-23 14:42:27 +08:00 committed by GitHub
parent 338f0dabbe
commit 136b3801c7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 42 additions and 14 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@
/agent /agent
/cmd/agent/agent /cmd/agent/agent
*.pprof *.pprof
dist

View File

@ -41,9 +41,21 @@ var (
netInSpeed, netOutSpeed, netInTransfer, netOutTransfer, lastUpdateNetStats uint64 netInSpeed, netOutSpeed, netInTransfer, netOutTransfer, lastUpdateNetStats uint64
cachedBootTime time.Time cachedBootTime time.Time
gpuStat float64 gpuStat float64
temperatureStat []model.SensorTemperature
) )
var updateStatus int32 // 获取设备数据的最大尝试次数
const maxDeviceDataFetchAttempts = 3
// 获取状态数据的尝试次数Key 为 HostState 的属性名
var deviceDataFetchAttempts = map[string]int{
"Temperatures": 0,
}
var (
updateGPUStatus int32
updateTempStatus int32
)
// GetHost 获取主机硬件信息 // GetHost 获取主机硬件信息
func GetHost(agentConfig *model.AgentConfig) *model.Host { func GetHost(agentConfig *model.AgentConfig) *model.Host {
@ -205,17 +217,8 @@ func GetState(agentConfig *model.AgentConfig, skipConnectionCount bool, skipProc
} }
} }
temperatures, err := host.SensorsTemperatures() go updateTemplatureStat(&temperatureStat)
if err != nil { ret.Temperatures = temperatureStat
println("host.SensorsTemperatures error:", err)
} else {
for _, t := range temperatures {
ret.Temperatures = append(ret.Temperatures, model.SensorTemperature{
Name: t.SensorKey,
Temperature: t.Temperature,
})
}
}
go updateGPUStat(agentConfig, &gpuStat) go updateGPUStat(agentConfig, &gpuStat)
ret.GPU = gpuStat ret.GPU = gpuStat
@ -311,10 +314,10 @@ func getDiskTotalAndUsed(agentConfig *model.AgentConfig) (total uint64, used uin
} }
func updateGPUStat(agentConfig *model.AgentConfig, gpuStat *float64) { func updateGPUStat(agentConfig *model.AgentConfig, gpuStat *float64) {
if !atomic.CompareAndSwapInt32(&updateStatus, 0, 1) { if !atomic.CompareAndSwapInt32(&updateGPUStatus, 0, 1) {
return return
} }
defer atomic.StoreInt32(&updateStatus, 0) defer atomic.StoreInt32(&updateGPUStatus, 0)
if agentConfig.GPU { if agentConfig.GPU {
gs, err := gpustat.GetGPUStat() gs, err := gpustat.GetGPUStat()
if err != nil { if err != nil {
@ -328,6 +331,30 @@ func updateGPUStat(agentConfig *model.AgentConfig, gpuStat *float64) {
} }
} }
func updateTemplatureStat(tempStat *[]model.SensorTemperature) {
if !atomic.CompareAndSwapInt32(&updateTempStatus, 0, 1) {
return
}
defer atomic.StoreInt32(&updateTempStatus, 0)
if deviceDataFetchAttempts["Temperatures"] <= maxDeviceDataFetchAttempts {
temperatures, err := host.SensorsTemperatures()
if err != nil {
deviceDataFetchAttempts["Temperatures"]++
println("host.SensorsTemperatures error:", err, "attempt:", deviceDataFetchAttempts["Temperatures"])
} else {
deviceDataFetchAttempts["Temperatures"] = 0
for _, t := range temperatures {
if t.Temperature > 0 {
*tempStat = append(*tempStat, model.SensorTemperature{
Name: t.SensorKey,
Temperature: t.Temperature,
})
}
}
}
}
}
func isListContainsStr(list []string, str string) bool { func isListContainsStr(list []string, str string) bool {
for i := 0; i < len(list); i++ { for i := 0; i < len(list); i++ {
if strings.Contains(str, list[i]) { if strings.Contains(str, list[i]) {