refactor: improve error handling for fetching sensor temperatures (#27)
This commit is contained in:
parent
338f0dabbe
commit
136b3801c7
1
.gitignore
vendored
1
.gitignore
vendored
@ -2,3 +2,4 @@
|
||||
/agent
|
||||
/cmd/agent/agent
|
||||
*.pprof
|
||||
dist
|
@ -41,9 +41,21 @@ var (
|
||||
netInSpeed, netOutSpeed, netInTransfer, netOutTransfer, lastUpdateNetStats uint64
|
||||
cachedBootTime time.Time
|
||||
gpuStat float64
|
||||
temperatureStat []model.SensorTemperature
|
||||
)
|
||||
|
||||
var updateStatus int32
|
||||
// 获取设备数据的最大尝试次数
|
||||
const maxDeviceDataFetchAttempts = 3
|
||||
|
||||
// 获取状态数据的尝试次数,Key 为 HostState 的属性名
|
||||
var deviceDataFetchAttempts = map[string]int{
|
||||
"Temperatures": 0,
|
||||
}
|
||||
|
||||
var (
|
||||
updateGPUStatus int32
|
||||
updateTempStatus int32
|
||||
)
|
||||
|
||||
// GetHost 获取主机硬件信息
|
||||
func GetHost(agentConfig *model.AgentConfig) *model.Host {
|
||||
@ -205,17 +217,8 @@ func GetState(agentConfig *model.AgentConfig, skipConnectionCount bool, skipProc
|
||||
}
|
||||
}
|
||||
|
||||
temperatures, err := host.SensorsTemperatures()
|
||||
if err != nil {
|
||||
println("host.SensorsTemperatures error:", err)
|
||||
} else {
|
||||
for _, t := range temperatures {
|
||||
ret.Temperatures = append(ret.Temperatures, model.SensorTemperature{
|
||||
Name: t.SensorKey,
|
||||
Temperature: t.Temperature,
|
||||
})
|
||||
}
|
||||
}
|
||||
go updateTemplatureStat(&temperatureStat)
|
||||
ret.Temperatures = temperatureStat
|
||||
|
||||
go updateGPUStat(agentConfig, &gpuStat)
|
||||
ret.GPU = gpuStat
|
||||
@ -311,10 +314,10 @@ func getDiskTotalAndUsed(agentConfig *model.AgentConfig) (total uint64, used uin
|
||||
}
|
||||
|
||||
func updateGPUStat(agentConfig *model.AgentConfig, gpuStat *float64) {
|
||||
if !atomic.CompareAndSwapInt32(&updateStatus, 0, 1) {
|
||||
if !atomic.CompareAndSwapInt32(&updateGPUStatus, 0, 1) {
|
||||
return
|
||||
}
|
||||
defer atomic.StoreInt32(&updateStatus, 0)
|
||||
defer atomic.StoreInt32(&updateGPUStatus, 0)
|
||||
if agentConfig.GPU {
|
||||
gs, err := gpustat.GetGPUStat()
|
||||
if err != nil {
|
||||
@ -328,6 +331,30 @@ func updateGPUStat(agentConfig *model.AgentConfig, gpuStat *float64) {
|
||||
}
|
||||
}
|
||||
|
||||
func updateTemplatureStat(tempStat *[]model.SensorTemperature) {
|
||||
if !atomic.CompareAndSwapInt32(&updateTempStatus, 0, 1) {
|
||||
return
|
||||
}
|
||||
defer atomic.StoreInt32(&updateTempStatus, 0)
|
||||
if deviceDataFetchAttempts["Temperatures"] <= maxDeviceDataFetchAttempts {
|
||||
temperatures, err := host.SensorsTemperatures()
|
||||
if err != nil {
|
||||
deviceDataFetchAttempts["Temperatures"]++
|
||||
println("host.SensorsTemperatures error:", err, "attempt:", deviceDataFetchAttempts["Temperatures"])
|
||||
} else {
|
||||
deviceDataFetchAttempts["Temperatures"] = 0
|
||||
for _, t := range temperatures {
|
||||
if t.Temperature > 0 {
|
||||
*tempStat = append(*tempStat, model.SensorTemperature{
|
||||
Name: t.SensorKey,
|
||||
Temperature: t.Temperature,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func isListContainsStr(list []string, str string) bool {
|
||||
for i := 0; i < len(list); i++ {
|
||||
if strings.Contains(str, list[i]) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user