♻️ 重构部分代码,打印调试信息
This commit is contained in:
parent
d3c3e55c88
commit
6286e34af0
@ -35,11 +35,10 @@ var (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
reporting bool
|
|
||||||
client pb.NezhaServiceClient
|
client pb.NezhaServiceClient
|
||||||
ctx = context.Background()
|
ctx = context.Background()
|
||||||
delayWhenError = time.Second * 10 // Agent 重连间隔
|
delayWhenError = time.Second * 10 // Agent 重连间隔
|
||||||
updateCh = make(chan struct{}, 0) // Agent 自动更新间隔
|
updateCh = make(chan struct{}) // Agent 自动更新间隔
|
||||||
httpClient = &http.Client{
|
httpClient = &http.Client{
|
||||||
Transport: &http.Transport{
|
Transport: &http.Transport{
|
||||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
||||||
@ -179,12 +178,14 @@ func doTask(task *pb.Task) {
|
|||||||
start := time.Now()
|
start := time.Now()
|
||||||
resp, err := httpClient.Get(task.GetData())
|
resp, err := httpClient.Get(task.GetData())
|
||||||
if err == nil {
|
if err == nil {
|
||||||
result.Delay = float32(time.Now().Sub(start).Microseconds()) / 1000.0
|
// 检查 HTTP Response 状态
|
||||||
|
result.Delay = float32(time.Since(start).Microseconds()) / 1000.0
|
||||||
if resp.StatusCode > 399 || resp.StatusCode < 200 {
|
if resp.StatusCode > 399 || resp.StatusCode < 200 {
|
||||||
err = errors.New("\n应用错误:" + resp.Status)
|
err = errors.New("\n应用错误:" + resp.Status)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err == nil {
|
if err == nil {
|
||||||
|
// 检查 SSL 证书信息
|
||||||
if strings.HasPrefix(task.GetData(), "https://") {
|
if strings.HasPrefix(task.GetData(), "https://") {
|
||||||
c := cert.NewCert(task.GetData()[8:])
|
c := cert.NewCert(task.GetData()[8:])
|
||||||
if c.Error != "" {
|
if c.Error != "" {
|
||||||
@ -197,6 +198,7 @@ func doTask(task *pb.Task) {
|
|||||||
result.Successful = true
|
result.Successful = true
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
// HTTP 请求失败
|
||||||
result.Data = err.Error()
|
result.Data = err.Error()
|
||||||
}
|
}
|
||||||
case model.TaskTypeICMPPing:
|
case model.TaskTypeICMPPing:
|
||||||
@ -219,7 +221,7 @@ func doTask(task *pb.Task) {
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
conn.Write([]byte("ping\n"))
|
conn.Write([]byte("ping\n"))
|
||||||
conn.Close()
|
conn.Close()
|
||||||
result.Delay = float32(time.Now().Sub(start).Microseconds()) / 1000.0
|
result.Delay = float32(time.Since(start).Microseconds()) / 1000.0
|
||||||
result.Successful = true
|
result.Successful = true
|
||||||
} else {
|
} else {
|
||||||
result.Data = err.Error()
|
result.Data = err.Error()
|
||||||
@ -260,7 +262,7 @@ func doTask(task *pb.Task) {
|
|||||||
result.Data = string(output)
|
result.Data = string(output)
|
||||||
result.Successful = true
|
result.Successful = true
|
||||||
}
|
}
|
||||||
result.Delay = float32(time.Now().Sub(startedAt).Seconds())
|
result.Delay = float32(time.Since(startedAt).Seconds())
|
||||||
default:
|
default:
|
||||||
log.Printf("Unknown action: %v", task)
|
log.Printf("Unknown action: %v", task)
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,6 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
@ -71,7 +70,6 @@ func (oa *oauth2controller) callback(c *gin.Context) {
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
gu, _, err = client.Users.Get(ctx, "")
|
gu, _, err = client.Users.Get(ctx, "")
|
||||||
}
|
}
|
||||||
log.Printf("%+v", gu)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
mygin.ShowErrorPage(c, mygin.ErrInfo{
|
mygin.ShowErrorPage(c, mygin.ErrInfo{
|
||||||
Code: http.StatusBadRequest,
|
Code: http.StatusBadRequest,
|
||||||
|
@ -34,7 +34,7 @@ func tcpping() {
|
|||||||
}
|
}
|
||||||
conn.Write([]byte("ping\n"))
|
conn.Write([]byte("ping\n"))
|
||||||
conn.Close()
|
conn.Close()
|
||||||
fmt.Println(time.Now().Sub(start).Microseconds(), float32(time.Now().Sub(start).Microseconds())/1000.0)
|
fmt.Println(time.Since(start).Microseconds(), float32(time.Since(start).Microseconds())/1000.0)
|
||||||
}
|
}
|
||||||
|
|
||||||
func sysinfo() {
|
func sysinfo() {
|
||||||
|
@ -43,7 +43,9 @@ func AlertSentinelStart() {
|
|||||||
checkStatus()
|
checkStatus()
|
||||||
checkCount++
|
checkCount++
|
||||||
if lastPrint.Before(startedAt.Add(-1 * time.Hour)) {
|
if lastPrint.Before(startedAt.Add(-1 * time.Hour)) {
|
||||||
|
if Conf.Debug {
|
||||||
log.Println("报警规则检测每小时", checkCount, "次", startedAt, time.Now())
|
log.Println("报警规则检测每小时", checkCount, "次", startedAt, time.Now())
|
||||||
|
}
|
||||||
checkCount = 0
|
checkCount = 0
|
||||||
lastPrint = startedAt
|
lastPrint = startedAt
|
||||||
}
|
}
|
||||||
|
@ -46,7 +46,7 @@ func ReSortServer() {
|
|||||||
|
|
||||||
sort.SliceStable(SortedServerList, func(i, j int) bool {
|
sort.SliceStable(SortedServerList, func(i, j int) bool {
|
||||||
if SortedServerList[i].DisplayIndex == SortedServerList[j].DisplayIndex {
|
if SortedServerList[i].DisplayIndex == SortedServerList[j].DisplayIndex {
|
||||||
return SortedServerList[i].ID < SortedServerList[i].ID
|
return SortedServerList[i].ID < SortedServerList[j].ID
|
||||||
}
|
}
|
||||||
return SortedServerList[i].DisplayIndex > SortedServerList[j].DisplayIndex
|
return SortedServerList[i].DisplayIndex > SortedServerList[j].DisplayIndex
|
||||||
})
|
})
|
||||||
|
@ -15,7 +15,7 @@ var ServiceSentinelShared *ServiceSentinel
|
|||||||
|
|
||||||
func NewServiceSentinel() {
|
func NewServiceSentinel() {
|
||||||
ServiceSentinelShared = &ServiceSentinel{
|
ServiceSentinelShared = &ServiceSentinel{
|
||||||
serviceResponseChannel: make(chan *pb.TaskResult, 200),
|
serviceResponseChannel: make(chan ReportData, 200),
|
||||||
serviceResponseDataStoreTodaySavedIndex: make(map[uint64]int),
|
serviceResponseDataStoreTodaySavedIndex: make(map[uint64]int),
|
||||||
serviceCurrentStatusIndex: make(map[uint64]int),
|
serviceCurrentStatusIndex: make(map[uint64]int),
|
||||||
serviceCurrentStatusData: make(map[uint64][]model.MonitorHistory),
|
serviceCurrentStatusData: make(map[uint64][]model.MonitorHistory),
|
||||||
@ -26,14 +26,35 @@ func NewServiceSentinel() {
|
|||||||
serviceResponseDataStoreCurrentDown: make(map[uint64]uint64),
|
serviceResponseDataStoreCurrentDown: make(map[uint64]uint64),
|
||||||
monitors: make(map[uint64]model.Monitor),
|
monitors: make(map[uint64]model.Monitor),
|
||||||
serviceResponseDataStoreToday: make(map[uint64][]model.MonitorHistory),
|
serviceResponseDataStoreToday: make(map[uint64][]model.MonitorHistory),
|
||||||
|
sslCertCache: make(map[uint64]string),
|
||||||
}
|
}
|
||||||
ServiceSentinelShared.OnMonitorUpdate()
|
ServiceSentinelShared.OnMonitorUpdate()
|
||||||
|
|
||||||
|
year, month, day := time.Now().Date()
|
||||||
|
today := time.Date(year, month, day, 0, 0, 0, 0, time.Local)
|
||||||
|
var mhs []model.MonitorHistory
|
||||||
|
DB.Where("created_at >= ?", today).Find(&mhs)
|
||||||
|
|
||||||
|
// 加载当日记录
|
||||||
|
for i := 0; i < len(mhs); i++ {
|
||||||
|
ServiceSentinelShared.serviceResponseDataStoreToday[mhs[i].MonitorID] =
|
||||||
|
append(ServiceSentinelShared.serviceResponseDataStoreToday[mhs[i].MonitorID], mhs[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
// 更新入库时间及当日数据入库游标
|
||||||
for k := range ServiceSentinelShared.monitors {
|
for k := range ServiceSentinelShared.monitors {
|
||||||
ServiceSentinelShared.latestDate[k] = time.Now().Format("02-Jan-06")
|
ServiceSentinelShared.latestDate[k] = time.Now().Format("02-Jan-06")
|
||||||
|
ServiceSentinelShared.serviceResponseDataStoreTodaySavedIndex[k] = len(ServiceSentinelShared.serviceResponseDataStoreToday[k])
|
||||||
}
|
}
|
||||||
|
|
||||||
go ServiceSentinelShared.worker()
|
go ServiceSentinelShared.worker()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ReportData struct {
|
||||||
|
Data *pb.TaskResult
|
||||||
|
Reporter uint64
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
使用缓存 channel,处理上报的 Service 请求结果,然后判断是否需要报警
|
使用缓存 channel,处理上报的 Service 请求结果,然后判断是否需要报警
|
||||||
需要记录上一次的状态信息
|
需要记录上一次的状态信息
|
||||||
@ -41,7 +62,7 @@ func NewServiceSentinel() {
|
|||||||
type ServiceSentinel struct {
|
type ServiceSentinel struct {
|
||||||
serviceResponseDataStoreLock sync.RWMutex
|
serviceResponseDataStoreLock sync.RWMutex
|
||||||
monitorsLock sync.RWMutex
|
monitorsLock sync.RWMutex
|
||||||
serviceResponseChannel chan *pb.TaskResult
|
serviceResponseChannel chan ReportData
|
||||||
serviceResponseDataStoreTodaySavedIndex map[uint64]int
|
serviceResponseDataStoreTodaySavedIndex map[uint64]int
|
||||||
serviceCurrentStatusIndex map[uint64]int
|
serviceCurrentStatusIndex map[uint64]int
|
||||||
serviceCurrentStatusData map[uint64][]model.MonitorHistory
|
serviceCurrentStatusData map[uint64][]model.MonitorHistory
|
||||||
@ -52,9 +73,10 @@ type ServiceSentinel struct {
|
|||||||
serviceResponseDataStoreCurrentDown map[uint64]uint64
|
serviceResponseDataStoreCurrentDown map[uint64]uint64
|
||||||
monitors map[uint64]model.Monitor
|
monitors map[uint64]model.Monitor
|
||||||
serviceResponseDataStoreToday map[uint64][]model.MonitorHistory
|
serviceResponseDataStoreToday map[uint64][]model.MonitorHistory
|
||||||
|
sslCertCache map[uint64]string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ss *ServiceSentinel) Dispatch(r *pb.TaskResult) {
|
func (ss *ServiceSentinel) Dispatch(r ReportData) {
|
||||||
ss.serviceResponseChannel <- r
|
ss.serviceResponseChannel <- r
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -94,6 +116,7 @@ func (ss *ServiceSentinel) OnMonitorDelete(id uint64) {
|
|||||||
delete(ss.serviceResponseDataStoreCurrentUp, id)
|
delete(ss.serviceResponseDataStoreCurrentUp, id)
|
||||||
delete(ss.serviceResponseDataStoreCurrentDown, id)
|
delete(ss.serviceResponseDataStoreCurrentDown, id)
|
||||||
delete(ss.serviceResponseDataStoreToday, id)
|
delete(ss.serviceResponseDataStoreToday, id)
|
||||||
|
delete(ss.sslCertCache, id)
|
||||||
ss.monitorsLock.Lock()
|
ss.monitorsLock.Lock()
|
||||||
defer ss.monitorsLock.Unlock()
|
defer ss.monitorsLock.Unlock()
|
||||||
delete(ss.monitors, id)
|
delete(ss.monitors, id)
|
||||||
@ -186,10 +209,10 @@ func getStateStr(percent uint64) string {
|
|||||||
|
|
||||||
func (ss *ServiceSentinel) worker() {
|
func (ss *ServiceSentinel) worker() {
|
||||||
for r := range ss.serviceResponseChannel {
|
for r := range ss.serviceResponseChannel {
|
||||||
if ss.monitors[r.GetId()].ID == 0 {
|
if ss.monitors[r.Data.GetId()].ID == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
mh := model.PB2MonitorHistory(r)
|
mh := model.PB2MonitorHistory(r.Data)
|
||||||
ss.serviceResponseDataStoreLock.Lock()
|
ss.serviceResponseDataStoreLock.Lock()
|
||||||
// 先查看是否到下一天
|
// 先查看是否到下一天
|
||||||
nowDate := time.Now().Format("02-Jan-06")
|
nowDate := time.Now().Format("02-Jan-06")
|
||||||
@ -239,30 +262,34 @@ func (ss *ServiceSentinel) worker() {
|
|||||||
upPercent = ss.serviceResponseDataStoreCurrentUp[mh.MonitorID] * 100 / (ss.serviceResponseDataStoreCurrentDown[mh.MonitorID] + ss.serviceResponseDataStoreCurrentUp[mh.MonitorID])
|
upPercent = ss.serviceResponseDataStoreCurrentUp[mh.MonitorID] * 100 / (ss.serviceResponseDataStoreCurrentDown[mh.MonitorID] + ss.serviceResponseDataStoreCurrentUp[mh.MonitorID])
|
||||||
}
|
}
|
||||||
stateStr := getStateStr(upPercent)
|
stateStr := getStateStr(upPercent)
|
||||||
log.Println(ss.monitors[mh.MonitorID].Target, stateStr)
|
if Conf.Debug {
|
||||||
|
log.Println(ss.monitors[mh.MonitorID].Target, stateStr, "Reporter:", r.Reporter, "Successful:", mh.Successful, "Data:", mh.Data)
|
||||||
|
}
|
||||||
if stateStr == "故障" || stateStr != ss.lastStatus[mh.MonitorID] {
|
if stateStr == "故障" || stateStr != ss.lastStatus[mh.MonitorID] {
|
||||||
ss.monitorsLock.RLock()
|
ss.monitorsLock.RLock()
|
||||||
isSendNotification := (ss.lastStatus[mh.MonitorID] != "" || stateStr == "故障") && ss.monitors[mh.MonitorID].Notify
|
isSendNotification := (ss.lastStatus[mh.MonitorID] != "" || stateStr == "故障") && ss.monitors[mh.MonitorID].Notify
|
||||||
ss.lastStatus[mh.MonitorID] = stateStr
|
ss.lastStatus[mh.MonitorID] = stateStr
|
||||||
if isSendNotification {
|
if isSendNotification {
|
||||||
SendNotification(fmt.Sprintf("服务监控:%s 服务状态:%s", ss.monitors[mh.MonitorID].Name, stateStr), true)
|
go SendNotification(fmt.Sprintf("服务监控:%s 服务状态:%s", ss.monitors[mh.MonitorID].Name, stateStr), true)
|
||||||
}
|
}
|
||||||
ss.monitorsLock.RUnlock()
|
ss.monitorsLock.RUnlock()
|
||||||
}
|
}
|
||||||
ss.serviceResponseDataStoreLock.Unlock()
|
ss.serviceResponseDataStoreLock.Unlock()
|
||||||
// SSL 证书报警
|
// SSL 证书报警
|
||||||
var errMsg string
|
var errMsg string
|
||||||
if strings.HasPrefix(r.GetData(), "SSL证书错误:") {
|
if strings.HasPrefix(mh.Data, "SSL证书错误:") {
|
||||||
// 排除 i/o timeont、connection timeout、EOF 错误
|
// 排除 i/o timeont、connection timeout、EOF 错误
|
||||||
if !strings.HasSuffix(r.GetData(), "timeout") &&
|
if !strings.HasSuffix(mh.Data, "timeout") &&
|
||||||
!strings.HasSuffix(r.GetData(), "EOF") &&
|
!strings.HasSuffix(mh.Data, "EOF") &&
|
||||||
!strings.HasSuffix(r.GetData(), "timed out") {
|
!strings.HasSuffix(mh.Data, "timed out") {
|
||||||
errMsg = r.GetData()
|
errMsg = mh.Data
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
var last model.MonitorHistory
|
var newCert = strings.Split(mh.Data, "|")
|
||||||
var newCert = strings.Split(r.GetData(), "|")
|
|
||||||
if len(newCert) > 1 {
|
if len(newCert) > 1 {
|
||||||
|
if ss.sslCertCache[mh.MonitorID] == "" {
|
||||||
|
ss.sslCertCache[mh.MonitorID] = mh.Data
|
||||||
|
}
|
||||||
expiresNew, _ := time.Parse("2006-01-02 15:04:05 -0700 MST", newCert[1])
|
expiresNew, _ := time.Parse("2006-01-02 15:04:05 -0700 MST", newCert[1])
|
||||||
// 证书过期提醒
|
// 证书过期提醒
|
||||||
if expiresNew.Before(time.Now().AddDate(0, 0, 7)) {
|
if expiresNew.Before(time.Now().AddDate(0, 0, 7)) {
|
||||||
@ -271,23 +298,23 @@ func (ss *ServiceSentinel) worker() {
|
|||||||
expiresNew.Format("2006-01-02 15:04:05"))
|
expiresNew.Format("2006-01-02 15:04:05"))
|
||||||
}
|
}
|
||||||
// 证书变更提醒
|
// 证书变更提醒
|
||||||
if err := DB.Where("monitor_id = ? AND data LIKE ?", r.GetId(), "%|%").Order("id DESC").First(&last).Error; err == nil {
|
var oldCert = strings.Split(ss.sslCertCache[mh.MonitorID], "|")
|
||||||
var oldCert = strings.Split(last.Data, "|")
|
|
||||||
var expiresOld time.Time
|
var expiresOld time.Time
|
||||||
if len(oldCert) > 1 {
|
if len(oldCert) > 1 {
|
||||||
expiresOld, _ = time.Parse("2006-01-02 15:04:05 -0700 MST", oldCert[1])
|
expiresOld, _ = time.Parse("2006-01-02 15:04:05 -0700 MST", oldCert[1])
|
||||||
}
|
}
|
||||||
if last.Data != "" && oldCert[0] != newCert[0] && !expiresNew.Equal(expiresOld) {
|
if oldCert[0] != newCert[0] && !expiresNew.Equal(expiresOld) {
|
||||||
errMsg = fmt.Sprintf(
|
errMsg = fmt.Sprintf(
|
||||||
"SSL证书变更,旧:%s, %s 过期;新:%s, %s 过期。",
|
"SSL证书变更,旧:%s, %s 过期;新:%s, %s 过期。",
|
||||||
oldCert[0], expiresOld.Format("2006-01-02 15:04:05"), newCert[0], expiresNew.Format("2006-01-02 15:04:05"))
|
oldCert[0], expiresOld.Format("2006-01-02 15:04:05"), newCert[0], expiresNew.Format("2006-01-02 15:04:05"))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if errMsg != "" {
|
if errMsg != "" {
|
||||||
ss.monitorsLock.RLock()
|
ss.monitorsLock.RLock()
|
||||||
SendNotification(fmt.Sprintf("服务监控:%s %s", ss.monitors[mh.MonitorID].Name, errMsg), true)
|
if ss.monitors[mh.MonitorID].Notify {
|
||||||
|
go SendNotification(fmt.Sprintf("服务监控:%s %s", ss.monitors[mh.MonitorID].Name, errMsg), true)
|
||||||
|
}
|
||||||
ss.monitorsLock.RUnlock()
|
ss.monitorsLock.RUnlock()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -21,7 +21,10 @@ func (s *NezhaHandler) ReportTask(c context.Context, r *pb.TaskResult) (*pb.Rece
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if r.GetType() != model.TaskTypeCommand {
|
if r.GetType() != model.TaskTypeCommand {
|
||||||
dao.ServiceSentinelShared.Dispatch(r)
|
dao.ServiceSentinelShared.Dispatch(dao.ReportData{
|
||||||
|
Data: r,
|
||||||
|
Reporter: clientID,
|
||||||
|
})
|
||||||
} else {
|
} else {
|
||||||
// 处理上报的计划任务
|
// 处理上报的计划任务
|
||||||
dao.CronLock.RLock()
|
dao.CronLock.RLock()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user