添加代码注释+修正一个typo
This commit is contained in:
		
							parent
							
								
									91a1e3fe22
								
							
						
					
					
						commit
						707985e5c8
					
				@ -63,19 +63,20 @@ func initSystem() {
 | 
				
			|||||||
	loadServers() //加载服务器列表
 | 
						loadServers() //加载服务器列表
 | 
				
			||||||
	loadCrons()   //加载计划任务
 | 
						loadCrons()   //加载计划任务
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// 清理 服务请求记录 和 流量记录 的旧数据
 | 
						// 每天的3:30 对 监控记录 和 流量记录 进行清理
 | 
				
			||||||
	_, err := singleton.Cron.AddFunc("0 30 3 * * *", cleanMonitorHistory)
 | 
						_, err := singleton.Cron.AddFunc("0 30 3 * * *", cleanMonitorHistory)
 | 
				
			||||||
	if err != nil {
 | 
						if err != nil {
 | 
				
			||||||
		panic(err)
 | 
							panic(err)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// 流量记录打点
 | 
						// 每小时对流量记录进行打点
 | 
				
			||||||
	_, err = singleton.Cron.AddFunc("0 0 * * * *", recordTransferHourlyUsage)
 | 
						_, err = singleton.Cron.AddFunc("0 0 * * * *", recordTransferHourlyUsage)
 | 
				
			||||||
	if err != nil {
 | 
						if err != nil {
 | 
				
			||||||
		panic(err)
 | 
							panic(err)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// recordTransferHourlyUsage 对流量记录进行打点
 | 
				
			||||||
func recordTransferHourlyUsage() {
 | 
					func recordTransferHourlyUsage() {
 | 
				
			||||||
	singleton.ServerLock.Lock()
 | 
						singleton.ServerLock.Lock()
 | 
				
			||||||
	defer singleton.ServerLock.Unlock()
 | 
						defer singleton.ServerLock.Unlock()
 | 
				
			||||||
@ -102,8 +103,9 @@ func recordTransferHourlyUsage() {
 | 
				
			|||||||
	log.Println("NEZHA>> Cron 流量统计入库", len(txs), singleton.DB.Create(txs).Error)
 | 
						log.Println("NEZHA>> Cron 流量统计入库", len(txs), singleton.DB.Create(txs).Error)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// cleanMonitorHistory 清理无效或过时的 监控记录 和 流量记录
 | 
				
			||||||
func cleanMonitorHistory() {
 | 
					func cleanMonitorHistory() {
 | 
				
			||||||
	// 清理无效数据
 | 
						// 清理已被删除的服务器的监控记录与流量记录
 | 
				
			||||||
	singleton.DB.Unscoped().Delete(&model.MonitorHistory{}, "created_at < ? OR monitor_id NOT IN (SELECT `id` FROM monitors)", time.Now().AddDate(0, 0, -30))
 | 
						singleton.DB.Unscoped().Delete(&model.MonitorHistory{}, "created_at < ? OR monitor_id NOT IN (SELECT `id` FROM monitors)", time.Now().AddDate(0, 0, -30))
 | 
				
			||||||
	singleton.DB.Unscoped().Delete(&model.Transfer{}, "server_id NOT IN (SELECT `id` FROM servers)")
 | 
						singleton.DB.Unscoped().Delete(&model.Transfer{}, "server_id NOT IN (SELECT `id` FROM servers)")
 | 
				
			||||||
	// 计算可清理流量记录的时长
 | 
						// 计算可清理流量记录的时长
 | 
				
			||||||
@ -146,6 +148,7 @@ func cleanMonitorHistory() {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//loadServers 加载服务器列表并根据ID排序
 | 
				
			||||||
func loadServers() {
 | 
					func loadServers() {
 | 
				
			||||||
	var servers []model.Server
 | 
						var servers []model.Server
 | 
				
			||||||
	singleton.DB.Find(&servers)
 | 
						singleton.DB.Find(&servers)
 | 
				
			||||||
@ -159,6 +162,7 @@ func loadServers() {
 | 
				
			|||||||
	singleton.ReSortServer()
 | 
						singleton.ReSortServer()
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// loadCrons 加载计划任务
 | 
				
			||||||
func loadCrons() {
 | 
					func loadCrons() {
 | 
				
			||||||
	var crons []model.Cron
 | 
						var crons []model.Cron
 | 
				
			||||||
	singleton.DB.Find(&crons)
 | 
						singleton.DB.Find(&crons)
 | 
				
			||||||
@ -172,6 +176,7 @@ func loadCrons() {
 | 
				
			|||||||
			crIgnoreMap[cr.Servers[j]] = true
 | 
								crIgnoreMap[cr.Servers[j]] = true
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							// 注册计划任务
 | 
				
			||||||
		cr.CronJobID, err = singleton.Cron.AddFunc(cr.Scheduler, singleton.CronTrigger(cr))
 | 
							cr.CronJobID, err = singleton.Cron.AddFunc(cr.Scheduler, singleton.CronTrigger(cr))
 | 
				
			||||||
		if err == nil {
 | 
							if err == nil {
 | 
				
			||||||
			singleton.Crons[cr.ID] = &cr
 | 
								singleton.Crons[cr.ID] = &cr
 | 
				
			||||||
@ -192,7 +197,7 @@ func loadCrons() {
 | 
				
			|||||||
func main() {
 | 
					func main() {
 | 
				
			||||||
	cleanMonitorHistory()
 | 
						cleanMonitorHistory()
 | 
				
			||||||
	go rpc.ServeRPC(singleton.Conf.GRPCPort)
 | 
						go rpc.ServeRPC(singleton.Conf.GRPCPort)
 | 
				
			||||||
	serviceSentinelDispatchBus := make(chan model.Monitor)
 | 
						serviceSentinelDispatchBus := make(chan model.Monitor) // 用于传递服务监控任务信息的channel
 | 
				
			||||||
	go rpc.DispatchTask(serviceSentinelDispatchBus)
 | 
						go rpc.DispatchTask(serviceSentinelDispatchBus)
 | 
				
			||||||
	go rpc.DispatchKeepalive()
 | 
						go rpc.DispatchKeepalive()
 | 
				
			||||||
	go singleton.AlertSentinelStart()
 | 
						go singleton.AlertSentinelStart()
 | 
				
			||||||
 | 
				
			|||||||
@ -43,6 +43,7 @@ func (r *AlertRule) Enabled() bool {
 | 
				
			|||||||
	return r.Enable != nil && *r.Enable
 | 
						return r.Enable != nil && *r.Enable
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Snapshot 对传入的Server进行该报警规则下所有type的检查 返回包含每项检查结果的空接口
 | 
				
			||||||
func (r *AlertRule) Snapshot(cycleTransferStats *CycleTransferStats, server *Server, db *gorm.DB) []interface{} {
 | 
					func (r *AlertRule) Snapshot(cycleTransferStats *CycleTransferStats, server *Server, db *gorm.DB) []interface{} {
 | 
				
			||||||
	var point []interface{}
 | 
						var point []interface{}
 | 
				
			||||||
	for i := 0; i < len(r.Rules); i++ {
 | 
						for i := 0; i < len(r.Rules); i++ {
 | 
				
			||||||
@ -51,9 +52,10 @@ func (r *AlertRule) Snapshot(cycleTransferStats *CycleTransferStats, server *Ser
 | 
				
			|||||||
	return point
 | 
						return point
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Check 传入包含当前报警规则下所有type检查结果的空接口 返回报警持续时间与是否通过报警检查(通过则返回true)
 | 
				
			||||||
func (r *AlertRule) Check(points [][]interface{}) (int, bool) {
 | 
					func (r *AlertRule) Check(points [][]interface{}) (int, bool) {
 | 
				
			||||||
	var max int
 | 
						var max int   // 报警持续时间
 | 
				
			||||||
	var count int
 | 
						var count int // 检查未通过的个数
 | 
				
			||||||
	for i := 0; i < len(r.Rules); i++ {
 | 
						for i := 0; i < len(r.Rules); i++ {
 | 
				
			||||||
		if r.Rules[i].IsTransferDurationRule() {
 | 
							if r.Rules[i].IsTransferDurationRule() {
 | 
				
			||||||
			// 循环区间流量报警
 | 
								// 循环区间流量报警
 | 
				
			||||||
@ -83,11 +85,13 @@ func (r *AlertRule) Check(points [][]interface{}) (int, bool) {
 | 
				
			|||||||
					fail++
 | 
										fail++
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
								// 当70%以上的采样点未通过规则判断时 才认为当前检查未通过
 | 
				
			||||||
			if fail/total > 0.7 {
 | 
								if fail/total > 0.7 {
 | 
				
			||||||
				count++
 | 
									count++
 | 
				
			||||||
				break
 | 
									break
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						// 仅当所有检查均未通过时 返回false
 | 
				
			||||||
	return max, count != len(r.Rules)
 | 
						return max, count != len(r.Rules)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
@ -48,8 +48,9 @@ func (c *AgentConfig) Save() error {
 | 
				
			|||||||
	return ioutil.WriteFile(c.v.ConfigFileUsed(), data, os.ModePerm)
 | 
						return ioutil.WriteFile(c.v.ConfigFileUsed(), data, os.ModePerm)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Config 站点配置
 | 
				
			||||||
type Config struct {
 | 
					type Config struct {
 | 
				
			||||||
	Debug bool
 | 
						Debug bool // debug模式开关
 | 
				
			||||||
	Site  struct {
 | 
						Site  struct {
 | 
				
			||||||
		Brand        string // 站点名称
 | 
							Brand        string // 站点名称
 | 
				
			||||||
		CookieName   string // 浏览器 Cookie 名称
 | 
							CookieName   string // 浏览器 Cookie 名称
 | 
				
			||||||
@ -73,13 +74,14 @@ type Config struct {
 | 
				
			|||||||
	EnablePlainIPInNotification bool
 | 
						EnablePlainIPInNotification bool
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// IP变更提醒
 | 
						// IP变更提醒
 | 
				
			||||||
	Cover                 uint8  // 覆盖范围
 | 
						Cover                 uint8  // 覆盖范围(0:提醒未被 IgnoredIPNotification 包含的所有服务器; 1:仅提醒被 IgnoredIPNotification 包含的服务器;)
 | 
				
			||||||
	IgnoredIPNotification string // 特定服务器
 | 
						IgnoredIPNotification string // 特定服务器IP(多个服务器用逗号分隔)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	v                              *viper.Viper
 | 
						v                              *viper.Viper
 | 
				
			||||||
	IgnoredIPNotificationServerIDs map[uint64]bool
 | 
						IgnoredIPNotificationServerIDs map[uint64]bool // [ServerID] -> bool(值为true代表当前ServerID在特定服务器列表内)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Read 读取配置文件并应用
 | 
				
			||||||
func (c *Config) Read(path string) error {
 | 
					func (c *Config) Read(path string) error {
 | 
				
			||||||
	c.v = viper.New()
 | 
						c.v = viper.New()
 | 
				
			||||||
	c.v.SetConfigFile(path)
 | 
						c.v.SetConfigFile(path)
 | 
				
			||||||
@ -101,6 +103,7 @@ func (c *Config) Read(path string) error {
 | 
				
			|||||||
	return nil
 | 
						return nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// updateIgnoredIPNotificationID 更新用于判断服务器ID是否属于特定服务器的map
 | 
				
			||||||
func (c *Config) updateIgnoredIPNotificationID() {
 | 
					func (c *Config) updateIgnoredIPNotificationID() {
 | 
				
			||||||
	c.IgnoredIPNotificationServerIDs = make(map[uint64]bool)
 | 
						c.IgnoredIPNotificationServerIDs = make(map[uint64]bool)
 | 
				
			||||||
	splitedIDs := strings.Split(c.IgnoredIPNotification, ",")
 | 
						splitedIDs := strings.Split(c.IgnoredIPNotification, ",")
 | 
				
			||||||
@ -112,6 +115,7 @@ func (c *Config) updateIgnoredIPNotificationID() {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Save 保存配置文件
 | 
				
			||||||
func (c *Config) Save() error {
 | 
					func (c *Config) Save() error {
 | 
				
			||||||
	c.updateIgnoredIPNotificationID()
 | 
						c.updateIgnoredIPNotificationID()
 | 
				
			||||||
	data, err := yaml.Marshal(c)
 | 
						data, err := yaml.Marshal(c)
 | 
				
			||||||
 | 
				
			|||||||
@ -22,7 +22,7 @@ type Cron struct {
 | 
				
			|||||||
	PushSuccessful bool      // 推送成功的通知
 | 
						PushSuccessful bool      // 推送成功的通知
 | 
				
			||||||
	LastExecutedAt time.Time // 最后一次执行时间
 | 
						LastExecutedAt time.Time // 最后一次执行时间
 | 
				
			||||||
	LastResult     bool      // 最后一次执行结果
 | 
						LastResult     bool      // 最后一次执行结果
 | 
				
			||||||
	Cover          uint8
 | 
						Cover          uint8     // 计划任务覆盖范围 (0:仅覆盖特定服务器 1:仅忽略特定服务器)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	CronJobID  cron.EntryID `gorm:"-"`
 | 
						CronJobID  cron.EntryID `gorm:"-"`
 | 
				
			||||||
	ServersRaw string
 | 
						ServersRaw string
 | 
				
			||||||
 | 
				
			|||||||
@ -56,6 +56,7 @@ func (m *Monitor) PB() *pb.Task {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// CronSpec 返回服务监控请求间隔对应的 cron 表达式
 | 
				
			||||||
func (m *Monitor) CronSpec() string {
 | 
					func (m *Monitor) CronSpec() string {
 | 
				
			||||||
	if m.Duration == 0 {
 | 
						if m.Duration == 0 {
 | 
				
			||||||
		// 默认间隔 30 秒
 | 
							// 默认间隔 30 秒
 | 
				
			||||||
@ -76,6 +77,7 @@ func (m *Monitor) AfterFind(tx *gorm.DB) error {
 | 
				
			|||||||
	return nil
 | 
						return nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// IsServiceSentinelNeeded 判断该任务类型是否需要进行服务监控 需要则返回true
 | 
				
			||||||
func IsServiceSentinelNeeded(t uint64) bool {
 | 
					func IsServiceSentinelNeeded(t uint64) bool {
 | 
				
			||||||
	return t != TaskTypeCommand && t != TaskTypeTerminal && t != TaskTypeUpgrade
 | 
						return t != TaskTypeCommand && t != TaskTypeTerminal && t != TaskTypeUpgrade
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
@ -4,6 +4,7 @@ import (
 | 
				
			|||||||
	pb "github.com/naiba/nezha/proto"
 | 
						pb "github.com/naiba/nezha/proto"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// MonitorHistory 历史监控记录
 | 
				
			||||||
type MonitorHistory struct {
 | 
					type MonitorHistory struct {
 | 
				
			||||||
	Common
 | 
						Common
 | 
				
			||||||
	MonitorID  uint64
 | 
						MonitorID  uint64
 | 
				
			||||||
 | 
				
			|||||||
@ -159,10 +159,12 @@ func (u *Rule) Snapshot(cycleTransferStats *CycleTransferStats, server *Server,
 | 
				
			|||||||
	return nil
 | 
						return nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// IsTransferDurationRule 判断该规则是否属于周期流量规则 属于则返回true
 | 
				
			||||||
func (rule Rule) IsTransferDurationRule() bool {
 | 
					func (rule Rule) IsTransferDurationRule() bool {
 | 
				
			||||||
	return strings.HasSuffix(rule.Type, "_cycle")
 | 
						return strings.HasSuffix(rule.Type, "_cycle")
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// GetTransferDurationStart 获取周期流量的起始时间
 | 
				
			||||||
func (rule Rule) GetTransferDurationStart() time.Time {
 | 
					func (rule Rule) GetTransferDurationStart() time.Time {
 | 
				
			||||||
	// Accept uppercase and lowercase
 | 
						// Accept uppercase and lowercase
 | 
				
			||||||
	unit := strings.ToLower(rule.CycleUnit)
 | 
						unit := strings.ToLower(rule.CycleUnit)
 | 
				
			||||||
@ -202,6 +204,7 @@ func (rule Rule) GetTransferDurationStart() time.Time {
 | 
				
			|||||||
	return startTime
 | 
						return startTime
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// GetTransferDurationEnd 获取周期流量结束时间
 | 
				
			||||||
func (rule Rule) GetTransferDurationEnd() time.Time {
 | 
					func (rule Rule) GetTransferDurationEnd() time.Time {
 | 
				
			||||||
	// Accept uppercase and lowercase
 | 
						// Accept uppercase and lowercase
 | 
				
			||||||
	unit := strings.ToLower(rule.CycleUnit)
 | 
						unit := strings.ToLower(rule.CycleUnit)
 | 
				
			||||||
 | 
				
			|||||||
@ -23,10 +23,11 @@ type NotificationHistory struct {
 | 
				
			|||||||
// 报警规则
 | 
					// 报警规则
 | 
				
			||||||
var AlertsLock sync.RWMutex
 | 
					var AlertsLock sync.RWMutex
 | 
				
			||||||
var Alerts []*model.AlertRule
 | 
					var Alerts []*model.AlertRule
 | 
				
			||||||
var alertsStore map[uint64]map[uint64][][]interface{}
 | 
					var alertsStore map[uint64]map[uint64][][]interface{}                  // [alert_id][server_id] -> 对应报警规则的检查结果
 | 
				
			||||||
var alertsPrevState map[uint64]map[uint64]uint
 | 
					var alertsPrevState map[uint64]map[uint64]uint                         // [alert_id][server_id] -> 对应报警规则的上一次报警状态
 | 
				
			||||||
var AlertsCycleTransferStatsStore map[uint64]*model.CycleTransferStats
 | 
					var AlertsCycleTransferStatsStore map[uint64]*model.CycleTransferStats // [alert_id] -> 对应报警规则的周期流量统计
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// addCycleTransferStatsInfo 向AlertsCycleTransferStatsStore中添加周期流量报警统计信息
 | 
				
			||||||
func addCycleTransferStatsInfo(alert *model.AlertRule) {
 | 
					func addCycleTransferStatsInfo(alert *model.AlertRule) {
 | 
				
			||||||
	if !alert.Enabled() {
 | 
						if !alert.Enabled() {
 | 
				
			||||||
		return
 | 
							return
 | 
				
			||||||
@ -52,6 +53,7 @@ func addCycleTransferStatsInfo(alert *model.AlertRule) {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// AlertSentinelStart 报警器启动
 | 
				
			||||||
func AlertSentinelStart() {
 | 
					func AlertSentinelStart() {
 | 
				
			||||||
	alertsStore = make(map[uint64]map[uint64][][]interface{})
 | 
						alertsStore = make(map[uint64]map[uint64][][]interface{})
 | 
				
			||||||
	alertsPrevState = make(map[uint64]map[uint64]uint)
 | 
						alertsPrevState = make(map[uint64]map[uint64]uint)
 | 
				
			||||||
@ -120,6 +122,7 @@ func OnDeleteAlert(id uint64) {
 | 
				
			|||||||
	delete(AlertsCycleTransferStatsStore, id)
 | 
						delete(AlertsCycleTransferStatsStore, id)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// checkStatus 检查报警规则并发送报警
 | 
				
			||||||
func checkStatus() {
 | 
					func checkStatus() {
 | 
				
			||||||
	AlertsLock.RLock()
 | 
						AlertsLock.RLock()
 | 
				
			||||||
	defer AlertsLock.RUnlock()
 | 
						defer AlertsLock.RUnlock()
 | 
				
			||||||
 | 
				
			|||||||
@ -16,6 +16,7 @@ const firstNotificationDelay = time.Minute * 15
 | 
				
			|||||||
var notifications []model.Notification
 | 
					var notifications []model.Notification
 | 
				
			||||||
var notificationsLock sync.RWMutex
 | 
					var notificationsLock sync.RWMutex
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// LoadNotifications 加载通知方式到 singleton.notifications 变量
 | 
				
			||||||
func LoadNotifications() {
 | 
					func LoadNotifications() {
 | 
				
			||||||
	notificationsLock.Lock()
 | 
						notificationsLock.Lock()
 | 
				
			||||||
	if err := DB.Find(¬ifications).Error; err != nil {
 | 
						if err := DB.Find(¬ifications).Error; err != nil {
 | 
				
			||||||
 | 
				
			|||||||
@ -24,12 +24,14 @@ type ReportData struct {
 | 
				
			|||||||
	Reporter uint64
 | 
						Reporter uint64
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// _TodayStatsOfMonitor 今日监控记录
 | 
				
			||||||
type _TodayStatsOfMonitor struct {
 | 
					type _TodayStatsOfMonitor struct {
 | 
				
			||||||
	Up    int
 | 
						Up    int     // 今日在线计数
 | 
				
			||||||
	Down  int
 | 
						Down  int     // 今日离线计数
 | 
				
			||||||
	Delay float32
 | 
						Delay float32 // 今日平均延迟
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// NewServiceSentinel 创建服务监控器
 | 
				
			||||||
func NewServiceSentinel(serviceSentinelDispatchBus chan<- model.Monitor) {
 | 
					func NewServiceSentinel(serviceSentinelDispatchBus chan<- model.Monitor) {
 | 
				
			||||||
	ServiceSentinelShared = &ServiceSentinel{
 | 
						ServiceSentinelShared = &ServiceSentinel{
 | 
				
			||||||
		serviceReportChannel:                make(chan ReportData, 200),
 | 
							serviceReportChannel:                make(chan ReportData, 200),
 | 
				
			||||||
@ -46,6 +48,7 @@ func NewServiceSentinel(serviceSentinelDispatchBus chan<- model.Monitor) {
 | 
				
			|||||||
		monthlyStatus: make(map[uint64]*model.ServiceItemResponse),
 | 
							monthlyStatus: make(map[uint64]*model.ServiceItemResponse),
 | 
				
			||||||
		dispatchBus:   serviceSentinelDispatchBus,
 | 
							dispatchBus:   serviceSentinelDispatchBus,
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						// 加载历史记录
 | 
				
			||||||
	ServiceSentinelShared.loadMonitorHistory()
 | 
						ServiceSentinelShared.loadMonitorHistory()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	year, month, day := time.Now().Date()
 | 
						year, month, day := time.Now().Date()
 | 
				
			||||||
@ -72,6 +75,7 @@ func NewServiceSentinel(serviceSentinelDispatchBus chan<- model.Monitor) {
 | 
				
			|||||||
		ServiceSentinelShared.latestDate[k] = time.Now().Format("02-Jan-06")
 | 
							ServiceSentinelShared.latestDate[k] = time.Now().Format("02-Jan-06")
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// 启动服务监控器
 | 
				
			||||||
	go ServiceSentinelShared.worker()
 | 
						go ServiceSentinelShared.worker()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// 每日将游标往后推一天
 | 
						// 每日将游标往后推一天
 | 
				
			||||||
@ -88,20 +92,20 @@ func NewServiceSentinel(serviceSentinelDispatchBus chan<- model.Monitor) {
 | 
				
			|||||||
type ServiceSentinel struct {
 | 
					type ServiceSentinel struct {
 | 
				
			||||||
	serviceResponseDataStoreLock        sync.RWMutex
 | 
						serviceResponseDataStoreLock        sync.RWMutex
 | 
				
			||||||
	monitorsLock                        sync.RWMutex
 | 
						monitorsLock                        sync.RWMutex
 | 
				
			||||||
	serviceReportChannel                chan ReportData
 | 
						serviceReportChannel                chan ReportData                   // 服务状态汇报管道
 | 
				
			||||||
	serviceStatusToday                  map[uint64]*_TodayStatsOfMonitor
 | 
						serviceStatusToday                  map[uint64]*_TodayStatsOfMonitor  // [monitor_id] -> _TodayStatsOfMonitor
 | 
				
			||||||
	serviceCurrentStatusIndex           map[uint64]int
 | 
						serviceCurrentStatusIndex           map[uint64]int                    // [monitor_id] -> 该监控ID对应的 serviceCurrentStatusData 的最新索引下标
 | 
				
			||||||
	serviceCurrentStatusData            map[uint64][]model.MonitorHistory
 | 
						serviceCurrentStatusData            map[uint64][]model.MonitorHistory // [monitor_id] -> []model.MonitorHistory
 | 
				
			||||||
	latestDate                          map[uint64]string
 | 
						latestDate                          map[uint64]string                 // 最近一次更新时间
 | 
				
			||||||
	lastStatus                          map[uint64]string
 | 
						lastStatus                          map[uint64]string
 | 
				
			||||||
	serviceResponseDataStoreCurrentUp   map[uint64]uint64
 | 
						serviceResponseDataStoreCurrentUp   map[uint64]uint64         // [monitor_id] -> 当前服务在线计数
 | 
				
			||||||
	serviceResponseDataStoreCurrentDown map[uint64]uint64
 | 
						serviceResponseDataStoreCurrentDown map[uint64]uint64         // [monitor_id] -> 当前服务离线计数
 | 
				
			||||||
	monitors                            map[uint64]*model.Monitor
 | 
						monitors                            map[uint64]*model.Monitor // [monitor_id] -> model.Monitor
 | 
				
			||||||
	sslCertCache                        map[uint64]string
 | 
						sslCertCache                        map[uint64]string
 | 
				
			||||||
	// 30天数据缓存
 | 
						// 30天数据缓存
 | 
				
			||||||
	monthlyStatusLock sync.Mutex
 | 
						monthlyStatusLock sync.Mutex
 | 
				
			||||||
	monthlyStatus     map[uint64]*model.ServiceItemResponse
 | 
						monthlyStatus     map[uint64]*model.ServiceItemResponse // [monitor_id] -> model.ServiceItemResponse
 | 
				
			||||||
	// 服务监控调度计划任务
 | 
						// 服务监控任务调度管道
 | 
				
			||||||
	dispatchBus chan<- model.Monitor
 | 
						dispatchBus chan<- model.Monitor
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -120,6 +124,7 @@ func (ss *ServiceSentinel) refreshMonthlyServiceStatus() {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Dispatch 将传入的 ReportData 传给 服务状态汇报管道
 | 
				
			||||||
func (ss *ServiceSentinel) Dispatch(r ReportData) {
 | 
					func (ss *ServiceSentinel) Dispatch(r ReportData) {
 | 
				
			||||||
	ss.serviceReportChannel <- r
 | 
						ss.serviceReportChannel <- r
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@ -137,6 +142,7 @@ func (ss *ServiceSentinel) Monitors() []*model.Monitor {
 | 
				
			|||||||
	return monitors
 | 
						return monitors
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// LoadStats 加载服务监控器的历史状态信息
 | 
				
			||||||
func (ss *ServiceSentinel) loadMonitorHistory() {
 | 
					func (ss *ServiceSentinel) loadMonitorHistory() {
 | 
				
			||||||
	var monitors []*model.Monitor
 | 
						var monitors []*model.Monitor
 | 
				
			||||||
	DB.Find(&monitors)
 | 
						DB.Find(&monitors)
 | 
				
			||||||
@ -146,6 +152,7 @@ func (ss *ServiceSentinel) loadMonitorHistory() {
 | 
				
			|||||||
	ss.monitors = make(map[uint64]*model.Monitor)
 | 
						ss.monitors = make(map[uint64]*model.Monitor)
 | 
				
			||||||
	for i := 0; i < len(monitors); i++ {
 | 
						for i := 0; i < len(monitors); i++ {
 | 
				
			||||||
		task := *monitors[i]
 | 
							task := *monitors[i]
 | 
				
			||||||
 | 
							// 通过cron定时将服务监控任务传递给任务调度管道
 | 
				
			||||||
		monitors[i].CronJobID, err = Cron.AddFunc(task.CronSpec(), func() {
 | 
							monitors[i].CronJobID, err = Cron.AddFunc(task.CronSpec(), func() {
 | 
				
			||||||
			ss.dispatchBus <- task
 | 
								ss.dispatchBus <- task
 | 
				
			||||||
		})
 | 
							})
 | 
				
			||||||
@ -171,7 +178,7 @@ func (ss *ServiceSentinel) loadMonitorHistory() {
 | 
				
			|||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// 加载历史记录
 | 
						// 加载服务监控历史记录
 | 
				
			||||||
	var mhs []model.MonitorHistory
 | 
						var mhs []model.MonitorHistory
 | 
				
			||||||
	DB.Where("created_at >= ? AND created_at < ?", today.AddDate(0, 0, -29), today).Find(&mhs)
 | 
						DB.Where("created_at >= ? AND created_at < ?", today.AddDate(0, 0, -29), today).Find(&mhs)
 | 
				
			||||||
	for i := 0; i < len(mhs); i++ {
 | 
						for i := 0; i < len(mhs); i++ {
 | 
				
			||||||
@ -266,6 +273,7 @@ func (ss *ServiceSentinel) LoadStats() map[uint64]*model.ServiceItemResponse {
 | 
				
			|||||||
	return ss.monthlyStatus
 | 
						return ss.monthlyStatus
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// getStateStr 根据服务在线率返回对应的状态字符串
 | 
				
			||||||
func getStateStr(percent uint64) string {
 | 
					func getStateStr(percent uint64) string {
 | 
				
			||||||
	if percent == 0 {
 | 
						if percent == 0 {
 | 
				
			||||||
		return "无数据"
 | 
							return "无数据"
 | 
				
			||||||
@ -279,7 +287,9 @@ func getStateStr(percent uint64) string {
 | 
				
			|||||||
	return "故障"
 | 
						return "故障"
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// worker 服务监控的实际工作流程
 | 
				
			||||||
func (ss *ServiceSentinel) worker() {
 | 
					func (ss *ServiceSentinel) worker() {
 | 
				
			||||||
 | 
						// 从服务状态汇报管道获取汇报的服务数据
 | 
				
			||||||
	for r := range ss.serviceReportChannel {
 | 
						for r := range ss.serviceReportChannel {
 | 
				
			||||||
		if ss.monitors[r.Data.GetId()] == nil || ss.monitors[r.Data.GetId()].ID == 0 {
 | 
							if ss.monitors[r.Data.GetId()] == nil || ss.monitors[r.Data.GetId()].ID == 0 {
 | 
				
			||||||
			log.Printf("NEZAH>> 错误的服务监控上报 %+v", r)
 | 
								log.Printf("NEZAH>> 错误的服务监控上报 %+v", r)
 | 
				
			||||||
@ -355,7 +365,7 @@ func (ss *ServiceSentinel) worker() {
 | 
				
			|||||||
		// SSL 证书报警
 | 
							// SSL 证书报警
 | 
				
			||||||
		var errMsg string
 | 
							var errMsg string
 | 
				
			||||||
		if strings.HasPrefix(mh.Data, "SSL证书错误:") {
 | 
							if strings.HasPrefix(mh.Data, "SSL证书错误:") {
 | 
				
			||||||
			// 排除 i/o timeont、connection timeout、EOF 错误
 | 
								// 排除 i/o timeout、connection timeout、EOF 错误
 | 
				
			||||||
			if !strings.HasSuffix(mh.Data, "timeout") &&
 | 
								if !strings.HasSuffix(mh.Data, "timeout") &&
 | 
				
			||||||
				!strings.HasSuffix(mh.Data, "EOF") &&
 | 
									!strings.HasSuffix(mh.Data, "EOF") &&
 | 
				
			||||||
				!strings.HasSuffix(mh.Data, "timed out") {
 | 
									!strings.HasSuffix(mh.Data, "timed out") {
 | 
				
			||||||
 | 
				
			|||||||
@ -23,14 +23,15 @@ var (
 | 
				
			|||||||
	DB    *gorm.DB
 | 
						DB    *gorm.DB
 | 
				
			||||||
	Loc   *time.Location
 | 
						Loc   *time.Location
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ServerList map[uint64]*model.Server
 | 
						ServerList map[uint64]*model.Server // [ServerID] -> model.Server
 | 
				
			||||||
	SecretToID map[string]uint64
 | 
						SecretToID map[string]uint64        // [ServerSecret] -> ServerID
 | 
				
			||||||
	ServerLock sync.RWMutex
 | 
						ServerLock sync.RWMutex
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	SortedServerList []*model.Server
 | 
						SortedServerList []*model.Server // 用于存储服务器列表的 slice,按照服务器 ID 排序
 | 
				
			||||||
	SortedServerLock sync.RWMutex
 | 
						SortedServerLock sync.RWMutex
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Init 初始化时区为上海时区
 | 
				
			||||||
func Init() {
 | 
					func Init() {
 | 
				
			||||||
	var err error
 | 
						var err error
 | 
				
			||||||
	Loc, err = time.LoadLocation("Asia/Shanghai")
 | 
						Loc, err = time.LoadLocation("Asia/Shanghai")
 | 
				
			||||||
@ -39,6 +40,7 @@ func Init() {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// ReSortServer 根据服务器ID 对服务器列表进行排序(ID越大越靠前)
 | 
				
			||||||
func ReSortServer() {
 | 
					func ReSortServer() {
 | 
				
			||||||
	ServerLock.RLock()
 | 
						ServerLock.RLock()
 | 
				
			||||||
	defer ServerLock.RUnlock()
 | 
						defer ServerLock.RUnlock()
 | 
				
			||||||
@ -50,6 +52,7 @@ func ReSortServer() {
 | 
				
			|||||||
		SortedServerList = append(SortedServerList, s)
 | 
							SortedServerList = append(SortedServerList, s)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// 按照服务器 ID 排序的具体实现(ID越大越靠前)
 | 
				
			||||||
	sort.SliceStable(SortedServerList, func(i, j int) bool {
 | 
						sort.SliceStable(SortedServerList, func(i, j int) bool {
 | 
				
			||||||
		if SortedServerList[i].DisplayIndex == SortedServerList[j].DisplayIndex {
 | 
							if SortedServerList[i].DisplayIndex == SortedServerList[j].DisplayIndex {
 | 
				
			||||||
			return SortedServerList[i].ID < SortedServerList[j].ID
 | 
								return SortedServerList[i].ID < SortedServerList[j].ID
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user