package main import ( "fmt" "strconv" "strings" "time" "redisdog/model" ) const ( MONITOR_DEFAULT_LOOP_INTERVAL = 30 MONITOR_DEFAULT_MAIL_INTERVAL = 3600 ) type RedisMonitor struct { redisCfg *model.RedisCfg monitorLogger *model.MonitorLog warnLogger *model.WarnLog processLogger *model.ProcessLog logs map[int64]*model.MonitorLogRow } func NewRedisMonitor() *RedisMonitor { return &RedisMonitor{ redisCfg: model.NewRedisCfg(Db), monitorLogger: model.NewMonitorLog(Db), warnLogger: model.NewWarnLog(Db), processLogger: model.NewProcessLog(Db), logs: make(map[int64]*model.MonitorLogRow), } } func (m *RedisMonitor) Loop() { interval := MONITOR_DEFAULT_LOOP_INTERVAL for { if str, ok := SysCfg.Get("monitor_loop_interval"); ok { if num, err := strconv.Atoi(str); err == nil { interval = num } else { SYSLOG("DEBUG", fmt.Sprintf("表syscfg记录cfg_key=monitor_loop_interval的值不是数字:%s", err.Error())) } } time.Sleep(time.Second * time.Duration(interval)) m.loopStep() } } func (m *RedisMonitor) loopStep() { interval := MONITOR_DEFAULT_MAIL_INTERVAL rows, err := m.redisCfg.GetAll(0) if err != nil { SYSLOG("WARN", fmt.Sprintf("函数model.RedisCfg.GetAll(0)调用失败:%s", err.Error())) return } for _, row := range rows { now, mails := time.Now(), make([]*MailRequest, 0) ts, tstr := now.Unix(), now.String() //取配置 if str, ok := SysCfg.Get("monitor_mail_interval"); ok { if num, err := strconv.Atoi(str); err == nil { interval = num } else { SYSLOG("DEBUG", fmt.Sprintf("表syscfg记录cfg_key=monitor_mail_interval的值不是数字:%s", err.Error())) } } //初始化检测数据 if _, ok := m.logs[row.Id]; !ok { m.logs[row.Id] = &model.MonitorLogRow{RedisId: row.Id} } m.logs[row.Id].LogTime = ts //查询Redis的状态信息 info := queryRedisInfo(row) if info.Error != "" { //更新检测数据 m.logs[row.Id].QueryStatus = false m.logs[row.Id].FailedCount++ //判断状态异常检查是否连接出错次数达到上限 if m.logs[row.Id].FailedCount >= row.MaxStatusFailed { mails = append(mails, &MailRequest{ sendto: strings.Split(row.MailList, ";"), title: fmt.Sprintf("报警:Redis[%s]状态检测失败", row.Address), content: fmt.Sprintf(`

时间:%s

Redis#%d[%s]状态检测失败:%s,已连续失败:%d

`, tstr, row.Id, row.Address, info.Error, m.logs[row.Id].FailedCount), }) } //记录日志 SYSLOG("WARN", fmt.Sprintf("Redis#%d[%s]状态查询失败:%s,连接第%d次", row.Id, row.Address, info.Error, m.logs[row.Id].FailedCount)) } else { var usedMemory, maxMemory, systemMemory, connections, qps, evictedKeys, eviIncreased int //已用内存 if usedMemory, err = strconv.Atoi(info.Data["used_memory"]); err != nil { SYSLOG("WARN", fmt.Sprintf("Redis#%d[%s]状态字段used_memory值无效:%s", row.Id, row.Address, err.Error())) } //最大可分配内存 if maxMemory, err = strconv.Atoi(info.Data["maxmemory"]); err != nil { SYSLOG("WARN", fmt.Sprintf("Redis#%d[%s]状态字段maxmemory值无效:%s", row.Id, row.Address, err.Error())) } //系统内存 if systemMemory, err = strconv.Atoi(info.Data["total_system_memory"]); err != nil { SYSLOG("WARN", fmt.Sprintf("Redis#%d[%s]状态字段total_system_memory值无效:%s", row.Id, row.Address, err.Error())) } //连接数 if connections, err = strconv.Atoi(info.Data["connected_clients"]); err != nil { SYSLOG("WARN", fmt.Sprintf("Redis#%d[%s]状态字段connected_clients值无效:%s", row.Id, row.Address, err.Error())) } //QPS if qps, err = strconv.Atoi(info.Data["instantaneous_ops_per_sec"]); err != nil { SYSLOG("WARN", fmt.Sprintf("Redis#%d[%s]状态字段instantaneous_ops_per_sec值无效:%s", row.Id, row.Address, err.Error())) } //淘汰数 if evictedKeys, err = strconv.Atoi(info.Data["evicted_keys"]); err != nil { SYSLOG("WARN", fmt.Sprintf("Redis#%d[%s]状态字段evicted_keys值无效:%s", row.Id, row.Address, err.Error())) } //新增淘汰数 eviIncreased = evictedKeys - int(m.logs[row.Id].EvictedKeys) //检查连接的客户端数量是否达到上限 if connections >= int(row.MaxConnection) { mails = append(mails, &MailRequest{ sendto: strings.Split(row.MailList, ";"), title: fmt.Sprintf("报警:Redis[%s]连接数达到上限", row.Address), content: fmt.Sprintf(`

时间:%s

Redis#%d[%s]连接数已达到:%d,上限为:%d

`, tstr, row.Id, row.Address, connections, row.MaxConnection), }) } //检查QPS是否达到上限 if qps >= int(row.MaxQPS) { mails = append(mails, &MailRequest{ sendto: strings.Split(row.MailList, ";"), title: fmt.Sprintf("报警:Redis[%s]QPS达到上限", row.Address), content: fmt.Sprintf(`

时间:%s

Redis#%d[%s]QPS已达到:%d,上限为:%d

`, tstr, row.Id, row.Address, qps, row.MaxQPS), }) } //检查新增淘汰数是否达到上限 if eviIncreased >= int(row.MaxEviIncreased) { mails = append(mails, &MailRequest{ sendto: strings.Split(row.MailList, ";"), title: fmt.Sprintf("报警:Redis[%s]新增淘汰记录数达到上限", row.Address), content: fmt.Sprintf(`

时间:%s

Redis#%d[%s]新增淘汰记录数已达到:%d,上限为:%d

`, tstr, row.Id, row.Address, eviIncreased, row.MaxEviIncreased), }) } //有内存使用限制,并且允许自动扩容 if maxMemory > 0 { flag := false //检查当前使用内存是否达到了预警值 freeMemory := maxMemory - usedMemory if freeMemory < int(row.MinMemoryFree) { mails = append(mails, &MailRequest{ sendto: strings.Split(row.MailList, ";"), title: fmt.Sprintf("报警:Redis[%s]可用内存不足", row.Address), content: fmt.Sprintf(`

时间:%s

Redis#%d[%s]可用内存不足,分配:%s,剩余:%s,要求剩余:%s

`, tstr, row.Id, row.Address, info.Data["maxmemory_human"], number2size(float64(freeMemory)), number2size(float64(row.MinMemoryFree))), }) flag = true } //判断是否需要自动扩容 if flag && row.StepMemoryIncrease > 0 && maxMemory < int(row.MaxMemoryUsage) { newMaxMemory := int64(maxMemory) + row.StepMemoryIncrease if newMaxMemory > row.MaxMemoryUsage { newMaxMemory = row.MaxMemoryUsage } ret, err := resetRedisConfig(row, newMaxMemory) //判断扩容结果 if err != nil { SYSLOG("ERROR", fmt.Sprintf("Redis#%d[%s]扩容失败:%s", row.Id, row.Address, err.Error())) } else { if ret { mails = append(mails, &MailRequest{ sendto: strings.Split(row.MailList, ";"), title: fmt.Sprintf("通知:Redis[%s]已自动扩容", row.Address), content: fmt.Sprintf(`

时间:%s

Redis#%d[%s]最大可用内存限制已由%s调整为%s

`, tstr, row.Id, row.Address, info.Data["maxmemory_human"], number2size(float64(newMaxMemory))), }) if _, err = m.processLogger.Add(&model.ProcessLogRow{RedisId: row.Id, MaxMemoryBefore: int64(maxMemory), MaxMemoryAfter: newMaxMemory}); err != nil { SYSLOG("ERROR", fmt.Sprintf("Redis#%d[%s]扩容日志写入DB失败:%s", row.Id, row.Address, err.Error())) } } else { SYSLOG("ERROR", fmt.Sprintf("Redis#%d[%s]扩容失败:无改变", row.Id, row.Address)) } } } } //更新检测数据 m.logs[row.Id].QueryStatus = true m.logs[row.Id].FailedCount = 0 m.logs[row.Id].UsedMemory = int64(usedMemory) m.logs[row.Id].MaxMemory = int64(maxMemory) m.logs[row.Id].SystemMemory = int64(systemMemory) m.logs[row.Id].Connection = int64(connections) m.logs[row.Id].QPS = int64(qps) m.logs[row.Id].EvictedKeys = int64(evictedKeys) m.logs[row.Id].EviIncreased = int64(eviIncreased) } //添加任务到邮件发送队列 if len(mails) > 0 && m.logs[row.Id].LastMailTime < ts-int64(interval) { m.logs[row.Id].LastMailTime = ts for _, mail := range mails { Sender.Push(mail) if _, err = m.warnLogger.Add(&model.WarnLogRow{RedisId: row.Id, WarnMsg: mail.content}); err != nil { SYSLOG("ERROR", fmt.Sprintf("Redis#%d[%s]报警日志写入DB失败:%s", row.Id, row.Address, err.Error())) } } } //记录检查日志 if _, err = m.monitorLogger.Add(m.logs[row.Id]); err != nil { SYSLOG("ERROR", fmt.Sprintf("Redis#%d[%s]监控日志写入DB失败:%s", row.Id, row.Address, err.Error())) } } }