913 lines
36 KiB
C++
913 lines
36 KiB
C++
// +------------------------------------------------------------------+
|
|
// | ____ _ _ __ __ _ __ |
|
|
// | / ___| |__ ___ ___| | __ | \/ | |/ / |
|
|
// | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
|
|
// | | |___| | | | __/ (__| < | | | | . \ |
|
|
// | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
|
|
// | |
|
|
// | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
|
|
// +------------------------------------------------------------------+
|
|
//
|
|
// This file is part of Check_MK.
|
|
// The official homepage is at http://mathias-kettner.de/check_mk.
|
|
//
|
|
// check_mk is free software; you can redistribute it and/or modify it
|
|
// under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation in version 2. check_mk is distributed
|
|
// in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
|
|
// out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
|
// PARTICULAR PURPOSE. See the GNU General Public License for more de-
|
|
// tails. You should have received a copy of the GNU General Public
|
|
// License along with GNU Make; see the file COPYING. If not, write
|
|
// to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
|
// Boston, MA 02110-1301 USA.
|
|
|
|
#include "TableStateHistory.h"
|
|
#include <cstdint>
|
|
#include <ctime>
|
|
#include <memory>
|
|
#include <mutex>
|
|
#include <optional>
|
|
#include <ostream>
|
|
#include <set>
|
|
#include <stdexcept>
|
|
#include <utility>
|
|
#include <vector>
|
|
#include "Column.h"
|
|
#include "Filter.h"
|
|
#include "HostServiceState.h"
|
|
#include "LogEntry.h"
|
|
#include "Logger.h"
|
|
#include "OffsetDoubleColumn.h"
|
|
#include "OffsetIntColumn.h"
|
|
#include "OffsetSStringColumn.h"
|
|
#include "OffsetStringColumn.h"
|
|
#include "OffsetTimeColumn.h"
|
|
#include "OringFilter.h" // IWYU pragma: keep
|
|
#include "Query.h"
|
|
#include "Row.h"
|
|
#include "StringUtils.h"
|
|
#include "TableHosts.h"
|
|
#include "TableServices.h"
|
|
|
|
#ifdef CMC
|
|
// This seems to be an IWYU bug: If we remove the includes as suggested, we
|
|
// would do a member access on an incomplete type.
|
|
#include "Host.h" // IWYU pragma: keep
|
|
#include "Service.h" // IWYU pragma: keep
|
|
#include "Timeperiod.h"
|
|
#include "cmc.h"
|
|
#define STATE_OK 0
|
|
#define STATE_WARNING 1
|
|
#define STATE_CRITICAL 2
|
|
#define STATE_UNKNOWN 3
|
|
#else
|
|
#include "auth.h"
|
|
#include "nagios.h"
|
|
#endif
|
|
|
|
namespace {
|
|
constexpr unsigned classmask_statehist =
|
|
(1U << static_cast<int>(LogEntry::Class::alert)) | //
|
|
(1U << static_cast<int>(LogEntry::Class::program)) | //
|
|
(1U << static_cast<int>(LogEntry::Class::state)) | //
|
|
(1U << static_cast<int>(LogEntry::Class::text));
|
|
} // namespace
|
|
|
|
#ifndef CMC
|
|
namespace {
|
|
std::string getCustomVariable(customvariablesmember *cvm,
|
|
const std::string &name) {
|
|
for (; cvm != nullptr; cvm = cvm->next) {
|
|
if (cvm->variable_name == name) {
|
|
return cvm->variable_value == nullptr ? "" : cvm->variable_value;
|
|
}
|
|
}
|
|
return "";
|
|
}
|
|
} // namespace
|
|
#endif
|
|
|
|
TableStateHistory::TableStateHistory(MonitoringCore *mc, LogCache *log_cache)
|
|
: Table(mc), _log_cache(log_cache) {
|
|
addColumn(std::make_unique<OffsetTimeColumn>(
|
|
"time", "Time of the log event (seconds since 1/1/1970)", -1, -1, -1,
|
|
DANGEROUS_OFFSETOF(HostServiceState, _time)));
|
|
addColumn(std::make_unique<OffsetIntColumn>(
|
|
"lineno", "The number of the line in the log file", -1, -1, -1,
|
|
DANGEROUS_OFFSETOF(HostServiceState, _lineno)));
|
|
addColumn(std::make_unique<OffsetTimeColumn>(
|
|
"from", "Start time of state (seconds since 1/1/1970)", -1, -1, -1,
|
|
DANGEROUS_OFFSETOF(HostServiceState, _from)));
|
|
addColumn(std::make_unique<OffsetTimeColumn>(
|
|
"until", "End time of state (seconds since 1/1/1970)", -1, -1, -1,
|
|
DANGEROUS_OFFSETOF(HostServiceState, _until)));
|
|
addColumn(std::make_unique<OffsetIntColumn>(
|
|
"duration", "Duration of state (until - from)", -1, -1, -1,
|
|
DANGEROUS_OFFSETOF(HostServiceState, _duration)));
|
|
addColumn(std::make_unique<OffsetDoubleColumn>(
|
|
"duration_part", "Duration part in regard to the query timeframe", -1,
|
|
-1, -1, DANGEROUS_OFFSETOF(HostServiceState, _duration_part)));
|
|
addColumn(std::make_unique<OffsetIntColumn>(
|
|
"state",
|
|
"The state of the host or service in question - OK(0) / WARNING(1) / CRITICAL(2) / UNKNOWN(3) / UNMONITORED(-1)",
|
|
-1, -1, -1, DANGEROUS_OFFSETOF(HostServiceState, _state)));
|
|
addColumn(std::make_unique<OffsetIntColumn>(
|
|
"host_down", "Shows if the host of this service is down", -1, -1, -1,
|
|
DANGEROUS_OFFSETOF(HostServiceState, _host_down)));
|
|
addColumn(std::make_unique<OffsetIntColumn>(
|
|
"in_downtime", "Shows if the host or service is in downtime", -1, -1,
|
|
-1, DANGEROUS_OFFSETOF(HostServiceState, _in_downtime)));
|
|
addColumn(std::make_unique<OffsetIntColumn>(
|
|
"in_host_downtime", "Shows if the host of this service is in downtime",
|
|
-1, -1, -1, DANGEROUS_OFFSETOF(HostServiceState, _in_host_downtime)));
|
|
addColumn(std::make_unique<OffsetIntColumn>(
|
|
"is_flapping", "Shows if the host or service is flapping", -1, -1, -1,
|
|
DANGEROUS_OFFSETOF(HostServiceState, _is_flapping)));
|
|
addColumn(std::make_unique<OffsetIntColumn>(
|
|
"in_notification_period",
|
|
"Shows if the host or service is within its notification period", -1,
|
|
-1, -1, DANGEROUS_OFFSETOF(HostServiceState, _in_notification_period)));
|
|
addColumn(std::make_unique<OffsetStringColumn>(
|
|
"notification_period",
|
|
"The notification period of the host or service in question", -1, -1,
|
|
-1, DANGEROUS_OFFSETOF(HostServiceState, _notification_period)));
|
|
addColumn(std::make_unique<OffsetIntColumn>(
|
|
"in_service_period",
|
|
"Shows if the host or service is within its service period", -1, -1, -1,
|
|
DANGEROUS_OFFSETOF(HostServiceState, _in_service_period)));
|
|
addColumn(std::make_unique<OffsetSStringColumn>(
|
|
"service_period",
|
|
"The service period of the host or service in question", -1, -1, -1,
|
|
DANGEROUS_OFFSETOF(HostServiceState, _service_period)));
|
|
addColumn(std::make_unique<OffsetSStringColumn>(
|
|
"debug_info", "Debug information", -1, -1, -1,
|
|
DANGEROUS_OFFSETOF(HostServiceState, _debug_info)));
|
|
addColumn(std::make_unique<OffsetSStringColumn>(
|
|
"host_name", "Host name", -1, -1, -1,
|
|
DANGEROUS_OFFSETOF(HostServiceState, _host_name)));
|
|
addColumn(std::make_unique<OffsetSStringColumn>(
|
|
"service_description", "Description of the service", -1, -1, -1,
|
|
DANGEROUS_OFFSETOF(HostServiceState, _service_description)));
|
|
addColumn(std::make_unique<OffsetSStringColumn>(
|
|
"log_output", "Logfile output relevant for this state", -1, -1, -1,
|
|
DANGEROUS_OFFSETOF(HostServiceState, _log_output)));
|
|
addColumn(std::make_unique<OffsetIntColumn>(
|
|
"duration_ok", "OK duration of state ( until - from )", -1, -1, -1,
|
|
DANGEROUS_OFFSETOF(HostServiceState, _duration_state_OK)));
|
|
addColumn(std::make_unique<OffsetDoubleColumn>(
|
|
"duration_part_ok", "OK duration part in regard to the query timeframe",
|
|
-1, -1, -1, DANGEROUS_OFFSETOF(HostServiceState, _duration_part_OK)));
|
|
|
|
addColumn(std::make_unique<OffsetIntColumn>(
|
|
"duration_warning", "WARNING duration of state (until - from)", -1, -1,
|
|
-1, DANGEROUS_OFFSETOF(HostServiceState, _duration_state_WARNING)));
|
|
addColumn(std::make_unique<OffsetDoubleColumn>(
|
|
"duration_part_warning",
|
|
"WARNING duration part in regard to the query timeframe", -1, -1, -1,
|
|
DANGEROUS_OFFSETOF(HostServiceState, _duration_part_WARNING)));
|
|
|
|
addColumn(std::make_unique<OffsetIntColumn>(
|
|
"duration_critical", "CRITICAL duration of state (until - from)", -1,
|
|
-1, -1,
|
|
DANGEROUS_OFFSETOF(HostServiceState, _duration_state_CRITICAL)));
|
|
addColumn(std::make_unique<OffsetDoubleColumn>(
|
|
"duration_part_critical",
|
|
"CRITICAL duration part in regard to the query timeframe", -1, -1, -1,
|
|
DANGEROUS_OFFSETOF(HostServiceState, _duration_part_CRITICAL)));
|
|
|
|
addColumn(std::make_unique<OffsetIntColumn>(
|
|
"duration_unknown", "UNKNOWN duration of state (until - from)", -1, -1,
|
|
-1, DANGEROUS_OFFSETOF(HostServiceState, _duration_state_UNKNOWN)));
|
|
addColumn(std::make_unique<OffsetDoubleColumn>(
|
|
"duration_part_unknown",
|
|
"UNKNOWN duration part in regard to the query timeframe", -1, -1, -1,
|
|
DANGEROUS_OFFSETOF(HostServiceState, _duration_part_UNKNOWN)));
|
|
|
|
addColumn(std::make_unique<OffsetIntColumn>(
|
|
"duration_unmonitored", "UNMONITORED duration of state (until - from)",
|
|
-1, -1, -1,
|
|
DANGEROUS_OFFSETOF(HostServiceState, _duration_state_UNMONITORED)));
|
|
addColumn(std::make_unique<OffsetDoubleColumn>(
|
|
"duration_part_unmonitored",
|
|
"UNMONITORED duration part in regard to the query timeframe", -1, -1,
|
|
-1, DANGEROUS_OFFSETOF(HostServiceState, _duration_part_UNMONITORED)));
|
|
|
|
// join host and service tables
|
|
TableHosts::addColumns(this, "current_host_",
|
|
DANGEROUS_OFFSETOF(HostServiceState, _host), -1);
|
|
TableServices::addColumns(this, "current_service_",
|
|
DANGEROUS_OFFSETOF(HostServiceState, _service),
|
|
false /* no hosts table */);
|
|
}
|
|
|
|
std::string TableStateHistory::name() const { return "statehist"; }
|
|
|
|
std::string TableStateHistory::namePrefix() const { return "statehist_"; }
|
|
|
|
void TableStateHistory::getPreviousLogentry() {
|
|
while (_it_entries == _entries->begin()) {
|
|
// open previous logfile
|
|
if (_it_logs == _log_cache->begin()) {
|
|
return;
|
|
}
|
|
--_it_logs;
|
|
_entries = _it_logs->second->getEntriesFor(classmask_statehist);
|
|
_it_entries = _entries->end();
|
|
}
|
|
--_it_entries;
|
|
}
|
|
|
|
LogEntry *TableStateHistory::getNextLogentry() {
|
|
if (_it_entries != _entries->end()) {
|
|
++_it_entries;
|
|
}
|
|
|
|
while (_it_entries == _entries->end()) {
|
|
auto it_logs_cpy = _it_logs;
|
|
if (++it_logs_cpy == _log_cache->end()) {
|
|
return nullptr;
|
|
}
|
|
++_it_logs;
|
|
_entries = _it_logs->second->getEntriesFor(classmask_statehist);
|
|
_it_entries = _entries->begin();
|
|
}
|
|
return _it_entries->second.get();
|
|
}
|
|
|
|
namespace {
|
|
class TimeperiodTransition {
|
|
public:
|
|
explicit TimeperiodTransition(const std::string &str) {
|
|
auto fields = mk::split(str, ';');
|
|
if (fields.size() != 3) {
|
|
throw std::invalid_argument("expected 3 arguments");
|
|
}
|
|
_name = fields[0];
|
|
_from = std::stoi(fields[1]);
|
|
_to = std::stoi(fields[2]);
|
|
}
|
|
|
|
[[nodiscard]] std::string name() const { return _name; }
|
|
[[nodiscard]] int from() const { return _from; }
|
|
[[nodiscard]] int to() const { return _to; }
|
|
|
|
private:
|
|
std::string _name;
|
|
int _from;
|
|
int _to;
|
|
};
|
|
} // namespace
|
|
|
|
// Create a partial filter, that contains only such filters that check
|
|
// attributes of current hosts and services
|
|
|
|
// static
|
|
std::unique_ptr<Filter> TableStateHistory::createPartialFilter(
|
|
const Query &query) {
|
|
return query.partialFilter(
|
|
"current host/service columns", [](const Column &column) {
|
|
return mk::starts_with(column.name(), "current_") ||
|
|
mk::starts_with(column.name(), "host_") ||
|
|
mk::starts_with(column.name(), "service_");
|
|
});
|
|
}
|
|
|
|
void TableStateHistory::answerQuery(Query *query) {
|
|
auto object_filter = createPartialFilter(*query);
|
|
std::lock_guard<std::mutex> lg(_log_cache->_lock);
|
|
_log_cache->update();
|
|
if (_log_cache->begin() == _log_cache->end()) {
|
|
return;
|
|
}
|
|
|
|
// This flag might be set to true by the return value of processDataset(...)
|
|
_abort_query = false;
|
|
|
|
// Keep track of the historic state of services/hosts here
|
|
std::map<HostServiceKey, HostServiceState *> state_info;
|
|
|
|
// Store hosts/services that we have filtered out here
|
|
std::set<HostServiceKey> object_blacklist;
|
|
|
|
// Optimize time interval for the query. In log querys there should always
|
|
// be a time range in form of one or two filter expressions over time. We
|
|
// use that to limit the number of logfiles we need to scan and to find the
|
|
// optimal entry point into the logfile
|
|
if (auto glb = query->greatestLowerBoundFor("time")) {
|
|
_since = *glb;
|
|
} else {
|
|
query->invalidRequest(
|
|
"Start of timeframe required. e.g. Filter: time > 1234567890");
|
|
return;
|
|
}
|
|
_until = query->leastUpperBoundFor("time").value_or(time(nullptr)) + 1;
|
|
|
|
_query_timeframe = _until - _since - 1;
|
|
if (_query_timeframe == 0) {
|
|
query->invalidRequest("Query timeframe is 0 seconds");
|
|
return;
|
|
}
|
|
|
|
// Switch to last logfile (we have at least one)
|
|
_it_logs = _log_cache->end();
|
|
--_it_logs;
|
|
auto newest_log = _it_logs;
|
|
|
|
// Now find the log where 'since' starts.
|
|
while (_it_logs != _log_cache->begin() && _it_logs->first >= _since) {
|
|
--_it_logs; // go back in history
|
|
}
|
|
|
|
// Check if 'until' is within these logfiles
|
|
if (_it_logs->first > _until) {
|
|
// All logfiles are too new, invalid timeframe
|
|
// -> No data available. Return empty result.
|
|
return;
|
|
}
|
|
|
|
// Determine initial logentry
|
|
_entries = _it_logs->second->getEntriesFor(classmask_statehist);
|
|
if (!_entries->empty() && _it_logs != newest_log) {
|
|
_it_entries = _entries->end();
|
|
// Check last entry. If it's younger than _since -> use this logfile too
|
|
if (--_it_entries != _entries->begin()) {
|
|
if (_it_entries->second->_time >= _since) {
|
|
_it_entries = _entries->begin();
|
|
}
|
|
}
|
|
} else {
|
|
_it_entries = _entries->begin();
|
|
}
|
|
|
|
// From now on use getPreviousLogentry() / getNextLogentry()
|
|
bool only_update = true;
|
|
bool in_nagios_initial_states = false;
|
|
|
|
while (LogEntry *entry = getNextLogentry()) {
|
|
if (_abort_query) {
|
|
break;
|
|
}
|
|
|
|
if (entry->_time >= _until) {
|
|
getPreviousLogentry();
|
|
break;
|
|
}
|
|
if (only_update && entry->_time >= _since) {
|
|
// Reached start of query timeframe. From now on let's produce real
|
|
// output. Update _from time of every state entry
|
|
for (auto &it_hst : state_info) {
|
|
it_hst.second->_from = _since;
|
|
it_hst.second->_until = _since;
|
|
}
|
|
only_update = false;
|
|
}
|
|
|
|
if (in_nagios_initial_states &&
|
|
!(entry->_type == LogEntryType::state_service_initial ||
|
|
entry->_type == LogEntryType::state_host_initial)) {
|
|
// Set still unknown hosts / services to unmonitored
|
|
for (auto &it_hst : state_info) {
|
|
HostServiceState *hst = it_hst.second;
|
|
if (hst->_may_no_longer_exist) {
|
|
hst->_has_vanished = true;
|
|
}
|
|
}
|
|
in_nagios_initial_states = false;
|
|
}
|
|
|
|
HostServiceKey key = nullptr;
|
|
bool is_service = false;
|
|
switch (entry->_type) {
|
|
case LogEntryType::none:
|
|
case LogEntryType::core_starting:
|
|
case LogEntryType::core_stopping:
|
|
case LogEntryType::log_version:
|
|
case LogEntryType::acknowledge_alert_host:
|
|
case LogEntryType::acknowledge_alert_service:
|
|
break;
|
|
case LogEntryType::alert_service:
|
|
case LogEntryType::state_service:
|
|
case LogEntryType::state_service_initial:
|
|
case LogEntryType::downtime_alert_service:
|
|
case LogEntryType::flapping_service:
|
|
key = entry->_service;
|
|
is_service = true;
|
|
// fall-through
|
|
case LogEntryType::alert_host:
|
|
case LogEntryType::state_host:
|
|
case LogEntryType::state_host_initial:
|
|
case LogEntryType::downtime_alert_host:
|
|
case LogEntryType::flapping_host: {
|
|
if (!is_service) {
|
|
key = entry->_host;
|
|
}
|
|
|
|
if (key == nullptr) {
|
|
continue;
|
|
}
|
|
|
|
if (object_blacklist.find(key) != object_blacklist.end()) {
|
|
// Host/Service is not needed for this query and has already
|
|
// been filtered out.
|
|
continue;
|
|
}
|
|
|
|
// Find state object for this host/service
|
|
HostServiceState *state;
|
|
auto it_hst = state_info.find(key);
|
|
if (it_hst == state_info.end()) {
|
|
// Create state object that we also need for filtering right
|
|
// now
|
|
state = new HostServiceState();
|
|
state->_is_host = entry->_svc_desc.empty();
|
|
state->_host = entry->_host;
|
|
state->_service = entry->_service;
|
|
#ifdef CMC
|
|
state->_host_name = entry->_host->name();
|
|
state->_service_description = entry->_service == nullptr
|
|
? ""
|
|
: entry->_service->name();
|
|
#else
|
|
state->_host_name = entry->_host->name;
|
|
state->_service_description =
|
|
entry->_service == nullptr
|
|
? ""
|
|
: entry->_service->description;
|
|
#endif
|
|
|
|
// No state found. Now check if this host/services is
|
|
// filtered out. Note: we currently do not filter out hosts
|
|
// since they might be needed for service states
|
|
if (!entry->_svc_desc.empty()) {
|
|
if (!object_filter->accepts(Row(state),
|
|
query->authUser(),
|
|
query->timezoneOffset())) {
|
|
object_blacklist.insert(key);
|
|
delete state;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Host/Service relations
|
|
if (state->_is_host) {
|
|
for (auto &it_inh : state_info) {
|
|
if (it_inh.second->_host == state->_host) {
|
|
state->_services.push_back(it_inh.second);
|
|
}
|
|
}
|
|
} else {
|
|
auto it_inh = state_info.find(state->_host);
|
|
if (it_inh != state_info.end()) {
|
|
it_inh->second->_services.push_back(state);
|
|
}
|
|
}
|
|
|
|
// Store this state object for tracking state transitions
|
|
state_info.emplace(key, state);
|
|
state->_from = _since;
|
|
|
|
// Get notification period of host/service
|
|
// If this host/service is no longer availabe in nagios ->
|
|
// set to ""
|
|
if (state->_service != nullptr) {
|
|
#ifdef CMC
|
|
state->_notification_period =
|
|
state->_service->notificationPeriod()->name();
|
|
#else
|
|
auto np = state->_service->notification_period;
|
|
state->_notification_period = np == nullptr ? "" : np;
|
|
#endif
|
|
} else if (state->_host != nullptr) {
|
|
#ifdef CMC
|
|
state->_notification_period =
|
|
state->_host->notificationPeriod()->name();
|
|
#else
|
|
auto np = state->_host->notification_period;
|
|
state->_notification_period = np == nullptr ? "" : np;
|
|
#endif
|
|
} else {
|
|
state->_notification_period = "";
|
|
}
|
|
|
|
// Same for service period. For Nagios this is a bit
|
|
// different, since this is no native field but just a
|
|
// custom variable
|
|
if (state->_service != nullptr) {
|
|
#ifdef CMC
|
|
state->_service_period =
|
|
state->_service->servicePeriod()->name();
|
|
#else
|
|
state->_service_period =
|
|
getCustomVariable(state->_service->custom_variables,
|
|
"SERVICE_PERIOD");
|
|
#endif
|
|
} else if (state->_host != nullptr) {
|
|
#ifdef CMC
|
|
state->_service_period =
|
|
state->_host->servicePeriod()->name();
|
|
#else
|
|
state->_service_period = getCustomVariable(
|
|
state->_host->custom_variables, "SERVICE_PERIOD");
|
|
#endif
|
|
} else {
|
|
state->_service_period = "";
|
|
}
|
|
|
|
// Determine initial in_notification_period status
|
|
auto tmp_period =
|
|
_notification_periods.find(state->_notification_period);
|
|
if (tmp_period != _notification_periods.end()) {
|
|
state->_in_notification_period = tmp_period->second;
|
|
} else {
|
|
state->_in_notification_period = 1;
|
|
}
|
|
|
|
// Same for service period
|
|
tmp_period =
|
|
_notification_periods.find(state->_service_period);
|
|
if (tmp_period != _notification_periods.end()) {
|
|
state->_in_service_period = tmp_period->second;
|
|
} else {
|
|
state->_in_service_period = 1;
|
|
}
|
|
|
|
// If this key is a service try to find its host and apply
|
|
// its _in_host_downtime and _host_down parameters
|
|
if (!state->_is_host) {
|
|
auto my_host = state_info.find(state->_host);
|
|
if (my_host != state_info.end()) {
|
|
state->_in_host_downtime =
|
|
my_host->second->_in_host_downtime;
|
|
state->_host_down = my_host->second->_host_down;
|
|
}
|
|
}
|
|
|
|
// Log UNMONITORED state if this host or service just
|
|
// appeared within the query timeframe
|
|
// It gets a grace period of ten minutes (nagios startup)
|
|
if (!only_update && entry->_time - _since > 60 * 10) {
|
|
state->_debug_info = "UNMONITORED ";
|
|
state->_state = -1;
|
|
}
|
|
} else {
|
|
state = it_hst->second;
|
|
}
|
|
|
|
int state_changed =
|
|
updateHostServiceState(query, entry, state, only_update);
|
|
// Host downtime or state changes also affect its services
|
|
if (entry->_type == LogEntryType::alert_host ||
|
|
entry->_type == LogEntryType::state_host ||
|
|
entry->_type == LogEntryType::downtime_alert_host) {
|
|
if (state_changed != 0) {
|
|
for (auto &_service : state->_services) {
|
|
updateHostServiceState(query, entry, _service,
|
|
only_update);
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case LogEntryType::timeperiod_transition: {
|
|
try {
|
|
TimeperiodTransition tpt(entry->_options);
|
|
_notification_periods[tpt.name()] = tpt.to();
|
|
for (auto &it_hst : state_info) {
|
|
updateHostServiceState(query, entry, it_hst.second,
|
|
only_update);
|
|
}
|
|
} catch (const std::logic_error &e) {
|
|
Warning(logger())
|
|
<< "Error: Invalid syntax of TIMEPERIOD TRANSITION: "
|
|
<< entry->_complete;
|
|
}
|
|
break;
|
|
}
|
|
case LogEntryType::log_initial_states: {
|
|
// This feature is only available if log_initial_states is set
|
|
// to 1. If log_initial_states is set, each nagios startup logs
|
|
// the initial states of all known hosts and services. Therefore
|
|
// we can detect if a host is no longer available after a nagios
|
|
// startup. If it still exists an INITIAL HOST/SERVICE state
|
|
// entry will follow up shortly.
|
|
for (auto &it_hst : state_info) {
|
|
if (!it_hst.second->_has_vanished) {
|
|
it_hst.second->_last_known_time = entry->_time;
|
|
it_hst.second->_may_no_longer_exist = true;
|
|
}
|
|
}
|
|
in_nagios_initial_states = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Create final reports
|
|
auto it_hst = state_info.begin();
|
|
if (!_abort_query) {
|
|
while (it_hst != state_info.end()) {
|
|
HostServiceState *hst = it_hst->second;
|
|
|
|
// No trace since the last two nagios startup -> host/service has
|
|
// vanished
|
|
if (hst->_may_no_longer_exist) {
|
|
// Log last known state up to nagios restart
|
|
hst->_time = hst->_last_known_time;
|
|
hst->_until = hst->_last_known_time;
|
|
process(query, hst);
|
|
|
|
// Set absent state
|
|
hst->_state = -1;
|
|
hst->_debug_info = "UNMONITORED";
|
|
hst->_log_output = "";
|
|
}
|
|
|
|
hst->_time = _until - 1;
|
|
hst->_until = hst->_time;
|
|
|
|
process(query, hst);
|
|
++it_hst;
|
|
}
|
|
}
|
|
|
|
// Cleanup !
|
|
it_hst = state_info.begin();
|
|
while (it_hst != state_info.end()) {
|
|
delete it_hst->second;
|
|
++it_hst;
|
|
}
|
|
state_info.clear();
|
|
object_blacklist.clear();
|
|
}
|
|
|
|
int TableStateHistory::updateHostServiceState(Query *query,
|
|
const LogEntry *entry,
|
|
HostServiceState *hs_state,
|
|
const bool only_update) {
|
|
int state_changed = 1;
|
|
|
|
// Revive host / service if it was unmonitored
|
|
if (entry->_type != LogEntryType::timeperiod_transition &&
|
|
hs_state->_has_vanished) {
|
|
hs_state->_time = hs_state->_last_known_time;
|
|
hs_state->_until = hs_state->_last_known_time;
|
|
if (!only_update) {
|
|
process(query, hs_state);
|
|
}
|
|
|
|
hs_state->_may_no_longer_exist = false;
|
|
hs_state->_has_vanished = false;
|
|
// Set absent state
|
|
hs_state->_state = -1;
|
|
hs_state->_debug_info = "UNMONITORED";
|
|
hs_state->_in_downtime = 0;
|
|
hs_state->_in_notification_period = 0;
|
|
hs_state->_in_service_period = 0;
|
|
hs_state->_is_flapping = 0;
|
|
hs_state->_log_output = "";
|
|
|
|
// Apply latest notification period information and set the host_state
|
|
// to unmonitored
|
|
auto it_status =
|
|
_notification_periods.find(hs_state->_notification_period);
|
|
if (it_status != _notification_periods.end()) {
|
|
hs_state->_in_notification_period = it_status->second;
|
|
} else {
|
|
// No notification period information available -> within
|
|
// notification period
|
|
hs_state->_in_notification_period = 1;
|
|
}
|
|
|
|
// Same for service period
|
|
it_status = _notification_periods.find(hs_state->_service_period);
|
|
if (it_status != _notification_periods.end()) {
|
|
hs_state->_in_service_period = it_status->second;
|
|
} else {
|
|
// No service period information available -> within service period
|
|
hs_state->_in_service_period = 1;
|
|
}
|
|
}
|
|
|
|
// Update basic information
|
|
hs_state->_time = entry->_time;
|
|
hs_state->_lineno = entry->_lineno;
|
|
hs_state->_until = entry->_time;
|
|
|
|
// A timeperiod entry never brings an absent host or service into
|
|
// existence..
|
|
if (entry->_type != LogEntryType::timeperiod_transition) {
|
|
hs_state->_may_no_longer_exist = false;
|
|
}
|
|
|
|
switch (entry->_type) {
|
|
case LogEntryType::none:
|
|
case LogEntryType::core_starting:
|
|
case LogEntryType::core_stopping:
|
|
case LogEntryType::log_version:
|
|
case LogEntryType::log_initial_states:
|
|
case LogEntryType::acknowledge_alert_host:
|
|
case LogEntryType::acknowledge_alert_service:
|
|
break;
|
|
case LogEntryType::state_host:
|
|
case LogEntryType::state_host_initial:
|
|
case LogEntryType::alert_host: {
|
|
if (hs_state->_is_host) {
|
|
if (hs_state->_state != entry->_state) {
|
|
if (!only_update) {
|
|
process(query, hs_state);
|
|
}
|
|
hs_state->_state = entry->_state;
|
|
hs_state->_host_down = static_cast<int>(entry->_state > 0);
|
|
hs_state->_debug_info = "HOST STATE";
|
|
} else {
|
|
state_changed = 0;
|
|
}
|
|
} else if (hs_state->_host_down !=
|
|
static_cast<int>(entry->_state > 0)) {
|
|
if (!only_update) {
|
|
process(query, hs_state);
|
|
}
|
|
hs_state->_host_down = static_cast<int>(entry->_state > 0);
|
|
hs_state->_debug_info = "SVC HOST STATE";
|
|
}
|
|
break;
|
|
}
|
|
case LogEntryType::state_service:
|
|
case LogEntryType::state_service_initial:
|
|
case LogEntryType::alert_service: {
|
|
if (hs_state->_state != entry->_state) {
|
|
if (!only_update) {
|
|
process(query, hs_state);
|
|
}
|
|
hs_state->_debug_info = "SVC ALERT";
|
|
hs_state->_state = entry->_state;
|
|
}
|
|
break;
|
|
}
|
|
case LogEntryType::downtime_alert_host: {
|
|
int downtime_active =
|
|
mk::starts_with(entry->_state_type, "STARTED") ? 1 : 0;
|
|
|
|
if (hs_state->_in_host_downtime != downtime_active) {
|
|
if (!only_update) {
|
|
process(query, hs_state);
|
|
}
|
|
hs_state->_debug_info =
|
|
hs_state->_is_host ? "HOST DOWNTIME" : "SVC HOST DOWNTIME";
|
|
hs_state->_in_host_downtime = downtime_active;
|
|
if (hs_state->_is_host) {
|
|
hs_state->_in_downtime = downtime_active;
|
|
}
|
|
} else {
|
|
state_changed = 0;
|
|
}
|
|
break;
|
|
}
|
|
case LogEntryType::downtime_alert_service: {
|
|
int downtime_active =
|
|
mk::starts_with(entry->_state_type, "STARTED") ? 1 : 0;
|
|
if (hs_state->_in_downtime != downtime_active) {
|
|
if (!only_update) {
|
|
process(query, hs_state);
|
|
}
|
|
hs_state->_debug_info = "DOWNTIME SERVICE";
|
|
hs_state->_in_downtime = downtime_active;
|
|
}
|
|
break;
|
|
}
|
|
case LogEntryType::flapping_host:
|
|
case LogEntryType::flapping_service: {
|
|
int flapping_active =
|
|
mk::starts_with(entry->_state_type, "STARTED") ? 1 : 0;
|
|
if (hs_state->_is_flapping != flapping_active) {
|
|
if (!only_update) {
|
|
process(query, hs_state);
|
|
}
|
|
hs_state->_debug_info = "FLAPPING ";
|
|
hs_state->_is_flapping = flapping_active;
|
|
} else {
|
|
state_changed = 0;
|
|
}
|
|
break;
|
|
}
|
|
case LogEntryType::timeperiod_transition: {
|
|
try {
|
|
TimeperiodTransition tpt(entry->_options);
|
|
// if no _host pointer is available the initial status of
|
|
// _in_notification_period (1) never changes
|
|
if (hs_state->_host != nullptr &&
|
|
tpt.name() == hs_state->_notification_period) {
|
|
if (tpt.to() != hs_state->_in_notification_period) {
|
|
if (!only_update) {
|
|
process(query, hs_state);
|
|
}
|
|
hs_state->_debug_info = "TIMEPERIOD ";
|
|
hs_state->_in_notification_period = tpt.to();
|
|
}
|
|
}
|
|
// same for service period
|
|
if (hs_state->_host != nullptr &&
|
|
tpt.name() == hs_state->_service_period) {
|
|
if (tpt.to() != hs_state->_in_service_period) {
|
|
if (!only_update) {
|
|
process(query, hs_state);
|
|
}
|
|
hs_state->_debug_info = "TIMEPERIOD ";
|
|
hs_state->_in_service_period = tpt.to();
|
|
}
|
|
}
|
|
} catch (const std::logic_error &e) {
|
|
Warning(logger())
|
|
<< "Error: Invalid syntax of TIMEPERIOD TRANSITION: "
|
|
<< entry->_complete;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (entry->_type != LogEntryType::timeperiod_transition) {
|
|
if ((entry->_type == LogEntryType::state_host_initial ||
|
|
entry->_type == LogEntryType::state_service_initial) &&
|
|
entry->_check_output == "(null)") {
|
|
hs_state->_log_output = "";
|
|
} else {
|
|
hs_state->_log_output = entry->_check_output;
|
|
}
|
|
}
|
|
|
|
return state_changed;
|
|
}
|
|
|
|
void TableStateHistory::process(Query *query, HostServiceState *hs_state) {
|
|
hs_state->_duration = hs_state->_until - hs_state->_from;
|
|
hs_state->_duration_part = static_cast<double>(hs_state->_duration) /
|
|
static_cast<double>(_query_timeframe);
|
|
|
|
hs_state->_duration_state_UNMONITORED = 0;
|
|
hs_state->_duration_part_UNMONITORED = 0;
|
|
|
|
hs_state->_duration_state_OK = 0;
|
|
hs_state->_duration_part_OK = 0;
|
|
|
|
hs_state->_duration_state_WARNING = 0;
|
|
hs_state->_duration_part_WARNING = 0;
|
|
|
|
hs_state->_duration_state_CRITICAL = 0;
|
|
hs_state->_duration_part_CRITICAL = 0;
|
|
|
|
hs_state->_duration_state_UNKNOWN = 0;
|
|
hs_state->_duration_part_UNKNOWN = 0;
|
|
|
|
switch (hs_state->_state) {
|
|
case -1:
|
|
hs_state->_duration_state_UNMONITORED = hs_state->_duration;
|
|
hs_state->_duration_part_UNMONITORED = hs_state->_duration_part;
|
|
break;
|
|
case STATE_OK:
|
|
hs_state->_duration_state_OK = hs_state->_duration;
|
|
hs_state->_duration_part_OK = hs_state->_duration_part;
|
|
break;
|
|
case STATE_WARNING:
|
|
hs_state->_duration_state_WARNING = hs_state->_duration;
|
|
hs_state->_duration_part_WARNING = hs_state->_duration_part;
|
|
break;
|
|
case STATE_CRITICAL:
|
|
hs_state->_duration_state_CRITICAL = hs_state->_duration;
|
|
hs_state->_duration_part_CRITICAL = hs_state->_duration_part;
|
|
break;
|
|
case STATE_UNKNOWN:
|
|
hs_state->_duration_state_UNKNOWN = hs_state->_duration;
|
|
hs_state->_duration_part_UNKNOWN = hs_state->_duration_part;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
// if (hs_state->_duration > 0)
|
|
_abort_query = !query->processDataset(Row(hs_state));
|
|
|
|
hs_state->_from = hs_state->_until;
|
|
}
|
|
|
|
bool TableStateHistory::isAuthorized(Row row, const contact *ctc) const {
|
|
auto entry = rowData<HostServiceState>(row);
|
|
service *svc = entry->_service;
|
|
host *hst = entry->_host;
|
|
return (hst != nullptr || svc != nullptr) &&
|
|
is_authorized_for(core(), ctc, hst, svc);
|
|
}
|
|
|
|
std::shared_ptr<Column> TableStateHistory::column(std::string colname) const {
|
|
try {
|
|
// First try to find column in the usual way
|
|
return Table::column(colname);
|
|
} catch (const std::runtime_error &e) {
|
|
// Now try with prefix "current_", since our joined tables have this
|
|
// prefix in order to make clear that we access current and not historic
|
|
// data and in order to prevent mixing up historic and current fields
|
|
// with the same name.
|
|
return Table::column("current_" + colname);
|
|
}
|
|
}
|