Merge pull request #11887 from annando/maxload

Pause the worker execution when the load is too high
This commit is contained in:
Hypolite Petovan 2022-09-04 15:54:40 -04:00 committed by GitHub
commit 934a3a6721
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 105 additions and 53 deletions

View File

@ -435,6 +435,33 @@ class System
return max($load_arr[0], $load_arr[1]); return max($load_arr[0], $load_arr[1]);
} }
/**
* Fetch the load and number of processes
*
* @return array
*/
public static function getLoadAvg(): array
{
$content = file_get_contents('/proc/loadavg');
if (empty($content)) {
$content = shell_exec('cat /proc/loadavg');
}
if (empty($content)) {
return [];
}
if (!preg_match("#([.\d]+)\s([.\d]+)\s([.\d]+)\s(\d+)/(\d+)#", $content, $matches)) {
return [];
}
return [
'average1' => (float)$matches[1],
'average5' => (float)$matches[2],
'average15' => (float)$matches[3],
'runnable' => (float)$matches[4],
'scheduled' => (float)$matches[5]
];
}
/** /**
* Redirects to an external URL (fully qualified URL) * Redirects to an external URL (fully qualified URL)
* If you want to route relative to the current Friendica base, use App->internalRedirect() * If you want to route relative to the current Friendica base, use App->internalRedirect()

View File

@ -291,7 +291,7 @@ class Worker
return false; return false;
} }
$file = str_replace(getcwd() . "/", "", $file, $count); $file = str_replace(getcwd() . '/', '', $file, $count);
if ($count != 1) { if ($count != 1) {
return false; return false;
} }
@ -301,11 +301,11 @@ class Worker
} }
$valid = false; $valid = false;
if (strpos($file, "include/") === 0) { if (strpos($file, 'include/') === 0) {
$valid = true; $valid = true;
} }
if (strpos($file, "addon/") === 0) { if (strpos($file, 'addon/') === 0) {
$valid = true; $valid = true;
} }
@ -327,19 +327,19 @@ class Worker
// Quit when in maintenance // Quit when in maintenance
if (DI::config()->get('system', 'maintenance', false, true)) { if (DI::config()->get('system', 'maintenance', false, true)) {
Logger::notice("Maintenance mode - quit process", ['pid' => $mypid]); Logger::notice('Maintenance mode - quit process', ['pid' => $mypid]);
return false; return false;
} }
// Constantly check the number of parallel database processes // Constantly check the number of parallel database processes
if (DI::system()->isMaxProcessesReached()) { if (DI::system()->isMaxProcessesReached()) {
Logger::warning("Max processes reached for process", ['pid' => $mypid]); Logger::warning('Max processes reached for process', ['pid' => $mypid]);
return false; return false;
} }
// Constantly check the number of available database connections to let the frontend be accessible at any time // Constantly check the number of available database connections to let the frontend be accessible at any time
if (self::maxConnectionsReached()) { if (self::maxConnectionsReached()) {
Logger::warning("Max connection reached for process", ['pid' => $mypid]); Logger::warning('Max connection reached for process', ['pid' => $mypid]);
return false; return false;
} }
@ -363,7 +363,7 @@ class Worker
if (method_exists(sprintf('Friendica\Worker\%s', $include), 'execute')) { if (method_exists(sprintf('Friendica\Worker\%s', $include), 'execute')) {
// We constantly update the "executed" date every minute to avoid being killed too soon // We constantly update the "executed" date every minute to avoid being killed too soon
if (!isset(self::$last_update)) { if (!isset(self::$last_update)) {
self::$last_update = strtotime($queue["executed"]); self::$last_update = strtotime($queue['executed']);
} }
$age = (time() - self::$last_update) / 60; $age = (time() - self::$last_update) / 60;
@ -393,13 +393,13 @@ class Worker
// The script could be provided as full path or only with the function name // The script could be provided as full path or only with the function name
if ($include == basename($include)) { if ($include == basename($include)) {
$include = "include/".$include.".php"; $include = 'include/' . $include . '.php';
} }
if (!self::validateInclude($include)) { if (!self::validateInclude($include)) {
Logger::warning("Include file is not valid", ['file' => $argv[0]]); Logger::warning('Include file is not valid', ['file' => $argv[0]]);
$stamp = (float)microtime(true); $stamp = (float)microtime(true);
DBA::delete('workerqueue', ['id' => $queue["id"]]); DBA::delete('workerqueue', ['id' => $queue['id']]);
self::$db_duration = (microtime(true) - $stamp); self::$db_duration = (microtime(true) - $stamp);
self::$db_duration_write += (microtime(true) - $stamp); self::$db_duration_write += (microtime(true) - $stamp);
return true; return true;
@ -407,12 +407,12 @@ class Worker
require_once $include; require_once $include;
$funcname = str_replace(".php", "", basename($argv[0]))."_run"; $funcname = str_replace('.php', '', basename($argv[0])) .'_run';
if (function_exists($funcname)) { if (function_exists($funcname)) {
// We constantly update the "executed" date every minute to avoid being killed too soon // We constantly update the "executed" date every minute to avoid being killed too soon
if (!isset(self::$last_update)) { if (!isset(self::$last_update)) {
self::$last_update = strtotime($queue["executed"]); self::$last_update = strtotime($queue['executed']);
} }
$age = (time() - self::$last_update) / 60; $age = (time() - self::$last_update) / 60;
@ -428,15 +428,15 @@ class Worker
self::execFunction($queue, $funcname, $argv, false); self::execFunction($queue, $funcname, $argv, false);
$stamp = (float)microtime(true); $stamp = (float)microtime(true);
if (DBA::update('workerqueue', ['done' => true], ['id' => $queue["id"]])) { if (DBA::update('workerqueue', ['done' => true], ['id' => $queue['id']])) {
DI::config()->set('system', 'last_worker_execution', DateTimeFormat::utcNow()); DI::config()->set('system', 'last_worker_execution', DateTimeFormat::utcNow());
} }
self::$db_duration = (microtime(true) - $stamp); self::$db_duration = (microtime(true) - $stamp);
self::$db_duration_write += (microtime(true) - $stamp); self::$db_duration_write += (microtime(true) - $stamp);
} else { } else {
Logger::warning("Function does not exist", ['function' => $funcname]); Logger::warning('Function does not exist', ['function' => $funcname]);
$stamp = (float)microtime(true); $stamp = (float)microtime(true);
DBA::delete('workerqueue', ['id' => $queue["id"]]); DBA::delete('workerqueue', ['id' => $queue['id']]);
self::$db_duration = (microtime(true) - $stamp); self::$db_duration = (microtime(true) - $stamp);
self::$db_duration_write += (microtime(true) - $stamp); self::$db_duration_write += (microtime(true) - $stamp);
} }
@ -458,15 +458,32 @@ class Worker
{ {
$a = DI::app(); $a = DI::app();
$cooldown = DI::config()->get("system", "worker_cooldown", 0); $cooldown = DI::config()->get('system', 'worker_cooldown', 0);
if ($cooldown > 0) { if ($cooldown > 0) {
Logger::info('Pre execution cooldown.', ['priority' => $queue['priority'], 'id' => $queue["id"], 'cooldown' => $cooldown]); Logger::debug('Pre execution cooldown.', ['cooldown' => $cooldown, 'id' => $queue['id'], 'priority' => $queue['priority'], 'command' => $queue['command']]);
sleep($cooldown); sleep($cooldown);
} }
$load_cooldown = DI::config()->get('system', 'worker_load_cooldown');
$processes_cooldown = DI::config()->get('system', 'worker_processes_cooldown');
while ((($load_cooldown > 0) || ($processes_cooldown > 0)) && ($load = System::getLoadAvg())) {
if (($load_cooldown > 0) && ($load['average1'] > $load_cooldown)) {
Logger::debug('Load induced pre execution cooldown.', ['max' => $load_cooldown, 'load' => $load, 'id' => $queue['id'], 'priority' => $queue['priority'], 'command' => $queue['command']]);
sleep(1);
continue;
}
if (($processes_cooldown > 0) && ($load['scheduled'] > $processes_cooldown)) {
Logger::debug('Process induced pre execution cooldown.', ['max' => $processes_cooldown, 'load' => $load, 'id' => $queue['id'], 'priority' => $queue['priority'], 'command' => $queue['command']]);
sleep(1);
continue;
}
break;
}
Logger::enableWorker($funcname); Logger::enableWorker($funcname);
Logger::info("Process start.", ['priority' => $queue['priority'], 'id' => $queue["id"]]); Logger::info('Process start.', ['priority' => $queue['priority'], 'id' => $queue['id']]);
$stamp = (float)microtime(true); $stamp = (float)microtime(true);
@ -518,21 +535,21 @@ class Worker
self::$lock_duration = 0; self::$lock_duration = 0;
if ($duration > 3600) { if ($duration > 3600) {
Logger::info('Longer than 1 hour.', ['priority' => $queue['priority'], 'id' => $queue["id"], 'duration' => round($duration/60, 3)]); Logger::info('Longer than 1 hour.', ['priority' => $queue['priority'], 'id' => $queue['id'], 'duration' => round($duration/60, 3)]);
} elseif ($duration > 600) { } elseif ($duration > 600) {
Logger::info('Longer than 10 minutes.', ['priority' => $queue['priority'], 'id' => $queue["id"], 'duration' => round($duration/60, 3)]); Logger::info('Longer than 10 minutes.', ['priority' => $queue['priority'], 'id' => $queue['id'], 'duration' => round($duration/60, 3)]);
} elseif ($duration > 300) { } elseif ($duration > 300) {
Logger::info('Longer than 5 minutes.', ['priority' => $queue['priority'], 'id' => $queue["id"], 'duration' => round($duration/60, 3)]); Logger::info('Longer than 5 minutes.', ['priority' => $queue['priority'], 'id' => $queue['id'], 'duration' => round($duration/60, 3)]);
} elseif ($duration > 120) { } elseif ($duration > 120) {
Logger::info('Longer than 2 minutes.', ['priority' => $queue['priority'], 'id' => $queue["id"], 'duration' => round($duration/60, 3)]); Logger::info('Longer than 2 minutes.', ['priority' => $queue['priority'], 'id' => $queue['id'], 'duration' => round($duration/60, 3)]);
} }
Logger::info('Process done.', ['priority' => $queue['priority'], 'id' => $queue["id"], 'duration' => round($duration, 3)]); Logger::info('Process done.', ['priority' => $queue['priority'], 'id' => $queue['id'], 'duration' => round($duration, 3)]);
DI::profiler()->saveLog(DI::logger(), "ID " . $queue["id"] . ": " . $funcname); DI::profiler()->saveLog(DI::logger(), 'ID ' . $queue['id'] . ': ' . $funcname);
if ($cooldown > 0) { if ($cooldown > 0) {
Logger::info('Post execution cooldown.', ['priority' => $queue['priority'], 'id' => $queue["id"], 'cooldown' => $cooldown]); Logger::info('Post execution cooldown.', ['priority' => $queue['priority'], 'id' => $queue['id'], 'cooldown' => $cooldown]);
sleep($cooldown); sleep($cooldown);
} }
} }
@ -546,16 +563,16 @@ class Worker
private static function maxConnectionsReached(): bool private static function maxConnectionsReached(): bool
{ {
// Fetch the max value from the config. This is needed when the system cannot detect the correct value by itself. // Fetch the max value from the config. This is needed when the system cannot detect the correct value by itself.
$max = DI::config()->get("system", "max_connections"); $max = DI::config()->get('system', 'max_connections');
// Fetch the percentage level where the worker will get active // Fetch the percentage level where the worker will get active
$maxlevel = DI::config()->get("system", "max_connections_level", 75); $maxlevel = DI::config()->get('system', 'max_connections_level', 75);
if ($max == 0) { if ($max == 0) {
// the maximum number of possible user connections can be a system variable // the maximum number of possible user connections can be a system variable
$r = DBA::fetchFirst("SHOW VARIABLES WHERE `variable_name` = 'max_user_connections'"); $r = DBA::fetchFirst("SHOW VARIABLES WHERE `variable_name` = 'max_user_connections'");
if (DBA::isResult($r)) { if (DBA::isResult($r)) {
$max = $r["Value"]; $max = $r['Value'];
} }
// Or it can be granted. This overrides the system variable // Or it can be granted. This overrides the system variable
$stamp = (float)microtime(true); $stamp = (float)microtime(true);
@ -581,12 +598,12 @@ class Worker
$used = DBA::numRows($r); $used = DBA::numRows($r);
DBA::close($r); DBA::close($r);
Logger::info("Connection usage (user values)", ['usage' => $used, 'max' => $max]); Logger::info('Connection usage (user values)', ['usage' => $used, 'max' => $max]);
$level = ($used / $max) * 100; $level = ($used / $max) * 100;
if ($level >= $maxlevel) { if ($level >= $maxlevel) {
Logger::warning("Maximum level (".$maxlevel."%) of user connections reached: ".$used."/".$max); Logger::warning('Maximum level (' . $maxlevel . '%) of user connections reached: ' . $used .'/' . $max);
return true; return true;
} }
} }
@ -597,7 +614,7 @@ class Worker
if (!DBA::isResult($r)) { if (!DBA::isResult($r)) {
return false; return false;
} }
$max = intval($r["Value"]); $max = intval($r['Value']);
if ($max == 0) { if ($max == 0) {
return false; return false;
} }
@ -605,18 +622,18 @@ class Worker
if (!DBA::isResult($r)) { if (!DBA::isResult($r)) {
return false; return false;
} }
$used = intval($r["Value"]); $used = intval($r['Value']);
if ($used == 0) { if ($used == 0) {
return false; return false;
} }
Logger::info("Connection usage (system values)", ['used' => $used, 'max' => $max]); Logger::info('Connection usage (system values)', ['used' => $used, 'max' => $max]);
$level = $used / $max * 100; $level = $used / $max * 100;
if ($level < $maxlevel) { if ($level < $maxlevel) {
return false; return false;
} }
Logger::warning("Maximum level (".$level."%) of system connections reached: ".$used."/".$max); Logger::warning('Maximum level (' . $level . '%) of system connections reached: ' . $used . '/' . $max);
return true; return true;
} }
@ -629,7 +646,7 @@ class Worker
*/ */
private static function tooMuchWorkers(): bool private static function tooMuchWorkers(): bool
{ {
$queues = DI::config()->get("system", "worker_queues", 10); $queues = DI::config()->get('system', 'worker_queues', 10);
$maxqueues = $queues; $maxqueues = $queues;
@ -638,7 +655,7 @@ class Worker
// Decrease the number of workers at higher load // Decrease the number of workers at higher load
$load = System::currentLoad(); $load = System::currentLoad();
if ($load) { if ($load) {
$maxsysload = intval(DI::config()->get("system", "maxloadavg", 20)); $maxsysload = intval(DI::config()->get('system', 'maxloadavg', 20));
/* Default exponent 3 causes queues to rapidly decrease as load increases. /* Default exponent 3 causes queues to rapidly decrease as load increases.
* If you have 20 max queues at idle, then you get only 5 queues at 37.1% of $maxsysload. * If you have 20 max queues at idle, then you get only 5 queues at 37.1% of $maxsysload.
@ -690,8 +707,8 @@ class Worker
self::$db_duration += (microtime(true) - $stamp); self::$db_duration += (microtime(true) - $stamp);
self::$db_duration_stat += (microtime(true) - $stamp); self::$db_duration_stat += (microtime(true) - $stamp);
$idle_workers -= $running; $idle_workers -= $running;
$waiting_processes += $entry["entries"]; $waiting_processes += $entry['entries'];
$listitem[$entry['priority']] = $entry['priority'] . ":" . $running . "/" . $entry["entries"]; $listitem[$entry['priority']] = $entry['priority'] . ':' . $running . '/' . $entry['entries'];
} }
DBA::close($jobs); DBA::close($jobs);
} else { } else {
@ -702,33 +719,33 @@ class Worker
self::$db_duration_stat += (microtime(true) - $stamp); self::$db_duration_stat += (microtime(true) - $stamp);
while ($entry = DBA::fetch($jobs)) { while ($entry = DBA::fetch($jobs)) {
$idle_workers -= $entry["running"]; $idle_workers -= $entry['running'];
$listitem[$entry['priority']] = $entry['priority'].":".$entry["running"]; $listitem[$entry['priority']] = $entry['priority'] . ':' . $entry['running'];
} }
DBA::close($jobs); DBA::close($jobs);
} }
$waiting_processes -= $deferred; $waiting_processes -= $deferred;
$listitem[0] = "0:" . max(0, $idle_workers); $listitem[0] = '0:' . max(0, $idle_workers);
$processlist .= ' ('.implode(', ', $listitem).')'; $processlist .= ' ('.implode(', ', $listitem).')';
if (DI::config()->get("system", "worker_fastlane", false) && ($queues > 0) && ($active >= $queues) && self::entriesExists()) { if (DI::config()->get('system', 'worker_fastlane', false) && ($queues > 0) && ($active >= $queues) && self::entriesExists()) {
$top_priority = self::highestPriority(); $top_priority = self::highestPriority();
$high_running = self::processWithPriorityActive($top_priority); $high_running = self::processWithPriorityActive($top_priority);
if (!$high_running && ($top_priority > PRIORITY_UNDEFINED) && ($top_priority < PRIORITY_NEGLIGIBLE)) { if (!$high_running && ($top_priority > PRIORITY_UNDEFINED) && ($top_priority < PRIORITY_NEGLIGIBLE)) {
Logger::info("Jobs with a higher priority are waiting but none is executed. Open a fastlane.", ['priority' => $top_priority]); Logger::info('Jobs with a higher priority are waiting but none is executed. Open a fastlane.', ['priority' => $top_priority]);
$queues = $active + 1; $queues = $active + 1;
} }
} }
Logger::notice("Load: " . $load ."/" . $maxsysload . " - processes: " . $deferred . "/" . $active . "/" . $waiting_processes . $processlist . " - maximum: " . $queues . "/" . $maxqueues); Logger::notice('Load: ' . $load . '/' . $maxsysload . ' - processes: ' . $deferred . '/' . $active . '/' . $waiting_processes . $processlist . ' - maximum: ' . $queues . '/' . $maxqueues);
// Are there fewer workers running as possible? Then fork a new one. // Are there fewer workers running as possible? Then fork a new one.
if (!DI::config()->get("system", "worker_dont_fork", false) && ($queues > ($active + 1)) && self::entriesExists()) { if (!DI::config()->get('system', 'worker_dont_fork', false) && ($queues > ($active + 1)) && self::entriesExists()) {
Logger::info("There are fewer workers as possible, fork a new worker.", ['active' => $active, 'queues' => $queues]); Logger::info('There are fewer workers as possible, fork a new worker.', ['active' => $active, 'queues' => $queues]);
if (Worker\Daemon::isMode()) { if (Worker\Daemon::isMode()) {
Worker\IPC::SetJobState(true); Worker\IPC::SetJobState(true);
} else { } else {
@ -1117,12 +1134,12 @@ class Worker
* @param (integer|array) priority or parameter array, strings are deprecated and are ignored * @param (integer|array) priority or parameter array, strings are deprecated and are ignored
* *
* next args are passed as $cmd command line * next args are passed as $cmd command line
* or: Worker::add(PRIORITY_HIGH, "Notifier", Delivery::DELETION, $drop_id); * or: Worker::add(PRIORITY_HIGH, 'Notifier', Delivery::DELETION, $drop_id);
* or: Worker::add(array('priority' => PRIORITY_HIGH, 'dont_fork' => true), "Delivery", $post_id); * or: Worker::add(array('priority' => PRIORITY_HIGH, 'dont_fork' => true), 'Delivery', $post_id);
* *
* @return int "0" if worker queue entry already existed or there had been an error, otherwise the ID of the worker task * @return int '0' if worker queue entry already existed or there had been an error, otherwise the ID of the worker task
* @throws \Friendica\Network\HTTPException\InternalServerErrorException * @throws \Friendica\Network\HTTPException\InternalServerErrorException
* @note $cmd and string args are surrounded with "" * @note $cmd and string args are surrounded with ''
* *
* @hooks 'proc_run' * @hooks 'proc_run'
* array $arr * array $arr
@ -1136,14 +1153,14 @@ class Worker
$arr = ['args' => $args, 'run_cmd' => true]; $arr = ['args' => $args, 'run_cmd' => true];
Hook::callAll("proc_run", $arr); Hook::callAll('proc_run', $arr);
if (!$arr['run_cmd'] || !count($args)) { if (!$arr['run_cmd'] || !count($args)) {
return 1; return 1;
} }
$priority = PRIORITY_MEDIUM; $priority = PRIORITY_MEDIUM;
// Don't fork from frontend tasks by default // Don't fork from frontend tasks by default
$dont_fork = DI::config()->get("system", "worker_dont_fork", false) || !DI::mode()->isBackend(); $dont_fork = DI::config()->get('system', 'worker_dont_fork', false) || !DI::mode()->isBackend();
$created = DateTimeFormat::utcNow(); $created = DateTimeFormat::utcNow();
$delayed = DBA::NULL_DATETIME; $delayed = DBA::NULL_DATETIME;
$force_priority = false; $force_priority = false;

View File

@ -608,7 +608,7 @@ return [
'username_max_length' => 48, 'username_max_length' => 48,
// worker_cooldown (Integer) // worker_cooldown (Integer)
// Cooldown period in seconds after each worker function call. // Cooldown period in seconds before each worker function call.
'worker_cooldown' => 0, 'worker_cooldown' => 0,
// worker_debug (Boolean) // worker_debug (Boolean)
@ -632,12 +632,20 @@ return [
// List of minutes for the jobs per minute (JPM) calculation // List of minutes for the jobs per minute (JPM) calculation
'worker_jpm_range' => '1, 10, 60', 'worker_jpm_range' => '1, 10, 60',
// worker_load_cooldown (Integer)
// Maximum load that causes a cooldown before each worker function call.
'worker_load_cooldown' => 0,
// worker_load_exponent (Integer) // worker_load_exponent (Integer)
// Default 3, which allows only 25% of the maximum worker queues when server load reaches around 37% of maximum load. // Default 3, which allows only 25% of the maximum worker queues when server load reaches around 37% of maximum load.
// For a linear response where 25% of worker queues are allowed at 75% of maximum load, set this to 1. // For a linear response where 25% of worker queues are allowed at 75% of maximum load, set this to 1.
// Setting 0 would allow maximum worker queues at all times, which is not recommended. // Setting 0 would allow maximum worker queues at all times, which is not recommended.
'worker_load_exponent' => 3, 'worker_load_exponent' => 3,
// worker_processes_cooldown (Integer)
// Maximum number pro processes that causes a cooldown before each worker function call.
'worker_processes_cooldown' => 0,
// worker_multiple_fetch (Boolean) // worker_multiple_fetch (Boolean)
// When activated, the worker fetches jobs for multiple workers (not only for itself). // When activated, the worker fetches jobs for multiple workers (not only for itself).
// This is an experimental setting without knowing the performance impact. // This is an experimental setting without knowing the performance impact.