From 3716165ffdf9cba4f48d16f1f3afe927334300f4 Mon Sep 17 00:00:00 2001 From: Timotei Date: Sat, 2 Mar 2024 00:37:04 +0200 Subject: [PATCH 01/14] Cache crawler- run within a certain range of time only --- src/base.cls.php | 2 ++ src/crawler.cls.php | 33 ++++++++++++++++++++++++++++ src/lang.cls.php | 1 + tpl/crawler/settings-general.tpl.php | 19 ++++++++++++++++ 4 files changed, 55 insertions(+) diff --git a/src/base.cls.php b/src/base.cls.php index 870f9f960..ce466ab4e 100644 --- a/src/base.cls.php +++ b/src/base.cls.php @@ -232,6 +232,7 @@ class Base extends Root ## -------------- Crawler ----------------- ## ## -------------------------------------------------- ## const O_CRAWLER = 'crawler'; + const O_CRAWLER_SCHEDULE_TIME = 'crawler-schedule_time'; const O_CRAWLER_USLEEP = 'crawler-usleep'; const O_CRAWLER_RUN_DURATION = 'crawler-run_duration'; const O_CRAWLER_RUN_INTERVAL = 'crawler-run_interval'; @@ -514,6 +515,7 @@ class Base extends Root // Crawler self::O_CRAWLER => false, + self::O_CRAWLER_SCHEDULE_TIME => '00:00-23:59', self::O_CRAWLER_USLEEP => 0, self::O_CRAWLER_RUN_DURATION => 0, self::O_CRAWLER_RUN_INTERVAL => 0, diff --git a/src/crawler.cls.php b/src/crawler.cls.php index 7791d76d9..be38c8f3c 100644 --- a/src/crawler.cls.php +++ b/src/crawler.cls.php @@ -247,6 +247,32 @@ public static function async_handler($manually_run = false) self::start($manually_run); } + /** + * Check if crawler can run in the choosen time period + * + * @since 6.1 + */ + public static function _crawler_in_schedule_time() + { + $now = new DateTime(); + $class_settings = self::cls(); + $schedule_times = $class_settings->conf(Base::O_CRAWLER_SCHEDULE_TIME, ''); + $schedule_times = explode(',', $schedule_times); + + foreach ($schedule_times as $time) { + if ($time !== '') { + $hours = explode('-', $time); + $start = new DateTime($hours[0] . ":00"); + $end = new DateTime($hours[1] . ":00"); + if ($now < $end && $now > $start) { + return true; + } + } + } + + return false; + } + /** * Proceed crawling * @@ -255,10 +281,17 @@ public static function async_handler($manually_run = false) */ public static function start($manually_run = false) { + $crawler_is_in_time = self::_crawler_in_schedule_time(); + if (!Router::can_crawl()) { self::debug('......crawler is NOT allowed by the server admin......'); return false; } + + if (!$manually_run && !$crawler_is_in_time) { + self::debug('......crawler is NOT allowed in this time slot......'); + return false; + } if ($manually_run) { self::debug('......crawler manually ran......'); diff --git a/src/lang.cls.php b/src/lang.cls.php index b2d2dce46..f201456b7 100644 --- a/src/lang.cls.php +++ b/src/lang.cls.php @@ -251,6 +251,7 @@ public static function title($id) self::O_CDN_CLOUDFLARE => __('Cloudflare API', 'litespeed-cache'), self::O_CRAWLER => __('Crawler', 'litespeed-cache'), + self::O_CRAWLER_SCHEDULE_TIME => __('Running time', 'litespeed-cache'), self::O_CRAWLER_USLEEP => __('Delay', 'litespeed-cache'), self::O_CRAWLER_RUN_DURATION => __('Run Duration', 'litespeed-cache'), self::O_CRAWLER_RUN_INTERVAL => __('Interval Between Runs', 'litespeed-cache'), diff --git a/tpl/crawler/settings-general.tpl.php b/tpl/crawler/settings-general.tpl.php index 49d2ece76..745ad81e0 100644 --- a/tpl/crawler/settings-general.tpl.php +++ b/tpl/crawler/settings-general.tpl.php @@ -28,6 +28,25 @@ + + + + title($id); ?> + + + build_input($id); ?> +
+ +
+ , +
+ +
+ 00:00-06:00,20:00-23:59 +
+ + + From 06e501fd58d0df993b9531381e6a556f48795cf2 Mon Sep 17 00:00:00 2001 From: Timotei Date: Mon, 4 Mar 2024 20:22:22 +0200 Subject: [PATCH 02/14] Changes to default value + instance usage --- src/base.cls.php | 2 +- src/crawler.cls.php | 30 +++++++++++++++++------------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/base.cls.php b/src/base.cls.php index ce466ab4e..9ecd576b6 100644 --- a/src/base.cls.php +++ b/src/base.cls.php @@ -515,7 +515,7 @@ class Base extends Root // Crawler self::O_CRAWLER => false, - self::O_CRAWLER_SCHEDULE_TIME => '00:00-23:59', + self::O_CRAWLER_SCHEDULE_TIME => '', self::O_CRAWLER_USLEEP => 0, self::O_CRAWLER_RUN_DURATION => 0, self::O_CRAWLER_RUN_INTERVAL => 0, diff --git a/src/crawler.cls.php b/src/crawler.cls.php index be38c8f3c..b27c4b88d 100644 --- a/src/crawler.cls.php +++ b/src/crawler.cls.php @@ -257,20 +257,25 @@ public static function _crawler_in_schedule_time() $now = new DateTime(); $class_settings = self::cls(); $schedule_times = $class_settings->conf(Base::O_CRAWLER_SCHEDULE_TIME, ''); - $schedule_times = explode(',', $schedule_times); - - foreach ($schedule_times as $time) { - if ($time !== '') { - $hours = explode('-', $time); - $start = new DateTime($hours[0] . ":00"); - $end = new DateTime($hours[1] . ":00"); - if ($now < $end && $now > $start) { - return true; + if($schedule_times!== ''){ + $schedule_times = explode(',', $schedule_times); + + foreach ($schedule_times as $time) { + if ($time !== '') { + $hours = explode('-', $time); + $start = new DateTime($hours[0] . ":00"); + $end = new DateTime($hours[1] . ":00"); + if ($now < $end && $now > $start) { + return true; + } } } + + return false; + } + else{ + return true; } - - return false; } /** @@ -281,13 +286,12 @@ public static function _crawler_in_schedule_time() */ public static function start($manually_run = false) { - $crawler_is_in_time = self::_crawler_in_schedule_time(); - if (!Router::can_crawl()) { self::debug('......crawler is NOT allowed by the server admin......'); return false; } + $crawler_is_in_time = self::cls()->_crawler_in_schedule_time(); if (!$manually_run && !$crawler_is_in_time) { self::debug('......crawler is NOT allowed in this time slot......'); return false; From c1f8d6417f74b78fa32223d828cf14d2e491795a Mon Sep 17 00:00:00 2001 From: Timotei Date: Thu, 7 Mar 2024 02:24:04 +0200 Subject: [PATCH 03/14] Time parsing update --- src/crawler.cls.php | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/src/crawler.cls.php b/src/crawler.cls.php index b27c4b88d..96f9c3510 100644 --- a/src/crawler.cls.php +++ b/src/crawler.cls.php @@ -7,6 +7,7 @@ */ namespace LiteSpeed; +use \DateTime; defined('WPINC') || exit(); @@ -257,20 +258,39 @@ public static function _crawler_in_schedule_time() $now = new DateTime(); $class_settings = self::cls(); $schedule_times = $class_settings->conf(Base::O_CRAWLER_SCHEDULE_TIME, ''); + if($schedule_times!== ''){ $schedule_times = explode(',', $schedule_times); - foreach ($schedule_times as $time) { - if ($time !== '') { - $hours = explode('-', $time); - $start = new DateTime($hours[0] . ":00"); - $end = new DateTime($hours[1] . ":00"); - if ($now < $end && $now > $start) { - return true; + // match only correct format time period: 0:00-23:00 OR 00:00-23:00 + $time_parse_ok = preg_match('/(\d{1,2}):(\d{1,2})\-(\d{1,2}):(\d{1,2})/', $time, $time_from_match); + if($time !== '' && $time_parse_ok){ + $count_colon = substr_count($time, ":"); + $count_minus = substr_count($time, "-"); + // Time periods must have 5 array parts, 2 colons, 1 minus sign + if (count($time_from_match) === 5 && $count_colon == 2 && $count_minus == 1){ + // Check if second hour is higher than first one + if((int)$time_from_match[3] > (int) $time_from_match[1]){ + // Create DateTime for start and end time and zerofill the number + $start = new DateTime( + str_pad((string)$time_from_match[1], 2, "0", STR_PAD_LEFT). ":" . + str_pad((string)$time_from_match[2], 2, "0", STR_PAD_LEFT) . ":00" + ); + $end = new DateTime( + str_pad((string)$time_from_match[3], 2, "0", STR_PAD_LEFT). ":" . + str_pad((string)$time_from_match[4], 2, "0", STR_PAD_LEFT) . ":00" + ); + + // Test start < now < end + if ($now < $end && $now > $start) { + return true; + } + } } } } + self::debug('------------crawler schedule time-------------no time period found'); return false; } else{ @@ -293,7 +313,7 @@ public static function start($manually_run = false) $crawler_is_in_time = self::cls()->_crawler_in_schedule_time(); if (!$manually_run && !$crawler_is_in_time) { - self::debug('......crawler is NOT allowed in this time slot......'); + self::debug('......crawler is NOT allowed at this time......'); return false; } From 5e1b60556a29618a3b0d20400558c481382001b2 Mon Sep 17 00:00:00 2001 From: Timotei Date: Thu, 7 Mar 2024 02:26:31 +0200 Subject: [PATCH 04/14] Added "equal to" to time comparison --- src/crawler.cls.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/crawler.cls.php b/src/crawler.cls.php index 96f9c3510..9cbf94cf1 100644 --- a/src/crawler.cls.php +++ b/src/crawler.cls.php @@ -281,8 +281,8 @@ public static function _crawler_in_schedule_time() str_pad((string)$time_from_match[4], 2, "0", STR_PAD_LEFT) . ":00" ); - // Test start < now < end - if ($now < $end && $now > $start) { + // Test start <= now <= end + if ($now <= $end && $now >= $start) { return true; } } From 3e8bf121ef0c5e56c13d6e861d06bcad051edb39 Mon Sep 17 00:00:00 2001 From: Timotei Date: Fri, 8 Mar 2024 16:50:47 +0200 Subject: [PATCH 05/14] Crawler in schedule time - change from DateTime to strtotime --- src/crawler.cls.php | 78 +++++++++++++++++----------- tpl/crawler/settings-general.tpl.php | 2 +- 2 files changed, 48 insertions(+), 32 deletions(-) diff --git a/src/crawler.cls.php b/src/crawler.cls.php index 9cbf94cf1..4ff2b47b7 100644 --- a/src/crawler.cls.php +++ b/src/crawler.cls.php @@ -7,6 +7,7 @@ */ namespace LiteSpeed; + use \DateTime; defined('WPINC') || exit(); @@ -253,47 +254,62 @@ public static function async_handler($manually_run = false) * * @since 6.1 */ - public static function _crawler_in_schedule_time() + public static function _crawler_in_schedule_time() { - $now = new DateTime(); $class_settings = self::cls(); $schedule_times = $class_settings->conf(Base::O_CRAWLER_SCHEDULE_TIME, ''); - if($schedule_times!== ''){ + if ($schedule_times !== '') { $schedule_times = explode(',', $schedule_times); - foreach ($schedule_times as $time) { - // match only correct format time period: 0:00-23:00 OR 00:00-23:00 - $time_parse_ok = preg_match('/(\d{1,2}):(\d{1,2})\-(\d{1,2}):(\d{1,2})/', $time, $time_from_match); - if($time !== '' && $time_parse_ok){ - $count_colon = substr_count($time, ":"); - $count_minus = substr_count($time, "-"); - // Time periods must have 5 array parts, 2 colons, 1 minus sign - if (count($time_from_match) === 5 && $count_colon == 2 && $count_minus == 1){ - // Check if second hour is higher than first one - if((int)$time_from_match[3] > (int) $time_from_match[1]){ - // Create DateTime for start and end time and zerofill the number - $start = new DateTime( - str_pad((string)$time_from_match[1], 2, "0", STR_PAD_LEFT). ":" . - str_pad((string)$time_from_match[2], 2, "0", STR_PAD_LEFT) . ":00" - ); - $end = new DateTime( - str_pad((string)$time_from_match[3], 2, "0", STR_PAD_LEFT). ":" . - str_pad((string)$time_from_match[4], 2, "0", STR_PAD_LEFT) . ":00" - ); - - // Test start <= now <= end - if ($now <= $end && $now >= $start) { - return true; - } + + if (count($schedule_times) > 0) { + $now = time(); + + // A single time: e.g. 1, 01, 1:1, 1:01, 1:01:1, or 1:01:01, etc. + $time_re = '(\d{1,2}(?::\d{1,2}){0,2}(?i:[AP]M)?)'; + $re = '/^' . $time_re . '[-]' . $time_re . '$/'; + + // Allow parsing times like 1-3, 1AM-3PM, 1aM-3Pm + $with_minutes = function ($time) { + $has_meridian = stripos($time, 'm'); + if (preg_match('/:\d/', $time)) { + return $time; + } else { + if ($has_meridian !== false) { + $meridian = strtoupper(substr($time, -2)); + $time_only = substr($time, 0, -2); + + return $time_only . ':00' . $meridian; + } else { + return $time . ':00'; } } + }; + + foreach ($schedule_times as $time) { + preg_match($re, $time, $matches); + + if (!$matches) { + continue; + } + + $start = strtotime($with_minutes($matches[1])); + $end = strtotime($with_minutes($matches[2])); + + if (false === $start || false === $end || $start > $end) { + continue; + } + + // Test start <= now <= end + if ($now <= $end && $now >= $start) { + return true; + } } } - + self::debug('------------crawler schedule time-------------no time period found'); return false; - } - else{ + } else { return true; } } @@ -310,7 +326,7 @@ public static function start($manually_run = false) self::debug('......crawler is NOT allowed by the server admin......'); return false; } - + $crawler_is_in_time = self::cls()->_crawler_in_schedule_time(); if (!$manually_run && !$crawler_is_in_time) { self::debug('......crawler is NOT allowed at this time......'); diff --git a/tpl/crawler/settings-general.tpl.php b/tpl/crawler/settings-general.tpl.php index 745ad81e0..bedb4d26a 100644 --- a/tpl/crawler/settings-general.tpl.php +++ b/tpl/crawler/settings-general.tpl.php @@ -42,7 +42,7 @@

- 00:00-06:00,20:00-23:59 + 00:00-06:00,20:00-23:59,1AM-15:00 From d5b15a29a0ceea52b1c63d184113cda2e7dc1ec6 Mon Sep 17 00:00:00 2001 From: Timotei Date: Fri, 8 Mar 2024 17:51:00 +0200 Subject: [PATCH 06/14] Crawler in schedule time - change example to 24h format --- tpl/crawler/settings-general.tpl.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/crawler/settings-general.tpl.php b/tpl/crawler/settings-general.tpl.php index bedb4d26a..745ad81e0 100644 --- a/tpl/crawler/settings-general.tpl.php +++ b/tpl/crawler/settings-general.tpl.php @@ -42,7 +42,7 @@

- 00:00-06:00,20:00-23:59,1AM-15:00 + 00:00-06:00,20:00-23:59 From 717b1bcd65f79da5e8d7aec49b31de763e9634a1 Mon Sep 17 00:00:00 2001 From: Timotei Date: Fri, 8 Mar 2024 17:58:32 +0200 Subject: [PATCH 07/14] Fix setting translation --- tpl/crawler/settings-general.tpl.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tpl/crawler/settings-general.tpl.php b/tpl/crawler/settings-general.tpl.php index 745ad81e0..27d354bc4 100644 --- a/tpl/crawler/settings-general.tpl.php +++ b/tpl/crawler/settings-general.tpl.php @@ -38,9 +38,9 @@

- , + ,'); ?>
- + '. date('H:m') . ''); ?>
00:00-06:00,20:00-23:59
From 4e93ce58f4c06591d07f62bf2944114cb219f691 Mon Sep 17 00:00:00 2001 From: Timotei Date: Fri, 8 Mar 2024 18:06:02 +0200 Subject: [PATCH 08/14] Tim empty spaces --- src/crawler.cls.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/crawler.cls.php b/src/crawler.cls.php index 4ff2b47b7..2479e4577 100644 --- a/src/crawler.cls.php +++ b/src/crawler.cls.php @@ -287,6 +287,7 @@ public static function _crawler_in_schedule_time() }; foreach ($schedule_times as $time) { + $time = trim($time); preg_match($re, $time, $matches); if (!$matches) { From 01930665f661237fdaa26bc02278fdb065599681 Mon Sep 17 00:00:00 2001 From: Timotei Date: Fri, 8 Mar 2024 18:14:20 +0200 Subject: [PATCH 09/14] Improve readability --- tpl/crawler/settings-general.tpl.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/crawler/settings-general.tpl.php b/tpl/crawler/settings-general.tpl.php index 27d354bc4..d2caa83a0 100644 --- a/tpl/crawler/settings-general.tpl.php +++ b/tpl/crawler/settings-general.tpl.php @@ -42,7 +42,7 @@
'. date('H:m') . ''); ?>
- 00:00-06:00,20:00-23:59 + 00:00-06:00 , 20:00-23:59, 01:00-05:00 From 42855410d0bcb3af4984e9fffe53b35af193d1c8 Mon Sep 17 00:00:00 2001 From: Timotei Date: Mon, 11 Mar 2024 20:48:01 +0200 Subject: [PATCH 10/14] Text changes --- tpl/crawler/settings-general.tpl.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tpl/crawler/settings-general.tpl.php b/tpl/crawler/settings-general.tpl.php index d2caa83a0..ee19a52e9 100644 --- a/tpl/crawler/settings-general.tpl.php +++ b/tpl/crawler/settings-general.tpl.php @@ -36,9 +36,9 @@ build_input($id); ?>
- +
- ,'); ?> + HH:mm-HH:mm delimited by %s', 'litespeed-cache'), ','); ?>
'. date('H:m') . ''); ?>
From b93d92babeee6d4ff74ae6616aef165ca34eb202 Mon Sep 17 00:00:00 2001 From: Timotei Date: Mon, 11 Mar 2024 20:56:57 +0200 Subject: [PATCH 11/14] Changed text --- tpl/crawler/settings-general.tpl.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpl/crawler/settings-general.tpl.php b/tpl/crawler/settings-general.tpl.php index ee19a52e9..342b1e3fa 100644 --- a/tpl/crawler/settings-general.tpl.php +++ b/tpl/crawler/settings-general.tpl.php @@ -36,7 +36,7 @@ build_input($id); ?>
- +
HH:mm-HH:mm delimited by %s', 'litespeed-cache'), ','); ?>
From 93648f29d32f9fa3bb28b5f5088c3e5cec69b6a7 Mon Sep 17 00:00:00 2001 From: Timotei Date: Wed, 17 Apr 2024 00:31:37 +0300 Subject: [PATCH 12/14] Review fixes --- tpl/crawler/settings-general.tpl.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tpl/crawler/settings-general.tpl.php b/tpl/crawler/settings-general.tpl.php index 342b1e3fa..42792c096 100644 --- a/tpl/crawler/settings-general.tpl.php +++ b/tpl/crawler/settings-general.tpl.php @@ -38,9 +38,9 @@

- HH:mm-HH:mm delimited by %s', 'litespeed-cache'), ','); ?> + %s delimited by %s', 'litespeed-cache'), 'HH:mm-HH:mm', ','); ?>
- '. date('H:m') . ''); ?> + '. date('H:m') . ''; ?>
00:00-06:00 , 20:00-23:59, 01:00-05:00
From a64bdd0d80671b61c7895914453c04e5d050aebc Mon Sep 17 00:00:00 2001 From: Timotei Date: Wed, 17 Apr 2024 22:41:18 +0300 Subject: [PATCH 13/14] New Review fixes --- tpl/crawler/settings-general.tpl.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tpl/crawler/settings-general.tpl.php b/tpl/crawler/settings-general.tpl.php index 42792c096..a06eebf7a 100644 --- a/tpl/crawler/settings-general.tpl.php +++ b/tpl/crawler/settings-general.tpl.php @@ -38,9 +38,9 @@

- %s delimited by %s', 'litespeed-cache'), 'HH:mm-HH:mm', ','); ?> + HH:mm-HH:mm', ','); ?>
- '. date('H:m') . ''; ?> + '. date('H:m') . ''; ?>
00:00-06:00 , 20:00-23:59, 01:00-05:00
From e645104ab4a10b34d8b28766895027e362679e46 Mon Sep 17 00:00:00 2001 From: Timotei Date: Wed, 21 Aug 2024 18:18:24 +0300 Subject: [PATCH 14/14] Review update --- src/crawler.cls.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crawler.cls.php b/src/crawler.cls.php index 2479e4577..9a91dbddc 100644 --- a/src/crawler.cls.php +++ b/src/crawler.cls.php @@ -254,7 +254,7 @@ public static function async_handler($manually_run = false) * * @since 6.1 */ - public static function _crawler_in_schedule_time() + public function _crawler_in_schedule_time() { $class_settings = self::cls(); $schedule_times = $class_settings->conf(Base::O_CRAWLER_SCHEDULE_TIME, '');