Add URL tracking, bot-aware cleanup, and request URL helper
- PageViewsTable: add `url` column with migration, orphan cleanup, batched deletes - SessionTable: add cleanOldSessionsSmart() with separate bot/real retention - SessionInstaller: add cleanAllSmart() orchestrating all table cleanup - SessionTrait: add mprGetRequestUrl() and mprDetectPageType() helpers Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
84
src/PageViewsTable.php
Normal file → Executable file
84
src/PageViewsTable.php
Normal file → Executable file
@@ -71,6 +71,7 @@ class PageViewsTable
|
||||
`page_type` TINYINT(2) UNSIGNED NOT NULL,
|
||||
`page_id` INT(10) UNSIGNED DEFAULT NULL,
|
||||
`controller` VARCHAR(64) DEFAULT NULL,
|
||||
`url` VARCHAR(500) DEFAULT NULL,
|
||||
`date_add` DATETIME NOT NULL,
|
||||
PRIMARY KEY (`id_page_view`),
|
||||
KEY `id_session` (`id_session`),
|
||||
@@ -79,7 +80,33 @@ class PageViewsTable
|
||||
KEY `session_date` (`id_session`, `date_add`)
|
||||
) ENGINE={$engine} DEFAULT CHARSET={$charset};";
|
||||
|
||||
return \Db::getInstance()->execute($sql);
|
||||
$result = \Db::getInstance()->execute($sql);
|
||||
|
||||
// Add url column to existing tables (safe migration)
|
||||
self::migrateAddUrlColumn();
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add url column if it doesn't exist (safe for existing installations)
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public static function migrateAddUrlColumn()
|
||||
{
|
||||
$table = self::getTableName();
|
||||
|
||||
try {
|
||||
$columns = \Db::getInstance()->executeS("SHOW COLUMNS FROM `{$table}` LIKE 'url'");
|
||||
if (empty($columns)) {
|
||||
\Db::getInstance()->execute(
|
||||
"ALTER TABLE `{$table}` ADD COLUMN `url` VARCHAR(500) DEFAULT NULL AFTER `controller`"
|
||||
);
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
// Table might not exist yet during fresh install — safe to ignore
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -109,9 +136,10 @@ class PageViewsTable
|
||||
* @param int $pageType Page type constant
|
||||
* @param int|null $pageId Page ID
|
||||
* @param string|null $controller Controller name
|
||||
* @param string|null $url Request URL
|
||||
* @return bool
|
||||
*/
|
||||
public static function trackPageView($idSession, $pageType, $pageId = null, $controller = null)
|
||||
public static function trackPageView($idSession, $pageType, $pageId = null, $controller = null, $url = null)
|
||||
{
|
||||
if (!$idSession) {
|
||||
return false;
|
||||
@@ -119,10 +147,11 @@ class PageViewsTable
|
||||
|
||||
$table = self::getTableName();
|
||||
|
||||
$sql = "INSERT INTO `{$table}` (id_session, page_type, page_id, controller, date_add)
|
||||
$sql = "INSERT INTO `{$table}` (id_session, page_type, page_id, controller, url, date_add)
|
||||
VALUES (" . (int) $idSession . ", " . (int) $pageType . ", " .
|
||||
($pageId !== null ? (int) $pageId : 'NULL') . ", " .
|
||||
($controller ? "'" . pSQL(substr($controller, 0, 64)) . "'" : 'NULL') . ", NOW())";
|
||||
($controller ? "'" . pSQL(substr($controller, 0, 64)) . "'" : 'NULL') . ", " .
|
||||
($url ? "'" . pSQL(substr($url, 0, 500)) . "'" : 'NULL') . ", NOW())";
|
||||
|
||||
return \Db::getInstance()->execute($sql);
|
||||
}
|
||||
@@ -172,18 +201,51 @@ class PageViewsTable
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean old page view records
|
||||
* Clean old page view records (batched to avoid long locks)
|
||||
*
|
||||
* @param int $hours Hours to keep
|
||||
* @return bool
|
||||
* @return int Number of rows deleted
|
||||
*/
|
||||
public static function cleanOldPageViews($hours = 24)
|
||||
public static function cleanOldPageViews($hours = 48)
|
||||
{
|
||||
$table = self::getTableName();
|
||||
$deleted = 0;
|
||||
|
||||
return \Db::getInstance()->execute(
|
||||
"DELETE FROM `{$table}`
|
||||
WHERE date_add < DATE_SUB(NOW(), INTERVAL " . (int) $hours . " HOUR)"
|
||||
);
|
||||
do {
|
||||
\Db::getInstance()->execute(
|
||||
"DELETE FROM `{$table}`
|
||||
WHERE date_add < DATE_SUB(NOW(), INTERVAL " . (int) $hours . " HOUR)
|
||||
LIMIT 10000"
|
||||
);
|
||||
$affected = (int) \Db::getInstance()->Affected_Rows();
|
||||
$deleted += $affected;
|
||||
} while ($affected >= 10000);
|
||||
|
||||
return $deleted;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean orphaned page views (session no longer exists)
|
||||
*
|
||||
* @return int Number of rows deleted
|
||||
*/
|
||||
public static function cleanOrphanedPageViews()
|
||||
{
|
||||
$table = self::getTableName();
|
||||
$sessTable = SessionTable::getTableName();
|
||||
$deleted = 0;
|
||||
|
||||
do {
|
||||
\Db::getInstance()->execute(
|
||||
"DELETE pv FROM `{$table}` pv
|
||||
LEFT JOIN `{$sessTable}` s ON pv.id_session = s.id_session
|
||||
WHERE s.id_session IS NULL
|
||||
LIMIT 10000"
|
||||
);
|
||||
$affected = (int) \Db::getInstance()->Affected_Rows();
|
||||
$deleted += $affected;
|
||||
} while ($affected >= 10000);
|
||||
|
||||
return $deleted;
|
||||
}
|
||||
}
|
||||
|
||||
33
src/SessionInstaller.php
Normal file → Executable file
33
src/SessionInstaller.php
Normal file → Executable file
@@ -59,7 +59,7 @@ class SessionInstaller
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up old data from all session tables
|
||||
* Clean up old data from all session tables (simple mode)
|
||||
*
|
||||
* @param int $sessionHours Hours to keep sessions (default 24)
|
||||
* @param int $pageViewsHours Hours to keep page views (default 24)
|
||||
@@ -70,8 +70,35 @@ class SessionInstaller
|
||||
{
|
||||
$result = true;
|
||||
$result = $result && SessionTable::cleanOldSessions($sessionHours);
|
||||
$result = $result && PageViewsTable::cleanOldPageViews($pageViewsHours);
|
||||
$result = $result && CartActionsTable::cleanOldCartActions($cartActionsHours);
|
||||
PageViewsTable::cleanOldPageViews($pageViewsHours);
|
||||
CartActionsTable::cleanOldCartActions($cartActionsHours);
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Smart cleanup: bot-aware session retention + orphan cleanup.
|
||||
* Recommended for hourly cron execution.
|
||||
*
|
||||
* @param int $botSessionHours Hours to keep bot sessions (default 24)
|
||||
* @param int $realSessionHours Hours to keep real sessions (default 72)
|
||||
* @param int $pageViewsHours Hours to keep page views (default 48)
|
||||
* @param int $cartActionsHours Hours to keep cart actions (default 168 / 7 days)
|
||||
* @return array Cleanup statistics
|
||||
*/
|
||||
public static function cleanAllSmart($botSessionHours = 24, $realSessionHours = 72, $pageViewsHours = 48, $cartActionsHours = 168)
|
||||
{
|
||||
$stats = [];
|
||||
|
||||
// Clean sessions (bot-aware)
|
||||
$stats['sessions'] = SessionTable::cleanOldSessionsSmart($botSessionHours, $realSessionHours);
|
||||
|
||||
// Clean orphaned page views (session no longer exists) + old page views
|
||||
$stats['orphaned_page_views'] = PageViewsTable::cleanOrphanedPageViews();
|
||||
$stats['old_page_views'] = PageViewsTable::cleanOldPageViews($pageViewsHours);
|
||||
|
||||
// Clean old cart actions
|
||||
$stats['cart_actions'] = CartActionsTable::cleanOldCartActions($cartActionsHours);
|
||||
|
||||
return $stats;
|
||||
}
|
||||
}
|
||||
|
||||
162
src/SessionTable.php
Normal file → Executable file
162
src/SessionTable.php
Normal file → Executable file
@@ -29,16 +29,6 @@ if (!defined('_PS_VERSION_')) {
|
||||
|
||||
class SessionTable
|
||||
{
|
||||
/**
|
||||
* List of modules that use the shared mpr_sessions table.
|
||||
* Add your module name here when integrating.
|
||||
*/
|
||||
private const MODULES_USING_SESSION = [
|
||||
'mprexpresscheckout',
|
||||
'mprtotaldefender',
|
||||
'mprtradeaccount',
|
||||
];
|
||||
|
||||
/**
|
||||
* Table name without prefix
|
||||
*/
|
||||
@@ -91,7 +81,7 @@ class SessionTable
|
||||
`os` VARCHAR(32) DEFAULT NULL,
|
||||
`is_bot` TINYINT(1) UNSIGNED NOT NULL DEFAULT 0,
|
||||
|
||||
-- Attribution tracking (mprexpresscheckout)
|
||||
-- Attribution tracking (mprcheckoutrevolution)
|
||||
`source_type` TINYINT(3) UNSIGNED NOT NULL DEFAULT 0,
|
||||
`source_detail` VARCHAR(500) DEFAULT NULL,
|
||||
`utm_source` VARCHAR(128) DEFAULT NULL,
|
||||
@@ -104,7 +94,7 @@ class SessionTable
|
||||
`msclkid` VARCHAR(255) DEFAULT NULL,
|
||||
`ttclid` VARCHAR(255) DEFAULT NULL,
|
||||
|
||||
-- Landing page tracking (mprexpresscheckout)
|
||||
-- Landing page tracking (mprcheckoutrevolution)
|
||||
`landing_page_type` TINYINT(2) UNSIGNED DEFAULT NULL,
|
||||
`landing_page_id` INT(10) UNSIGNED DEFAULT NULL,
|
||||
`landing_url` VARCHAR(500) DEFAULT NULL,
|
||||
@@ -148,29 +138,44 @@ class SessionTable
|
||||
*/
|
||||
public static function uninstall($currentModule)
|
||||
{
|
||||
// Check if any other module using sessions is still installed
|
||||
foreach (self::MODULES_USING_SESSION as $moduleName) {
|
||||
if ($moduleName === $currentModule) {
|
||||
continue;
|
||||
}
|
||||
if (\Module::isInstalled($moduleName)) {
|
||||
// Another module still needs the table
|
||||
return true;
|
||||
}
|
||||
if (self::isAnotherModuleInstalled($currentModule)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// No other module needs it - drop the table
|
||||
return \Db::getInstance()->execute("DROP TABLE IF EXISTS `" . self::getTableName() . "`");
|
||||
}
|
||||
|
||||
/**
|
||||
* Get list of modules that use the session table
|
||||
* Discover which installed modules bundle the prestashop-session package.
|
||||
* Uses runtime filesystem check instead of a hardcoded list.
|
||||
*
|
||||
* @return array
|
||||
* @return array Module names
|
||||
*/
|
||||
public static function getModulesUsingSession()
|
||||
{
|
||||
return self::MODULES_USING_SESSION;
|
||||
if (!defined('_PS_MODULE_DIR_') || !_PS_MODULE_DIR_) {
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
$rows = \Db::getInstance()->executeS(
|
||||
'SELECT name FROM ' . _DB_PREFIX_ . 'module'
|
||||
);
|
||||
} catch (\Exception $e) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$modules = [];
|
||||
if ($rows) {
|
||||
foreach ($rows as $row) {
|
||||
$path = _PS_MODULE_DIR_ . $row['name'] . '/vendor/myprestarocks/prestashop-session/';
|
||||
if (is_dir($path)) {
|
||||
$modules[] = $row['name'];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $modules;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -181,11 +186,8 @@ class SessionTable
|
||||
*/
|
||||
public static function isAnotherModuleInstalled($excludeModule)
|
||||
{
|
||||
foreach (self::MODULES_USING_SESSION as $moduleName) {
|
||||
if ($moduleName === $excludeModule) {
|
||||
continue;
|
||||
}
|
||||
if (\Module::isInstalled($moduleName)) {
|
||||
foreach (self::getModulesUsingSession() as $moduleName) {
|
||||
if ($moduleName !== $excludeModule) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -237,6 +239,63 @@ class SessionTable
|
||||
return $result ? (int) $result : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the most recent active session for a guest, regardless of hash.
|
||||
* Enables cross-module session deduplication — if one module already
|
||||
* created a session for this guest (possibly with different hash params),
|
||||
* another module can find and reuse it instead of creating a duplicate.
|
||||
*
|
||||
* @param int $idGuest Guest ID
|
||||
* @param int $timeoutMinutes Session timeout
|
||||
* @return int|null Session ID or null
|
||||
*/
|
||||
public static function getActiveSessionByGuest($idGuest, $timeoutMinutes = 60)
|
||||
{
|
||||
if (!$idGuest) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$table = self::getTableName();
|
||||
|
||||
$sql = "SELECT id_session FROM `{$table}`
|
||||
WHERE id_guest = " . (int) $idGuest . "
|
||||
AND date_last_activity > DATE_SUB(NOW(), INTERVAL " . (int) $timeoutMinutes . " MINUTE)
|
||||
ORDER BY date_last_activity DESC
|
||||
LIMIT 1";
|
||||
|
||||
$result = \Db::getInstance()->getValue($sql);
|
||||
return $result ? (int) $result : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the most recent active session for an IP address.
|
||||
* Fallback for when id_guest is not available (PS 9.x anonymous visitors).
|
||||
*
|
||||
* @param string $ipAddress
|
||||
* @param int $timeoutMinutes Session timeout
|
||||
* @return int|null Session ID or null
|
||||
*/
|
||||
public static function getActiveSessionByIP($ipAddress, $timeoutMinutes = 60)
|
||||
{
|
||||
if (!$ipAddress) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$table = self::getTableName();
|
||||
|
||||
// Use executeS instead of getValue — PS Db caching can return stale results
|
||||
// for queries containing NOW()/DATE_SUB()
|
||||
$sql = "SELECT id_session FROM `{$table}`
|
||||
WHERE ip_address = '" . pSQL($ipAddress) . "'
|
||||
AND is_bot = 0
|
||||
AND date_last_activity > DATE_SUB(NOW(), INTERVAL " . (int) $timeoutMinutes . " MINUTE)
|
||||
ORDER BY date_last_activity DESC
|
||||
LIMIT 1";
|
||||
|
||||
$rows = \Db::getInstance()->executeS($sql);
|
||||
return !empty($rows[0]['id_session']) ? (int) $rows[0]['id_session'] : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Link guest sessions to customer (on login/registration)
|
||||
*
|
||||
@@ -258,7 +317,7 @@ class SessionTable
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean old sessions
|
||||
* Clean old sessions (simple, non-bot-aware)
|
||||
*
|
||||
* @param int $hours Hours to keep (default 24)
|
||||
* @return bool
|
||||
@@ -273,6 +332,47 @@ class SessionTable
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Bot-aware session cleanup (batched to avoid long locks).
|
||||
* Bot sessions accumulate fast and have no analytics value beyond short-term
|
||||
* monitoring, so they're cleaned more aggressively than real sessions.
|
||||
*
|
||||
* @param int $botHours Hours to keep bot sessions (default 24)
|
||||
* @param int $realHours Hours to keep real sessions (default 72)
|
||||
* @return array ['deleted_bots' => int, 'deleted_real' => int]
|
||||
*/
|
||||
public static function cleanOldSessionsSmart($botHours = 24, $realHours = 72)
|
||||
{
|
||||
$table = self::getTableName();
|
||||
$deletedBots = 0;
|
||||
$deletedReal = 0;
|
||||
|
||||
// Phase 1: Clean bot sessions (aggressive)
|
||||
do {
|
||||
\Db::getInstance()->execute(
|
||||
"DELETE FROM `{$table}`
|
||||
WHERE is_bot = 1
|
||||
AND date_last_activity < DATE_SUB(NOW(), INTERVAL " . (int) $botHours . " HOUR)
|
||||
LIMIT 10000"
|
||||
);
|
||||
$affected = (int) \Db::getInstance()->Affected_Rows();
|
||||
$deletedBots += $affected;
|
||||
} while ($affected >= 10000);
|
||||
|
||||
// Phase 2: Clean old real sessions
|
||||
do {
|
||||
\Db::getInstance()->execute(
|
||||
"DELETE FROM `{$table}`
|
||||
WHERE date_last_activity < DATE_SUB(NOW(), INTERVAL " . (int) $realHours . " HOUR)
|
||||
LIMIT 10000"
|
||||
);
|
||||
$affected = (int) \Db::getInstance()->Affected_Rows();
|
||||
$deletedReal += $affected;
|
||||
} while ($affected >= 10000);
|
||||
|
||||
return ['deleted_bots' => $deletedBots, 'deleted_real' => $deletedReal];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get active session count
|
||||
*
|
||||
|
||||
54
src/SessionTrait.php
Normal file → Executable file
54
src/SessionTrait.php
Normal file → Executable file
@@ -68,12 +68,20 @@ trait SessionTrait
|
||||
$userAgent = $_SERVER['HTTP_USER_AGENT'];
|
||||
}
|
||||
|
||||
$userAgent = strtolower($userAgent);
|
||||
$userAgentLower = strtolower($userAgent);
|
||||
|
||||
// Phase 1: Known bot UA patterns (fast string matching)
|
||||
$botPatterns = [
|
||||
// Search engine crawlers
|
||||
'googlebot', 'bingbot', 'slurp', 'duckduckbot', 'baiduspider',
|
||||
'yandexbot', 'sogou', 'exabot',
|
||||
// Google rendering / Lighthouse / other crawlers
|
||||
'chrome-lighthouse', 'lighthouse', 'pagespeed', 'google-inspectiontool',
|
||||
'googleother',
|
||||
// AI crawlers
|
||||
'gptbot', 'claude-web', 'bytespider', 'ccbot', 'amazonbot',
|
||||
'meta-externalagent', 'cohere-ai', 'google-extended',
|
||||
'chatgpt-user', 'oai-searchbot', 'perplexitybot',
|
||||
// Social media bots
|
||||
'facebot', 'facebookexternalhit', 'twitterbot', 'linkedinbot',
|
||||
'whatsapp', 'telegrambot', 'slackbot', 'discordbot', 'pinterestbot',
|
||||
@@ -82,6 +90,8 @@ trait SessionTrait
|
||||
'rogerbot', 'petalbot', 'screaming frog',
|
||||
// Archive bots
|
||||
'ia_archiver', 'archive.org_bot',
|
||||
// Internal tools
|
||||
'mprsitemapbuilder',
|
||||
// Generic patterns
|
||||
'crawler', 'spider', 'bot.htm', 'bot.php', 'crawl',
|
||||
// HTTP clients
|
||||
@@ -101,7 +111,31 @@ trait SessionTrait
|
||||
];
|
||||
|
||||
foreach ($botPatterns as $pattern) {
|
||||
if (strpos($userAgent, $pattern) !== false) {
|
||||
if (strpos($userAgentLower, $pattern) !== false) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 2: Heuristic checks on Chrome UA
|
||||
if (preg_match('/Chrome\/(\d+)\.(\d+)\.(\d+)\.(\d+)/', $userAgent, $m)) {
|
||||
$major = (int) $m[1];
|
||||
$minor = (int) $m[2];
|
||||
$build = (int) $m[3];
|
||||
$patch = (int) $m[4];
|
||||
|
||||
// Chrome version higher than any released version = fake UA
|
||||
// Current stable: ~136 as of early 2026; 150 gives headroom
|
||||
if ($major > 150) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Chrome < 80 is 6+ years old — no real user runs this
|
||||
if ($major < 80) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Bots often use X.0.0.0 pattern (real Chrome has non-zero build numbers)
|
||||
if ($minor === 0 && $build === 0 && $patch === 0 && $major < 120) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -256,6 +290,20 @@ trait SessionTrait
|
||||
return substr($_SERVER['HTTP_USER_AGENT'], 0, $maxLength);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current request URL (path + query string, no host)
|
||||
*
|
||||
* @param int $maxLength Maximum length to return
|
||||
* @return string|null
|
||||
*/
|
||||
protected static function mprGetRequestUrl($maxLength = 500)
|
||||
{
|
||||
if (!isset($_SERVER['REQUEST_URI'])) {
|
||||
return null;
|
||||
}
|
||||
return substr($_SERVER['REQUEST_URI'], 0, $maxLength);
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect current page type from controller
|
||||
*
|
||||
@@ -287,7 +335,7 @@ trait SessionTrait
|
||||
case 'order':
|
||||
case 'orderopc':
|
||||
case 'checkout':
|
||||
case 'mprexpresscheckoutcheckout':
|
||||
case 'mprcheckoutrevolutioncheckout':
|
||||
return [self::$PAGE_TYPES['CHECKOUT'], null];
|
||||
|
||||
case 'orderconfirmation':
|
||||
|
||||
Reference in New Issue
Block a user