Add URL tracking, bot-aware cleanup, and request URL helper

- PageViewsTable: add `url` column with migration, orphan cleanup, batched deletes
- SessionTable: add cleanOldSessionsSmart() with separate bot/real retention
- SessionInstaller: add cleanAllSmart() orchestrating all table cleanup
- SessionTrait: add mprGetRequestUrl() and mprDetectPageType() helpers

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-09 11:33:18 +00:00
parent 0feba3038f
commit 15f436ff47
4 changed files with 285 additions and 48 deletions

84
src/PageViewsTable.php Normal file → Executable file
View File

@@ -71,6 +71,7 @@ class PageViewsTable
`page_type` TINYINT(2) UNSIGNED NOT NULL,
`page_id` INT(10) UNSIGNED DEFAULT NULL,
`controller` VARCHAR(64) DEFAULT NULL,
`url` VARCHAR(500) DEFAULT NULL,
`date_add` DATETIME NOT NULL,
PRIMARY KEY (`id_page_view`),
KEY `id_session` (`id_session`),
@@ -79,7 +80,33 @@ class PageViewsTable
KEY `session_date` (`id_session`, `date_add`)
) ENGINE={$engine} DEFAULT CHARSET={$charset};";
return \Db::getInstance()->execute($sql);
$result = \Db::getInstance()->execute($sql);
// Add url column to existing tables (safe migration)
self::migrateAddUrlColumn();
return $result;
}
/**
* Add url column if it doesn't exist (safe for existing installations)
*
* @return void
*/
public static function migrateAddUrlColumn()
{
$table = self::getTableName();
try {
$columns = \Db::getInstance()->executeS("SHOW COLUMNS FROM `{$table}` LIKE 'url'");
if (empty($columns)) {
\Db::getInstance()->execute(
"ALTER TABLE `{$table}` ADD COLUMN `url` VARCHAR(500) DEFAULT NULL AFTER `controller`"
);
}
} catch (\Exception $e) {
// Table might not exist yet during fresh install — safe to ignore
}
}
/**
@@ -109,9 +136,10 @@ class PageViewsTable
* @param int $pageType Page type constant
* @param int|null $pageId Page ID
* @param string|null $controller Controller name
* @param string|null $url Request URL
* @return bool
*/
public static function trackPageView($idSession, $pageType, $pageId = null, $controller = null)
public static function trackPageView($idSession, $pageType, $pageId = null, $controller = null, $url = null)
{
if (!$idSession) {
return false;
@@ -119,10 +147,11 @@ class PageViewsTable
$table = self::getTableName();
$sql = "INSERT INTO `{$table}` (id_session, page_type, page_id, controller, date_add)
$sql = "INSERT INTO `{$table}` (id_session, page_type, page_id, controller, url, date_add)
VALUES (" . (int) $idSession . ", " . (int) $pageType . ", " .
($pageId !== null ? (int) $pageId : 'NULL') . ", " .
($controller ? "'" . pSQL(substr($controller, 0, 64)) . "'" : 'NULL') . ", NOW())";
($controller ? "'" . pSQL(substr($controller, 0, 64)) . "'" : 'NULL') . ", " .
($url ? "'" . pSQL(substr($url, 0, 500)) . "'" : 'NULL') . ", NOW())";
return \Db::getInstance()->execute($sql);
}
@@ -172,18 +201,51 @@ class PageViewsTable
}
/**
* Clean old page view records
* Clean old page view records (batched to avoid long locks)
*
* @param int $hours Hours to keep
* @return bool
* @return int Number of rows deleted
*/
public static function cleanOldPageViews($hours = 24)
public static function cleanOldPageViews($hours = 48)
{
$table = self::getTableName();
$deleted = 0;
return \Db::getInstance()->execute(
"DELETE FROM `{$table}`
WHERE date_add < DATE_SUB(NOW(), INTERVAL " . (int) $hours . " HOUR)"
);
do {
\Db::getInstance()->execute(
"DELETE FROM `{$table}`
WHERE date_add < DATE_SUB(NOW(), INTERVAL " . (int) $hours . " HOUR)
LIMIT 10000"
);
$affected = (int) \Db::getInstance()->Affected_Rows();
$deleted += $affected;
} while ($affected >= 10000);
return $deleted;
}
/**
* Clean orphaned page views (session no longer exists)
*
* @return int Number of rows deleted
*/
public static function cleanOrphanedPageViews()
{
$table = self::getTableName();
$sessTable = SessionTable::getTableName();
$deleted = 0;
do {
\Db::getInstance()->execute(
"DELETE pv FROM `{$table}` pv
LEFT JOIN `{$sessTable}` s ON pv.id_session = s.id_session
WHERE s.id_session IS NULL
LIMIT 10000"
);
$affected = (int) \Db::getInstance()->Affected_Rows();
$deleted += $affected;
} while ($affected >= 10000);
return $deleted;
}
}

33
src/SessionInstaller.php Normal file → Executable file
View File

@@ -59,7 +59,7 @@ class SessionInstaller
}
/**
* Clean up old data from all session tables
* Clean up old data from all session tables (simple mode)
*
* @param int $sessionHours Hours to keep sessions (default 24)
* @param int $pageViewsHours Hours to keep page views (default 24)
@@ -70,8 +70,35 @@ class SessionInstaller
{
$result = true;
$result = $result && SessionTable::cleanOldSessions($sessionHours);
$result = $result && PageViewsTable::cleanOldPageViews($pageViewsHours);
$result = $result && CartActionsTable::cleanOldCartActions($cartActionsHours);
PageViewsTable::cleanOldPageViews($pageViewsHours);
CartActionsTable::cleanOldCartActions($cartActionsHours);
return $result;
}
/**
* Smart cleanup: bot-aware session retention + orphan cleanup.
* Recommended for hourly cron execution.
*
* @param int $botSessionHours Hours to keep bot sessions (default 24)
* @param int $realSessionHours Hours to keep real sessions (default 72)
* @param int $pageViewsHours Hours to keep page views (default 48)
* @param int $cartActionsHours Hours to keep cart actions (default 168 / 7 days)
* @return array Cleanup statistics
*/
public static function cleanAllSmart($botSessionHours = 24, $realSessionHours = 72, $pageViewsHours = 48, $cartActionsHours = 168)
{
$stats = [];
// Clean sessions (bot-aware)
$stats['sessions'] = SessionTable::cleanOldSessionsSmart($botSessionHours, $realSessionHours);
// Clean orphaned page views (session no longer exists) + old page views
$stats['orphaned_page_views'] = PageViewsTable::cleanOrphanedPageViews();
$stats['old_page_views'] = PageViewsTable::cleanOldPageViews($pageViewsHours);
// Clean old cart actions
$stats['cart_actions'] = CartActionsTable::cleanOldCartActions($cartActionsHours);
return $stats;
}
}

162
src/SessionTable.php Normal file → Executable file
View File

@@ -29,16 +29,6 @@ if (!defined('_PS_VERSION_')) {
class SessionTable
{
/**
* List of modules that use the shared mpr_sessions table.
* Add your module name here when integrating.
*/
private const MODULES_USING_SESSION = [
'mprexpresscheckout',
'mprtotaldefender',
'mprtradeaccount',
];
/**
* Table name without prefix
*/
@@ -91,7 +81,7 @@ class SessionTable
`os` VARCHAR(32) DEFAULT NULL,
`is_bot` TINYINT(1) UNSIGNED NOT NULL DEFAULT 0,
-- Attribution tracking (mprexpresscheckout)
-- Attribution tracking (mprcheckoutrevolution)
`source_type` TINYINT(3) UNSIGNED NOT NULL DEFAULT 0,
`source_detail` VARCHAR(500) DEFAULT NULL,
`utm_source` VARCHAR(128) DEFAULT NULL,
@@ -104,7 +94,7 @@ class SessionTable
`msclkid` VARCHAR(255) DEFAULT NULL,
`ttclid` VARCHAR(255) DEFAULT NULL,
-- Landing page tracking (mprexpresscheckout)
-- Landing page tracking (mprcheckoutrevolution)
`landing_page_type` TINYINT(2) UNSIGNED DEFAULT NULL,
`landing_page_id` INT(10) UNSIGNED DEFAULT NULL,
`landing_url` VARCHAR(500) DEFAULT NULL,
@@ -148,29 +138,44 @@ class SessionTable
*/
public static function uninstall($currentModule)
{
// Check if any other module using sessions is still installed
foreach (self::MODULES_USING_SESSION as $moduleName) {
if ($moduleName === $currentModule) {
continue;
}
if (\Module::isInstalled($moduleName)) {
// Another module still needs the table
return true;
}
if (self::isAnotherModuleInstalled($currentModule)) {
return true;
}
// No other module needs it - drop the table
return \Db::getInstance()->execute("DROP TABLE IF EXISTS `" . self::getTableName() . "`");
}
/**
* Get list of modules that use the session table
* Discover which installed modules bundle the prestashop-session package.
* Uses runtime filesystem check instead of a hardcoded list.
*
* @return array
* @return array Module names
*/
public static function getModulesUsingSession()
{
return self::MODULES_USING_SESSION;
if (!defined('_PS_MODULE_DIR_') || !_PS_MODULE_DIR_) {
return [];
}
try {
$rows = \Db::getInstance()->executeS(
'SELECT name FROM ' . _DB_PREFIX_ . 'module'
);
} catch (\Exception $e) {
return [];
}
$modules = [];
if ($rows) {
foreach ($rows as $row) {
$path = _PS_MODULE_DIR_ . $row['name'] . '/vendor/myprestarocks/prestashop-session/';
if (is_dir($path)) {
$modules[] = $row['name'];
}
}
}
return $modules;
}
/**
@@ -181,11 +186,8 @@ class SessionTable
*/
public static function isAnotherModuleInstalled($excludeModule)
{
foreach (self::MODULES_USING_SESSION as $moduleName) {
if ($moduleName === $excludeModule) {
continue;
}
if (\Module::isInstalled($moduleName)) {
foreach (self::getModulesUsingSession() as $moduleName) {
if ($moduleName !== $excludeModule) {
return true;
}
}
@@ -237,6 +239,63 @@ class SessionTable
return $result ? (int) $result : null;
}
/**
* Get the most recent active session for a guest, regardless of hash.
* Enables cross-module session deduplication — if one module already
* created a session for this guest (possibly with different hash params),
* another module can find and reuse it instead of creating a duplicate.
*
* @param int $idGuest Guest ID
* @param int $timeoutMinutes Session timeout
* @return int|null Session ID or null
*/
public static function getActiveSessionByGuest($idGuest, $timeoutMinutes = 60)
{
if (!$idGuest) {
return null;
}
$table = self::getTableName();
$sql = "SELECT id_session FROM `{$table}`
WHERE id_guest = " . (int) $idGuest . "
AND date_last_activity > DATE_SUB(NOW(), INTERVAL " . (int) $timeoutMinutes . " MINUTE)
ORDER BY date_last_activity DESC
LIMIT 1";
$result = \Db::getInstance()->getValue($sql);
return $result ? (int) $result : null;
}
/**
* Get the most recent active session for an IP address.
* Fallback for when id_guest is not available (PS 9.x anonymous visitors).
*
* @param string $ipAddress
* @param int $timeoutMinutes Session timeout
* @return int|null Session ID or null
*/
public static function getActiveSessionByIP($ipAddress, $timeoutMinutes = 60)
{
if (!$ipAddress) {
return null;
}
$table = self::getTableName();
// Use executeS instead of getValue — PS Db caching can return stale results
// for queries containing NOW()/DATE_SUB()
$sql = "SELECT id_session FROM `{$table}`
WHERE ip_address = '" . pSQL($ipAddress) . "'
AND is_bot = 0
AND date_last_activity > DATE_SUB(NOW(), INTERVAL " . (int) $timeoutMinutes . " MINUTE)
ORDER BY date_last_activity DESC
LIMIT 1";
$rows = \Db::getInstance()->executeS($sql);
return !empty($rows[0]['id_session']) ? (int) $rows[0]['id_session'] : null;
}
/**
* Link guest sessions to customer (on login/registration)
*
@@ -258,7 +317,7 @@ class SessionTable
}
/**
* Clean old sessions
* Clean old sessions (simple, non-bot-aware)
*
* @param int $hours Hours to keep (default 24)
* @return bool
@@ -273,6 +332,47 @@ class SessionTable
);
}
/**
* Bot-aware session cleanup (batched to avoid long locks).
* Bot sessions accumulate fast and have no analytics value beyond short-term
* monitoring, so they're cleaned more aggressively than real sessions.
*
* @param int $botHours Hours to keep bot sessions (default 24)
* @param int $realHours Hours to keep real sessions (default 72)
* @return array ['deleted_bots' => int, 'deleted_real' => int]
*/
public static function cleanOldSessionsSmart($botHours = 24, $realHours = 72)
{
$table = self::getTableName();
$deletedBots = 0;
$deletedReal = 0;
// Phase 1: Clean bot sessions (aggressive)
do {
\Db::getInstance()->execute(
"DELETE FROM `{$table}`
WHERE is_bot = 1
AND date_last_activity < DATE_SUB(NOW(), INTERVAL " . (int) $botHours . " HOUR)
LIMIT 10000"
);
$affected = (int) \Db::getInstance()->Affected_Rows();
$deletedBots += $affected;
} while ($affected >= 10000);
// Phase 2: Clean old real sessions
do {
\Db::getInstance()->execute(
"DELETE FROM `{$table}`
WHERE date_last_activity < DATE_SUB(NOW(), INTERVAL " . (int) $realHours . " HOUR)
LIMIT 10000"
);
$affected = (int) \Db::getInstance()->Affected_Rows();
$deletedReal += $affected;
} while ($affected >= 10000);
return ['deleted_bots' => $deletedBots, 'deleted_real' => $deletedReal];
}
/**
* Get active session count
*

54
src/SessionTrait.php Normal file → Executable file
View File

@@ -68,12 +68,20 @@ trait SessionTrait
$userAgent = $_SERVER['HTTP_USER_AGENT'];
}
$userAgent = strtolower($userAgent);
$userAgentLower = strtolower($userAgent);
// Phase 1: Known bot UA patterns (fast string matching)
$botPatterns = [
// Search engine crawlers
'googlebot', 'bingbot', 'slurp', 'duckduckbot', 'baiduspider',
'yandexbot', 'sogou', 'exabot',
// Google rendering / Lighthouse / other crawlers
'chrome-lighthouse', 'lighthouse', 'pagespeed', 'google-inspectiontool',
'googleother',
// AI crawlers
'gptbot', 'claude-web', 'bytespider', 'ccbot', 'amazonbot',
'meta-externalagent', 'cohere-ai', 'google-extended',
'chatgpt-user', 'oai-searchbot', 'perplexitybot',
// Social media bots
'facebot', 'facebookexternalhit', 'twitterbot', 'linkedinbot',
'whatsapp', 'telegrambot', 'slackbot', 'discordbot', 'pinterestbot',
@@ -82,6 +90,8 @@ trait SessionTrait
'rogerbot', 'petalbot', 'screaming frog',
// Archive bots
'ia_archiver', 'archive.org_bot',
// Internal tools
'mprsitemapbuilder',
// Generic patterns
'crawler', 'spider', 'bot.htm', 'bot.php', 'crawl',
// HTTP clients
@@ -101,7 +111,31 @@ trait SessionTrait
];
foreach ($botPatterns as $pattern) {
if (strpos($userAgent, $pattern) !== false) {
if (strpos($userAgentLower, $pattern) !== false) {
return true;
}
}
// Phase 2: Heuristic checks on Chrome UA
if (preg_match('/Chrome\/(\d+)\.(\d+)\.(\d+)\.(\d+)/', $userAgent, $m)) {
$major = (int) $m[1];
$minor = (int) $m[2];
$build = (int) $m[3];
$patch = (int) $m[4];
// Chrome version higher than any released version = fake UA
// Current stable: ~136 as of early 2026; 150 gives headroom
if ($major > 150) {
return true;
}
// Chrome < 80 is 6+ years old — no real user runs this
if ($major < 80) {
return true;
}
// Bots often use X.0.0.0 pattern (real Chrome has non-zero build numbers)
if ($minor === 0 && $build === 0 && $patch === 0 && $major < 120) {
return true;
}
}
@@ -256,6 +290,20 @@ trait SessionTrait
return substr($_SERVER['HTTP_USER_AGENT'], 0, $maxLength);
}
/**
* Get current request URL (path + query string, no host)
*
* @param int $maxLength Maximum length to return
* @return string|null
*/
protected static function mprGetRequestUrl($maxLength = 500)
{
if (!isset($_SERVER['REQUEST_URI'])) {
return null;
}
return substr($_SERVER['REQUEST_URI'], 0, $maxLength);
}
/**
* Detect current page type from controller
*
@@ -287,7 +335,7 @@ trait SessionTrait
case 'order':
case 'orderopc':
case 'checkout':
case 'mprexpresscheckoutcheckout':
case 'mprcheckoutrevolutioncheckout':
return [self::$PAGE_TYPES['CHECKOUT'], null];
case 'orderconfirmation':