Cross posting to mobile-l. 

---------- Forwarded message ----------
From: Sean Pringle <springle@wikimedia.org>
Date: Wednesday, June 24, 2015
Subject: [Analytics] analtyics-store eventlogging UNION queries
To: "A mailing list for the Analytics Team at WMF and everybody who has an interest in Wikipedia and analytics." <analytics@lists.wikimedia.org>


Below are a couple examples of recent slow queries running on
analytics-store. They both use subqueries and UNION. Neither push the
WHERE conditions down to the subqueries, which means indexes are not
used. If you include WHERE clauses in both inner and outer queries you
will achieve index usage, especially for these sorts of eventlogging
queries on event_action or wiki.

MySQL / MariaDB query optimizer does do constant propagation, but
UNION tends to trip it up in all sorts of exciting ways. Be verbose,
abandon DRY principle, and make your life easier :-)

Current query:

SELECT Month.Date, COALESCE(Web.Web, 0) AS Web FROM (
    SELECT DATE_FORMAT( ADDDATE(CURDATE() - INTERVAL 30 - 1 DAY,
@num:=@num+1), '%Y-%m-%d' ) AS Date
        FROM seq_1_to_100, (SELECT @num:=-1) num LIMIT 30 ) AS Month LEFT JOIN (
        SELECT DATE(timestamp) AS Date,
        SUM(1) AS Web FROM (
            SELECT timestamp, wiki, event_username, event_action,
event_namespace, event_userEditCount FROM MobileWebEditing_5644223
            UNION SELECT timestamp, wiki, event_username,
event_action, event_namespace, event_userEditCount
                FROM MobileWebEditing_6077315
            UNION SELECT timestamp, wiki, event_username,
event_action, event_namespace, event_userEditCount
                from MobileWebEditing_6637866
            UNION SELECT timestamp, wiki, event_username,
event_action, event_namespace, event_userEditCount
                from MobileWebEditing_7675117
            UNION SELECT timestamp, wiki, event_username,
event_action, event_namespace, event_userEditCount
                from MobileWebEditing_8599025
        ) as MobileWebEditing
    WHERE event_namespace != 0 AND event_action = 'success' AND wiki
!= 'testwiki'
    GROUP BY Date
) AS Web ON Month.Date = Web.Date;

Version with WHERE clauses pushed down to subqueries:

SELECT Month.Date, COALESCE(Web.Web, 0) AS Web FROM (
    SELECT DATE_FORMAT( ADDDATE(CURDATE() - INTERVAL 30 - 1 DAY,
@num:=@num+1), '%Y-%m-%d' ) AS Date
        FROM seq_1_to_100, (SELECT @num:=-1) num LIMIT 30 ) AS Month LEFT JOIN (
        SELECT DATE(timestamp) AS Date,
        SUM(1) AS Web FROM (
            SELECT timestamp, wiki, event_username, event_action,
event_namespace, event_userEditCount FROM MobileWebEditing_5644223
            UNION SELECT timestamp, wiki, event_username,
event_action, event_namespace, event_userEditCount
                FROM MobileWebEditing_6077315 WHERE event_namespace !=
0 AND event_action = 'success' AND wiki != 'testwiki'
            UNION SELECT timestamp, wiki, event_username,
event_action, event_namespace, event_userEditCount
                from MobileWebEditing_6637866 WHERE event_namespace !=
0 AND event_action = 'success' AND wiki != 'testwiki'
            UNION SELECT timestamp, wiki, event_username,
event_action, event_namespace, event_userEditCount
                from MobileWebEditing_7675117 WHERE event_namespace !=
0 AND event_action = 'success' AND wiki != 'testwiki'
            UNION SELECT timestamp, wiki, event_username,
event_action, event_namespace, event_userEditCount
                from MobileWebEditing_8599025 WHERE event_namespace !=
0 AND event_action = 'success' AND wiki != 'testwiki'
        ) as MobileWebEditing
    WHERE event_namespace != 0 AND event_action = 'success' AND wiki
!= 'testwiki'
    GROUP BY Date
) AS Web ON Month.Date = Web.Date;

Current query (hours):

SELECT day,
`Attempted saves` / `Loads` AS "All wikis",
`Attempted saves - dewiki` / `Loads - dewiki` AS "dewiki",
`Attempted saves - enwiki` / `Loads - enwiki` AS "enwiki",
`Attempted saves - eswiki` / `Loads - eswiki` AS "eswiki",
`Attempted saves - frwiki` / `Loads - frwiki` AS "frwiki",
`Attempted saves - ptwiki` / `Loads - plwiki` AS "plwiki",
`Attempted saves - plwiki` / `Loads - ptwiki` AS "ptwiki" FROM (
    SELECT DATE( timestamp ) AS day,
    SUM( IF( event_action = 'ready', 1, 0 ) ) AS "Loads",
    SUM( IF( event_action = 'ready' AND wiki = 'dewiki', 1, 0 ) ) AS
"Loads - dewiki",
    SUM( IF( event_action = 'ready' AND wiki = 'enwiki', 1, 0 ) ) AS
"Loads - enwiki",
    SUM( IF( event_action = 'ready' AND wiki = 'eswiki', 1, 0 ) ) AS
"Loads - eswiki",
    SUM( IF( event_action = 'ready' AND wiki = 'frwiki', 1, 0 ) ) AS
"Loads - frwiki",
    SUM( IF( event_action = 'ready' AND wiki = 'plwiki', 1, 0 ) ) AS
"Loads - plwiki",
    SUM( IF( event_action = 'ready' AND wiki = 'ptwiki', 1, 0 ) ) AS
"Loads - ptwiki",
    SUM( IF( event_action = 'saveAttempt', 1, 0 ) ) AS "Attempted saves",
    SUM( IF( event_action = 'saveAttempt' AND wiki = 'dewiki', 1, 0 )
) AS "Attempted saves - dewiki",
    SUM( IF( event_action = 'saveAttempt' AND wiki = 'enwiki', 1, 0 )
) AS "Attempted saves - enwiki",
    SUM( IF( event_action = 'saveAttempt' AND wiki = 'eswiki', 1, 0 )
) AS "Attempted saves - eswiki",
    SUM( IF( event_action = 'saveAttempt' AND wiki = 'frwiki', 1, 0 )
) AS "Attempted saves - frwiki",
    SUM( IF( event_action = 'saveAttempt' AND wiki = 'plwiki', 1, 0 )
) AS "Attempted saves - plwiki",
    SUM( IF( event_action = 'saveAttempt' AND wiki = 'ptwiki', 1, 0 )
) AS "Attempted saves - ptwiki" FROM (
        SELECT event_action, wiki, timestamp
        FROM Edit_10604157
        UNION ALL
        SELECT event_action, wiki, timestamp
        FROM Edit_10676603
    ) AS valid_edit_schema_revisions
    GROUP BY day
    ORDER BY day
) as internalQuery;

Version with WHERE clauses pushed down to subqueries:

SELECT day,
`Attempted saves` / `Loads` AS "All wikis",
`Attempted saves - dewiki` / `Loads - dewiki` AS "dewiki",
`Attempted saves - enwiki` / `Loads - enwiki` AS "enwiki",
`Attempted saves - eswiki` / `Loads - eswiki` AS "eswiki",
`Attempted saves - frwiki` / `Loads - frwiki` AS "frwiki",
`Attempted saves - ptwiki` / `Loads - plwiki` AS "plwiki",
`Attempted saves - plwiki` / `Loads - ptwiki` AS "ptwiki" FROM (
    SELECT DATE( timestamp ) AS day,
    SUM( IF( event_action = 'ready', 1, 0 ) ) AS "Loads",
    SUM( IF( event_action = 'ready' AND wiki = 'dewiki', 1, 0 ) ) AS
"Loads - dewiki",
    SUM( IF( event_action = 'ready' AND wiki = 'enwiki', 1, 0 ) ) AS
"Loads - enwiki",
    SUM( IF( event_action = 'ready' AND wiki = 'eswiki', 1, 0 ) ) AS
"Loads - eswiki",
    SUM( IF( event_action = 'ready' AND wiki = 'frwiki', 1, 0 ) ) AS
"Loads - frwiki",
    SUM( IF( event_action = 'ready' AND wiki = 'plwiki', 1, 0 ) ) AS
"Loads - plwiki",
    SUM( IF( event_action = 'ready' AND wiki = 'ptwiki', 1, 0 ) ) AS
"Loads - ptwiki",
    SUM( IF( event_action = 'saveAttempt', 1, 0 ) ) AS "Attempted saves",
    SUM( IF( event_action = 'saveAttempt' AND wiki = 'dewiki', 1, 0 )
) AS "Attempted saves - dewiki",
    SUM( IF( event_action = 'saveAttempt' AND wiki = 'enwiki', 1, 0 )
) AS "Attempted saves - enwiki",
    SUM( IF( event_action = 'saveAttempt' AND wiki = 'eswiki', 1, 0 )
) AS "Attempted saves - eswiki",
    SUM( IF( event_action = 'saveAttempt' AND wiki = 'frwiki', 1, 0 )
) AS "Attempted saves - frwiki",
    SUM( IF( event_action = 'saveAttempt' AND wiki = 'plwiki', 1, 0 )
) AS "Attempted saves - plwiki",
    SUM( IF( event_action = 'saveAttempt' AND wiki = 'ptwiki', 1, 0 )
) AS "Attempted saves - ptwiki" FROM (
        SELECT event_action, wiki, timestamp
        FROM Edit_10604157
        WHERE event_action in ('saveAttempt', 'ready')
        UNION ALL
        SELECT event_action, wiki, timestamp
        FROM Edit_10676603
        WHERE event_action in ('saveAttempt', 'ready')
    ) AS valid_edit_schema_revisions
    GROUP BY day
    ORDER BY day
) as internalQuery;

--
DBA @ WMF

_______________________________________________
Analytics mailing list
Analytics@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/analytics